码迷,mamicode.com
首页 > Web开发 > 详细

jsoup-处理html中的script数据

时间:2016-06-25 14:53:11      阅读:852      评论:0      收藏:0      [点我收藏+]

标签:

 

 

 

/**
 * 价值在线数据-左边分类抓取
 * http://www.valueonline.cn/laws/laws?typeid=96219074211635284
 * @author hwaggLee
 */
public class UtilsHtmValueonLineType {
    
    public static void main(String[] args) {
        String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284";
        readHtml(url);
    }
    
    public static List<Object> readHtml(String url){
        List<Object> list  = new ArrayList<Object>();
        //
        Document doc = null;
        try {
            doc = Jsoup.connect(url).get();
        } catch (Exception e) {
            ///e.printStackTrace();
            System.out.println(e.getMessage()+":--------------->"+url);
        }
        if( doc == null )return list;
        Elements elScripts = doc.getElementsByTag("script");  
        String[] elScriptList = elScripts.get(0).data().toString().split("var");
        String strTypeList = elScriptList[2];
        if( StringUtils.isNotBlank(strTypeList)){
            /*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
            JSONArray array = JSONArray.fromObject(strTypeList);
            JSONArray arrayList = JSONArray.fromObject(array.get(0));
            for (Object o : arrayList) {
                JSONObject object = JSONObject.fromObject(o);
                StringBuilder sb = new StringBuilder();
                sb.append("insert into n3b_vl_plate_type values ");
                sb.append(" ( ");
                sb.append("‘"+object.get("id")+"‘");
                sb.append(",‘"+object.get("parentId")+"‘");
                sb.append(","+object.get("level")+"");
                sb.append(",‘"+object.get("declareTypeName")+"‘");
                sb.append(",‘"+object.get("declareTypeNo")+"‘");
                sb.append(",‘"+object.get("validFlag")+"‘");
                sb.append(","+object.get("oftenFlag")+"");
                sb.append(",‘"+object.get("showTypeName")+"‘");
                sb.append(" ); ");
                System.out.println(sb.toString());
            }*/
        }
        
        strTypeList = elScriptList[3];
        System.out.println(strTypeList);
        if( StringUtils.isNotBlank(strTypeList) ){
            strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
            JSONArray arrayList = JSONArray.fromObject(strTypeList);
            for (Object o : arrayList) {
                JSONObject object = JSONObject.fromObject(o);
                StringBuilder sb = new StringBuilder();
                sb.append("insert into n3b_vl_market_type values ");
                sb.append(" ( ");
                sb.append("‘0"+object.get("code_value")+"‘");
                sb.append(",‘"+object.get("code_name")+"‘");
                sb.append(",‘"+object.get("code_no")+"‘");
                sb.append(",‘"+object.get("code_value")+"‘");
                sb.append(",‘"+object.get("valid_flag")+"‘");
                sb.append(",‘"+object.get("version")+"‘");
                sb.append(",‘"+object.get("code_type")+"‘");
                sb.append(" ); ");
                System.out.println(sb.toString());
            }
        }
        return list;
    }
    
}

 

jsoup-处理html中的script数据

标签:

原文地址:http://www.cnblogs.com/hwaggLee/p/5616229.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!