码迷,mamicode.com
首页 > Web开发 > 详细

Jsoup解析网站输出gson数据

时间:2014-11-01 14:56:58      阅读:255      评论:0      收藏:0      [点我收藏+]

标签:des   style   blog   http   io   os   ar   java   for   

首先要导入jsoup跟gson的jar包。

<span style="font-size:18px;">package com.bjsxt.test;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.jsoup.parser.XmlTreeBuilder;
import org.jsoup.select.Elements;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;


public class Gson1 {
	public static void main(String[] args) {
		String url="http://www.alibaba.com/product-detail/OEM-Cheapest-Plain-High-Quality-Hoodie_60035404068.html";
		Element doc=null;
		while(true){
			try{
			   /**
			    * 如果禁止jsoup解析url路径时,
			    * doc=new Gson1().strToDoc(url);
			    */
			      doc = Jsoup.connect(url).get();
			}catch(Exception e){
				continue;
			}
			if(doc!=null){
				break;
			}
		}
		Elements valEl = doc.select("#J-quick-detail tbody tr td div");
		Elements keyEl = doc.select("#J-quick-detail tbody tr td span");
		Map<String, String> map = new HashMap<String, String>();
			for (int i = 0; i < keyEl.size(); i++) {
				String key = keyEl.get(i).text();
				String val = valEl.get(i).text();
				map.put(key, val);
				
			}
			String others = Gson1.write2response(map);	
			System.out.println("======="+others+"=====");
	}
	public  static  String write2response(Object obj){
		Gson  gson  = null;
		GsonBuilder  builder = new GsonBuilder();
		builder.setPrettyPrinting();
		gson = builder.create();
		String json = gson.toJson(obj);
		return json;	
	}
	public Document strToDoc(String url) throws Exception{
		URL url1 = null;
		// 伪装 一
		URLConnection uc = null;
		InputStream in = null;
		BufferedReader br = null;
		
		url1 = new URL(url);
		uc = url1.openConnection();
		uc.setRequestProperty("User-Agent",
				"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0");
		// 玫
//		 uc.setRequestProperty("User-Agent",
//		          "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)");
		in = uc.getInputStream();
		br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
		String temp = "";
		StringBuilder sb = new StringBuilder();
		while ((temp = br.readLine()) != null) {
			sb.append(temp + "\n");
		}

		String s = sb.toString();
		Document doc = Jsoup.parse(s, "", new Parser(new XmlTreeBuilder()));
		System.out.println(s);
		return doc;
		
	}
}</span>

运行结果:

bubuko.com,布布扣

Jsoup解析网站输出gson数据

标签:des   style   blog   http   io   os   ar   java   for   

原文地址:http://blog.csdn.net/yantingmei/article/details/40679995

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!