码迷,mamicode.com
首页 > Web开发 > 详细

jsoup html采集器

时间:2016-01-22 10:35:26      阅读:182      评论:0      收藏:0      [点我收藏+]

标签:

package com.forex.collect;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;

import javax.mail.MessagingException;

import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HexunCollect {
private static final int timeout = 5 * 1000;

public static void main(String... args) throws IOException, MessagingException, InterruptedException {
HexunCollect hc = new HexunCollect();

while(true){
hc.excute();
Thread.sleep(Long.valueOf(hc.getRandom()) * 1000 * 60);
}
}


public int getRandom() {
int max = 15;
int min = 3;
Random random = new Random();
int s = random.nextInt(max) % (max - min + 1) + min;
System.out.println(s);
return s;
}

public void excute() throws IOException, MessagingException {
String url = "http://quote.hexun.com/forex/forex.aspx";

Map<String, String> cookies = new HashMap<String, String>();
cookies.put("__jsluid", "5942ae5f94a3c06666e2a4dfa745c94e");
cookies.put("__utma", "194262068.1565981099.1453342082.1453342082.1453342082.1");
cookies.put("__utmc", "194262068");
cookies.put("__utmz", "194262068.1453342082.1.1.utmcsr=hexun.com|utmccn=(referral)|utmcmd=referral|utmcct=/ ");
cookies.put("hxck_sq_common", "LoginStateCookie");

// fetch the specified URL and parse to a HTML DOM
Response res = Jsoup
.connect(url)
.cookies(cookies)
.header("User-Agent", "Mozilla/5.0 (Windows NT 5.1; zh-CN) AppleWebKit/535.12 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/535.12")
.timeout(timeout)
.data("type", "3")
.method(Method.POST)
.execute();

Document doc = res.parse();

Elements links = doc.select("tr");
Iterator<Element> iterator = links.iterator();
while (iterator.hasNext()) {
Element next = iterator.next();
String text = next.text();
String[] split = text.split(" ");
String enCode = split[0];
String zhCode = split[1];
String point = split[2];
String zhangf = split[3];
String datee = split[split.length - 1];

if ("NZDCAD".equalsIgnoreCase(enCode)) {
if(0.9200<=Double.valueOf(point) && Double.valueOf(point) <=0.92500){
SendMail.sendMessage(datee, enCode+"|"+point+"|"+zhangf+"|"+datee);
}

System.out.println(enCode);
System.out.println(zhCode);
System.out.println(point);
System.out.println(zhangf);
}
System.out.println(datee);

}
}



}

jsoup html采集器

标签:

原文地址:http://www.cnblogs.com/adolfmc/p/5150300.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!