码迷,mamicode.com
首页 > 编程语言 > 详细

Java解析采集模块

时间:2016-06-30 23:18:09      阅读:1199      评论:0      收藏:0      [点我收藏+]

标签:

技术分享
  1 package step3;
  2 
  3 import java.io.BufferedReader;
  4 import java.io.BufferedWriter;
  5 import java.io.File;
  6 import java.io.FileReader;
  7 import java.io.FileWriter;
  8 import java.io.IOException;
  9 import java.io.InputStream;
 10 import java.io.InputStreamReader;
 11 import java.io.PrintWriter;
 12 import java.sql.ResultSet;
 13 import java.sql.SQLException;
 14 import java.sql.Statement;
 15 import java.util.ArrayList;
 16 import java.util.Calendar;
 17 import java.util.List;
 18 
 19 import org.apache.commons.httpclient.HttpClient;
 20 import org.apache.commons.httpclient.methods.GetMethod;
 21 import org.apache.commons.httpclient.methods.PostMethod;
 22 import org.json.JSONObject;
 23 import org.jsoup.Jsoup;
 24 import org.jsoup.nodes.Document;
 25 import org.jsoup.select.Elements;
 26 
 27 import bean.Porn;
 28 import util.DBConnection;
 29 
 30 /**
 31  * 
 32  * @ClassName: quhao
 33  * @Description: 91porn地址解析
 34  * @author zeze
 35  * @date 2016年06月30日 下午7:55:31
 36  *
 37  */
 38 public class porn91 {
 39 
 40     private static String cookie = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMQ.INJmWYMZ8T220CgsSTcfpHhTxXI";
 41     private static String cookie2 = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMw.6MGC1jX7mgjsChpGFBd-xHTv9ZU";
 42 
 43     private static String Token = "1467296187##60ecf40d9328862cc6cd6a478adfc72ee0554050";
 44 
 45     private static String Url = "http://freeget.co/video/extraction";
 46     private static String url001 = null;
 47     private static String dirfile = "F:/91porn/91url.csv";
 48     private static String destfile = "F:/91porn/data.txt";
 49 
 50     private static int cnt0 = 0;
 51 
 52     private static String num = null;
 53     private static String title = null;
 54     private static String time = null;
 55     private static String longtime = null;
 56     private static String viewnum = null;
 57     private static String Parurl = null;// "http://www.91porn.com/view_video.php?viewkey=c5ec60d0da8c8fbdb180&page=4&viewtype=basic&category=mr";
 58 
 59     public static void main(String[] args) throws InterruptedException {
 60 
 61         File file = new File(dirfile);
 62         FileReader reader = null;
 63         BufferedReader br = null;
 64         try {
 65             reader = new FileReader(file);
 66             br = new BufferedReader(reader);
 67             String str = null;
 68             String[] strArr = null;
 69             int cnt = 0;
 70             while ((str = br.readLine()) != null) {
 71                 // System.out.println(str);
 72                 strArr = str.split(",");
 73                 if (strArr.length != 7)
 74                     continue;
 75                 num = strArr[0];
 76                 title = strArr[1];
 77                 time = strArr[2];
 78                 longtime = strArr[4];
 79                 viewnum = strArr[5];
 80                 Parurl = strArr[6];
 81                 cnt++;
 82                 System.out.println(num + "," + title + "," + time);
 83                 func_step1();
 84             }
 85             System.out.println("采集结束,总共:" + cnt + "条,成功写入" + cnt0 + "条");
 86 
 87         } catch (Exception e) {
 88             // TODO: handle exception
 89             e.printStackTrace();
 90         } finally {
 91             if (br != null) {
 92                 try {
 93                     br.close();
 94                 } catch (Exception e2) {
 95                     // TODO: handle exception
 96                     e2.printStackTrace();
 97                 }
 98             }
 99             if (reader != null) {
100                 try {
101                     reader.close();
102                 } catch (Exception e2) {
103                     // TODO: handle exception
104                     e2.printStackTrace();
105                 }
106             }
107         }
108 
109     }
110 
111     private static void func_step1() {
112         HttpClient httpClient = new HttpClient();
113         try {
114             PostMethod postMethod = new PostMethod(Url);
115             postMethod.getParams().setContentCharset("utf-8");
116             // 每次访问需授权的网址时需 cookie 作为通行证
117             postMethod.setRequestHeader("cookie", cookie);
118             postMethod.setRequestHeader("X-CSRFToken", Token);
119             postMethod.setRequestHeader("Accept-Language", "zh-CN,zh;q=0.8");
120             postMethod.setRequestHeader("Host", "freeget.co");
121             postMethod.setRequestHeader("Referer", "http://freeget.co/");
122             postMethod.setRequestHeader("User-Agent",
123                     "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
124             postMethod.setParameter("url", Parurl);
125             int statusCode = httpClient.executeMethod(postMethod);// 返回状态码200为成功,500为服务器端发生运行错误
126             System.out.println("返回状态码:" + statusCode);
127             // 打印出返回数据,检验一下是否成功
128             String result = postMethod.getResponseBodyAsString();
129             if (statusCode == 200) {
130                 // 解析成功,取得token和view_key
131                 JSONObject a = new JSONObject(result);
132                 url001 = "http://freeget.co/video/" + a.get("view_key") + "/" + a.get("token");
133                 System.out.println("视频解析地址:" + url001);
134                 func_step2(url001);
135             }
136         } catch (Exception e) {
137             e.printStackTrace();
138         }
139     }
140 
141     private static void func_step2(String url) {
142         HttpClient httpClient = new HttpClient();
143         try {
144             GetMethod getMethod = new GetMethod(url);
145             getMethod.getParams().setContentCharset("utf-8");
146             getMethod.setRequestHeader("cookie", cookie2);
147             getMethod.setRequestHeader("Accept-Language", "zh-cn");
148             getMethod.setRequestHeader("User-Agent",
149                     "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
150             int statusCode = httpClient.executeMethod(getMethod);// 返回状态码200为成功,500为服务器端发生运行错误
151             // System.out.println("返回状态码:" + statusCode);
152             // 打印出返回数据,检验一下是否成功
153             InputStream inputStream = getMethod.getResponseBodyAsStream();
154             BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
155             StringBuffer stringBuffer = new StringBuffer();
156             String str = "";
157             while ((str = br.readLine()) != null) {
158                 stringBuffer.append(str);
159             }
160             if (statusCode == 200) {
161                 Document doc = Jsoup.parse(stringBuffer.toString());
162                 Elements name = doc.select("a");
163                 String playurl = name.get(4).text();
164                 System.out.println("在线播放地址:" + playurl);
165                 writefile(playurl);
166                 cnt0++;
167             }
168         } catch (Exception e) {
169             e.printStackTrace();
170         }
171     }
172 
173     private static void writefile(String url) {
174         FileWriter fw = null;
175         BufferedWriter bw = null;
176         PrintWriter pw = null;
177         try {
178             fw = new FileWriter(new File(destfile), true);
179             bw = new BufferedWriter(fw);
180             pw = new PrintWriter(bw);
181             pw.write(num + ‘,‘ + title + ‘,‘ + time + ‘,‘ + longtime + ‘,‘ + viewnum + ‘,‘ + url + "\r\n");
182         } catch (IOException e) {
183             // TODO Auto-generated catch block
184             e.printStackTrace();
185         } finally {
186             if (pw != null) {
187                 pw.close();
188             }
189             if (bw != null) {
190                 try {
191                     bw.close();
192                 } catch (IOException e) {
193                     // TODO Auto-generated catch block
194                     e.printStackTrace();
195                 }
196             }
197             if (fw != null) {
198                 try {
199                     fw.close();
200                 } catch (IOException e) {
201                     // TODO Auto-generated catch block
202                     e.printStackTrace();
203                 }
204             }
205         }
206     }
207 
208     public List<Porn> QueryAllBook() {
209         java.sql.Connection connection = DBConnection.getConnection();
210         String sql = "select * from porn where status=0";
211         java.sql.PreparedStatement pstmt = DBConnection.getPreparedStatement(connection, sql);
212         List<Porn> pornlist = new ArrayList<Porn>();
213         System.out.println(sql);
214         try {
215             Statement stmt = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY);
216             java.sql.ResultSet rs = stmt.executeQuery(sql);
217             while (rs.next()) {
218                 Porn porn = new Porn();
219                 porn.setNum(rs.getString(1));
220                 porn.setTitle(rs.getString(2));
221                 porn.setTime(rs.getString(3));
222                 porn.setViewkey(rs.getString(4));
223                 porn.setLongtime(rs.getString(5));
224                 porn.setViewnum(rs.getString(6));
225                 porn.setParurl(rs.getString(7));
226                 pornlist.add(porn);
227             }
228             rs.last();
229         } catch (SQLException e) {
230             e.printStackTrace();
231         } finally {
232             DBConnection.close(connection, pstmt, null);
233         }
234         return pornlist;
235     }
236 }
View Code

 

Java解析采集模块

标签:

原文地址:http://www.cnblogs.com/zeze/p/5631448.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!