码迷,mamicode.com
首页 > 编程语言 > 详细

java爬虫。登录央行征信网站

时间:2017-09-26 01:08:18      阅读:533      评论:0      收藏:0      [点我收藏+]

标签:java爬虫   run   struts   描述   org   int   comm   common   bug   

  1 package com.entrym.crawler.test;
  2 
  3 import java.util.HashMap;
  4 import java.util.Map;
  5 
  6 import org.apache.commons.lang.StringUtils;
  7 import org.apache.http.client.methods.HttpGet;
  8 import org.apache.http.client.methods.HttpPost;
  9 import org.jsoup.Jsoup;
 10 import org.jsoup.nodes.Document;
 11 import org.slf4j.Logger;
 12 import org.slf4j.LoggerFactory;
 13 
 14 import com.entrym.crawler.constans.CommonConstants;
 15 import com.entrym.crawler.util.CommonUtil;
 16 import com.entrym.crawler.util.PowerHttpClient;
 17 import com.entrym.crawler.util.verifyCode.Captcha;
 18 
 19 /**
 20  * 央行征信报告获取爬虫
 21  * @ClassName: PbccrcCrawler
 22  * @Description: TODO(这里用一句话描述这个类的作用)
 23  *
 24  */
 25 public class PbccrcCrawler
 26 {
 27     
 28     private static Logger logger = LoggerFactory.getLogger(PbccrcCrawler.class);
 29     private static String loginUrl = "https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp";
 30     private static String loginPostUrl = "https://ipcrs.pbccrc.org.cn/login.do";
 31     private static String welcomeUrl = "https://ipcrs.pbccrc.org.cn/welcome.do";
 32     private static String reportUrl = "https://ipcrs.pbccrc.org.cn/reportAction.do";
 33     private static String summaryReportUrl = "https://ipcrs.pbccrc.org.cn/summaryReport.do";
 34     private static String simpleReportUrl = "https://ipcrs.pbccrc.org.cn/simpleReport.do";
 35    
 36     private static String host = "ipcrs.pbccrc.org.cn";
 37     private static String userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko";
 38     
 39     private PowerHttpClient powerHttpClient;
 40     private boolean isLogin=false;//登录状态
 41     private boolean report0=false;//个人信用信息提示
 42     private boolean report1=false;//个人信用信息概要
 43     private boolean report2=false;//个人信用报告
 44     
 45     private String task_id=null;
 46     
 47     /**
 48      * 登录
 49      * @Title: login
 50      * @Description: TODO(这里用一句话描述这个方法的作用)
 51      * @param @param loginname
 52      * @param @param password    参数
 53      * @return void    返回类型
 54      * @throws
 55      */
 56     public  String login(String loginname ,String password)
 57     {
 58         try 
 59         {
 60             powerHttpClient = new PowerHttpClient();
 61             //打开首页
 62             String html = powerHttpClient.getToString(loginUrl);
 63             logger.debug("----首页---",html);
 64             
 65             Document doc = Jsoup.parse(html);
 66             String token = doc.select("input[name=org.apache.struts.taglib.html.TOKEN]").val(); 
 67             String method = doc.select("input[name=method]").val(); 
 68             String date = doc.select("input[name=date]").val(); 
 69             String imgUrl = doc.select("img[id=imgrc]").attr("src");
 70             String imgData = "";
 71             if (StringUtils.isNotBlank(imgUrl)) 
 72             {
 73                 //处理验证码
 74                 imgUrl = "https://ipcrs.pbccrc.org.cn"+imgUrl;
 75                 logger.info(task_id+"Captcha imgUrl = {}",imgUrl);
 76                 HttpGet httpGet = powerHttpClient.getHttpGet(imgUrl);
 77                 httpGet.addHeader("Host", host);
 78                 httpGet.addHeader("User-Agent", userAgent);
 79                 httpGet.addHeader("Referer", loginUrl);
 80 
 81                 byte[] imgByte = powerHttpClient.getToByteArray(httpGet);
 82                 Captcha captcha = new Captcha("img");
 83                 CommonUtil.writeByteArrayToFile(imgByte, CommonConstants.RUNTIME_TEMPIMG_FOLDER+captcha.getFilePath());
 84                 System.out.println(CommonConstants.RUNTIME_TEMPIMG_FOLDER+captcha.getFilePath());
 85                 imgData = CommonUtil.consoleScanner();
 86             }
 87 
 88             //提交登录
 89             Map<String, String> map = new HashMap<>();
 90             map.put("org.apache.struts.taglib.html.TOKEN", token);
 91             map.put("method", method);
 92             map.put("date", date);
 93             map.put("_@IMGRC@_", imgData);
 94             map.put("loginname", loginname);
 95             map.put("password", password);
 96             
 97             HttpPost httpPost = powerHttpClient.getHttpPost(loginPostUrl);
 98             httpPost.addHeader("Host", host);
 99             httpPost.addHeader("User-Agent", userAgent);
100             httpPost.addHeader("Referer", loginUrl);
101             String html1 = powerHttpClient.postWithMap(map,httpPost);
102             logger.debug("----提交登录结果---",html1);
103             
104             HttpGet httpGet1 = powerHttpClient.getHttpGet(welcomeUrl);
105             httpGet1.addHeader("Host", host);
106             httpGet1.addHeader("User-Agent", userAgent);
107             httpGet1.addHeader("Referer", loginPostUrl);
108             String html2 = powerHttpClient.getToString(httpGet1, "");
109             logger.info("----欢迎页面---{}",html2);
110             parseLogin(html2);
111             return html2;
112         } catch (Exception e) {
113             logger.error(task_id+"---登录出现异常:{}",CommonUtil.getExceptionTrace(e));
114         }
115         return null;
116     }
117     
118     
119     /**
120      * 解析登录和报告状态
121      * @Title: parseLogin
122      * @Description: TODO(这里用一句话描述这个方法的作用)
123      * @param @param html    参数
124      * @return void    返回类型
125      * @throws
126      */
127     private void parseLogin(String html)
128     {
129         if (StringUtils.isBlank(html)) {
130             return;
131         }
132         if (html.contains("欢迎登录个人信用信息服务平台")) {
133             isLogin = true;
134             logger.info(task_id+"----恭喜,登录成功---");
135         }
136         
137     }
138     
139  
140     
141     public static void main(String[] args) {
142         PbccrcCrawler pbccrcCrawler = new PbccrcCrawler();
143         pbccrcCrawler.login("88888", "9999999");
144     }
145       
146 
147 }

 

java爬虫。登录央行征信网站

标签:java爬虫   run   struts   描述   org   int   comm   common   bug   

原文地址:http://www.cnblogs.com/ydf0509/p/7594637.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!