标签:
概要:
1.因为公司需要,就花了一点时间写了一下三大运营商通信数据的抓取,涉及到Web上你所看得到的一切数据。
代码没啥技术含量,重点在于抓包分析过程。期间遇到了很多未知的困难,都一一克服了。
2.由于抓取数据的隐私性,我们的抓包是假设在用户已知自己数据被抓取,并且同意告知短信验证码的情况下进行的,
不属于黑客范畴!
3.整个过程,包括重建数据库表结构,解析json等如同逆向运营商的数据库一般。总体来说,三大运营商更新频率不算频繁,还算较稳定,数据结构,网页结构等都不会做很大的变动。
整体效果如下:
所有运营商抓取到的数据都放到了一个库的三个表里面,后期做数据分析用。
下面分享几个核心的源代码给 正在研究这个的朋友们。
简单架构:
WebApi控制器代码:
1 using Crawler; 2 using Crawler.Common; 3 using Crawler.Interface; 4 using System; 5 using System.Collections.Generic; 6 using System.Configuration; 7 using System.Linq; 8 using System.Net; 9 using System.Net.Http; 10 using System.Text.RegularExpressions; 11 using System.Web.Http; 12 using System.Web.Providers.Entities; 13 using YXRepository.BLL; 14 using YXRepository.Log; 15 using YXRepository.Model; 16 17 namespace MvcWebApi.Controllers 18 { 19 public class TongXinController : ApiController 20 { 21 static string currentPhoneNumber; 22 static string currentPhonePwd; 23 string StoreConnection = ConfigurationSettings.AppSettings["StoreConnection"]; 24 string CreditConnection = ConfigurationSettings.AppSettings["CreditConnection"]; 25 static ICrawler crawler; 26 static string srcid; 27 //static Logger log; 28 private static userbll ubll; 29 30 [CrossSite] 31 [HttpGet] 32 //请求登录验证码 33 public string RequestLoginImg(string phoneNumber, string servicepwd) { 34 string retS = string.Empty; 35 srcid = System.Guid.NewGuid().ToString(); 36 Log.srcid = srcid; 37 ubll = new userbll(StoreConnection, CreditConnection,srcid); 38 39 if (!string.IsNullOrEmpty(phoneNumber) && !string.IsNullOrEmpty(servicepwd)) { 40 41 try { 42 Regex belongTocmcc = new Regex(Consts.RegExpcmcc); 43 Regex belongTounc = new Regex(Consts.RegExpunc); 44 Regex belongToctc = new Regex(Consts.RegExpctc); 45 if (belongTocmcc.IsMatch(phoneNumber.Trim())) 46 crawler = new CrawlerCMCC(phoneNumber.Trim(), servicepwd.Trim()); 47 else if (belongTounc.IsMatch(phoneNumber.Trim())) 48 crawler = new CrawlerUNC(phoneNumber.Trim(), servicepwd.Trim()); 49 else if (belongToctc.IsMatch(phoneNumber.Trim())) { 50 //电信存记录 51 if(ubll.StoreCTCNumber(phoneNumber)) 52 return ReturnErrorStatusCode.当前手机号不在征信范围; 53 else 54 return ReturnErrorStatusCode.系统发生异常; 55 } 56 else 57 return ReturnErrorStatusCode.当前手机号不在征信范围; 58 currentPhoneNumber = phoneNumber; 59 currentPhonePwd = servicepwd; 60 //if (crawler.IsLoginNeedVerify()) 61 //{ 62 retS = crawler.GetLoginImg(); 63 if(string.IsNullOrEmpty(retS)) 64 retS = ReturnErrorStatusCode.获取登录图片验证码失败; 65 //} 66 } catch (Exception ex) { 67 Log.Configure(); 68 Log.AddTXLog(currentPhoneNumber, currentPhonePwd, "RequestLoginImg", ex); 69 IList<string> httploglist = crawler.GetAllJsonLog();//记录所有http日志 70 71 72 foreach (var httplog in httploglist) { 73 Log.I(string.Format(" 【SrcId:{0}】{1}", srcid, httplog));//成功的话不记录http历史记录 74 } 75 } 76 } 77 return retS; 78 } 79 80 [CrossSite] 81 [HttpGet] 82 //请求登录短信 83 public string RequestQuerySms(string phoneS, string loginimg,bool isUNC) { 84 string retS = string.Empty; 85 86 try { 87 if (ubll.isExistRecord(phoneS)) 88 return ReturnErrorStatusCode.查询失败_已采集过数据;//本库验证 89 90 91 if (!string.IsNullOrEmpty(loginimg)) { 92 93 if (crawler.IsLoginImgVerifyOk(loginimg.Trim())) { 94 if (crawler.Login(out retS)) { 95 if (!isUNC) { 96 if (crawler.SendQuerySms()) 97 retS = ReturnSuccessfulStatusCode.实施成功执行; 98 else 99 retS = ReturnErrorStatusCode.请求发送查询短信验证码失败; 100 } else 101 retS = ReturnSuccessfulStatusCode.实施成功执行;//联通无需查询短信 102 } else { 103 if (retS.Contains("账户名与密码不匹配")) { retS = ReturnErrorStatusCode.登录失败_账号密码不匹配; } 104 if (retS.Contains("密码出错已达上限")) { retS = ReturnErrorStatusCode.登录失败_密码出错已达上限; } 105 if (retS.Equals("所属省份系统正在升级")) { retS = ReturnErrorStatusCode.登录失败_所属省份系统正在升级; } 106 } 107 } else 108 retS = ReturnErrorStatusCode.校验登录图片验证码失败; 109 } 110 } catch (Exception ex) { 111 crawler.LogOut(); 112 Log.Configure(); 113 Log.AddTXLog(currentPhoneNumber, currentPhonePwd, "RequestQuerySms", ex); 114 IList<string> httploglist = crawler.GetAllJsonLog();//记录所有http日志 115 116 117 foreach (var httplog in httploglist) { 118 Log.I(string.Format(" 【SrcId:{0}】{1}", srcid, httplog));//成功的话不记录http历史记录 119 } 120 } 121 return retS; 122 } 123 124 [CrossSite] 125 [HttpGet] 126 //请求抓取数据 127 public string ClientFinish(string smscode) { 128 string retS = string.Empty; 129 if (!string.IsNullOrEmpty(smscode)) { 130 try { 131 if (crawler.IsQuerySmsVerifyOk(smscode.Trim())) 132 { 133 return InsertAllInfo(); 134 //var Data1 = crawler.GetQueryData(); 135 //TXInfoModel insertmodel = ConvertDataToModel(Data1); 136 137 138 //if (Data1.Contains("暂时无法为您提供服务,请稍后再试")) 139 // retS =ReturnErrorStatusCode.查询失败_暂时无法为您提供服务; 140 //if (Data1.Contains("同一时间使用该业务人数较多")) 141 // retS = ReturnErrorStatusCode.查询失败_同一时间使用该业务人数较多; 142 //else { 143 144 //if (ubll.InsertRecord(insertmodel)) 145 // retS = ReturnSuccessfulStatusCode.实施成功执行; 146 //else 147 // retS = ReturnErrorStatusCode.操作失败_数据持久化失败; 148 149 //} 150 } 151 else 152 { 153 retS = ReturnErrorStatusCode.校验查询短信验证码失败; 154 } 155 } catch (Exception ex) { 156 crawler.LogOut(); 157 Log.Configure(); 158 Log.AddTXLog(currentPhoneNumber, currentPhonePwd, "ClientFinish", ex); 159 IList<string> httploglist = crawler.GetAllJsonLog();//记录所有http日志 160 161 162 foreach (var httplog in httploglist) { 163 Log.I(string.Format(" 【SrcId:{0}】{1}", srcid, httplog));//成功的话不记录http历史记录 164 } 165 } 166 } 167 //存储查询的结果数据 存入数据库,不向前端输出 168 return retS; 169 } 170 171 172 //数据持久化 173 private string InsertAllInfo() { 174 string retCode = string.Empty; 175 176 //Insert Base info 177 TXInfoModel txinfomodel = new TXInfoModel(); 178 txinfomodel = crawler.GetInfo(); 179 180 //Insert Overall&Details info 181 //首先存储Baseinfo返回主键,再存储剩余两项内容。 182 183 IList<TXZhangDanModel> zdlist = crawler.GetZhangDan( ); 184 IList<TXXiangDanModel> xdlist = crawler.GetXiangDan( ); 185 txinfomodel.zdlist = zdlist.OrderByDescending(q=>q.billEndDate).Distinct().ToList(); 186 txinfomodel.xdlist = xdlist.OrderByDescending(q => q.startTime).Distinct().ToList(); 187 188 crawler.LogOut();//退出登录 189 //调用bll进行事务 数据持久化 190 if (ubll.InsertRecord(txinfomodel)) { 191 retCode = ReturnSuccessfulStatusCode.实施成功执行; 192 } else { 193 retCode = ReturnErrorStatusCode.操作失败_数据持久化失败; 194 } 195 196 return retCode; 197 } 198 } 199 }
爬虫核心代码:
代码有些乱,基本把整个联通上的数据都能抓全了,大家自己优化代码把。
(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)
/* ############################################################################################# # TongXinZhenXin Solution # 通信征信数据抓取解决方案 # ========================================================================================= # # File: CrawlerUNC.cs 联通抓取类 # ========================================================================================= # # Credits: 0ng ching tong # Thanks: .. # Time: 2016年5月12日17:10:09 # Update: 联通最近一次抓包分析时间:2016-5-12 17:15:32 ############################################################################################# */ using Crawler.Common; using Crawler.Interface; using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using YXRepository.Log; using YXRepository.Model; namespace Crawler { public class CrawlerUNC : CrawlerBase, ICrawler { HttpHelperNew hhn; static IList<String> loglist; private string currentUVC { get { return HttpHelperNew.UNCuacverifykey; } } /// <summary> /// 中国联通 初始化链接 /// </summary> public CrawlerUNC(string number, string pwd) { hhn = new HttpHelperNew(); loglist = new List<string>(); currentPhoneNumber = number; currentPhoneServicePwd = pwd; loginIsNeedVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CheckNeedVerify"; loginVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CreateImage"; loginToVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CtaIdyChk"; } public bool IsLoginNeedVerify() { string _url = loginIsNeedVerifyImgRequestUrl; string queryData = "callback=jQuery17205245009011952871_" + TimeStamp.GetTimeStamp_13() + "&userName="+currentPhoneNumber+"&pwdType=01&_=" + TimeStamp.GetTimeStamp_13(); string retString = hhn.HttpGet(_url, queryData, HttpForType.联通); //添加日志记录: CollectJsonLog(_url,queryData,0,retString); // return retString.Contains(@"""resultCode"":""false""") ? false : true; } /// <summary> /// 联通登录不需要图片码 /// </summary> /// <param name="imgcode"></param> /// <returns></returns> public bool IsLoginImgVerifyOk(string imgcode) { currentLoginImgCode = imgcode; string _url = loginToVerifyImgRequestUrl; string queryData = "callback=jQuery17208163765012834383_1463034583178&verifyCode=" + currentLoginImgCode + "&verifyType=1&_=1463034805373"; string retString = hhn.HttpGet(_url, queryData); //添加日志记录: CollectJsonLog(_url, queryData, 0, retString); // return retString.Contains(@"""resultCode"":""true""") ? true : false; } public string GetLoginImg() { loginVerifyImgStream = string.Empty; string queryData = "t=1463034742570"; string part1 = "data:image/png;base64,"; string part2 = hhn.HttpGetImage(loginVerifyImgRequestUrl, queryData, HttpForType.联通);//注意:rdmdmd5这个cookie需要获取验证码后 返回。 //添加日志记录: CollectJsonLog(loginVerifyImgRequestUrl, queryData, 0, part2); // if (!string.IsNullOrEmpty(part2)) loginVerifyImgStream = part1 + part2; return loginVerifyImgStream; } public bool LogOut() { bool retValue = true; return retValue; } public bool Login(out string loginret) { loginret = string.Empty; //loginRequestUrl = "https://uac.10010.com/portal/Service/MallLogin?callback=jQuery172020724007464970884_" + TimeStamp.GetTimeStamp_13() + // "&req_time=" + TimeStamp.GetTimeStamp_13() + "&redirectURL=http%3A%2F%2Fwww.10010.com&userName=" + currentPhoneNumber + // "&password=" + currentPhoneServicePwd + "&pwdType=01&productType=01&redirectType=01&rememberMe=1&_=" + TimeStamp.GetTimeStamp_13();//服务密码登陆,不带图片验证码 loginRequestUrl = "https://uac.10010.com/portal/Service/MallLogin?callback=jQuery17208163765012834383_1463034583180&req_time=1463034838271&redirectURL=http%3A%2F%2Fwww.10010.com&userName=" + currentPhoneNumber + "&password=" + currentPhoneServicePwd + "&pwdType=01&productType=01&verifyCode=" + currentLoginImgCode + "&uvc=" + currentUVC + "&redirectType=01&rememberMe=1&_=1463034838271"; //服务密码登陆,不带图片验证码 bool retValue = false; string retString = string.Empty; do { retString = hhn.HttpGet(loginRequestUrl, "", HttpForType.联通); Thread.Sleep(500); } while (retString.Contains(@"所属省份系统正在升级")); //添加日志记录: CollectJsonLog(loginRequestUrl,"",0,retString); // if (retString.Contains(@"resultCode:""7007""")) loginret = "账户名与密码不匹配"; if (retString.Contains(@"密码出错已达上限")) loginret = "密码出错已达上限"; if (retString.Contains(@"所属省份系统正在升级")) loginret = "所属省份系统正在升级"; if (retString.Contains(@"resultCode:""0000""")) retValue = true; return retValue; } /// <summary> /// 联通独有,查账单前进行checklogin /// </summary> private bool checkLogin { get { string url = "http://iservice.10010.com/e3/static/check/checklogin?_=" + TimeStamp.GetTimeStamp_13(); bool retValue = false; string retString = hhn.HttpPost(url, "", HttpForType.联通); //添加日志记录: CollectJsonLog(url,"",1,retString); // if (retString.Contains(@"""isLogin"":true")) retValue = true; return retValue; } } /// <summary> /// 联通独有 /// </summary> public void UNCInitPage() { string url = "https://login.10010.com/captchazh.htm?type=05"; string retS = hhn.HttpGet(url, "", HttpForType.联通); //添加日志记录: CollectJsonLog(url, "", 0, retS); // //设置Cookie"WT_FPC" string wt_fpc = JsHelper.GetJsMethd("GetWT_FPC", null); CookieCollection hcc = new CookieCollection(); Cookie wtcookie = new Cookie() { Expires = DateTime.Now.AddYears(10), Path = "/", Domain = ".10010.com", Name = "WT_FPC", Value = "id=2c78d939da42319e6221460629342754:lv=1460686951978:ss=1460685811376" //Value = wt_fpc.Substring(wt_fpc.IndexOf(‘=‘) + 1, wt_fpc.Length - 7)//此处 写死也可以,服务器不做校验。 }; hcc.Add(wtcookie); hhn.cookie.Add(wtcookie); } public bool SendQuerySms() { return true; } /// <summary> /// 联通无需查询短信验证 /// </summary> /// <param name="smscode"></param> /// <returns></returns> public bool IsQuerySmsVerifyOk(string smscode) { return true; } public IList<T> GetQueryData<T>(T temp) { return null; } private string getMyDetails() { if (checkLogin) { string infoUrl = "http://iservice.10010.com/e3/static/query/searchPerInfo/?_=1464073258330&menuid=000100020001"; string retString = hhn.HttpPost(infoUrl, "", HttpForType.联通); //添加日志记录: CollectJsonLog(infoUrl,"",1,retString); // return retString; } return ""; } public TXInfoModel GetInfo() { TXInfoModel tim = new TXInfoModel(); #region 第一部分 string infoUrl = "https://uac.10010.com/cust/infomgr/anonymousInfoAJAX"; string retString = hhn.HttpGet(infoUrl, ""); //添加日志记录: CollectJsonLog(infoUrl,"",0,retString); // tim.CustomerName = Utilities.QuMiddle(retString, @"name"":""", @""""); tim.CustomerSex = Utilities.QuMiddle(retString, @"sex"":""", @"""")=="1"?"男":"女"; #endregion #region 第二部分 string retString2 = getMyDetails(); tim.Email = Utilities.QuMiddle(retString2, @"sendemail"":""", @""""); DateTime innettime; DateTime.TryParse(Utilities.QuMiddle(retString2, @"opendate"":""", @""""), out innettime); tim.InNetTime = innettime; tim.Grade = "";//星级得分 tim.IDCard = Utilities.QuMiddle(retString2, @"certnum"":""", @""""); tim.PhoneNumber = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); ; tim.ProviderName = "中国联通:" + Utilities.QuMiddle(retString2, @"brand"":""", @"""") + "-" + Utilities.QuMiddle(retString2, @"productName"":""", @"""");//01 ,02,03 tim.RegAddress = Utilities.QuMiddle(retString2, @"certaddr"":""", @""""); tim.ContactNum = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); tim.NetAge = ""; tim.PhoneStatus = Utilities.QuMiddle(retString2, @"subscrbstat"":""", @""""); tim.RealNameInfo = Utilities.QuMiddle(retString2, @"certtype"":""", @""""); tim.StarLevel = Utilities.QuMiddle(retString2, @"custlvl"":""", @""""); tim.LevelInfo = ""; tim.ZipCode = ""; #endregion #region 第三部分 话费余额/储蓄余额 string infoUrl11 = "http://iservice.10010.com/e3/static/query/accountBalance/search?_=1464858050369&menuid=000100010002"; string retString11 = hhn.HttpPost(infoUrl11, "type=onlyAccount", HttpForType.联通); //添加日志记录: CollectJsonLog(infoUrl11, "", 0, retString11); // tim.CurFee = Decimal.Parse(Utilities.QuMiddle(retString11, @"userbalance"":""", @"""")); tim.CurFeeTotal = Decimal.Parse(Utilities.QuMiddle(retString11, @"acctbalance"":""", @"""")); #endregion #region 第三部分 积分 string infoUrl22 = "http://iservice.10010.com/e3/static/query/headerView"; string retString22 = hhn.HttpPost(infoUrl22, "", HttpForType.联通); //添加日志记录: CollectJsonLog(infoUrl22, "", 0, retString22); // int score; int.TryParse(Utilities.QuMiddle(retString22, @"sore"":""", @""""), out score);//联通某些类型卡 返回的json中可能没有 积分这个 字段。 tim.PointValue = score; #endregion #region 第四部分 归属地查询 tim.PhoneAttribution = PhoneAttribution.getGuiShuDiNet(tim.PhoneNumber); #endregion return tim; } /// <summary> /// 获取五个月账单概括 /// </summary> /// <returns></returns> public IList<TXZhangDanModel> GetZhangDan() { IList<TXZhangDanModel> listZD = new List<TXZhangDanModel>(); TXZhangDanModel temp; string infoUrl = "http://iservice.10010.com/e3/static/query/queryHistoryBill?_="+TimeStamp.GetTimeStamp_13() +"&menuid=000100020001"; string retS = string.Empty; string[] temptimes;int loopi=0; List<string> tempss = GetZhangDanPostData(out temptimes); if (checkLogin) { foreach (var data in tempss) { //当前月份的通话账单进行处理! loopi++; string PostdataS = data; retS = hhn.HttpPost(infoUrl, PostdataS, HttpForType.联通); //添加日志记录: CollectJsonLog(infoUrl, PostdataS,1,retS); // //解析每月账单数据构建model DateTime start1,end1;decimal d1; string temps1= new Regex(@"""billcycle""(:)("".*?"")").Match(retS).ToString(); if (!string.IsNullOrEmpty(temps1)) { DateTime.TryParse(Utilities.QuMiddle(temps1, @"billcycle"":""", "至"), out start1); DateTime.TryParse(Utilities.QuMiddle(temps1, "至", @""""), out end1); decimal.TryParse(Utilities.QuMiddle(retS, @"nowFee"":""", @""""), out d1); temp = new TXZhangDanModel() { billStartDate = start1, billEndDate = end1, billFee = d1, //(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html) }; listZD.Add(temp); } else { decimal d2; decimal.TryParse(Utilities.QuMiddle(retS, @"allfee"":""", @""""), out d2); temp = new TXZhangDanModel() { billStartDate = DateTime.Parse( temptimes[loopi-1].Split(‘&‘)[0]), billEndDate = DateTime.Parse( temptimes[loopi-1].Split(‘&‘)[1]), billFee = d2 }; listZD.Add(temp); } } } return listZD; } /// <summary> /// 获取详单 /// </summary> /// <returns></returns> public IList<TXXiangDanModel> GetXiangDan() { IList<TXXiangDanModel> listXD = new List<TXXiangDanModel>(); TXXiangDanModel temp; callListRequestUrl = "http://iservice.10010.com/e3/static/query/callDetail?_=" + TimeStamp.GetTimeStamp_13() + "&menuid=000100030001"; //callListRequestData = "beginDate=2016-04-01&endDate=2016-04-20&pageNo=1&pageSize=1000"; string retS = string.Empty; List<string> tempss = GetXiangDanPostData(); if (checkLogin) { foreach (var data in tempss) { //注意,以下请求获取totalNum当月记录总数。 string PostdataS = "beginDate=" + data.Split(‘&‘)[0] + "&endDate=" + data.Split(‘&‘)[1] + "&pageNo=1&pageSize=20"; retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通); //添加日志记录: CollectJsonLog(callListRequestUrl, PostdataS, 1, retS); while (retS.Contains("暂时无法为您提供服务")) { retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通); Thread.Sleep(500); } if (retS.Contains("系统检测您的访问过于频繁")) { throw new Exception("访问获取详单链接过于频繁!请明天再试"); } //注意,以下代码解析当月记录总数,获得所有确定的游标集合。 string currentMonthTotalNum = Utilities.QuMiddle(retS, @"totalRecord"":", @","""); //从上面返回串,获取当月记录总数 List<String> curCurorlist = GetAllcurCuror(currentMonthTotalNum);//游标集合,翻页用. foreach (var curcuror in curCurorlist) { //当前月份的通话详单进行处理! PostdataS = "beginDate=" + data.Split(‘&‘)[0] + "&endDate=" + data.Split(‘&‘)[1] + "&pageNo=" + curcuror + "&pageSize=50"; retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通); //添加日志记录: CollectJsonLog(callListRequestUrl, PostdataS, 1, retS); // //匹配startTime date MatchCollection stlist = new Regex(@"""calldate""(:)("".*?"")").Matches(retS); //匹配startTime time MatchCollection stlist2 = new Regex(@"""calltime""(:)("".*?"")").Matches(retS); //匹配commPlac MatchCollection cplist = new Regex(@"""homeareaName""(:)("".*?"")").Matches(retS); //匹配commMode MatchCollection cmlist = new Regex(@"""calltypeName""(:)("".*?"")").Matches(retS); //匹配anotherNm MatchCollection anlist = new Regex(@"""othernum""(:)("".*?"")").Matches(retS); //匹配commTime MatchCollection ctilist = new Regex(@"""calllonghour""(:)("".*?"")").Matches(retS); //匹配commType //MatchCollection ctylist = new Regex(@"""romatype""(:)("".*?"")").Matches(retS); //匹配commType MatchCollection ctylist = new Regex(@"""landtype""(:)("".*?"")").Matches(retS); //匹配commFee MatchCollection cflist = new Regex(@"""totalfee""(:)("".*?"")").Matches(retS); if ((stlist.Count == cplist.Count) && (cplist.Count == cmlist.Count) && (cmlist.Count == anlist.Count) && (anlist.Count == ctilist.Count) && (ctilist.Count == ctylist.Count) && (ctylist.Count == cflist.Count)) { //解析每月详单数据构建model for (int i = 0; i < stlist.Count; i++) { temp = new TXXiangDanModel() { anotherNm = Utilities.QuMiddle(anlist[i].Value, @"othernum"":""", @""""), commFee = decimal.Parse(Utilities.QuMiddle(cflist[i].Value, @"totalfee"":""", @"""")), commMode = Utilities.QuMiddle(cmlist[i].Value, @"calltypeName"":""", @""""), commPlac = Utilities.QuMiddle(cplist[i].Value, @"homeareaName"":""", @""""), commTime = Utilities.QuMiddle(ctilist[i].Value, @"calllonghour"":""", @""""), commType = Utilities.QuMiddle(ctylist[i].Value, @"landtype"":""", @""""), startTime = DateTime.Parse(Utilities.QuMiddle(stlist[i].Value, @"calldate"":""", @"""") + " " + Utilities.QuMiddle(stlist2[i].Value, @"calltime"":""", @"""")) }; listXD.Add(temp); } } } } } return listXD; } /// <summary> /// 翻页索引(1,2,3,4,5),用于联通翻页查询账单。默认每页50条记录 /// </summary> /// <param name="totalNum">当月份总数目</param> /// <returns></returns> private List<String> GetAllcurCuror(string totalNum) { List<string> retlist = new List<string>(); int totalnum1; int.TryParse(totalNum, out totalnum1); if (totalnum1 == 0) return retlist; else { int yushu = totalnum1 % 50;//比如totalNum 201,余数1 int curcurorCount = totalnum1 / 50 + (yushu == 0 ? 0 : 1);//5页 for (int i = 0; i < curcurorCount; i++) { retlist.Add((i+1).ToString());//1,2,3,4,5 } return retlist; } } /// <summary> /// 最近5个月账单需要的Post数据() /// </summary> /// <returns></returns> private List<string> GetZhangDanPostData(out string[] startendS) { // string dataS = "billdate=201604&querycode=0001&querytype=0001"; List<string> retlist = new List<string>(); startendS = new string[5]; DateTime nowtime = DateTime.Now; for (int i = 0; i < 5; i++) { string mm = nowtime.AddMonths((-1)*i).ToString("yyyyMM"); string dataS = "billdate="+mm+"&querycode=0001&querytype=0001"; string seS; if(i==0) seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + nowtime.AddMonths((-1) * i).ToString(); //整理账单起&止月份,非本月 else seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).AddMonths(1).AddDays(-1).ToString(); //整理账单起&止月份,本月 startendS[i] = (seS); retlist.Add(dataS); } return retlist; } /// <summary> /// 获取5个月详单需要的post数据(2016-04-01&2016-04-30格式) /// </summary> /// <returns></returns> private List<string> GetXiangDanPostData() { List<string> retlist = new List<string>(); int year = DateTime.Now.Year;//当前年 int mouth = DateTime.Now.Month;//当前月 int beforeYear = 0; int beforeMouth = 0; for (int i = 0; i < 5; i++) { if (mouth <= 1 && i!=0 )//如果当前月是一月,那么年份就要减1 { beforeYear = year - i; beforeMouth = 12;//上个月 } else { beforeYear = year; beforeMouth = mouth - i;//上个月 } string beforeMouthOneDay = beforeYear + "-" + beforeMouth + "-" + "01";//上个月第一天 string beforeMouthLastDay; if (i != 0) beforeMouthLastDay = beforeYear + "-" + beforeMouth + "-" + DateTime.DaysInMonth(year, beforeMouth);//上个月最后一天 else beforeMouthLastDay = DateTime.Now.ToString("yyyy-MM-dd"); retlist.Add(DateTime.Parse(beforeMouthOneDay).ToString("yyyy-MM-dd") + "&" + DateTime.Parse(beforeMouthLastDay).ToString("yyyy-MM-dd")); } return retlist; } public static void CollectJsonLog(string url, string data, int method1, string responseS) { string method = method1 == 1 ? "Post" : "Get"; loglist.Add(string.Format("【请求url:{0} , 请求数据:{1} , 请求方式:{2}, 返回数据:{3} 】", url, data, method, responseS)); } public IList<String> GetAllJsonLog() { return loglist; } } }
//(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)
全部solution代码(含抓取UI,数据库脚本)下载:
【最新原创】中国移动(中国联通)_通信账单,详单,个人信息抓取爬虫代码
标签:
原文地址:http://www.cnblogs.com/x-poior/p/5641437.html