码迷,mamicode.com
首页 > 移动开发 > 详细

【最新原创】中国移动(中国联通)_通信账单,详单,个人信息抓取爬虫代码

时间:2016-07-04 20:31:08      阅读:5403      评论:0      收藏:0      [点我收藏+]

标签:

概要:

1.因为公司需要,就花了一点时间写了一下三大运营商通信数据的抓取,涉及到Web上你所看得到的一切数据。

代码没啥技术含量,重点在于抓包分析过程。期间遇到了很多未知的困难,都一一克服了。

2.由于抓取数据的隐私性,我们的抓包是假设在用户已知自己数据被抓取,并且同意告知短信验证码的情况下进行的,

不属于黑客范畴

3.整个过程,包括重建数据库表结构,解析json等如同逆向运营商的数据库一般。总体来说,三大运营商更新频率不算频繁,还算较稳定,数据结构,网页结构等都不会做很大的变动。

 

整体效果如下:

所有运营商抓取到的数据都放到了一个库的三个表里面,后期做数据分析用。

技术分享

下面分享几个核心的源代码给 正在研究这个的朋友们。

 

简单架构:

技术分享

 

WebApi控制器代码:

技术分享
  1 using Crawler;
  2 using Crawler.Common;
  3 using Crawler.Interface;
  4 using System;
  5 using System.Collections.Generic;
  6 using System.Configuration;
  7 using System.Linq;
  8 using System.Net;
  9 using System.Net.Http;
 10 using System.Text.RegularExpressions;
 11 using System.Web.Http;
 12 using System.Web.Providers.Entities;
 13 using YXRepository.BLL;
 14 using YXRepository.Log;
 15 using YXRepository.Model;
 16 
 17 namespace MvcWebApi.Controllers
 18 {
 19     public class TongXinController : ApiController
 20     {
 21         static string currentPhoneNumber;
 22         static string currentPhonePwd;
 23         string StoreConnection = ConfigurationSettings.AppSettings["StoreConnection"];
 24         string CreditConnection = ConfigurationSettings.AppSettings["CreditConnection"];
 25         static ICrawler crawler;
 26         static string srcid;
 27         //static Logger log;
 28         private static userbll ubll;
 29 
 30         [CrossSite]
 31         [HttpGet]
 32         //请求登录验证码
 33         public string RequestLoginImg(string phoneNumber, string servicepwd) {
 34             string retS = string.Empty; 
 35             srcid = System.Guid.NewGuid().ToString();
 36             Log.srcid = srcid; 
 37             ubll = new userbll(StoreConnection, CreditConnection,srcid);
 38 
 39             if (!string.IsNullOrEmpty(phoneNumber) && !string.IsNullOrEmpty(servicepwd)) {
 40 
 41                 try {
 42                     Regex belongTocmcc = new Regex(Consts.RegExpcmcc);
 43                     Regex belongTounc = new Regex(Consts.RegExpunc);
 44                     Regex belongToctc = new Regex(Consts.RegExpctc);
 45                     if (belongTocmcc.IsMatch(phoneNumber.Trim()))
 46                         crawler = new CrawlerCMCC(phoneNumber.Trim(), servicepwd.Trim());
 47                     else if (belongTounc.IsMatch(phoneNumber.Trim()))
 48                         crawler = new CrawlerUNC(phoneNumber.Trim(), servicepwd.Trim());
 49                     else if (belongToctc.IsMatch(phoneNumber.Trim())) {
 50                         //电信存记录
 51                         if(ubll.StoreCTCNumber(phoneNumber))
 52                             return ReturnErrorStatusCode.当前手机号不在征信范围;
 53                         else
 54                             return ReturnErrorStatusCode.系统发生异常; 
 55                     }
 56                     else
 57                         return ReturnErrorStatusCode.当前手机号不在征信范围;
 58                     currentPhoneNumber = phoneNumber;  
 59                     currentPhonePwd = servicepwd;
 60                     //if (crawler.IsLoginNeedVerify())
 61                     //{
 62                         retS = crawler.GetLoginImg();
 63                         if(string.IsNullOrEmpty(retS))
 64                             retS =  ReturnErrorStatusCode.获取登录图片验证码失败; 
 65                     //}
 66                 } catch (Exception ex) {
 67                     Log.Configure();
 68                     Log.AddTXLog(currentPhoneNumber, currentPhonePwd, "RequestLoginImg", ex);
 69                     IList<string> httploglist = crawler.GetAllJsonLog();//记录所有http日志
 70 
 71 
 72                     foreach (var httplog in httploglist) {
 73                         Log.I(string.Format(" 【SrcId:{0}】{1}", srcid, httplog));//成功的话不记录http历史记录
 74                     }
 75                 }
 76             }
 77             return retS;
 78         }
 79 
 80         [CrossSite]
 81         [HttpGet]
 82         //请求登录短信
 83         public string RequestQuerySms(string phoneS, string loginimg,bool isUNC) {
 84             string retS = string.Empty;
 85 
 86             try { 
 87                 if (ubll.isExistRecord(phoneS))
 88                     return ReturnErrorStatusCode.查询失败_已采集过数据;//本库验证
 89 
 90 
 91                 if (!string.IsNullOrEmpty(loginimg)) {
 92 
 93                     if (crawler.IsLoginImgVerifyOk(loginimg.Trim())) {
 94                         if (crawler.Login(out retS)) {
 95                             if (!isUNC) {
 96                                 if (crawler.SendQuerySms())
 97                                     retS = ReturnSuccessfulStatusCode.实施成功执行;
 98                                 else
 99                                     retS = ReturnErrorStatusCode.请求发送查询短信验证码失败;
100                             } else
101                                 retS = ReturnSuccessfulStatusCode.实施成功执行;//联通无需查询短信
102                         } else {
103                             if (retS.Contains("账户名与密码不匹配")) { retS = ReturnErrorStatusCode.登录失败_账号密码不匹配; }
104                             if (retS.Contains("密码出错已达上限")) { retS = ReturnErrorStatusCode.登录失败_密码出错已达上限; }
105                             if (retS.Equals("所属省份系统正在升级")) { retS = ReturnErrorStatusCode.登录失败_所属省份系统正在升级; }
106                         }
107                     } else
108                         retS = ReturnErrorStatusCode.校验登录图片验证码失败;
109                 }
110             } catch (Exception ex) {
111                 crawler.LogOut();
112                 Log.Configure();
113                 Log.AddTXLog(currentPhoneNumber, currentPhonePwd, "RequestQuerySms", ex);
114                 IList<string> httploglist = crawler.GetAllJsonLog();//记录所有http日志
115 
116 
117                 foreach (var httplog in httploglist) {
118                     Log.I(string.Format(" 【SrcId:{0}】{1}", srcid, httplog));//成功的话不记录http历史记录
119                 }
120             }
121             return retS;
122         }
123 
124         [CrossSite]
125         [HttpGet]
126         //请求抓取数据
127         public string ClientFinish(string smscode) {
128             string retS = string.Empty;
129             if (!string.IsNullOrEmpty(smscode)) {
130                 try {
131                     if (crawler.IsQuerySmsVerifyOk(smscode.Trim())) 
132                     {
133                         return InsertAllInfo();
134                          //var Data1 = crawler.GetQueryData();
135                         //TXInfoModel insertmodel = ConvertDataToModel(Data1);
136 
137 
138                         //if (Data1.Contains("暂时无法为您提供服务,请稍后再试"))
139                         //    retS =ReturnErrorStatusCode.查询失败_暂时无法为您提供服务;
140                         //if (Data1.Contains("同一时间使用该业务人数较多"))
141                         //    retS = ReturnErrorStatusCode.查询失败_同一时间使用该业务人数较多;
142                         //else {
143 
144                             //if (ubll.InsertRecord(insertmodel))
145                             //    retS = ReturnSuccessfulStatusCode.实施成功执行;
146                             //else
147                             //    retS = ReturnErrorStatusCode.操作失败_数据持久化失败; 
148                             
149                         //}
150                     } 
151                     else
152                     {
153                         retS = ReturnErrorStatusCode.校验查询短信验证码失败;
154                     }
155                 } catch (Exception ex) {
156                     crawler.LogOut();
157                     Log.Configure();
158                     Log.AddTXLog(currentPhoneNumber, currentPhonePwd, "ClientFinish", ex);
159                     IList<string> httploglist = crawler.GetAllJsonLog();//记录所有http日志
160 
161 
162                     foreach (var httplog in httploglist) {
163                         Log.I(string.Format(" 【SrcId:{0}】{1}", srcid, httplog));//成功的话不记录http历史记录
164                     }
165                 }
166             }
167             //存储查询的结果数据 存入数据库,不向前端输出
168             return retS;
169         }
170 
171      
172         //数据持久化
173         private string InsertAllInfo() { 
174             string retCode = string.Empty;
175 
176             //Insert Base info
177             TXInfoModel txinfomodel = new TXInfoModel();
178             txinfomodel = crawler.GetInfo();
179 
180             //Insert Overall&Details info
181             //首先存储Baseinfo返回主键,再存储剩余两项内容。
182              
183             IList<TXZhangDanModel> zdlist = crawler.GetZhangDan( );
184             IList<TXXiangDanModel> xdlist = crawler.GetXiangDan( );
185             txinfomodel.zdlist = zdlist.OrderByDescending(q=>q.billEndDate).Distinct().ToList();
186             txinfomodel.xdlist = xdlist.OrderByDescending(q => q.startTime).Distinct().ToList();
187 
188             crawler.LogOut();//退出登录
189             //调用bll进行事务 数据持久化
190             if (ubll.InsertRecord(txinfomodel)) {
191                 retCode = ReturnSuccessfulStatusCode.实施成功执行;
192             } else {
193                 retCode = ReturnErrorStatusCode.操作失败_数据持久化失败; 
194             } 
195 
196             return retCode;
197         }
198     }
199 }
View Code

 

爬虫核心代码:

代码有些乱,基本把整个联通上的数据都能抓全了,大家自己优化代码把。

(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)

/*
#############################################################################################
# TongXinZhenXin Solution
# 通信征信数据抓取解决方案
# ========================================================================================= #
# File: CrawlerUNC.cs 联通抓取类
# ========================================================================================= #
# Credits: 0ng ching tong
# Thanks:  ..
# Time: 2016年5月12日17:10:09
# Update: 联通最近一次抓包分析时间:2016-5-12 17:15:32
#############################################################################################
*/

using Crawler.Common;
using Crawler.Interface;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using YXRepository.Log;
using YXRepository.Model;

namespace Crawler {
    public class CrawlerUNC : CrawlerBase, ICrawler {

        HttpHelperNew hhn;
        static IList<String> loglist;
        private string currentUVC {

            get {

                return HttpHelperNew.UNCuacverifykey;
            }
        }
        /// <summary>
        /// 中国联通 初始化链接
        /// </summary>
        public CrawlerUNC(string number, string pwd) {
            hhn = new HttpHelperNew();
            loglist = new List<string>();

            currentPhoneNumber = number;
            currentPhoneServicePwd = pwd;

            loginIsNeedVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CheckNeedVerify";
            loginVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CreateImage";
            loginToVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CtaIdyChk";
        }

        public bool IsLoginNeedVerify() {
            string _url = loginIsNeedVerifyImgRequestUrl;
            string queryData = "callback=jQuery17205245009011952871_" + TimeStamp.GetTimeStamp_13() + "&userName="+currentPhoneNumber+"&pwdType=01&_="
                    + TimeStamp.GetTimeStamp_13();
            string retString = hhn.HttpGet(_url, queryData, HttpForType.联通);
            //添加日志记录:
            CollectJsonLog(_url,queryData,0,retString);
            //
            return retString.Contains(@"""resultCode"":""false""") ? false : true;  
        }

        /// <summary>
        /// 联通登录不需要图片码
        /// </summary>
        /// <param name="imgcode"></param>
        /// <returns></returns>
        public bool IsLoginImgVerifyOk(string imgcode) {
            currentLoginImgCode = imgcode;

            string _url = loginToVerifyImgRequestUrl;
            string queryData = "callback=jQuery17208163765012834383_1463034583178&verifyCode=" + currentLoginImgCode + "&verifyType=1&_=1463034805373";
            string retString = hhn.HttpGet(_url, queryData);
            //添加日志记录:
            CollectJsonLog(_url, queryData, 0, retString);
            //
            return retString.Contains(@"""resultCode"":""true""") ? true : false;
        }

        public string GetLoginImg() {
            loginVerifyImgStream = string.Empty;
            string queryData = "t=1463034742570";
            string part1 = "data:image/png;base64,";
            string part2 = hhn.HttpGetImage(loginVerifyImgRequestUrl,
     queryData, HttpForType.联通);//注意:rdmdmd5这个cookie需要获取验证码后 返回。
            //添加日志记录:
            CollectJsonLog(loginVerifyImgRequestUrl, queryData, 0, part2);
            //
            if (!string.IsNullOrEmpty(part2))
                loginVerifyImgStream = part1 + part2;
            return loginVerifyImgStream;
        }

        public bool LogOut() { 
            bool retValue = true;
             
            return retValue;

        }
         
        public bool Login(out string loginret) {
            loginret = string.Empty;

            //loginRequestUrl = "https://uac.10010.com/portal/Service/MallLogin?callback=jQuery172020724007464970884_" + TimeStamp.GetTimeStamp_13() +
            //        "&req_time=" + TimeStamp.GetTimeStamp_13() + "&redirectURL=http%3A%2F%2Fwww.10010.com&userName=" + currentPhoneNumber +
            //        "&password=" + currentPhoneServicePwd + "&pwdType=01&productType=01&redirectType=01&rememberMe=1&_=" + TimeStamp.GetTimeStamp_13();//服务密码登陆,不带图片验证码

            loginRequestUrl = "https://uac.10010.com/portal/Service/MallLogin?callback=jQuery17208163765012834383_1463034583180&req_time=1463034838271&redirectURL=http%3A%2F%2Fwww.10010.com&userName=" + currentPhoneNumber + "&password=" + currentPhoneServicePwd + "&pwdType=01&productType=01&verifyCode=" + currentLoginImgCode + "&uvc=" + currentUVC + "&redirectType=01&rememberMe=1&_=1463034838271";
            //服务密码登陆,不带图片验证码


            bool retValue = false;
            string retString = string.Empty;
            do {
                retString = hhn.HttpGet(loginRequestUrl, "", HttpForType.联通);
                Thread.Sleep(500);
            }
            while (retString.Contains(@"所属省份系统正在升级")); 

            //添加日志记录:
            CollectJsonLog(loginRequestUrl,"",0,retString);
            //
            if (retString.Contains(@"resultCode:""7007"""))
                loginret = "账户名与密码不匹配";

            if (retString.Contains(@"密码出错已达上限"))
                loginret = "密码出错已达上限";

            if (retString.Contains(@"所属省份系统正在升级"))
                loginret = "所属省份系统正在升级";

            if (retString.Contains(@"resultCode:""0000"""))
                retValue = true;

            return retValue;
        }

        /// <summary>
        /// 联通独有,查账单前进行checklogin
        /// </summary>
        private bool checkLogin {
            get {
                string url = "http://iservice.10010.com/e3/static/check/checklogin?_="
                          + TimeStamp.GetTimeStamp_13();
                bool retValue = false;

                string retString = hhn.HttpPost(url, "", HttpForType.联通);
                //添加日志记录:
                CollectJsonLog(url,"",1,retString);
                //
                if (retString.Contains(@"""isLogin"":true"))
                    retValue = true;

                return retValue;
            }
        }

        /// <summary>
        /// 联通独有
        /// </summary>
        public  void UNCInitPage() {
            string url = "https://login.10010.com/captchazh.htm?type=05";
            string retS =  hhn.HttpGet(url, "", HttpForType.联通);
            //添加日志记录:
            CollectJsonLog(url, "", 0, retS);
            //
            //设置Cookie"WT_FPC"
            string wt_fpc = JsHelper.GetJsMethd("GetWT_FPC", null);
            CookieCollection hcc = new CookieCollection();
            Cookie wtcookie = new Cookie() {
                Expires = DateTime.Now.AddYears(10),
                Path = "/",
                Domain = ".10010.com",
                Name = "WT_FPC",
                Value = "id=2c78d939da42319e6221460629342754:lv=1460686951978:ss=1460685811376"
                //Value = wt_fpc.Substring(wt_fpc.IndexOf(‘=‘) + 1, wt_fpc.Length - 7)//此处 写死也可以,服务器不做校验。
            };
            hcc.Add(wtcookie);
            hhn.cookie.Add(wtcookie);
        }

        public bool SendQuerySms() { 
            return true;
        }

        /// <summary>
        /// 联通无需查询短信验证
        /// </summary>
        /// <param name="smscode"></param>
        /// <returns></returns>
        public bool IsQuerySmsVerifyOk(string smscode) {
              
            return true;
        }

        public IList<T> GetQueryData<T>(T temp) {
            return null;
        }

        private string getMyDetails() {
            if (checkLogin) { 
                string infoUrl = "http://iservice.10010.com/e3/static/query/searchPerInfo/?_=1464073258330&menuid=000100020001";
                string retString = hhn.HttpPost(infoUrl, "", HttpForType.联通);
                //添加日志记录:
                CollectJsonLog(infoUrl,"",1,retString);
                //
                return retString;
            }
            return "";
        }

        public TXInfoModel GetInfo() {
            TXInfoModel tim = new TXInfoModel();



            #region 第一部分
            string infoUrl = "https://uac.10010.com/cust/infomgr/anonymousInfoAJAX";
            string retString = hhn.HttpGet(infoUrl, "");
            //添加日志记录:
            CollectJsonLog(infoUrl,"",0,retString);
            //
            tim.CustomerName = Utilities.QuMiddle(retString, @"name"":""", @"""");

            tim.CustomerSex = Utilities.QuMiddle(retString, @"sex"":""", @"""")=="1"?"":"";
            #endregion

            #region 第二部分
            string retString2 = getMyDetails();
            tim.Email = Utilities.QuMiddle(retString2, @"sendemail"":""", @"""");

            DateTime innettime;
            DateTime.TryParse(Utilities.QuMiddle(retString2, @"opendate"":""", @""""), out innettime);
            tim.InNetTime = innettime;

            tim.Grade = "";//星级得分
            tim.IDCard = Utilities.QuMiddle(retString2, @"certnum"":""", @"""");
            tim.PhoneNumber = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); ;
            tim.ProviderName = "中国联通:" + Utilities.QuMiddle(retString2, @"brand"":""", @"""") + "-" + Utilities.QuMiddle(retString2, @"productName"":""", @"""");//01 ,02,03
            tim.RegAddress = Utilities.QuMiddle(retString2, @"certaddr"":""", @"""");


            tim.ContactNum = Utilities.QuMiddle(retString2, @"usernumber"":""", @"""");
            tim.NetAge = "";
            tim.PhoneStatus = Utilities.QuMiddle(retString2, @"subscrbstat"":""", @"""");
            tim.RealNameInfo = Utilities.QuMiddle(retString2, @"certtype"":""", @"""");
            tim.StarLevel = Utilities.QuMiddle(retString2, @"custlvl"":""", @"""");
            tim.LevelInfo = "";
            tim.ZipCode = "";  
            #endregion

            #region 第三部分 话费余额/储蓄余额 
            string infoUrl11 = "http://iservice.10010.com/e3/static/query/accountBalance/search?_=1464858050369&menuid=000100010002";
            string retString11 = hhn.HttpPost(infoUrl11, "type=onlyAccount", HttpForType.联通);
            //添加日志记录:
            CollectJsonLog(infoUrl11, "", 0, retString11);
            //
            tim.CurFee = Decimal.Parse(Utilities.QuMiddle(retString11, @"userbalance"":""", @""""));
            tim.CurFeeTotal = Decimal.Parse(Utilities.QuMiddle(retString11, @"acctbalance"":""", @""""));
            #endregion

            #region 第三部分 积分
            string infoUrl22 = "http://iservice.10010.com/e3/static/query/headerView";
            string retString22 = hhn.HttpPost(infoUrl22, "", HttpForType.联通);
            //添加日志记录:
            CollectJsonLog(infoUrl22, "", 0, retString22);
            //
            int score;
            int.TryParse(Utilities.QuMiddle(retString22, @"sore"":""", @""""), out score);//联通某些类型卡 返回的json中可能没有 积分这个 字段。
            tim.PointValue = score;
            #endregion

            #region 第四部分 归属地查询
            tim.PhoneAttribution = PhoneAttribution.getGuiShuDiNet(tim.PhoneNumber);
            #endregion


            return tim;   
        }


        /// <summary>
        /// 获取五个月账单概括
        /// </summary>
        /// <returns></returns>
        public IList<TXZhangDanModel> GetZhangDan() {
            IList<TXZhangDanModel> listZD = new List<TXZhangDanModel>();
            TXZhangDanModel temp;
            string infoUrl = "http://iservice.10010.com/e3/static/query/queryHistoryBill?_="+TimeStamp.GetTimeStamp_13() +"&menuid=000100020001";
            string retS = string.Empty;
            string[] temptimes;int loopi=0;
            List<string> tempss = GetZhangDanPostData(out temptimes);
            if (checkLogin) {
                foreach (var data in tempss) {
                    //当前月份的通话账单进行处理!
                    loopi++;
                    string PostdataS = data;
                    retS = hhn.HttpPost(infoUrl, PostdataS, HttpForType.联通);
                    //添加日志记录:
                    CollectJsonLog(infoUrl, PostdataS,1,retS);
                    //
                    //解析每月账单数据构建model
                    DateTime start1,end1;decimal d1;
                    string temps1= new Regex(@"""billcycle""(:)("".*?"")").Match(retS).ToString();

                    if (!string.IsNullOrEmpty(temps1)) {
                        DateTime.TryParse(Utilities.QuMiddle(temps1, @"billcycle"":""", ""), out start1);
                        DateTime.TryParse(Utilities.QuMiddle(temps1, "", @""""), out end1);
                        decimal.TryParse(Utilities.QuMiddle(retS, @"nowFee"":""", @""""), out d1);
                        temp = new TXZhangDanModel() {

                            billStartDate = start1,
                            billEndDate = end1,
                            billFee = d1, 
//(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)
                        };
                        listZD.Add(temp);
                    } 
                    else {
                        decimal d2; decimal.TryParse(Utilities.QuMiddle(retS, @"allfee"":""", @""""), out d2);
                        temp = new TXZhangDanModel() {

                            billStartDate = DateTime.Parse( temptimes[loopi-1].Split(&)[0]),
                            billEndDate =  DateTime.Parse( temptimes[loopi-1].Split(&)[1]),
                            
                            billFee = d2  
                        };
                        listZD.Add(temp);
                    }
                }
            }

            return listZD;   

        }

      /// <summary>
      /// 获取详单
      /// </summary>
      /// <returns></returns>
        public IList<TXXiangDanModel> GetXiangDan() {
            IList<TXXiangDanModel> listXD = new List<TXXiangDanModel>();
            TXXiangDanModel temp;
            callListRequestUrl = "http://iservice.10010.com/e3/static/query/callDetail?_=" + TimeStamp.GetTimeStamp_13() + "&menuid=000100030001";
            //callListRequestData = "beginDate=2016-04-01&endDate=2016-04-20&pageNo=1&pageSize=1000";
            string retS = string.Empty;
            List<string> tempss = GetXiangDanPostData();
            if (checkLogin) {
                foreach (var data in tempss) {

                    //注意,以下请求获取totalNum当月记录总数。
                    string PostdataS = "beginDate=" + data.Split(&)[0] + "&endDate=" + data.Split(&)[1] + "&pageNo=1&pageSize=20";
                    retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);
                    //添加日志记录:
                    CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);

                    while (retS.Contains("暂时无法为您提供服务")) {
                        retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);
                        Thread.Sleep(500);
                    }
                    if (retS.Contains("系统检测您的访问过于频繁")) {
                        throw new Exception("访问获取详单链接过于频繁!请明天再试");
                    } 


                    //注意,以下代码解析当月记录总数,获得所有确定的游标集合。
                    string currentMonthTotalNum = Utilities.QuMiddle(retS, @"totalRecord"":", @","""); //从上面返回串,获取当月记录总数
                    List<String> curCurorlist = GetAllcurCuror(currentMonthTotalNum);//游标集合,翻页用.
                     

                    foreach (var curcuror in curCurorlist) {
                        //当前月份的通话详单进行处理!
                        PostdataS = "beginDate=" + data.Split(&)[0] + "&endDate=" + data.Split(&)[1] + "&pageNo=" + curcuror + "&pageSize=50";
                        retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);
                        //添加日志记录:
                        CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);
                        //

                        //匹配startTime date
                        MatchCollection stlist = new Regex(@"""calldate""(:)("".*?"")").Matches(retS);
                        //匹配startTime time
                        MatchCollection stlist2 = new Regex(@"""calltime""(:)("".*?"")").Matches(retS);

                        //匹配commPlac
                        MatchCollection cplist = new Regex(@"""homeareaName""(:)("".*?"")").Matches(retS);
                        //匹配commMode
                        MatchCollection cmlist = new Regex(@"""calltypeName""(:)("".*?"")").Matches(retS);
                        //匹配anotherNm 
                        MatchCollection anlist = new Regex(@"""othernum""(:)("".*?"")").Matches(retS);
                        //匹配commTime 
                        MatchCollection ctilist = new Regex(@"""calllonghour""(:)("".*?"")").Matches(retS);

                        //匹配commType 
                        //MatchCollection ctylist = new Regex(@"""romatype""(:)("".*?"")").Matches(retS);
                        //匹配commType 
                        MatchCollection ctylist = new Regex(@"""landtype""(:)("".*?"")").Matches(retS);

                        //匹配commFee 
                        MatchCollection cflist = new Regex(@"""totalfee""(:)("".*?"")").Matches(retS);

                        if ((stlist.Count == cplist.Count) && (cplist.Count == cmlist.Count) && (cmlist.Count == anlist.Count)
                            && (anlist.Count == ctilist.Count) && (ctilist.Count == ctylist.Count) && (ctylist.Count == cflist.Count)) {

                            //解析每月详单数据构建model
                            for (int i = 0; i < stlist.Count; i++) {
                                temp = new TXXiangDanModel() {
                                    anotherNm = Utilities.QuMiddle(anlist[i].Value, @"othernum"":""", @""""),
                                    commFee = decimal.Parse(Utilities.QuMiddle(cflist[i].Value, @"totalfee"":""", @"""")),
                                    commMode = Utilities.QuMiddle(cmlist[i].Value, @"calltypeName"":""", @""""),
                                    commPlac = Utilities.QuMiddle(cplist[i].Value, @"homeareaName"":""", @""""),
                                    commTime = Utilities.QuMiddle(ctilist[i].Value, @"calllonghour"":""", @""""),
                                    commType = Utilities.QuMiddle(ctylist[i].Value, @"landtype"":""", @""""),
                                    startTime = DateTime.Parse(Utilities.QuMiddle(stlist[i].Value, @"calldate"":""", @"""")
                                        + " " + Utilities.QuMiddle(stlist2[i].Value, @"calltime"":""", @""""))
                                };
                                listXD.Add(temp);
                            }
                        }
                    }
                }
            }
            return listXD;
        }

        /// <summary>
        /// 翻页索引(1,2,3,4,5),用于联通翻页查询账单。默认每页50条记录
        /// </summary>
        /// <param name="totalNum">当月份总数目</param>
        /// <returns></returns>
        private List<String> GetAllcurCuror(string totalNum) {
            List<string> retlist = new List<string>();
            int totalnum1;
            int.TryParse(totalNum, out totalnum1);
            if (totalnum1 == 0)
                return retlist;
            else {
                int yushu = totalnum1 % 50;//比如totalNum 201,余数1
                int curcurorCount = totalnum1 / 50 + (yushu == 0 ? 0 : 1);//5页
                for (int i = 0; i < curcurorCount; i++) {
                    retlist.Add((i+1).ToString());//1,2,3,4,5
                }
                return retlist;
            }

        }

        /// <summary>
        /// 最近5个月账单需要的Post数据()
        /// </summary>
        /// <returns></returns>
        private List<string> GetZhangDanPostData(out string[] startendS) {
            //            string dataS = "billdate=201604&querycode=0001&querytype=0001";
            List<string> retlist = new List<string>(); startendS = new string[5];
            DateTime nowtime = DateTime.Now;
            for (int i = 0; i < 5; i++) {
                string mm =  nowtime.AddMonths((-1)*i).ToString("yyyyMM");
                string dataS = "billdate="+mm+"&querycode=0001&querytype=0001";
                string seS;
                if(i==0)
                    seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + nowtime.AddMonths((-1) * i).ToString(); //整理账单起&止月份,非本月
                else
                    seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).AddMonths(1).AddDays(-1).ToString(); //整理账单起&止月份,本月
                startendS[i] = (seS);
                retlist.Add(dataS);
            }
            return retlist;
        }

        /// <summary>
        /// 获取5个月详单需要的post数据(2016-04-01&2016-04-30格式)
        /// </summary>
        /// <returns></returns>
        private List<string> GetXiangDanPostData() {
          List<string> retlist = new List<string>();



          int year = DateTime.Now.Year;//当前年  
          int mouth = DateTime.Now.Month;//当前月  

          int beforeYear = 0;
          int beforeMouth = 0;

           for (int i = 0; i < 5; i++) {
              if (mouth <= 1 && i!=0 )//如果当前月是一月,那么年份就要减1  
              {
                  beforeYear = year - i;
                  beforeMouth = 12;//上个月  
              } else {
                  beforeYear = year;
                  beforeMouth = mouth - i;//上个月  
              }
              string beforeMouthOneDay = beforeYear + "-" + beforeMouth + "-" + "01";//上个月第一天  
              string beforeMouthLastDay;
              if (i != 0)
                  beforeMouthLastDay = beforeYear + "-" + beforeMouth + "-" + DateTime.DaysInMonth(year, beforeMouth);//上个月最后一天
              else
                  beforeMouthLastDay = DateTime.Now.ToString("yyyy-MM-dd");
              retlist.Add(DateTime.Parse(beforeMouthOneDay).ToString("yyyy-MM-dd") + "&" + DateTime.Parse(beforeMouthLastDay).ToString("yyyy-MM-dd"));
           }
           return retlist;
        }

        public static void CollectJsonLog(string url, string data, int method1, string responseS) {
            string method = method1 == 1 ? "Post" : "Get";
            loglist.Add(string.Format("【请求url:{0} , 请求数据:{1} , 请求方式:{2}, 返回数据:{3} 】", url, data, method, responseS));
        }

        public IList<String> GetAllJsonLog() {
            return loglist;
        }
    }
}

 //(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)

全部solution代码(含抓取UI,数据库脚本)下载:

下载

 

【最新原创】中国移动(中国联通)_通信账单,详单,个人信息抓取爬虫代码

标签:

原文地址:http://www.cnblogs.com/x-poior/p/5641437.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!