码迷,mamicode.com
首页 > Web开发 > 详细

利用HttpWebRequest抓取网页数据

时间:2015-07-16 16:20:48      阅读:202      评论:0      收藏:0      [点我收藏+]

标签:

根据运单号来获取运单状态 以EMS快递为例:

 

第一步首先分析要抓取网站的入口及参数 http://www.ems.com.cn

第二部利用httpwebRequest 模拟浏览器请求来获取网页内容

技术分享

上图可以看到 获取验证码请求

http://www.ems.com.cn/ems/rand

查询

技术分享

可以看到 发送的 post请求 http://www.ems.com.cn/ems/order/singleQuery_t

技术分享
 public void getcode()
        {

          
            
            string code = "";
            HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create("http://www.ems.com.cn/ems/rand");
            hwr.Timeout = 30000;
            //hwr.ReadWriteTimeout = 5 * 1000;
            HttpWebResponse res;
            try
            {
                res = (HttpWebResponse)hwr.GetResponse();
                string[] s = res.Headers["Set-Cookie"].ToString().Split(;);
                _cookieConE = s[0].ToString();
                _cookieConE2 = s[2].ToString();
            }
            catch (WebException ex)
            {
                //
                 n++;
                if( n<=3)//同一个运单如果验证码获取三次后还是有异常就跳出进行下一个运单获取
                {
                    getcode();

                }
                res = (HttpWebResponse)ex.Response;
            }
           

            
            Bitmap bmp = new Bitmap(res.GetResponseStream());
            Bitmap bit2 = Crop(bmp, 5, 0, 78, 20);
            Base.YZ y = new Base.YZ(bit2);
            y.GrayByPixels(); //灰度处理
            y.GetPicValidByValue(128, 6); //得到有效空间
            Bitmap[] pics = y.GetSplitPics(6, 1);     //分割
            for (int i = 0; i < pics.Length; i++)
            {
                code = code + "%" + y.GetSingleBmpCode(pics[i], 128);
            }
            string[] arry = code.Split(%);
            result = y.CheckNumber(arry);
            if (result.Length < 6)//获取验证码有成功率 
            {
                n++;
                getcode();
            }
            n = 0;

         
            #endregion
        }
getread获取验证码
技术分享
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;

namespace zsy.Base
{
    public class YZ
    {
        public Bitmap bmpobj;
        public YZ(Bitmap pic)
        {
            bmpobj = new Bitmap(pic);    //转换为Format32bppRgb
        }
        const string _num1 = "00000000000000000000000000000000000000000000111000000001111100000000111110000000000011000000000001100000000000110000000000011000000000001100000000000110000000000011000000000001100000000111111110000011111111000000000000000000000000000000000000000000000000000000";
        const string _num0 = "00000000000000000000000000000000000000000000111000000001111111000000110001100000110000011000011000001100001100000110000110000011000011000001100001100000110000110000011000001100011000000111111100000000111000000000000000000000000000000000000000000000000000000000";
        const string _num9 = "00000000000000000000000000000000000000000001111000000001111111000001110001100000110000011000011000001100001110001110000011111111000000111101100000000000110000000000110000001000011000000111111000000001111000000000000000000000000000000000000000000000000000000000";
        const string _num3 = "00000000000000000000000000000000000000000011111000000011111111000001000001100000000000110000000000110000000111110000000011111100000000000111000000000001100000000000110000010000111000001111111000000011111000000000000000000000000000000000000000000000000000000000";
        const string _num7 = "00000000000000000000000000000000000000000011111111000001111111100000000000110000000000010000000000011000000000011000000000001000000000001100000000000100000000000110000000000011000000000011000000000001100000000000000000000000000000000000000000000000000000000000";
        const string _num5 = "00000000000000000000000000000000000000000111111110000011111111000001100000000000110000000000011000000000001111100000000111111100000000000111000000000001100000000000110000010000111000001111111000000011111000000000000000000000000000000000000000000000000000000000";
        const string _num2 = "00000000000000000000000000000000000000000011111000000011111110000001000001100000000000110000000000011000000000011000000000011000000000011000000000011000000000011000000000011000000000001111111100000111111110000000000000000000000000000000000000000000000000000000";
        const string _num6 = "00000000000000000000000000000000000000000000111100000000111111000000110000100000011000000000011000000000001101111000000111111110000011100011100001100000110000110000011000001100011100000111111100000000111100000000000000000000000000000000000000000000000000000000";
        const string _num4 = "00000000000000000000000000000000000000000000001100000000001110000000000111000000000111100000000110110000000011011000000011001100000001100110000001111111110000111111111000000000110000000000011000000000001100000000000000000000000000000000000000000000000000000000";
        const string _num8 = "00000000000000000000000000000000000000000001111100000001111111000000110001100000011000110000001110010000000011111000000001111100000001100111000001100000110000110000011000011100011100000111111100000001111100000000000000000000000000000000000000000000000000000000";
        public string CheckNumber(string[] number)
        {
            string b = "";
            for (int i = 1; i < number.Length; i++)
            {
                if (number[i].ToString() == _num0)
                {
                    b = b + "0";
                    continue;
                }
                if (number[i].ToString() == _num1)
                {
                    b = b + "1";
                }
                if (number[i].ToString() == _num2)
                {
                    b = b + "2";
                }
                if (number[i].ToString() == _num3)
                {
                    b = b + "3";
                }
                if (number[i].ToString() == _num4)
                {
                    b = b + "4";
                }
                if (number[i].ToString() == _num5)
                {
                    b = b + "5";
                }
                if (number[i].ToString() == _num6)
                {
                    b = b + "6";
                }
                if (number[i].ToString() == _num7)
                {
                    b = b + "7";
                }
                if (number[i].ToString() == _num8)
                {
                    b = b + "8";
                }
                if (number[i].ToString() == _num9)
                {
                    b = b + "9";
                }
            }
            return b;
        }

        /// <summary>
        /// 根据RGB,计算灰度值
        /// </summary>
        /// <param name="posClr">Color值</param>
        /// <returns>灰度值,整型</returns>
        private int GetGrayNumColor(System.Drawing.Color posClr)
        {
            return (posClr.R * 19595 + posClr.G * 38469 + posClr.B * 7472) >> 16;
        }

        /// <summary>
        /// 灰度转换,逐点方式
        /// </summary>
        public void GrayByPixels()
        {
            for (int i = 0; i < bmpobj.Height; i++)
            {
                for (int j = 0; j < bmpobj.Width; j++)
                {
                    int tmpValue = GetGrayNumColor(bmpobj.GetPixel(j, i));
                    bmpobj.SetPixel(j, i, Color.FromArgb(tmpValue, tmpValue, tmpValue));
                }
            }
        }

        /// <summary>
        /// 去图形边框
        /// </summary>
        /// <param name="borderWidth"></param>
        public void ClearPicBorder(int borderWidth)
        {
            for (int i = 0; i < bmpobj.Height; i++)
            {
                for (int j = 0; j < bmpobj.Width; j++)
                {
                    if (i < borderWidth || j < borderWidth || j > bmpobj.Width - 1 - borderWidth || i > bmpobj.Height - 1 - borderWidth)
                        bmpobj.SetPixel(j, i, Color.FromArgb(255, 255, 255));
                }
            }
        }

        /// <summary>
        /// 灰度转换,逐行方式
        /// </summary>
        public void GrayByLine()
        {
            Rectangle rec = new Rectangle(0, 0, bmpobj.Width, bmpobj.Height);
            BitmapData bmpData = bmpobj.LockBits(rec, ImageLockMode.ReadWrite, bmpobj.PixelFormat);// PixelFormat.Format32bppPArgb);
            //    bmpData.PixelFormat = PixelFormat.Format24bppRgb;
            IntPtr scan0 = bmpData.Scan0;
            int len = bmpobj.Width * bmpobj.Height;
            int[] pixels = new int[len];
            Marshal.Copy(scan0, pixels, 0, len);

            //对图片进行处理
            int GrayValue = 0;
            for (int i = 0; i < len; i++)
            {
                GrayValue = GetGrayNumColor(Color.FromArgb(pixels[i]));
                pixels[i] = (byte)(Color.FromArgb(GrayValue, GrayValue, GrayValue)).ToArgb();      //Color转byte
            }

            bmpobj.UnlockBits(bmpData);
        }

        /// <summary>
        /// 得到有效图形并调整为可平均分割的大小
        /// </summary>
        /// <param name="dgGrayValue">灰度背景分界值</param>
        /// <param name="CharsCount">有效字符数</param>
        /// <returns></returns>
        public void GetPicValidByValue(int dgGrayValue, int CharsCount)
        {
            int posx1 = bmpobj.Width; int posy1 = bmpobj.Height;
            int posx2 = 0; int posy2 = 0;
            for (int i = 0; i < bmpobj.Height; i++)      //找有效区
            {
                for (int j = 1; j < bmpobj.Width; j++)
                {
                    int pixelValue = bmpobj.GetPixel(j, i).R;
                    if (pixelValue < dgGrayValue)     //根据灰度值
                    {
                        if (posx1 > j) posx1 = j;
                        if (posy1 > i) posy1 = i;

                        if (posx2 < j) posx2 = j;
                        if (posy2 < i) posy2 = i;
                    };
                };
            };
            // 确保能整除
            int Span = CharsCount - (posx2 - posx1 + 1) % CharsCount;   //可整除的差额数
            if (Span < CharsCount)
            {
                int leftSpan = Span / 2;    //分配到左边的空列 ,如span为单数,则右边比左边大1
                if (posx1 > leftSpan)
                    posx1 = posx1 - leftSpan;
                if (posx2 + Span - leftSpan < bmpobj.Width)
                    posx2 = posx2 + Span - leftSpan;
            }
            //复制新图
            //Rectangle cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
            //bmpobj = bmpobj.Clone(cloneRect, bmpobj.PixelFormat);
        }

        /// <summary>
        /// 得到有效图形,图形为类变量
        /// </summary>
        /// <param name="dgGrayValue">灰度背景分界值</param>
        /// <param name="CharsCount">有效字符数</param>
        /// <returns></returns>
        public void GetPicValidByValue(int dgGrayValue)
        {
            int posx1 = bmpobj.Width; int posy1 = bmpobj.Height;
            int posx2 = 0; int posy2 = 0;
            for (int i = 0; i < bmpobj.Height; i++)      //找有效区
            {
                for (int j = 0; j < bmpobj.Width; j++)
                {
                    int pixelValue = bmpobj.GetPixel(j, i).R;
                    if (pixelValue < dgGrayValue)     //根据灰度值
                    {
                        if (posx1 > j) posx1 = j;
                        if (posy1 > i) posy1 = i;

                        if (posx2 < j) posx2 = j;
                        if (posy2 < i) posy2 = i;
                    };
                };
            };
            //复制新图
            Rectangle cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
            bmpobj = bmpobj.Clone(cloneRect, bmpobj.PixelFormat);
        }

        /// <summary>
        /// 得到有效图形,图形由外面传入
        /// </summary>
        /// <param name="dgGrayValue">灰度背景分界值</param>
        /// <param name="CharsCount">有效字符数</param>
        /// <returns></returns>
        public Bitmap GetPicValidByValue(Bitmap singlepic, int dgGrayValue)
        {
            int posx1 = singlepic.Width; int posy1 = singlepic.Height;
            int posx2 = 0; int posy2 = 0;
            for (int i = 0; i < singlepic.Height; i++)      //找有效区
            {
                for (int j = 0; j < singlepic.Width; j++)
                {
                    int pixelValue = singlepic.GetPixel(j, i).R;
                    if (pixelValue < dgGrayValue)     //根据灰度值
                    {
                        if (posx1 > j) posx1 = j;
                        if (posy1 > i) posy1 = i;

                        if (posx2 < j) posx2 = j;
                        if (posy2 < i) posy2 = i;
                    };
                };
            };
            //复制新图
            Rectangle cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
            return singlepic.Clone(cloneRect, singlepic.PixelFormat);
        }

        /// <summary>
        /// 平均分割图片
        /// </summary>
        /// <param name="RowNum">水平上分割数</param>
        /// <param name="ColNum">垂直上分割数</param>
        /// <returns>分割好的图片数组</returns>
        public Bitmap[] GetSplitPics(int RowNum, int ColNum)
        {
            if (RowNum == 0 || ColNum == 0)
                return null;
            int singW = bmpobj.Width / RowNum;
            int singH = bmpobj.Height / ColNum;
            Bitmap[] PicArray = new Bitmap[RowNum * ColNum];

            Rectangle cloneRect;
            for (int i = 0; i < ColNum; i++)      //找有效区
            {
                for (int j = 0; j < RowNum; j++)
                {
                    cloneRect = new Rectangle(j * singW, i * singH, singW, singH);
                    PicArray[i * RowNum + j] = bmpobj.Clone(cloneRect, bmpobj.PixelFormat);//复制小块图
                }
            }
            return PicArray;
        }

        /// <summary>
        /// 返回灰度图片的点阵描述字串,1表示灰点,0表示背景
        /// </summary>
        /// <param name="singlepic">灰度图</param>
        /// <param name="dgGrayValue">背前景灰色界限</param>
        /// <returns></returns>
        public string GetSingleBmpCode(Bitmap singlepic, int dgGrayValue)
        {
            Color piexl;
            string code = "";
            for (int posy = 0; posy < singlepic.Height; posy++)
                for (int posx = 0; posx < singlepic.Width; posx++)
                {
                    piexl = singlepic.GetPixel(posx, posy);
                    if (piexl.R < dgGrayValue)    // Color.Black )
                        code = code + "1";
                    else
                        code = code + "0";
                }
            return code;
        }
    }
}
获取图片里的验证吗
技术分享
 HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);
                Encoding encoding = Encoding.UTF8;
                string param = "muMailNum=" + mailCode + "&checkCode=" + code;
                byte[] bs = Encoding.ASCII.GetBytes(param);

                req.Method = "POST";
                req.Timeout = 30000;
                req.ContentType = "application/x-www-form-urlencoded";
                req.ContentLength = bs.Length;
                req.CookieContainer = _cookieCon;
                using (Stream reqStream = req.GetRequestStream())
                {
                    reqStream.Write(bs, 0, bs.Length);
                    reqStream.Close();
                }
                using (HttpWebResponse response = (HttpWebResponse)req.GetResponse())
                {
                    using (StreamReader reader = new StreamReader(response.GetResponseStream(), encoding))
                    {
                        responseData = reader.ReadToEnd().ToString();
                    }
                }
获取整个网页内容

然后利用正则过滤出想要获得的信息

利用HttpWebRequest抓取网页数据

标签:

原文地址:http://www.cnblogs.com/xiaoshitou/p/4651247.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!