标签:
目录:信息采集入门系列目录
下面记录的是我自己整理的C#请求页面核心类,主要有如下几个方法
1.HttpWebRequest Get请求获得页面html
2.HttpWebRequest Post请求获得页面html
3.模拟登录获得cookie内容
4.模拟登录获得cookie字符串
5.代理的设置
6.利用webbrowser 获取js生成的页面
7.为webbrowser设置cookie,模拟登录
8.每个方法的使用demo+demo下载
HttpWebRequest Get请求获得页面html
/// <summary> /// get请求获得页面的html /// </summary> /// <param name="url">需要获取的url</param> /// <param name="proxy">代理,没有设置为null,不然每次去读代理造成请求很慢</param> /// <param name="cookie">该网站所需要的cookie</param> /// <param name="timeout">超时时间</param> /// <returns>页面请求后的html</returns> public static string Crawl(string url, WebProxy proxy, CookieContainer cookie, int timeout = 10000) { string result = string.Empty; HttpWebRequest request = null; WebResponse response = null; StreamReader streamReader = null; try { request = (HttpWebRequest)HttpWebRequest.Create(url); request.Proxy = proxy; request.Timeout = timeout; request.AllowAutoRedirect = true; request.CookieContainer = cookie; response = (HttpWebResponse)request.GetResponse(); streamReader = new StreamReader(response.GetResponseStream(), Encoding.UTF8); result = streamReader.ReadToEnd(); } catch (Exception ex) { throw ex; } finally { if (request != null) { request.Abort(); } if (response != null) { response.Close(); } if (streamReader != null) { streamReader.Dispose(); } } return result; }
HttpWebRequest Post请求获得页面html
/// <summary> /// post请求获得页面 /// </summary> /// <param name="url">需要获取的url</param> /// <param name="postdata">post的数据字符串,如id=1&name=test</param> /// <param name="proxy">代理</param> /// <param name="cookie">coolie</param> /// <param name="timeout">超时</param> /// <returns></returns> public static string Crawl(string url, string postdata,WebProxy proxy, CookieContainer cookie, int timeout = 10000) { string result = string.Empty; HttpWebRequest request = null; WebResponse response = null; StreamReader streamReader = null; try { request = (HttpWebRequest)HttpWebRequest.Create(url); request.Proxy = proxy; request.Timeout = timeout; request.AllowAutoRedirect = true; request.CookieContainer = cookie; byte[] bs = Encoding.ASCII.GetBytes(postdata); string responseData = String.Empty; request.Method = "POST"; request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = bs.Length; using (Stream reqStream = request.GetRequestStream()) { reqStream.Write(bs, 0, bs.Length); reqStream.Close(); } response = (HttpWebResponse)request.GetResponse(); streamReader = new StreamReader(response.GetResponseStream(), Encoding.UTF8); result = streamReader.ReadToEnd(); } catch (Exception ex) { throw ex; } finally { if (request != null) { request.Abort(); } if (response != null) { response.Close(); } if (streamReader != null) { streamReader.Dispose(); } } return result; }
模拟登录获得cookie内容
/// <summary> ///根据模拟请求页面获得cookie /// </summary> /// <param name="url">模拟的url</param> /// <returns>cookie</returns> public static CookieContainer GetCookie(string url, WebProxy proxy, int timeout = 10000) { HttpWebRequest request = null; HttpWebResponse response = null; try { CookieContainer cc = new CookieContainer(); request = (HttpWebRequest)HttpWebRequest.Create(url); request.Proxy = proxy; request.Timeout = timeout; request.AllowAutoRedirect = true; request.CookieContainer = cc; response = (HttpWebResponse)request.GetResponse(); response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); return cc; } catch (Exception ex) { throw ex; } finally { if (request != null) { request.Abort(); } if (response != null) { response.Close(); } } }
模拟登录获得cookie字符串
/// <summary> /// 获得cookie字符串,webbrowser可以使用 /// </summary> /// <param name="url"></param> /// <param name="proxy"></param> /// <param name="timeout"></param> /// <returns></returns> public static string GetCookieString(string url, WebProxy proxy, int timeout = 10000) { HttpWebRequest request = null; HttpWebResponse response = null; try { CookieContainer cc = new CookieContainer(); request = (HttpWebRequest)HttpWebRequest.Create(url); request.Proxy = proxy; request.Timeout = timeout; request.AllowAutoRedirect = true; request.CookieContainer = cc; response = (HttpWebResponse)request.GetResponse(); response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); string strcrook = request.CookieContainer.GetCookieHeader(request.RequestUri); return strcrook; } catch (Exception ex) { throw ex; } finally { if (request != null) { request.Abort(); } if (response != null) { response.Close(); } } }
代理的设置
/// <summary> /// 创建代理 /// </summary> /// <param name="port">代理端口</param> /// <param name="user">用户名</param> /// <param name="password">密码</param> /// <returns></returns> public static WebProxy CreatePorxy(string port, string user, string password) { WebProxy proxy = new WebProxy(); proxy.Address = new Uri(port); proxy.Credentials = new NetworkCredential(user, password); return proxy; }
利用webbrowser 获取js生成的页面
为webbrowser设置cookie,模拟登录
标签:
原文地址:http://www.cnblogs.com/xiaoshuai1992/p/webcrawl.html