using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace _2015._5._23通过WebClient类发起请求并下载html { class Program { static void Main(string[] args) { #region 抓取网页邮箱 //string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_"; //WebClient wc = new WebClient(); //wc.Encoding = Encoding.UTF8; //string str = wc.DownloadString(url); //MatchCollection matchs= Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript); //foreach (Match item in matchs) //{ // Console.WriteLine(item.Value); //} //Console.WriteLine(matchs.Count); #endregion #region 抓取网页图片 //WebClient wc = new WebClient(); //wc.Encoding = Encoding.UTF8; ////下载源网页代码 //string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban"); //MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>"); //foreach (Match item in matches) //{ // //下载图片到指定路径 // wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value)); //} //Console.WriteLine(matches.Count); #endregion 爬一些信息 WebClient wc = new WebClient(); wc.Encoding = Encoding.UTF8; string html = wc.DownloadString("http://www.lagou.com/"); MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>"); foreach (Match item in matches) { Console.WriteLine(item.Groups[1].Value); } Console.WriteLine(matches.Count); Console.ReadKey(); } } }
原文地址:http://blog.csdn.net/duoduoluojia/article/details/45934131