码迷,mamicode.com
首页 > Windows程序 > 详细

C#实现简单的网页爬虫

时间:2015-05-23 14:13:16      阅读:329      评论:0      收藏:0      [点我收藏+]

标签:爬虫   c#   

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace _2015._5._23通过WebClient类发起请求并下载html
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 抓取网页邮箱
            //string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_";
            //WebClient wc = new WebClient();
            //wc.Encoding = Encoding.UTF8;
            //string str = wc.DownloadString(url);
            //MatchCollection matchs=  Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript);
            //foreach (Match item in matchs)
            //{
            //    Console.WriteLine(item.Value);
            //}
            //Console.WriteLine(matchs.Count);
            #endregion 

            #region 抓取网页图片

            //WebClient wc = new WebClient();
            //wc.Encoding = Encoding.UTF8;
            ////下载源网页代码
            //string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban");
            //MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>");
            //foreach (Match item in matches)
            //{
            //    //下载图片到指定路径
            //    wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value));
            //}
            //Console.WriteLine(matches.Count);

            #endregion 爬一些信息

            WebClient wc = new WebClient();
            wc.Encoding = Encoding.UTF8;
            string html = wc.DownloadString("http://www.lagou.com/");

            MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>");
            foreach (Match item in matches)
            {
                Console.WriteLine(item.Groups[1].Value);
            }
            Console.WriteLine(matches.Count);
            Console.ReadKey();                                  
        }
    }
}

C#实现简单的网页爬虫

标签:爬虫   c#   

原文地址:http://blog.csdn.net/duoduoluojia/article/details/45934131

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!