码迷,mamicode.com
首页 > Web开发 > 详细

通过WebClient类来发起请求并下载html 抓取邮箱 图片

时间:2016-11-02 01:08:55      阅读:163      评论:0      收藏:0      [点我收藏+]

标签:end   代码   reg   stat   arc   hot   html   style   acs   

 using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;

namespace 通过WebClient类来发起请求并下载html 抓取邮箱 图片
{
    class Program
    {
        static void Main(string[] args)
        {

            #region 抓取网页email
            //string url = "http://192.168.1.100:8080/提取Email.htm";
            ////1.根据网址下载对应html字符串
            //WebClient wc = new WebClient();
            //wc.Encoding = Encoding.UTF8;
            //string html = wc.DownloadString("http://192.168.1.100:8080/提取Email.htm");
            ////2.从下载到字符串中提取Email,并把提取到的Email写入到文本文件中
            //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9_.]+@[-a-zA-Z0-9]+(\.[a-zA-Z0-9]+){1,}");

            //using (StreamWriter writer = new StreamWriter("email.txt"))
            //{
            //    //遍历提取到的email
            //    foreach (Match item in matches)
            //    {
            //        //Console.WriteLine(item.Value);
            //        writer.WriteLine(item.Value);
            //    }
            //}


            //Console.ReadKey();
            #endregion



            #region 抓取网页图片
            //WebClient wc = new WebClient();


            ////1.下载网页源代码
            //string html = wc.DownloadString("http://192.168.1.100:8080/美女图片/美女们.htm");
            ////2.提取网页中的图片,其实就是<img>标签
            ////<img  src="hotgirls/00_00.jpg" />
            //MatchCollection matches = Regex.Matches(html, @"<img\s+"" src=""(.+)""\s*/>");
            //foreach (Match item in matches)
            //{
            //    string imgPath = "http://192.168.1.100:8080/美女图片/" + item.Groups[1].Value;
            //    //下载图片
            //    wc.DownloadFile(imgPath, @"c:\mv\" + Path.GetFileName(imgPath));
            //}
            //Console.WriteLine("ok");
            //Console.ReadKey();

            #endregion

            #region 抓取职位信息
            WebClient webClient = new WebClient();
            string html = webClient.DownloadString("http://192.168.1.100:8080/【上海,IT-管理,计算机软件招聘,求职】-前程无忧.htm");

            //<a href="http://search.51job.com/job/46621778,c.html" onclick="zzSearch.acStatRecJob( 1 );" class="jobname" target="_blank">ERP项目经理</a>
            MatchCollection matches = Regex.Matches(html, @"<a\s+href=""http://search.51job.com/job/[0-9]{8},c.html"".+>(.+)</a>");
            foreach (Match item in matches)
            {
                Console.WriteLine(item.Groups[1].Value);
            }
            Console.WriteLine("共{0}个职位信息。", matches.Count);
            Console.ReadKey();

            #endregion






        }
    }
}

 

通过WebClient类来发起请求并下载html 抓取邮箱 图片

标签:end   代码   reg   stat   arc   hot   html   style   acs   

原文地址:http://www.cnblogs.com/blacop/p/6021345.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!