码迷,mamicode.com
首页 > 编程语言 > 详细

.net语言获取网页的源代码

时间:2019-10-26 15:30:24      阅读:104      评论:0      收藏:0      [点我收藏+]

标签:filename   toe   red   htm   logs   获取网页   zh-cn   static   gen   

1.通过HttpWebRequest请求,HttpWebResponse响应获取网页源代码。

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApp1
{
    class Program
    {
        static void Main(string[] args)
        {
            //web请求
            string url = "https://baike.baidu.com/item/vs/14494077?fr=aladdin";
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            //主机
            request.Host = "baike.baidu.com";
            //来源
            request.Referer = "https://www.baidu.com/link?url=Bu_CrEdTSBhrOMDJ8onbirSI0bsUbWXp7VWNkbcbnqkS4FUeXFs8uDTsRxeQUkL8JSW19X6TRs-0D7bAAh-LQa&wd=&eqid=8d3d33880007de2d000000065db2aa9a";
            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36";

            //服务器响应

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            using (StreamReader sr = new StreamReader(response.GetResponseStream()))
            {
                Console.WriteLine(sr.ReadToEnd());
            }
            Console.ReadKey();
        }
    }
}

2.通过HttpWebRequest请求,HttpWebResponse响应获取文件(图片,音频,影视)

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApp1
{
    class Program
    {
        public static FileMode Filemode { get; private set; }

        static void Main(string[] args)
        {
            //web请求
            //请求文件的url,更改文件的url即可
            string url = "http://b-ssl.duitang.com/uploads/blog/201312/04/20131204184148_hhXUT.jpeg";
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            //主机
            //request.Host = "baike.baidu.com";
            //来源
            //request.Referer = "https://www.baidu.com/link?url=Bu_CrEdTSBhrOMDJ8onbirSI0bsUbWXp7VWNkbcbnqkS4FUeXFs8uDTsRxeQUkL8JSW19X6TRs-0D7bAAh-LQa&wd=&eqid=8d3d33880007de2d000000065db2aa9a";
            request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36";

            //服务器响应

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            using (FileStream fs = new FileStream("1.jpg", FileMode.Create))
            {
                response.GetResponseStream().CopyTo(fs);
            }
        }
    }
}

  3.封装一个查看网页源码和图片下载的类

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace ImgFormsApplication
{
    public class ImgDownLoadUtil
    {
        /// <summary>
        /// 获取网页源码流对象
        /// </summary>
        /// <param name="URL"></param>
        /// <returns></returns>
        public static Stream DownLoadFile(String URL)
        {
            //web请求
            //请求文件的url,更改文件的url即可
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            request.UserAgent = GetUA();

            //服务器响应
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return null;
            }

            return response.GetResponseStream();
        }

        /// <summary>
        /// 是否下载成功
        /// </summary>
        /// <param name="URL"></param> 源文件的url地址
        /// <param name="fileName"></param> 文件的名称
        /// <param name="referer"></param> 源文件的来源
        /// <returns></returns>
        public static Boolean DownLoadFile(String URL, String fileName, String referer)
        {
            //web请求
            //请求文件的url,更改文件的url即可
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
            request.KeepAlive = false;
            request.Timeout = 30 * 1000;
            request.Method = "GET";
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
            //主机,在url中分割获取
            request.Host = URL.Split(‘/‘)[2];
            //来源
            request.Referer = referer;
            request.UserAgent = GetUA();

            //服务器响应
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //判断响应状态码是否成功
            if (response.StatusCode != HttpStatusCode.OK)
            {
                return false;
            }
            using (FileStream fs = new FileStream(fileName, FileMode.Create))
            {
                response.GetResponseStream().CopyTo(fs);
            }

            return true;
        }

        private static String GetUA()
        {
            String[] userAgents = 
            {
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
            "Opera/8.0 (Windows NT 5.1; U; en)",
            "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
            "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
            "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36"
            };

            return userAgents[new Random().Next(0, userAgents.Length)];
        }
    }
}

 测试方法代码1

Stream stream = ImgDownLoadUtil.DownLoadFile("https://www.cnblogs.com/1906859953Lucas/p/9027165.html");
using (StreamReader sr = new StreamReader(stream))
{
   ImgInfo.Text = sr.ReadToEnd();
}

 测试方法代码2

ImgDownLoadUtil.DownLoadFile("https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1572006664637&di=6a73de4ab0d0092d2cc3e01bd2ecd93f&imgtype=0&src=http%3A%2F%2Fwx2.sinaimg.cn%2Fcrop.0.0.1797.1009.1000%2F005NLzplly1fvf2rfe838j31jm0s2gv8.jpg", "2.jpg","");

  

.net语言获取网页的源代码

标签:filename   toe   red   htm   logs   获取网页   zh-cn   static   gen   

原文地址:https://www.cnblogs.com/x-zhoulin/p/11742993.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!