抓取网页文本内容

时间：2015-06-15 23:49:47 阅读：227 评论：0 收藏：0 [点我收藏+]

标签：

使用的是WebRequest类，在这以http://novel.hongxiu.com/a/1036665/10425842.html为例。

代码如下：

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;

namespace 网页抓取
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        public void zhuaqu()
        {
            WebRequest request = WebRequest.Create(label1.Text);//发出请求
            WebResponse response = request.GetResponse();//Internet请求的响应
            StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);//按编码方式读取Internet返回的数据流
            string html = sr.ReadToEnd();
            string th = thtxt(html);//使用正则表达式替换html源代码中的标签为空格
            sr.Close();
            

            int sindex = th.IndexOf("红|袖|言|情|小|说");//查索引
            int lindex = th.IndexOf("但是什么？");
            string subtxt = th.Substring(sindex,lindex-sindex+6);//截取想要的内容
            StreamWriter sw = new StreamWriter("E:\\x1.txt");//写入流保存
            sw.WriteLine(subtxt);
            sw.Close();
            richTextBox1.Text = subtxt;

        }
        private void button1_Click(object sender, EventArgs e)
        {
            zhuaqu();
        }

        private string thtxt(string Html)
        {
            Regex reg = new Regex("<(.|\n)+?>");
            //Regex r = new Regex(@"\s+");//把空格替换掉的正则表达式
            string th = reg.Replace(Html, "");
            th = th.Replace("<", "<");
            th = th.Replace(">", "");
            //th = r.Replace(th,"");
            return th;
        }
    }
}

运行效果

技术分享

抓取网页文本内容

标签：

原文地址：http://www.cnblogs.com/happinesshappy/p/4579410.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行