码迷,mamicode.com
首页 > 其他好文 > 详细

【学习】爬糗事百科,可自动翻页。

时间:2018-04-10 13:26:23      阅读:145      评论:0      收藏:0      [点我收藏+]

标签:index   pre   threading   ide   char   first   play   clear   private   

技术分享图片
  1 namespace HTML
  2 {
  3     class Program
  4     {
  5         const string qsbkMainUrl = "http://www.qiushibaike.com";
  6 
  7         private static string GetWBJokeUrl(int pageIndex)
  8         {
  9 
 10             StringBuilder url = new StringBuilder();
 11 
 12             url.Append(qsbkMainUrl);
 13 
 14             url.Append("/textnew/page/");
 15 
 16             url.Append(pageIndex.ToString());
 17 
 18             url.Append("/?s=4869039");
 19 
 20             return url.ToString();
 21 
 22         }
 23 
 24         //根据网页的url获取网页的html源码
 25 
 26 
 27         private static string GetUrlContent(string url)
 28         {
 29             try
 30             {
 31 
 32                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
 33 
 34                 request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.8.1000 Chrome/30.0.1599.101 Safari/537.36";
 35 
 36                 request.Method = "GET";
 37 
 38                 request.ContentType = "text/html;charset=UTF-8";
 39 
 40                 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
 41 
 42                 Stream myResponseStream = response.GetResponseStream();
 43 
 44                 StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));//因为知道糗百网页的编码方式为utf-8
 45 
 46                 string retString = myStreamReader.ReadToEnd();
 47 
 48                 myStreamReader.Close();
 49 
 50                 myResponseStream.Close();
 51 
 52                 return retString;
 53 
 54             }
 55 
 56             catch { return null; }
 57 
 58         }
 59         static void Main(string[] args)
 60         {
 61             System.Threading.Timer threadTimer = new System.Threading.Timer(new System.Threading.TimerCallback(Method3), null, 0, 5000);
 62             while (true)
 63             {
 64                 Thread.Sleep(1000);
 65             }
 66         }
 67 
 68        static int first = 1;
 69        static int curNum=1;
 70        static void Method3(Object state)
 71         {
 72             List<JokeItem> a = GetJokeList(first == 1 ? curNum : first);
 73             int i = 1;
 74             Console.Clear();
 75             foreach (JokeItem item in a)
 76             {
 77                 Console.WriteLine("笑话" + i + ":" + item.JokeContent + "\n");
 78                 i++;
 79             }
 80             curNum++;
 81         }
 82         public class JokeItem
 83         {
 84 
 85             private string nickName;
 86 
 87             /// <summary>
 88 
 89             /// 昵称
 90 
 91             /// </summary>
 92 
 93             public string NickName
 94             {
 95 
 96                 get { return nickName; }
 97 
 98                 set { nickName = value; }
 99 
100             }
101 
102 
103 
104             private Image headImage;
105 
106             /// <summary>
107 
108             /// 头像
109 
110             /// </summary>
111 
112             public Image HeadImage
113             {
114 
115                 get { return headImage; }
116 
117                 set { headImage = value; }
118 
119             }
120 
121             private string jokeContent;
122 
123             /// <summary>
124 
125             /// 笑话内容
126 
127             /// </summary>
128 
129             public string JokeContent
130             {
131 
132                 get { return jokeContent; }
133 
134                 set { jokeContent = value; }
135 
136             }
137 
138 
139 
140             private string jokeUrl;
141 
142             /// <summary>
143 
144             /// 笑话地址
145 
146             /// </summary>
147 
148             public string JokeUrl
149             {
150 
151                 get { return jokeUrl; }
152 
153                 set { jokeUrl = value; }
154 
155             }
156 
157         }
158 
159         /// <summary>
160 
161         /// 获取笑话列表
162 
163         /// </summary>
164 
165         /// <param name="htmlContent"></param>
166 
167         public static List<JokeItem> GetJokeList(int pageIndex)
168         {
169 
170             string htmlContent = GetUrlContent(GetWBJokeUrl(pageIndex));
171             List<JokeItem> jokeList = new List<JokeItem>();
172             Regex rg = new Regex("<div class=\"content\">\\s*((.*|<br/>)*)", RegexOptions.IgnoreCase);
173 
174             JokeItem joke;
175 
176             MatchCollection matchResults = rg.Matches(htmlContent);
177 
178 
179 
180             foreach (Match result in matchResults)
181             {
182                 joke = new JokeItem();
183                 joke.JokeContent = result.Groups[0].Value.Replace("<div class=\"content\">", "").Replace("</span>", "").Replace("<span>", "").Replace("<br/>","");
184                 joke.JokeContent = Regex.Replace(joke.JokeContent, @"(\r\n)+|(\r)+", "");//去掉多余的空行
185                 joke.JokeContent = Regex.Replace(joke.JokeContent, @"(\n)+", "\n");
186                 jokeList.Add(joke);
187             }
188 
189             return jokeList;
190 
191         }
192 
193 
194 
195  
196     }
197 }
控制台代码

 

【学习】爬糗事百科,可自动翻页。

标签:index   pre   threading   ide   char   first   play   clear   private   

原文地址:https://www.cnblogs.com/Zhengxue/p/8777981.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!