标签:
采集数据源:http://www.sgcc.com.cn/xwzx/gsyw/
//根据URL地址获取所有html public static string GetUrltoHtml(string Url, string type) { try { System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url); // Get the response instance. System.Net.WebResponse wResp = wReq.GetResponse(); System.IO.Stream respStream = wResp.GetResponseStream(); // Dim reader As StreamReader = New StreamReader(respStream) using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type))) { return reader.ReadToEnd(); } } catch (System.Exception ex) { //errorMsg = ex.Message; } return ""; }
/// <summary> /// GetSubString截取字符串 /// </summary> /// <param name="strSource">原始字符</param> /// <param name="strIndexOf">开始字符</param> /// <param name="strLastOf">结束字符</param> /// <returns></returns> public static string GetSubString(string strSource, string strIndexOf, string strLastOf) { string strResult = string.Empty; int indexOf = strSource.IndexOf(strIndexOf); if (indexOf > -1) { string strTemp = strSource.Substring(indexOf + strIndexOf.Length); if (!string.IsNullOrEmpty(strTemp)) { strResult = strTemp.Substring(0, strTemp.IndexOf(strLastOf)); } } return strResult; }
采集网页数据生成到静态模板newslist.html文件中(正则表达式)
标签:
原文地址:http://www.cnblogs.com/500k/p/4824086.html