标签:

采集数据源:http://www.sgcc.com.cn/xwzx/gsyw/
//根据URL地址获取所有html
public static string GetUrltoHtml(string Url, string type)
{
try
{
System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url);
// Get the response instance.
System.Net.WebResponse wResp = wReq.GetResponse();
System.IO.Stream respStream = wResp.GetResponseStream();
// Dim reader As StreamReader = New StreamReader(respStream)
using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type)))
{
return reader.ReadToEnd();
}
}
catch (System.Exception ex)
{
//errorMsg = ex.Message;
}
return "";
}
/// <summary>
/// GetSubString截取字符串
/// </summary>
/// <param name="strSource">原始字符</param>
/// <param name="strIndexOf">开始字符</param>
/// <param name="strLastOf">结束字符</param>
/// <returns></returns>
public static string GetSubString(string strSource, string strIndexOf, string strLastOf)
{
string strResult = string.Empty;
int indexOf = strSource.IndexOf(strIndexOf);
if (indexOf > -1)
{
string strTemp = strSource.Substring(indexOf + strIndexOf.Length);
if (!string.IsNullOrEmpty(strTemp))
{
strResult = strTemp.Substring(0, strTemp.IndexOf(strLastOf));
}
}
return strResult;
}
采集网页数据生成到静态模板newslist.html文件中(正则表达式)
标签:
原文地址:http://www.cnblogs.com/500k/p/4824086.html