标签:
class h
{
public static string getPro(HtmlNode node, string proname)
{
return node.GetAttributeValue(proname, "");
}
public static HtmlNodeCollection Parse(string htm,string xpath)
{
var doc = new HtmlDocument();
doc.LoadHtml(htm);
return doc.DocumentNode.SelectNodes(xpath);
}
public static HtmlNode[] Contains(HtmlNodeCollection collection,string key,string value)
{
var list = new List<HtmlNode>();
foreach (var node in collection)
{
var pro = node.GetAttributeValue(key, "");
if (!pro.Contains(value)) continue;
list.Add(node);
}
return list.ToArray();
}
public static HtmlNode[] Equals(HtmlNodeCollection collection, string key, string value)
{
var list = new List<HtmlNode>();
foreach (var node in collection)
{
var pro = node.GetAttributeValue(key, "");
if (pro !=value) continue;
list.Add(node);
}
return list.ToArray();
}
}
public class u
{
public static MatchCollection getMidValue(string begin, string end, string html)
{
Regex reg = new Regex("(?<=(" + begin + "))[.\\s\\S]*?(?=(" + end + "))", RegexOptions.Multiline | RegexOptions.Singleline);
return reg.Matches(html);
}
static public string getvalue(string str, string start, string end)
{
Regex rg = new Regex("(?<=(" + start + "))[.\\s\\S]*?(?=(" + end + "))", RegexOptions.Multiline | RegexOptions.Singleline);
return rg.Match(str).Value;
}
static public string noHtml(string Htmlstring) //去除HTML标记
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r
])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r
", "");
return Htmlstring;
}
}
public class w
{
public static string get(string urlString)
{
return getutf8(urlString);
}
public static string getutf8(string urlString)
{
return get(urlString, Encoding.UTF8);
}
public static string getgbk(string urlString)
{
return get(urlString, Encoding.GetEncoding("gbk"));
}
public static string getgb2312(string urlString)
{
return get(urlString, Encoding.GetEncoding("gb2312"));
}
public static string get(string urlString, Encoding encoding)
{
//定义局部变量
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebRespones = null;
Stream stream = null;
string htmlString = string.Empty;
//请求页面
try
{
httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest;
}
//处理异常
catch (Exception ex)
{
throw new Exception("建立页面请求时发生错误!", ex);
}
httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; Maxthon 2.0)";
//获取服务器的返回信息
try
{
httpWebRespones = (HttpWebResponse)httpWebRequest.GetResponse();
stream = httpWebRespones.GetResponseStream();
}
//处理异常
catch (Exception ex)
{
throw new Exception("接受服务器返回页面时发生错误!", ex);
}
StreamReader streamReader = new StreamReader(stream, encoding);
//读取返回页面
try
{
htmlString = streamReader.ReadToEnd();
}
//处理异常
catch (Exception ex)
{
throw new Exception("读取页面数据时发生错误!", ex);
}
//释放资源返回结果
streamReader.Close();
stream.Close();
return htmlString;
}
/// <summary>
/// 提供通过POST方法获取页面的方法
/// </summary>
/// <param name="urlString">请求的URL</param>
/// <param name="encoding">页面使用的编码</param>
/// <param name="postDataString">POST数据</param>
/// <returns>获取的页面</returns>
public static string post(string urlString, Encoding encoding, string postDataString)
{
//定义局部变量
CookieContainer cookieContainer = new CookieContainer();
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
Stream inputStream = null;
Stream outputStream = null;
StreamReader streamReader = null;
string htmlString = string.Empty;
//转换POST数据
byte[] postDataByte = encoding.GetBytes(postDataString);
//建立页面请求
try
{
httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest;
}
//处理异常
catch (Exception ex)
{
throw new Exception("建立页面请求时发生错误!", ex);
}
//指定请求处理方式
httpWebRequest.Method = "POST";
httpWebRequest.KeepAlive = false;
httpWebRequest.ContentType = "application/x-www-form-urlencoded";
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentLength = postDataByte.Length;
//向服务器传送数据
try
{
inputStream = httpWebRequest.GetRequestStream();
inputStream.Write(postDataByte, 0, postDataByte.Length);
}
//处理异常
catch (Exception ex)
{
throw new Exception("发送POST数据时发生错误!", ex);
}
finally
{
inputStream.Close();
}
//接受服务器返回信息
try
{
httpWebResponse = httpWebRequest.GetResponse() as HttpWebResponse;
outputStream = httpWebResponse.GetResponseStream();
streamReader = new StreamReader(outputStream, encoding);
htmlString = streamReader.ReadToEnd();
}
//处理异常
catch (Exception ex)
{
throw new Exception("接受服务器返回页面时发生错误!", ex);
}
finally
{
streamReader.Close();
}
foreach (Cookie cookie in httpWebResponse.Cookies)
{
cookieContainer.Add(cookie);
}
return htmlString;
}
}
标签:
原文地址:http://my.oschina.net/yangxiaoguang/blog/373901