标签:
string url = "http://www.123.com/fast_view?a=1&gameId=25&areaId=0&serverId=0"; string reffer = "http://www.123.com/"; string html = GetHTML(url,reffer,10*1000); int tableStart = html.IndexOf("<table"); int tableEnd = html.IndexOf("</table>"); DataTable dt = new DataTable(); dt.Columns.Add("ServerName", typeof(System.String)); dt.Columns.Add("GoodsName", typeof(System.String)); dt.Columns.Add("Price", typeof(System.String)); dt.Columns.Add("Qty", typeof(System.String)); dt.Columns.Add("Id", typeof(System.String)); if (tableStart != -1 && tableEnd!=-1 && tableEnd>tableStart) { string tableHtml = html.Substring(tableStart, tableEnd - tableStart + 8); System.Text.RegularExpressions.MatchCollection trs = System.Text.RegularExpressions.Regex.Matches(html, "<tr[^>]*>(.*?)</tr>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase); for (int i = 0; i < trs.Count; i++) { System.Text.RegularExpressions.MatchCollection tds = System.Text.RegularExpressions.Regex.Matches(trs[i].Value, "<td[^>]*>(.*?)</td>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase); if (tds.Count < 8) continue; DataRow dr = dt.NewRow(); dr["ServerName"] = System.Text.RegularExpressions.Regex.Match(tds[0].Value, @"SelfTextCut2\(‘([^‘]*)‘", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value; dr["GoodsName"] = System.Text.RegularExpressions.Regex.Match(tds[2].Value, @"SelfTextCut2\(‘([^‘]*)‘", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value; dr["Price"] = System.Text.RegularExpressions.Regex.Match(tds[5].Value, @"parseFloat\(([^\)]*)\)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value; dr["Qty"] = System.Text.RegularExpressions.Regex.Replace(tds[6].Value, "<[^>]*>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline); dr["Id"] = System.Text.RegularExpressions.Regex.Match(tds[7].Value, @"dl\(‘(\d+)‘\)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value; dt.Rows.Add(dr); } } ================================================================================================================================ 附上一个小小的GETHTML,嘎嘎 public static string GetHTML(string strUrl, string Reffer, int Timeout) { try { //构造请求 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl); request.Method = "GET"; request.ServicePoint.Expect100Continue = false; //请求头 request.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"; request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; request.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"); request.Headers.Add("Accept-Encoding", "gzip, deflate"); request.KeepAlive = false; request.Referer = Reffer; request.Timeout = Timeout; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream stream = null; if (response.ContentEncoding == "gzip") { System.IO.Compression.GZipStream gzsStream = new System.IO.Compression.GZipStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress); stream = gzsStream; } else { stream = response.GetResponseStream(); } string strResult = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312")).ReadToEnd(); response.Close(); return strResult; } catch (Exception err) { return "Error:" + err.ToString(); } }
标签:
原文地址:http://www.cnblogs.com/ghelement/p/4512012.html