参考文档:https://www.cnblogs.com/asxinyu/p/CSharp_HtmlAgilityPack_XPath_Weather_Data.html#_label0
HtmlAgilityPack是一个开源的解析HTML元素的类库,最大的特点是可以通过XPath来解析HMTL
下载地址如下:http://htmlagilitypack.codeplex.com/
XPath教程:http://www.w3school.com.cn/xpath/index.asp
XPath获取方法:
官方API:http://html-agility-pack.net/parser
public ActionResult Index() { //爬取天气预报 HtmlWeb htmlWeb = new HtmlWeb(); string url = "http://www.tianqihoubao.com/lishi/taizhou/month/201712.html"; htmlWeb.OverrideEncoding = Encoding.GetEncoding("gb2312"); //解决乱码问题 HtmlAgilityPack.HtmlDocument document = htmlWeb.Load(url); var res = document.DocumentNode.SelectSingleNode(@"//*[@id=‘content‘]/table"); List<WeatherReport> weatherReports = new List<WeatherReport>(); if(res!=null) { var list = res.SelectNodes(@"tr"); list.RemoveAt(0);//移除第一行,是表头 // 遍历每一行,获取日期,以及天气状况等信息 foreach (var item in list) { var dd = item.SelectNodes(@"td"); //日期 - - 气温 - 风力风向 if (dd.Count != 4) continue; //获取当前行日期 var date1 = dd[0].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); //获取当前行天气状况 var tq = dd[1].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); //获取当前行气温 var qw = dd[2].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); //获取当前行风力风向 var fx = dd[3].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); //输出 Console.WriteLine("{0}:{1},{2},{3}", date1, tq, qw, fx); WeatherReport weatherReport = new WeatherReport { Date = date1, State = tq, Temperature = qw, Wind=fx }; weatherReports.Add(weatherReport); } } return View(weatherReports); }