标签:table repo 数据读取 body nbsp datarow node column 文档
HtmlAgilityPack.HtmlWeb hw = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = hw.Load("C:\\Users\\u1\\Desktop\\test html.html"); HtmlAgilityPack.HtmlNode rootNode = doc.DocumentNode;//获取文档的根节点 // //*[@id="report1"]/tbody string xpath = "//*[@id=‘report1‘]/tbody"; HtmlAgilityPack.HtmlNode node = rootNode.SelectSingleNode(xpath);//获取表格 foreach (var script in node.Descendants("script").ToArray()) script.Remove(); foreach (var style in node.Descendants("style").ToArray()) style.Remove(); string innerText = node.OuterHtml;//到这里就是纯纯的表格了 var trNodes = node.SelectNodes("tr"); DataTable dt = new DataTable(); for (int i = 0; i < 100; i++) //给表格增加100个列 { DataColumn dc = new DataColumn(); dt.Columns.Add(dc); } foreach (var trnod in trNodes) //遍历行 { //dt.Rows.Add() DataRow dr = dt.NewRow(); var tdNodes = trnod.SelectNodes("td"); for (int i = 0; i < tdNodes.Count; i++) //遍历列 { dr[i] = tdNodes[i].InnerText; } dt.Rows.Add(dr); } // 所有的数据读取到了datatable 中
使用HtmlAgilityPack将HtmlTable填入DataTable
标签:table repo 数据读取 body nbsp datarow node column 文档
原文地址:https://www.cnblogs.com/kqw/p/10029966.html