码迷,mamicode.com
首页 > 其他好文 > 详细

爬一下国家统计局行政区划代码

时间:2019-04-14 17:48:17      阅读:1237      评论:0      收藏:0      [点我收藏+]

标签:rgs   lag   分类   style   ==   add   return   nta   mozilla   

目前NBS上有2015-2018四个年度的代码信息。

写一个控制台程序搞下来一个县的代码:

技术图片

主方法

  1  class Program
  2     {
  3         static void Main(string[] args)
  4         {
  5             Console.ForegroundColor = ConsoleColor.Magenta;
  6             Console.WriteLine("\r\n----获取县级行政区乡、村二级区划代码");
  7             Console.WriteLine("----数据年份有:");
  8             Console.ResetColor();
  9             Cursor.WriteAt("A、2018", 2, 0);
 10             Cursor.WriteAt("B、2017", 12, 0);
 11             Cursor.WriteAt("C、2016", 2, 1);
 12             Cursor.WriteAt("D、2015", 12, 1);
 13             Input: Console.ForegroundColor = ConsoleColor.Magenta;
 14             Console.WriteLine();
 15             Console.WriteLine("----请输入一个年份代码(回车提交):");
 16             Console.ResetColor();
 17             char chr = Convert.ToChar( Console.ReadLine().ToLower()[0]);
 18             if ((int)chr >= 97 &&(int)chr <= 100)
 19             {
 20                 string year = string.Empty;
 21                 switch (chr)
 22                 {
 23                     case a:
 24                         year = "2018"; break;
 25                     case b:
 26                         year = "2017"; break;
 27                     case c:
 28                         year = "2016"; break;
 29                     default:
 30                         year = "2015"; break;
 31                 }
 32                 System.Diagnostics.Process.Start($"http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/{year}");
 33                 Console.ForegroundColor = ConsoleColor.Magenta;
 34                 Console.WriteLine("浏览器已加载区划代码起始页,请进入县级行政单位页面,复制url,粘贴到下面(回车提交):");
 35             }
 36             else
 37                 goto Input;
 38             Console.ResetColor();
 39             string cityurl = Console.ReadLine();
 40             if (cityurl.Length != 66&& cityurl.Length!=71)
 41             {
 42                 Console.ForegroundColor = ConsoleColor.Magenta;
 43                 Console.WriteLine("url有误,请确认是县级行政单位页面,重新复制链接,粘贴到下面:");
 44                 Console.ResetColor();
 45                 cityurl = Console.ReadLine();
 46             }
 47             try
 48             {
 49                 Console.ForegroundColor = ConsoleColor.Magenta;
 50                 List<TownInfo> townInfos = GetTownInfos(cityurl);
 51                 List<VillageInfo> villageInfos = new List<VillageInfo>();
 52                 foreach (var item in townInfos)
 53                 {
 54                     //把乡镇信息写入村级列表,实现乡镇信息输出
 55                     VillageInfo villageInfo_town = new VillageInfo() { code = item.code, cls = "", name = item.name };
 56                     villageInfos.Add(villageInfo_town);
 57                     villageInfos.AddRange(GetVillageInfos(item.href));
 58                 }
 59                 foreach (var item1 in villageInfos)
 60                 {
 61                     Console.WriteLine($"{item1.name.Trim()}\t{item1.cls.Trim()}\t{item1.code.Trim()}");
 62                 }
 63             }
 64             catch (Exception)
 65             {
 66 
 67                 throw;
 68             }
 69             Console.ReadKey();
 70         }
 71         //获取乡镇级信息列表
 72         static List<TownInfo> GetTownInfos(string cityurl)
 73         {
 74             List<TownInfo> townInfos = new List<TownInfo>();
 75             HttpGetHelper httpGetHelper = new HttpGetHelper() { Url = cityurl, ContentType = "text/html; charset=gb2312", Encode = Encoding.GetEncoding(20936) };
 76             //HtmlAgilityPack类库解析HTML
 77             HtmlDocument document = new HtmlDocument();
 78             document.LoadHtml(httpGetHelper.GetHtml());
 79             //路径里"//"表示从根节点开始查找,两个斜杠‘//’表示查找所有childnodes;一个斜杠‘/‘表示只查找第一层的childnodes(即不查找grandchild);点斜杠"./"表示从当前结点而不是根结点开始查找
 80             HtmlNodeCollection htmlNodes = document.DocumentNode.SelectNodes("//tr[@class=‘towntr‘]");
 81             foreach (var node in htmlNodes)
 82             {
 83                 TownInfo townInfo = new TownInfo();
 84                 HtmlNodeCollection htmlNodes1 = node.SelectNodes("./td");
 85                 HtmlNode htmlNodeHref = node.SelectSingleNode(".//a[@href]");
 86                 HtmlAttribute htmlAttribute = htmlNodeHref.Attributes["href"];
 87                 townInfo.code = htmlNodes1[0].InnerText;
 88                 townInfo.name = htmlNodes1[1].InnerText;
 89                 townInfo.href = cityurl.Substring(0, cityurl.LastIndexOf(/)+1) + htmlAttribute.Value;
 90                 townInfos.Add(townInfo);
 91             }
 92             return townInfos;
 93         }
 94         //获取村级信息列表
 95         static List<VillageInfo> GetVillageInfos(string townurl)
 96         {
 97             List<VillageInfo> villageInfos = new List<VillageInfo>();
 98             HttpGetHelper httpGetHelper = new HttpGetHelper() { Url = townurl, ContentType = "text/html; charset=gb2312", Encode = Encoding.GetEncoding(20936) };
 99             HtmlDocument document = new HtmlDocument();
100             document.LoadHtml(httpGetHelper.GetHtml());
101             HtmlNodeCollection htmlNodes = document.DocumentNode.SelectNodes("//tr[@class=‘villagetr‘]");
102             foreach (var node in htmlNodes)
103             {
104                 VillageInfo villageInfo = new VillageInfo();
105                 HtmlNodeCollection htmlNodes1 = node.SelectNodes(".//td");
106                 villageInfo.code = htmlNodes1[0].InnerText;
107                 villageInfo.cls = htmlNodes1[1].InnerText;
108                 villageInfo.name = htmlNodes1[2].InnerText;
109                 villageInfos.Add(villageInfo);
110             }
111             return villageInfos;
112         }
113     }

辅助类/结构

internal class Cursor
    {
        const int origRow = 3;
        const int origCol = 0;
        public static void WriteAt(string s, int c, int r)
        {
            Console.SetCursorPosition(origCol + c, origRow + r);
            Console.Write(s);
        }
    }
    //乡镇信息结构 编码、名称、超链
    struct TownInfo { public string code, name, href; }
    //村信息结构 编码、城乡划分类,名称
    struct VillageInfo { public string code, cls, name; }

 

Get请求,获取HTML

 1  public class HttpGetHelper
 2     {
 3         string url = string.Empty;
 4         public string Url
 5         {
 6             set { url = value; }
 7         }
 8 
 9         int timeOut=10*1000;
10         public int Timeout
11         {
12             set { timeOut = value; }
13         }
14 
15         string contentType= "text/html;charset=utf-8";
16 
17         public string ContentType
18         {
19             set { contentType = value; }
20         }
21 
22         string userAgent= "Mozilla / 5.0(Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0 ";
23         public string UserAgent
24         {
25             set { userAgent = value; }
26         }
27 
28         Encoding encode=Encoding.UTF8;
29         public Encoding Encode
30         {
31             set { encode = value; }
32         }
33         public string GetHtml()
34         {
35             string html = string.Empty;
36             try
37             {
38                 if (url!=string.Empty)
39                 {
40                     HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;
41                     request.Timeout = this.timeOut;
42                     request.ContentType = this.contentType;
43                     request.UserAgent = this.userAgent;
44                     using (HttpWebResponse response =request.GetResponse()as HttpWebResponse)
45                     {
46                         if (response.StatusCode==HttpStatusCode.OK)
47                         {
48                             using (StreamReader streamReader = new StreamReader(response.GetResponseStream(), encode))
49                             {
50                                 html = streamReader.ReadToEnd();
51                                 streamReader.Close();
52                             }
53                         }
54                     }
55                 }
56             }
57                
58             catch (Exception)
59             {
60                 throw new Exception($"GetHtml失败,url:{url}");
61             }
62             return html;
63         }
64     }

 

爬一下国家统计局行政区划代码

标签:rgs   lag   分类   style   ==   add   return   nta   mozilla   

原文地址:https://www.cnblogs.com/yzhyingcool/p/10705889.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!