码迷,mamicode.com
首页 > 其他好文 > 详细

图片抓取,根据关键字爬取淘宝或百度前4张图片

时间:2018-01-30 19:37:07      阅读:199      评论:0      收藏:0      [点我收藏+]

标签:arc   ++   start   encode   rod   filename   转码   post   uri   

通过关键字查询淘宝网或者百度图片,并下载到本地前四张图片

 

 1 private new string Capture(string productTitle)
 2         {
 3             string result = string.Empty;
 4             try
 5             {
 6                 string url = string.Empty;
 7                 //汉子转码
 8                 string contant = HttpUtility.UrlEncode(productTitle);
 9                 url = string.Format(@"https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={0}&oq={0}&rsp=-1", contant);//百度抓取
10 
11                 //url = string.Format("https://s.taobao.com/search?q={0}&search_type=item&sourceId=tb.index", contant);//淘宝抓取
12                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
13                 request.Method = "GET";
14                 HttpWebResponse httpWebResponse = (HttpWebResponse)request.GetResponse();
15                 StreamReader streamReader = new StreamReader(httpWebResponse.GetResponseStream());
16                 string responseContent = streamReader.ReadToEnd();
17                 httpWebResponse.Close();
18                 streamReader.Close();
19                 //string reg = "(?is)(?<=\"pic_url\":\"//).*?(?=\",\"detail_url\")";//淘宝图片
20                 string reg = "(?is)(?<=\"thumbURL\":\").*?(?=\",\"replaceUrl\")";//百度图片
21 
22                 var temp = Regex.Matches(responseContent, reg, RegexOptions.IgnoreCase | RegexOptions.Multiline);
23                 if (temp.Count == 0)
24                 {
25                     return null;
26                 }
27                 //存储路径
28                 //string targetFilePath = "../SecurityPlat\\images\\product\\";
29                 //serverFilePath = targetFilePath;
30                 //if (!Directory.Exists(serverFilePath))
31                 //    Directory.CreateDirectory(serverFilePath);
32                 //取前4张图片路径
33                 int i = 1;
34                 List<string> lisImage = new List<string>();
35                 foreach (var item in temp)
36                 {
37                     string ImageUrl = Get_img(item.ToString());
38                     if (!string.IsNullOrEmpty(ImageUrl))
39                     {
40                         result = result + ";" + ImageUrl;
41                         i++;
42                         if (i > 4) break;
43                     }
44                 }
45             }
46             catch (Exception ex)
47             {
48                 throw ex;
49             }
50             if (result != null)
51             {
52                 result = result.TrimStart(;);
53             }
54             return result;
55         }
56         private string Get_img(string url)
57         {
58             Bitmap img = null;
59             HttpWebRequest req;
60             HttpWebResponse res = null;
61             string result = string.Empty;
62             try
63             {
64                 //System.Uri httpUrl = new System.Uri("http://" + url);//淘宝图片
65                 System.Uri httpUrl = new System.Uri(url);
66                 req = (HttpWebRequest)(WebRequest.Create(httpUrl));
67                 req.Timeout = 180000; //设置超时值10秒
68                 res = (HttpWebResponse)(req.GetResponse());
69                 img = new Bitmap(res.GetResponseStream());//获取图片流  
70                 string type = url.Substring(url.Length - 4, 4);//获取图片类型
71                 string fileName = DateTime.Now.ToString("yyyyMMddHHmmssfffffff") + type;
72                 //测试用路径
73                 result = path + fileName;
74                 img.Save(result);//随机名
75             }
76 
77             catch (Exception)
78             {
79                 return null;
80             }
81             finally
82             {
83                 if (res != null)
84                 {
85                     res.Close();
86                 }
87             }
88             return result;
89         }

 

图片抓取,根据关键字爬取淘宝或百度前4张图片

标签:arc   ++   start   encode   rod   filename   转码   post   uri   

原文地址:https://www.cnblogs.com/guanwanli/p/8385836.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!