标签:
商品信息
using Ivony.Html.Parser;
using Ivony.Html;
using OpenQA.Selenium;
using OpenQA.Selenium.Firefox;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Skay.WebBot;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json;
using System.Data.SqlClient;
namespace taobao
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static Thread th;
private void button1_Click(object sender, EventArgs e)
{
th = new Thread(new ThreadStart(JDData));
th.Start();
}
void JDData()
{
for(int i = 1; i <= 10; i++)
{
string str = "http://list.jd.com/list.html?cat=9987,653,655&page="+i+"&ext=502864::1943^^&go=0&JL=6_0_0";
HttpUtility httpOne = new HttpUtility();
string htmlOne = httpOne.GetHtmlText(str);
var documenthtml = new JumonyParser().Parse(htmlOne);
var items = documenthtml.Find(".gl-item");
foreach(var item in items)
{
string title = item.FindFirst(".p-name em").InnerText(); //商品名称
string contents = item.FindFirst(".p-commit strong a").InnerText();//评论人数
string vender = item.FindFirst(".p-shop").Attribute("data-shop_name").Value();//卖主
//string vender = item.FindFirst(".p-shop span a").InnerText();
string surl = item.FindFirst(".p-name a").Attribute("href").Value();
HttpUtility httpsurl = new HttpUtility();
string htmlsurl = httpsurl.GetHtmlText(surl, "gbk", "text/html; charset=gbk");
string screen, resolution_ratio, camer_fore, camer_back;
screen = resolution_ratio = camer_fore = camer_back = "";
var documenthtmlsurl = new JumonyParser().Parse(htmlsurl);
try
{
screen = documenthtmlsurl.FindFirst(".detail p").InnerText().Split(‘:‘)[1];//屏幕大小
resolution_ratio = documenthtmlsurl.FindLast(".fore0 .detail p").InnerText().Split(‘:‘)[1];//分辨率
camer_fore = documenthtmlsurl.FindFirst(".fore1 .detail p").Attribute("title").Value();//前摄像头
camer_back = documenthtmlsurl.FindLast(".fore1 .detail p").Attribute("title").Value();//后摄像头
}
catch
{
}
string AllCots, imgCots, GoodCots, MidCots, lowCots;
string weight, Nametitle, goodsId = "", onselfTime, From1, hotPoint, system1, color, priceEnd="";
AllCots = imgCots = GoodCots = MidCots = lowCots = weight = Nametitle = goodsId = onselfTime = From1 = hotPoint = system1 = color = "";
var things = documenthtmlsurl.Find("#parameter2 li");
foreach(var thing in things)
{
try
{
if (thing.InnerText().Split(‘:‘)[0].Trim() == "商品毛重")
{
weight = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "商品名称")
{
Nametitle = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "商品编号")
{
goodsId = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "上架时间")
{
onselfTime = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "商品产地")
{
From1 = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "热点")
{
hotPoint = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "系统")
{
system1 = thing.InnerText().Split(‘:‘)[1];
}
else if (thing.InnerText().Split(‘:‘)[0].Trim() == "机身颜色")
{
color = thing.InnerText().Split(‘:‘)[1];
}
}
catch { }
}
string priceUrl = "http://p.3.cn/prices/get?type=1&area=1_72_4137&pdtk=&pduid=795687743&pdpin=&pdbp=0&skuid=J_"+goodsId+"&callback=cnp";
string subhtml = httpsurl.GetHtmlText(surl, "gbk", "text/html; charset=gbk");
string Area_html = httpsurl.GetHtmlText(priceUrl, "gbk", "text/html;charset=gbk", "");
try
{
//JObject Area_Jo = (JObject)JsonConvert.DeserializeObject(Area_html);
JObject Area_Jo = (JObject)JsonConvert.DeserializeObject(Area_html.Replace("cnp", "").Replace("(", "").Replace(")", "").Replace("[", "").Replace("]", "").Replace(";", ""));
priceEnd = Area_Jo["p"].ToString();//价格
}
catch
{
MessageBox.Show("捕获异常");
}
//string urlEnd = "http://club.jd.com/productpage/p-"+goodsId+"-s-0-t-5-p-0.html?callback=fetchJSON_comment98vv22256";
string urlEnd = "http://club.jd.com/clubservice.aspx?method=GetCommentsCount&referenceIds="+goodsId;
string subhtmlEnd = httpsurl.GetHtmlText(urlEnd, "gbk", "text/html; charset=gbk");
string Area_htmlEnd = httpsurl.GetHtmlText(urlEnd, "gbk", "text/html;charset=gbk", "");
try
{
string sad = "500";
JObject Area_Jo = (JObject)JsonConvert.DeserializeObject(Area_htmlEnd.Split(‘[‘)[1].Replace("]}", ""));
AllCots = Area_Jo["CommentCount"].ToString();
imgCots = sad;
GoodCots = Area_Jo["GoodCount"].ToString();
MidCots = Area_Jo["GeneralCount"].ToString();
lowCots = Area_Jo["PoorCount"].ToString();
}
catch
{
MessageBox.Show("异常");
}
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open();
string into = string.Format("insert into jdfirst (name, contents, vender, screen, resolution_ratio, camer_fore, camer_back, AllCots, imgCots, GoodCots, MidCots, lowCots, weight, Nametitle, goodsId, onselfTime, From1, hotPoint, system1, color, priceEnd) values (‘"+title+"‘, ‘"+contents+"‘,‘"+vender+"‘,‘"+screen+"‘,‘"+resolution_ratio+"‘,‘"+camer_fore+"‘,‘"+camer_back+"‘,‘"+AllCots+"‘, ‘"+imgCots+"‘, ‘"+GoodCots+"‘, ‘"+MidCots+"‘, ‘"+lowCots+"‘, ‘"+weight+"‘, ‘"+Nametitle+"‘, ‘"+goodsId+"‘, ‘"+onselfTime+"‘, ‘"+From1+"‘, ‘"+hotPoint+"‘, ‘"+system1+"‘, ‘"+color+"‘, ‘"+priceEnd+"‘)");
SqlCommand com = new SqlCommand(into, conn);
int s = com.ExecuteNonQuery();
}
}
}
}
}
标签:
原文地址:http://www.cnblogs.com/Tinamei/p/5170045.html