标签:pass script mysql apt val window .com lines title
from selenium import webdriver from scrapy.selector import Selector import time import random import pymysql from urllib import parse import re import os # a = Selector(text=webdriver.page_source) # if a.xpath(‘//*[@id="J_submit"]‘): # time.sleep(15) # for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘): # bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘) # bd_name = i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘) # bd_id = i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘) # bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘) # bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘) # # sql = "INSERT INTO " + i.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)" # cursor.execute(sql, # (bd_id, bd_pig, bd_much, bd_name, bd_liang)) # self.connection.commit() # else: # for i in Selector(text=webdriver.page_source).xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘): # bd_pig = i.xpath("./dt/a/img/@src").re(‘.*(img.*?jpg)‘) # bd_name =‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘))) # bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘))) # bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘) # bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span/text()‘).extract_first(‘‘) # # sql = "INSERT INTO " + shop.split(",")[0] + "( `id`,图片链接,价格,标题,销量) VALUES (%s,%s,%s,%s,%s)" # cursor.execute(sql, # (bd_id, bd_pig, bd_much, bd_name, bd_liang)) # conection.commit() class spider(object): def chul3(self,dates): a = Selector(text=dates) next_url = a.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11]/@href‘).extract_first("") return ‘https:‘+next_url chuli=spider() conection = pymysql.connect(host=‘localhost‘,user=‘root‘,password=‘123‘,db=‘7.24测试‘,charset=‘utf8mb4‘,cursorclass=pymysql.cursors.DictCursor) with conection.cursor() as cursor: sql1 = "select * from 商品id" cursor.execute(sql1) shop_id = cursor.fetchall() shop_oldid=[i[‘id‘] for i in shop_id] sql1 = ‘‘‘ SELECT `商品id`.id, `上架时间`,‘1天销量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架时间`,CURDATE()) =1 union SELECT `商品id`.id, `上架时间`,‘7天销量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架时间`,CURDATE()) =7 union SELECT `商品id`.id, `上架时间`,‘30天销量‘ as 日期 FROM `商品id` WHERE TIMESTAMPDIFF(DAY,`上架时间`,CURDATE()) =30‘‘‘ cursor.execute(sql1) shop_id = cursor.fetchall() shop_olxx = [i for i in shop_id] conection.commit() cursor =conection.cursor() webdriver = webdriver.Ie() url = ‘https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq‘ webdriver.get(url) time.sleep(20) def lll(url): webdriver.implicitly_wait(50) webdriver.get(url) myDynamicElement = webdriver.find_element_by_class_name(‘pagination‘) a=webdriver.page_source time.sleep(random.randrange(2,6)) selects=Selector(text=a) for i in selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl‘): bd_pig = i.xpath("./dt/a/img/@src").re(‘(.*)_‘) bd_name = ‘‘.join(re.findall(‘[\u4e00-\u9fa5]‘, i.xpath(‘./dd[1]/a/text()‘).extract_first(‘‘))) bd_id = ‘‘.join(re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘))) bd_much = i.xpath(‘./dd[1]/div/div[1]/span[2]/text()‘).extract_first(‘‘) bd_idlian=‘http://item.taobao.com/item.htm?id=‘+bd_id bd_liang = i.xpath(‘./dd[1]/div/div[last()]/span[last()]/text()‘).extract_first(‘‘) if bd_id not in shop_oldid: sql = "INSERT INTO 商品id (`品牌`, `id`,图片链接,价格,标题,商品地址) VALUES (%s,%s,%s,%s,%s,%s)" cursor.execute(sql, (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian)) conection.commit() webdriver.implicitly_wait(50) webdriver.get(‘http://item.taobao.com/item.htm?id=‘+bd_id) myDynamicElement = webdriver.find_element_by_class_name(‘tb-price-spec‘) time.sleep(random.randrange(2, 6)) date=webdriver.page_source select_xixi = Selector(text=date) liem = select_xixi.xpath(‘//*[@id="J_TMySize"]/@data-value‘).extract_first("") sql = ‘update `商品id` set `商品id`.`类目` = %s where id = %s‘ cursor.execute(sql, (liem, bd_id)) conection.commit() c=1 ee=1 for i in select_xixi.xpath(‘//*[@id="J_isku"]/div/dl‘): b = i.xpath(‘./dt/text()‘).extract_first("") if ‘尺码‘ in b: aa = i.xpath(‘./dd/ul/li/a/span/text()‘).extract() ee = len(aa) dd = ‘ ‘.join(aa) sql = ‘update `商品id` set `商品id`.`尺码` = %s where id = %s‘ cursor.execute(sql, (dd, bd_id)) conection.commit() if ‘颜色‘ in b: a = i.xpath(‘./dd/ul/li/a/span/text()‘).extract() c = len(a) d = ‘ ‘.join(a) sql = ‘update `商品id` set `商品id`.`颜色` = %s where id = %s‘ cursor.execute(sql, (d, bd_id)) conection.commit() w = c * ee sql= ‘update `商品id` set `商品id`.`sku量` = %s where id = %s‘ cursor.execute(sql, (w,bd_id)) conection.commit() title = path + r‘\\‘ +shop.split(",")[0] + r‘\\‘ + ‘‘.join( re.findall(‘\d‘, i.xpath(‘./dd[1]/a/@href‘).extract_first(‘‘))) + re.sub("\W", "", webdriver.title) capture(webdriver, title + ‘.jpg‘) for i in shop_olxx: if i[‘id‘] == bd_id: sql = "UPDATE 商品id set " + i[‘日期‘] + " = (%s) where id = %s" cursor.execute(sql, (bd_liang, i[‘id‘])) conection.commit() if selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href‘).extract_first(""): lll(‘https:‘+selects.xpath(‘//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href‘).extract_first("")) path=os.getcwd() def capture(webder, save_fn="capture.png"): # browser = webdriver.Ie() # Get local session of firefox # browser.get(url) # Load page webder.execute_script(""" (function () { var y = 0; var step = 100; window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 50); } else { window.scroll(0, 0); document.title += "scroll-done"; } } setTimeout(f, 1000); })(); """) for i in range(30): if "scroll-done" in webder.title: break time.sleep(1) webder.save_screenshot(save_fn) with open(os.getcwd() + r‘\1.csv‘, ‘r‘) as c: for shop in c.readlines(): url = shop.split(",")[2] lll(url)
标签:pass script mysql apt val window .com lines title
原文地址:http://www.cnblogs.com/gao-xiang/p/7228194.html