码迷,mamicode.com
首页 > 其他好文 > 详细

111111111111111

时间:2019-08-07 10:53:02      阅读:171      评论:0      收藏:0      [点我收藏+]

标签:ini   content   users   tree   password   etl   requests   mount   efault   

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests

from lxml import etree

from selenium import webdriver
from pymongo import MongoClient
from set_order_date00 import set_data
from add_order_to_mssql01 import order_to_mssql
from add_sales_data02 import deal_eb_vendor

# 连接数据库
client = MongoClient(host=‘192.168.6.232‘)
db = client.gw_scrapy

LOGIN_URI = ‘https://login.esgcc.com.cn/mallLogin‘
# chrome_driver = None
# 请求头
headers = {"User-Agent": "Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"}


def get_login_cookies():
# global chrome_driver

options = webdriver.ChromeOptions()
prefs = {
‘profile.default_content_setting_values‘: {
‘images‘: 2
}
}
#options.add_experimental_option(‘prefs‘, prefs)
options.add_argument(‘lang=zh_CN.UTF-8‘)
options.add_argument("--proxy-server=socks5://192.168.6.168:1024")
options.add_argument(‘user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"‘)
# chrome_driver = webdriver.Chrome(executable_path=r"E:\work\jishen\chromedriver", chrome_options=options)
chrome_driver = webdriver.Chrome(executable_path=r"C:\Users\Administrator\Desktop\国网订单抓取\chromedriver", chrome_options=options)
chrome_driver.get(LOGIN_URI)
chrome_driver.implicitly_wait(60)
chrome_driver.find_element_by_id(‘username‘).send_keys(‘HLJ_DSCX‘)
chrome_driver.find_element_by_id(‘password‘).send_keys(‘1234qwer‘)
chrome_driver.find_element_by_class_name(‘login_btn‘).click()
userid_els = chrome_driver.find_elements_by_id(‘hiddenUserIdInTopBar‘)
if len(userid_els) != 1:
raise Exception(‘登陆失败‘)

cookies_list = chrome_driver.get_cookies()
chrome_driver.quit()
chrome_driver = None
return {i["name"]:i["value"] for i in cookies_list}


def get_data(cookies):
ret = requests.get("http://b.esgcc.com.cn/showIndex/getLastestOrdersInner.htm?pgn=1", cookies=cookies,
headers=headers, proxies={‘http‘: ‘192.168.6.168:1025‘})
tree = etree.HTML(ret.text)
page_num = tree.xpath(‘//div[@class="page_wrap"]//a/text()‘)[-2]
print(page_num)
for page in range(1,int(page_num)+1):
ret = requests.get(f"http://b.esgcc.com.cn/showIndex/getLastestOrdersInner.htm?pgn={page}",cookies=cookies,headers=headers,proxies={‘http‘: ‘192.168.6.168:1025‘})
tree = etree.HTML(ret.text)
print(f"http://b.esgcc.com.cn/showIndex/getLastestOrdersInner.htm?pgn={page}")
line = tree.xpath(‘//table[@class="info_open_table"]/tbody/tr‘)
if len(line) == 0:
raise Exception(‘页面未发现数据元素‘)
for i in range(1, len(line) + 1):
item = tree.xpath(f‘//table[@class="info_open_table"]/tbody/tr[{i}]/td/text()‘)
data = {
"订单编号": item[1],
"订单金额": item[2],
"采购账号": item[3],
"所属单位": item[4],
"商品名称": item[5],
"店铺名称": item[6],
}
if db.order.find_one({‘order_id‘: data["订单编号"]}) is None:
db.order.insert_one({
‘order_id‘: data["订单编号"],
‘date‘: ‘‘,
‘user_name‘: data["采购账号"],
‘customer‘: data["所属单位"],
‘supplier‘: data["店铺名称"],
‘goods_name‘: data["商品名称"],
‘amount‘: data["订单金额"],
})
print("存储成功")


if __name__ == "__main__":

while True:
try:
print("开始")
cookies = get_login_cookies()
print(cookies)
get_data(cookies)
# 数据处理
set_data()
order_to_mssql()
deal_eb_vendor()
print("结束")
except Exception as e:
print(e)
continue

111111111111111

标签:ini   content   users   tree   password   etl   requests   mount   efault   

原文地址:https://www.cnblogs.com/JinMuBaoBao/p/11313754.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!