码迷,mamicode.com
首页 > 编程语言 > 详细

python爬虫:爬取京东商品信息

时间:2019-07-03 19:51:35      阅读:299      评论:0      收藏:0      [点我收藏+]

标签:close   odi   selenium   css   common   enc   send   format   windows   

‘‘‘
初级版
‘‘‘

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘)
num = 1
try:
    driver.implicitly_wait(10)
    driver.get(‘https://www.jd.com/‘)

    input_tag =  driver.find_element_by_id(‘key‘)
    input_tag.send_keys(‘墨菲定律‘)
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)

    good_list = driver.find_elements_by_class_name(‘gl-item‘)
    for good in good_list:
        # print(good)
        # 商品名称
        good_name = good.find_element_by_css_selector(‘.p-name em‘).text
        print(good_name)

        # 商品链接
        good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘)
        print(good_url)

        # 商品价格
        good_price = good.find_element_by_class_name(‘p-price‘).text
        print(good_price)

        # 商品评价
        good_commit = good.find_element_by_class_name(‘p-commit‘).text
        good_content = ‘‘‘
        num={}
        商品名称:{}
        商品链接:{}
        商品价格:{}
        商品的评价条数:{}
        \n
        ‘‘‘.format(num,good_name,good_url,good_price,good_commit)
        print(good_content)
        with open(‘jd.txt‘,‘a‘,encoding=‘utf-8‘) as f:
            f.write(good_content)

    print(‘商品信息写入成功!‘)
finally:
    driver.close()

‘‘‘
终极版
‘‘‘

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘)
num = 1
try:
    driver.implicitly_wait(10)
    driver.get(‘https://www.jd.com/‘)

    input_tag =  driver.find_element_by_id(‘key‘)
    input_tag.send_keys(‘墨菲定律‘)
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)

    # 下拉滑动5000px
    js_code = ‘‘‘
    windows.scrollTo(0,5000)
        ‘‘‘
    driver.execute_script(js_code)

    # 等待5s待商品数据加载
    time.sleep(5)

    good_list = driver.find_elements_by_class_name(‘gl-item‘)
    for good in good_list:
        # print(good)
        # 商品名称
        good_name = good.find_element_by_css_selector(‘.p-name em‘).text
        print(good_name)

        # 商品链接
        good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘)
        print(good_url)

        # 商品价格
        good_price = good.find_element_by_class_name(‘p-price‘).text
        print(good_price)

        # 商品评价
        good_commit = good.find_element_by_class_name(‘p-commit‘).text
        good_content = ‘‘‘
        num={}
        商品名称:{}
        商品链接:{}
        商品价格:{}
        商品的评价条数:{}
        \n
        ‘‘‘.format(num,good_name,good_url,good_price,good_commit)
        print(good_content)
        with open(‘jd.txt‘,‘a‘,encoding=‘utf-8‘) as f:
            f.write(good_content)
        num += 1

    print(‘商品信息写入成功!‘)

    next_tag = driver.find_element_by_class_name(‘pn-next‘)
    next_tag.click

    time.sleep(10)

finally:
    driver.close()

‘‘‘
狂暴版
‘‘‘
from selenium import webdriver
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import time

#
def get_good(driver):
    num = 1
    try:
        time.sleep(5)

        #下拉滑动5000px
        js_code = ‘‘‘
                   window.scrollTo(0,5000)
                   ‘‘‘
        driver.execute_script(js_code)

        time.sleep(5) # 商品信息加载,等待5s
        good_list = driver.find_elements_by_class_name(‘gl-item‘)
        for good in good_list:
            # 商品名称
            good_name = good.find_element_by_css_selector(‘.p-name em‘).text
            # 商品链接
            good_url = good.find_element_by_css_selector(‘.p-name a‘).get_attribute(‘href‘)
            # 商品价格
            good_price = good.find_element_by_class_name(‘p-price‘).text
            # 商品评价
            good_commit = good.find_element_by_class_name(‘p-commit‘).text

            good_content = ‘‘‘
                       num:{}
                       商品名称:{}
                       商品链接:{}
                       商品价格:{}
                       商品评论:{}
                       \n
                       ‘‘‘.format(num,good_name,good_url,good_price,good_commit)
            print(good_content)
            # 保存数据写入文件
            with open(‘京东商品信息爬取.txt‘, ‘a‘, encoding=‘utf-8‘) as f:
                f.write(good_content)
            num += 1

        # 找到页面下一页点击
        next_tag = driver.find_element_by_class_name(‘pn-next‘)
        next_tag.click()

        time.sleep(5)
        #递归调用函数本身
        get_good(driver)

    finally:
        driver.close()

if __name__ == ‘__main__‘:
    driver = webdriver.Chrome(r‘C:\Users\Auraro\Desktop/chromedriver.exe‘)
    try:
        driver.implicitly_wait(10)
        driver.get(‘https://www.jd.com/‘)

        input = driver.find_element_by_id(‘key‘)
        input.send_keys(‘人间失格‘)
        input.send_keys(Keys.ENTER)
        get_good(driver)
        print(‘商品信息写入完成‘)
    finally:
        driver.close()

  

  

python爬虫:爬取京东商品信息

标签:close   odi   selenium   css   common   enc   send   format   windows   

原文地址:https://www.cnblogs.com/Auraro997/p/11128158.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!