码迷,mamicode.com
首页 > 编程语言 > 详细

python例子-PyQuery抓取信息.

时间:2015-10-20 13:46:27      阅读:155      评论:0      收藏:0      [点我收藏+]

标签:

#!/usr/bin/python
#coding:utf-8

from pyquery import PyQuery
import re

# 抓取:http://www.stylebop.com/cn/product_details.php?id=606526&special=sale
# 获得   产品名 品牌 价格 size  图片(大图)
def main():
    pqhtml = PyQuery(url = http://www.stylebop.com/cn/product_details.php?id=606526&special=sale)
    #产品图片:
    img_li = pqhtml(li).filter(.image_click_rotator)
    pattern_img = re.compile(".*?‘(.*?jpg)‘.*?‘.*?‘.*?‘.*?‘.*?‘(.*?jpg)‘.*?")
    img_list = []
    for li in img_li:
        #div = li.getchildren()[0]
        #a = div.getchildren()[0]
        href = li.getchildren()[0].getchildren()[0].get(href)
        items = re.findall(pattern_img,href)
        img_large = list(items[0])[1]
        if img_large[0:4] != http :
            img_large = http://www.stylebop.com%s %img_large
        img_list.append(img_large)
    print 产品图片: , img_list

    #产品品牌:
    brand = pqhtml(div).filter(.productInfo)(a:first).text()
    print 品牌:%s %brand

    #价格
    price_div = pqhtml(div).filter(#product_price)  #根据ID获取价格的div
    price_first_span = price_div(span:first) #获取第一个span
    old_price = ‘‘
    new_price = ‘‘
    if price_first_span.hasClass(old_price):
        old_price = price_first_span.text
        new_price = price_div(span:eq(1)).text() +  /  + price_div(span:eq(3)).text()
    else:
        new_price = price_div.text() +  /  + price_div(span:first).text
    print 价格: , new_price
    #print ‘价格:%s‘ % new_price #这样打印会报编码错误:‘ascii‘ codec can‘t encode character u‘\u20ac‘ in position 21: ordinal not in range(128)

    #size
    size_option = pqhtml(select).filter(.newInput2)(option)
    size_list = []
    for size in size_option:        #为HTMLElement对象
        size_list.append(size.text)
    print size:, size_list

    #产品名:
    pname = pqhtml(div).filter(.productInfo)(span:first).text()
    print 产品名:%s % pname

if __name__ == __main__:
    main()

 

python例子-PyQuery抓取信息.

标签:

原文地址:http://www.cnblogs.com/xccnblogs/p/4894405.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!