标签:
操作系统:Windows 7
Python版本:2.7.9
# -*- coding: utf-8 -*- import sys reload(sys) sys.setdefaultencoding(‘utf-8‘) ‘‘‘ 作者:昨夜星辰 ‘‘‘ import re import os import requests from bs4 import BeautifulSoup # 创建一个用于存放头像的文件夹 folder = u‘头像‘ os.mkdir(folder) # 用于存放用户的cid cidList = [] # 用于头像计数 picCnt = 1 url = ‘http://1.163.com/list.html‘ bs = BeautifulSoup(requests.get(url).text, ‘lxml‘) for div in bs(‘div‘, ‘w-goods w-goods-l w-goods-ing‘): gid = div[‘data-gid‘] period = div[‘data-period‘] url = ‘http://1.163.com/record/getDuobaoRecord.do?pageNum=1&pageSize=50&totalCnt=0&gid=%s&period=%s‘ % (gid, period) json = requests.get(url).json() totalCnt = json[‘result‘][‘totalCnt‘] for pageNum in range(1, totalCnt / 50 + 1): url = ‘http://1.163.com/record/getDuobaoRecord.do?pageNum=%d&pageSize=50&totalCnt=%d&gid=%s&period=%s‘ % (pageNum, totalCnt, gid, period) json = requests.get(url).json() for _list in json[‘result‘][‘list‘]: cid = _list[‘user‘][‘cid‘] if cid in cidList: continue cidList.append(cid) avatarPrefix = _list[‘user‘][‘avatarPrefix‘] if avatarPrefix: url = avatarPrefix + ‘160.jpeg‘ filename = folder + ‘\\‘ + re.search(r‘.*/(.*)‘, url).group(1) print u‘正在保存第%d个头像...‘ % picCnt picCnt += 1 with open(filename, ‘wb‘) as f: f.write(requests.get(url).content)
爬虫实例——爬取1元夺宝用户头像(借助谷歌浏览器开发者工具)
标签:
原文地址:http://www.cnblogs.com/yestreenstars/p/5565245.html