标签:[1] name python append item from end app src
#coding=utf-8
from selenium import webdriver
def crawMovie():
driver=webdriver.PhantomJS()
driver.get("https://movie.douban.com/")
movie_list=[]
more_btn=driver.find_element_by_xpath(‘(//a[@class="more-link"])[1]‘)
more_btn.click()
while True:
start_index=len(movie_list)
xpath_str=‘//a[@class="item"][position()>%d]‘%start_index
item_tags=driver.find_elements_by_xpath(xpath_str)
print "start_index:",start_index
print item_tags
print "number:",len(item_tags)
for item_tag in item_tags:
img_tag=item_tag.find_element_by_tag_name(‘img‘)
cover=img_tag.get_attribute("src")
title=img_tag.get_attribute("alt")
rating=item_tag.find_element_by_xpath(".//p/strong").text
movie={‘cover‘:cover,
‘title‘:title,
‘rating‘:rating
}
movie_list.append(movie)
print "--"*20
load_more_btn=driver.find_element_by_xpath(‘//a[@class="more"]‘)
if load_more_btn.get_attribute("style"):
break
load_more_btn.click()
with open("e:\\movie_list.txt","w") as fp:
for d in movie_list:
temp=""
for k in d:
temp+=k+":"+d[k]+","
fp.write("{"+temp.strip(",")+"}"+"\n")
if __name__=="__main__":
crawMovie()
selenium+PhantomJS小案例—爬豆瓣网所有电影代码python
标签:[1] name python append item from end app src
原文地址:https://www.cnblogs.com/reyinever/p/9250467.html