标签:attr int requests print stc tty lis curl content
from lxml import etree
import requests
def getHtml(html):
novelcontent = requests.get(html).content
return etree.HTML(novelcontent)
source = getHtml("http://www.cabintu.com")
urllist = source.xpath(‘//li[@class="airline"]/a‘)
for i in urllist:
url = i.attrib[‘href‘]
sources = getHtml(url)
picurl = sources.xpath(‘//img[@class="plane"]‘)
for j in picurl:
urls = j.attrib[‘src‘]
print urls
‘‘‘
listclassify = source.xpath(‘//ul[@class="sg_menu"]/li/a‘)
listtype = source.xpath(‘//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]/ul[@class="subnav_a"]/li[@class="airline"]/a‘)
fname = source.xpath(‘//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]/a/text()‘)
for a in fname:
print a
typelist = source.xpath(‘//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]//ul[@class="subnav_a"]/li[@class="airline"]/a/text()‘)
for b in typelist:
print b
‘‘‘
标签:attr int requests print stc tty lis curl content
原文地址:http://www.cnblogs.com/cutepython/p/6103489.html