标签:yield list spider end spi lis cookies turn 调用
# -*- coding: utf-8 -*- import scrapy from xdb.items import XdbItem from scrapy.dupefilters import RFPDupeFilter from scrapy.http.cookies import CookieJar from scrapy.http import Request from urllib.parse import urlencode class ChoutiSpider(scrapy.Spider): name = ‘chouti‘ allowed_domains = [‘chouti.com‘] start_urls = [‘http://chouti.com/‘] # start_urls = [‘http://127.0.0.1:80/app01/login/‘] ‘‘‘ scrapy引擎来爬虫中取起始URL: 1. 调用start_request并获取返回值 2. v = iter(返回值) 3. req1 = 执行 v.__next__() req2 = 执行 v.__next__() req3 = 执行 v.__next__() ‘‘‘ def start_requests(self): # 方式一: for url in self.start_urls: yield Request(url=url, callback=self.parse) # 方式二: req_list = [] for url in self.start_urls: req_list.append(Request(url=url)) return req_list
标签:yield list spider end spi lis cookies turn 调用
原文地址:https://www.cnblogs.com/xiongfanyong/p/13122317.html