#!/usr/bin/python # coding:utf8 from __future__ import division import pycurl import simplejson as json try: import signal from signal import SIGPIPE, SIG_IGN signal.signal(signal.SIGPIPE, signal.SIG_IGN) except ImportError: pass def curlmulti_tsdb(urls): num_conn = 20 queue = [] for url in urls: url = url.strip() filename = "url_%03d.dat" % (len(queue)+1) queue.append((url, filename)) num_urls = len(urls) num_conn = min(num_conn, queue) m = pycurl.CurlMulti() m.handles = [] for i in range(num_conn): c = pycurl.Curl() c.fp = None c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.MAXREDIRS, 5) c.setopt(pycurl.CONNECTTIMEOUT, 5) c.setopt(pycurl.TIMEOUT, 100) c.setopt(pycurl.NOSIGNAL, 1) m.handles.append(c) #main loop freelist = m.handles[:] num_processed = 0 while num_processed < num_urls: while queue and freelist: url, filename = queue.pop() c = freelist.pop() c.fp = open(filename, "wb") c.setopt(pycurl.URL, url) c.setopt(pycurl.WRITEDATA, c.fp) m.add_handle(c) c.filename = filename c.url = url while 1: ret, num_handles = m.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break while 1: num_q, ok_list, err_list = m.info_read() for c in ok_list: c.fp.close() c.fp = None with open(c.filename, "rb") as f: print f.readlines() m.remove_handle(c) freelist.append(c) for c, errno, errmsg in err_list: c.fp.close() c.fp = None m.remove_handle(c) print "Failed:", c.url, errno, errmsg freelist.append(c) num_processed = num_processed + len(ok_list) + len(err_list) if num_q == 0: break m.select(1.0) for c in m.handles: if c.fp is not None: c.fp.close() c.fp = None c.close() m.close()
原文地址:http://curran.blog.51cto.com/2788306/1682034