码迷,mamicode.com
首页 > 编程语言 > 详细

Python superbwallpapers 动漫分类 下载

时间:2015-01-15 21:55:13      阅读:281      评论:0      收藏:0      [点我收藏+]

标签:

动漫分类壁纸多线程下载,有Bug

# -*- coding: utf-8 -*-
import os,urllib2,re,urllib
from bs4 import BeautifulSoup
import socket
socket.setdefaulttimeout(25)      #in case of overtime:http://outofmemory.cn/code-snippet/16848/python-through-urllib-urlretrieve-file-setting-method
#another way:urllib.request.socket.setdefaulttimeout(20)
re_link = re.compile(r/anime/.{0,50}-\d{5})
re_404 = re.compile(rPage not found - Please try some of the popular items below)  
main_url = []
#main_url.append("http://www.superbwallpapers.com/anime/")
pic_page = []
pic_name = []
pic_url = []
pic_url_number =[]
end_page = 40
for each_page in range(end_page):
    main_url.append("http://www.superbwallpapers.com/anime/" + str(each_page + 1) + ".html")
    
    each_page += 1

print main_url

#how_many = 0        
def one_page(main_url):
    #global how_many
    main_page_html = urllib2.urlopen(main_url).read()

    soup = BeautifulSoup(main_page_html,fromEncoding="gb18030")
    match_pic = []
    for link in soup.find_all(a):
        href = str(link.get(href))    
        match = re_link.match(href)
        if match :
            #print match.group()
            match_pic.append(match.group())
    global pic_url_number
    global pic_page
    global pic_name
    global pic_url
    for i in range((len(match_pic)-1)/2):
        pic_page.append(http://www.superbwallpapers.com + match_pic[i*2])
        pic_name.append(match_pic[i*2][7:])
        pic_url.append("http://cdn.superbwallpapers.com/wallpapers" + match_pic[i*2] + "-1920x1080.jpg")
        pic_url_number.append(x)
        
output = open(K://PIC/url.txt,w+)
for x in range(end_page):
    one_page(main_url[x])
    title = "K://PIC/" + str(x)
    if not os.path.isdir(title):
        os.mkdir(title)
output.write(str(pic_url))
output.close()
pic_number = 0
url_fail = []
import threading
how_many = 0
lock = threading.Lock()
#one_page(main_url[how_many])
class myThread (threading.Thread):
    def __init__(self, pic_url):
        threading.Thread.__init__(self)
        self.pic_url = pic_url
        
    def run(self):
        global pic_number
        global how_many
        print %s acquire lock... % threading.currentThread().getName()
        lock.acquire()
        
        print %s get the lock. % threading.currentThread().getName()
        o1 = pic_number
        
        pic_number += 1
        # 释放锁
        lock.release()
        print %s release lock... % threading.currentThread().getName()
        try:
            urllib.urlretrieve(pic_url[o1], "K://PIC/" + str(pic_url_number[o1])+ "/" + str(pic_name[o1]) + ".jpg")    
        #detail:http://www.nowamagic.net/academy/detail/1302861
        except:  #except socket.timeout as e:
            try:
                urllib.urlretrieve(pic_url[o1], "K://PIC/" + str(pic_url_number[o1])+ "/" + str(pic_name[o1]) + ".jpg")
            except:  
                global url_fail
                url_fail.append(pic_url[o1])
                url_fail.append(pic_url_number[o1])
                print "-----socket timout-----,record..."
        print "Picture " + str(pic_name[o1]) + " Downloaded"   
        
def start_new_thread():
    thread = myThread(pic_url)
    thread.start()
while pic_number <= len(pic_url):
    if threading.activeCount() < 7:
        start_new_thread()

 技术分享

Python superbwallpapers 动漫分类 下载

标签:

原文地址:http://www.cnblogs.com/mioakiyama/p/4227273.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!