码迷,mamicode.com
首页 > 其他好文 > 详细

通过设置Referer反"反盗链"

时间:2016-01-28 21:07:00      阅读:403      评论:0      收藏:0      [点我收藏+]

标签:

package cn.searchphoto.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.GZIPInputStream;
/**
* 下载远程网站的图片,通过设置Referer反反盗链。
*
* @author JAVA世纪网(java2000.net, laozizhu.com)
*/
public class ImageDownloader {
/**
* 下载文件到指定位置
* @param imgurl 下载连接
* @param f 目标文件
* @return 成功返回文件,失败返回null
*/
public static File download(String imgurl, File f) {
try {
URL url = new URL(imgurl);
URLConnection con = url.openConnection();
int index = imgurl.indexOf("/", 10);
con.setRequestProperty("Host", index == -1 ? imgurl.substring(7) : imgurl.substring(7, index));
con.setRequestProperty("Referer", imgurl);
InputStream is = con.getInputStream();
if (con.getContentEncoding() != null && con.getContentEncoding().equalsIgnoreCase("gzip")) {
is = new GZIPInputStream(con.getInputStream());
}
byte[] bs = new byte[1024];
int len = -1;
OutputStream os = new FileOutputStream(f);
try {
while ((len = is.read(bs)) != -1) {
os.write(bs, 0, len);
}
} finally {
try {
os.close();
} catch (Exception ex) {}
try {
is.close();
} catch (Exception ex) {}
}
return f;
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
}
}

 

#1 cookie的处理
import urllib2, cookielib
cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
content = urllib2.urlopen(http://XXXX).read()
 
#2 用代理和cookie
opener = urllib2.build_opener(proxy_support, cookie_support, urllib2.HTTPHandler)
 
#3 表单的处理
import urllib
postdata=urllib.urlencode({
    username:XXXXX,
    password:XXXXX,
    continueURI:http://www.verycd.com/,
    fk:fk,
    login_submit:登录
})
 
req = urllib2.Request(
    url = http://secure.verycd.com/signin/*/http://www.verycd.com/,
    data = postdata
)
result = urllib2.urlopen(req).read()
 
#4 伪装成浏览器访问
headers = {
    User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6
}
req = urllib2.Request(
    url = http://secure.verycd.com/signin/*/http://www.verycd.com/,
    data = postdata,
    headers = headers
)
 
#5 反”反盗链”
headers = {
    Referer:http://www.cnbeta.com/articles
}



 

#6 多线程并发抓取 
from threading import Thread
from Queue import Queue
from time import sleep
#q是任务队列
#NUM是并发线程总数
#JOBS是有多少任务
q = Queue()
NUM = 2
JOBS = 10
#具体的处理函数,负责处理单个任务
def do_somthing_using(arguments):
    print arguments
#这个是工作进程,负责不断从队列取数据并处理
def working():
    while True:
        arguments = q.get()
        do_somthing_using(arguments)
        sleep(1)
        q.task_done()
#fork NUM个线程等待队列
for i in range(NUM):
    t = Thread(target=working)
    t.setDaemon(True)
    t.start()
#把JOBS排入队列
for i in range(JOBS):
    q.put(i)
#等待所有JOBS完成
q.join()

 

通过设置Referer反"反盗链"

标签:

原文地址:http://www.cnblogs.com/timdes/p/5167304.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!