码迷,mamicode.com
首页 > Web开发 > 详细

jsoup抓取豆瓣美女

时间:2015-07-26 15:51:47      阅读:203      评论:0      收藏:0      [点我收藏+]

标签:url   jsoup   爬虫   抓取   java   

package com.huowolf;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Demo4 {
	
	private static final String url = "http://www.dbmeinv.com/?p=";
	private static final String picPath = "d:/picTest";
	private static String USER_AGENT="Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0";
	
	public static void main(String[] args) {
		System.out.println("下载完的图片位于d:/picTest");
		for (int i = 0; i < 50; i++) {
				try {
				Document doc = Jsoup.connect(url+i).userAgent(USER_AGENT).timeout(3000).data("pager_offset", i+1+"").post();
				Elements img = doc.select("img");
				for (Element ele : img) {
					String src = ele.absUrl("src");
					//System.out.println(src);
					getImage(src);
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		
		System.out.println("这些足够多了!");
		System.out.println("图片下载完成!");
	}
	
	
	private static void getImage(String src) {
		int indexName = src.lastIndexOf("/");
		String name = src.substring(indexName, src.length());
		//System.out.println(name);
		InputStream in = null;  
		OutputStream out = null;
		try {
			URL url = new URL(src);
			in = url.openStream();
			
			//创建文件夹
			File files = new File(picPath);
			if(!files.exists())
				files.mkdirs();
			
			out = new BufferedOutputStream(new FileOutputStream(files+name));
			for(int b;(b=in.read())!=-1;)
				out.write(b);
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			try {
				out.close();
				in.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
			
		}
		
	}
}

程序已打包,拿去双击即可运行,下载地址:http://pan.baidu.com/s/1mgKiQiG

运行结果图:

技术分享

版权声明:本文为博主原创文章,未经博主允许不得转载。

jsoup抓取豆瓣美女

标签:url   jsoup   爬虫   抓取   java   

原文地址:http://blog.csdn.net/huolang_vip/article/details/47067719

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!