码迷,mamicode.com
首页 > Web开发 > 详细

jsoup爬取网站图片

时间:2018-04-05 19:16:42      阅读:199      评论:0      收藏:0      [点我收藏+]

标签:get   static   nec   public   file   nts   .com   str   ima   



package
com.ij34.JsoupTest; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Random; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupTest { public static void downImages(String filePath,String imgUrl) throws Exception { //获取网址 String beforeUrl = imgUrl.substring(0,imgUrl.lastIndexOf("/")+1); //图片url后面的图片名字 String fileName = imgUrl.substring(imgUrl.lastIndexOf("/")+1); String newFileName = URLEncoder.encode(fileName, "UTF-8"); //"+"替换为UTF-8中的空格 newFileName = newFileName.replaceAll("\\+", "\\%20"); //编码之后的url imgUrl = beforeUrl + newFileName; //创建文件目录 File files = new File(filePath); if (!files.exists()) { files.mkdirs(); } URL url = new URL(imgUrl); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); InputStream is = connection.getInputStream(); Date day=new Date(); SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); Random ra=new Random(); int Num=ra.nextInt(11)+100; String fn=df.format(day)+Num; //去图片的格式例如.jpg .jpeg int lastIndex=fileName.lastIndexOf("."); String result=fileName.substring(lastIndex); File file = new File(filePath +fn+ result); FileOutputStream out = new FileOutputStream(file); int i = 0; while((i = is.read()) != -1){ out.write(i); } } public static void main(String[] args) throws Exception { //int[] a=new int[]{}; //for(int i=a.length-1;i>=0;i--){ //爬取的网址 String url = "http://www.ivsky.com/tupian/laohu_v45527";//+a[i]; String savePath = "D://webmagic//"; Document document = Jsoup.connect(url).get(); Elements elements = document.getElementsByTag("img"); for(Element element : elements){ //图片的绝对路径 String imgSrc = element.attr("abs:src"); //取jpg格式 if(imgSrc.contains(".jpg")){ downImages(savePath, imgSrc); System.out.println(url+":"+imgSrc); } } // } } }

 

 

技术分享图片

技术分享图片

 

jsoup爬取网站图片

标签:get   static   nec   public   file   nts   .com   str   ima   

原文地址:https://www.cnblogs.com/tk55/p/8723757.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!