标签:
1 package javaNet.Instance.ImageDownload; 2 3 import java.io.BufferedReader; 4 import java.io.File; 5 import java.io.FileOutputStream; 6 import java.io.IOException; 7 import java.io.InputStream; 8 import java.io.InputStreamReader; 9 import java.net.MalformedURLException; 10 import java.net.URL; 11 import java.util.ArrayList; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 14 15 public class DownloadImgs { 16 17 private String url=null; 18 19 public DownloadImgs(String url) { 20 this.url=url; 21 } 22 23 //----------------------------------gethtml start----------------------------- 24 /** 25 * visit the baidu.img page to get the html 26 * @return inputStream 27 * @throws IOException 28 * @throws MalformedURLException 29 */ 30 public InputStream GetBaiduImgHtml_Stream() throws IOException,MalformedURLException { 31 URL img_Url=new URL(url); 32 return img_Url.openStream(); 33 } 34 35 /** 36 * convert the stream to the string 37 * @param inStrm 38 * @return string of the page 39 */ 40 public String InputStreamToString(InputStream inStrm){ 41 BufferedReader reader=new BufferedReader(new InputStreamReader(inStrm)); 42 StringBuilder sb=new StringBuilder(); 43 44 String line=null; 45 46 try { 47 while((line=reader.readLine())!=null){ 48 sb.append(line+‘\n‘); 49 } 50 } 51 catch (IOException e) 52 { 53 e.printStackTrace(); 54 } 55 finally 56 { 57 try 58 { 59 inStrm.close(); 60 } 61 catch (IOException e) 62 { 63 e.printStackTrace(); 64 } 65 } 66 return sb.toString(); 67 } 68 69 /** 70 * get the origin page of baidu.img 71 * @return 72 * @throws MalformedURLException 73 * @throws IOException 74 */ 75 public String GetBaiduImgHtml_Page() throws MalformedURLException, IOException 76 { 77 return this.InputStreamToString(this.GetBaiduImgHtml_Stream()); 78 } 79 /** 80 * test whether url have been visited the image page,and get the page. 81 * @param page 82 */ 83 public void Display_HtmlPage(String page) 84 { 85 System.out.println(page); 86 } 87 //-------------------------gethtml end---------------- 88 //-------------------------paretoimgurllist start----- 89 public ArrayList<String> ParsePageToImgList(String page,String imgPa) 90 { 91 ArrayList<String> imgList=new ArrayList<String>(); 92 Pattern pattern=Pattern.compile(imgPa); 93 Matcher matcher=pattern.matcher(page); 94 while(matcher.find()) 95 { 96 imgList.add(matcher.group(1)); 97 } 98 return imgList; 99 } 100 //------------------------paretoimgurllist end--------- 101 //------------------------DownloadFile start---------- 102 public boolean DownloadFile(String imgUrl,int index,String path) 103 { 104 try 105 { 106 File f=new File(path+"\\"+index+".jpg"); 107 System.out.println("下载:"+imgUrl); 108 URL url=new URL(imgUrl); 109 InputStream ins=url.openStream(); 110 FileOutputStream fout=new FileOutputStream(f); 111 byte[] buffer=new byte[2048]; 112 int bytes_number; 113 while((bytes_number=ins.read(buffer))!=-1) 114 { 115 fout.write(buffer,0,bytes_number); 116 fout.flush(); 117 } 118 ins.close(); 119 fout.close(); 120 } 121 catch(Exception e) 122 { 123 System.out.println("下载失败!"); 124 e.printStackTrace(); 125 return false; 126 } 127 System.out.println("下载完成..."); 128 return true; 129 } 130 //------------------------DownloadFile end---------- 131 132 //------------------------mkDir start---------- 133 /** 134 * make a direction for download the images in the native disk. 135 * @param path the native path 136 * @return is success 137 */ 138 public void MkDir(String path) 139 { 140 File dir=new File(path); 141 if(!dir.exists()) 142 { 143 dir.mkdirs(); 144 } 145 } 146 //------------------------mkDir end------------ 147 148 public void Display_ArrayList(ArrayList<String> list) 149 { 150 for(String temp:list) 151 { 152 System.out.println(temp); 153 } 154 } 155 156 public static void main(String[] args) throws MalformedURLException, IOException 157 { 158 String imgPa="\"objURL\":\"(.*?)\""; 159 String path="F:\\photos"; 160 int index=0; 161 DownloadImgs downloadimgs=new DownloadImgs("http://image.baidu.com/search/index?" 162 + "tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1" 163 + "&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0" 164 + "&istype=2&ie=utf-8&word=%E5%8A%A8%E6%BC%AB&oq=%E5%8A%A8%E6%BC%AB&rsp=-1"); 165 //downloadimgs.Display_HtmlPage(downloadimgs.GetBaiduImgHtml_Page()); 166 String htmlPage=downloadimgs.GetBaiduImgHtml_Page(); 167 ArrayList<String> imgList=downloadimgs.ParsePageToImgList(htmlPage, imgPa); 168 //downloadimgs.Display_ArrayList(imgList); 169 downloadimgs.MkDir(path); 170 for(String imgUrl:imgList) 171 downloadimgs.DownloadFile(imgUrl, (index++)+1, path); 172 173 System.out.println("一共下载了"+index+"个图片。"); 174 } 175 }
标签:
原文地址:http://www.cnblogs.com/593213556wuyubao/p/4976799.html