标签:compile client end count 编码 img buffered line nbsp
class Solution { public String InitDocument(String link,String code,int time,int charter) { if(time-1==charter)return null; System.out.println("第"+time+"章:"); FileWriter file; BufferedWriter bw; Document doc; URLConnection src; BufferedReader buff; StringBuilder html=new StringBuilder(); String line; String reg="div#content"; Elements ele; URL url; String linkreg="a"; String filename="斗罗大陆.txt"; Elements elx; String NexthtmlLink=null; int count=1; Matcher m; try { url=new URL(link); src = url.openConnection(); buff=new BufferedReader(new InputStreamReader(src.getInputStream(),code)); while((line=buff.readLine())!=null) { html.append(line+"\n"); } doc=Jsoup.parse(html.toString()); ele=doc.select(reg); elx=doc.select(linkreg); file=new FileWriter(new File(filename),true); bw=new BufferedWriter(file); for(Element elem:ele) { bw.write(elem.wholeText()+"\n"); } bw.close(); file.close(); for(Element elem:elx) { if((m=Pattern.compile("/shu/518/\\d+\\.html").matcher(elem.attr("href"))).find()&&count<2) { NexthtmlLink=elem.attr("href"); System.out.println("上一章链接:"+"https://www.qb5.tw"+NexthtmlLink); count++; } else if((m=Pattern.compile("/shu/518/\\d+\\.html").matcher(elem.attr("href"))).find()&&count==2) { NexthtmlLink=elem.attr("href"); System.out.println("下一章链接:"+"https://www.qb5.tw"+NexthtmlLink); return "https://www.qb5.tw"+NexthtmlLink; } } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } } public class Main { private static String link; private static String code; private static Solution space=new Solution(); private static Scanner iner=new Scanner(System.in); private static String line; private static int cout=1; private static int charter=0; public static void main(String[] args) { System.out.print("请输入网址:"); link=iner.nextLine(); System.out.print("请输入网页的编码方式:"); code=iner.nextLine(); System.out.print("请输入要下载的章数:"); charter=iner.nextInt(); line=space.InitDocument(link, code, cout,charter); while(line!=null) { link=line; System.out.println(link); line=space.InitDocument(link, code, ++cout,charter); } } } public class Main { public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException { final WebClient webClient=new WebClient(BrowserVersion.CHROME); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); final HtmlPage page=webClient.getPage("http://www.4399dmw.com/mh/bailangxi/320654.html"); DomNodeList<DomElement> div=page.getElementsByTagName("img"); div.forEach(item->{ String line=item.getAttribute("src").toString(); if((line.startsWith("http")||line.startsWith("https"))&&(Pattern.compile("\\d+.jpg").matcher(line)).find()) { try { URL url=new URL(item.getAttribute("src")); BufferedInputStream in=new BufferedInputStream(url.openStream()); FileOutputStream file=new FileOutputStream(new File(line.substring(line.lastIndexOf("/")+1))); int bit=0; while((bit=in.read())!=-1) { file.write(bit); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } System.out.println(item.getAttribute("src")); } }); System.out.println(div.size()); webClient.close(); } } public class Main { public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException { WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.waitForBackgroundJavaScript(30 * 1000); HtmlPage page = webClient.getPage("https://manhua.dmzj.com/dccyd/571.shtml#@page=20"); String pageXml = page.asXml(); Document doc = Jsoup.parse(pageXml); Elements ele=doc.select("img"); System.out.println(ele.size()); for(Element elem:ele) { String line=elem.attr("src"); if((Pattern.compile("\\d+\\.(png|jpg)").matcher(elem.attr("src")).find())) { URL url=null; if(!(line.startsWith("https:")||line.startsWith("http:"))) { System.out.println("https:"+line); url=new URL("https:"+line); } else { System.out.println(line); url=new URL(line); } BufferedInputStream in=new BufferedInputStream(url.openStream()); FileOutputStream file=new FileOutputStream(new File(line.substring(line.lastIndexOf("/")+1))); int bit=0; while((bit=in.read())!=-1) { file.write(bit); } System.out.println("写入成功!"); } } } } public class Main { public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException { WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.waitForBackgroundJavaScript(30 * 1000); HtmlPage page = webClient.getPage("https://manhua.dmzj.com/dccyd/571.shtml#@page=20"); String pageXml = page.asXml(); Document doc = Jsoup.parse(pageXml); Elements ele=doc.select("img"); System.out.println(ele.size()); for(Element elem:ele) { String line=elem.attr("src"); if((Pattern.compile("\\d+\\.(png|jpg)").matcher(elem.attr("src")).find())) { URL url=null; if(!(line.startsWith("https:")||line.startsWith("http:"))) { System.out.println("https:"+line); url=new URL("https:"+line); } else { System.out.println(line); url=new URL(line); } BufferedInputStream in=new BufferedInputStream(url.openStream()); FileOutputStream file=new FileOutputStream(new File(line.substring(line.lastIndexOf("/")+1))); int bit=0; while((bit=in.read())!=-1) { file.write(bit); } System.out.println("写入成功!"); } } } }
标签:compile client end count 编码 img buffered line nbsp
原文地址:https://www.cnblogs.com/z2529827226/p/11831086.html