标签:
import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Snippet { public static void main(String[] args) throws IOException{ String baseUrl = "http://www.wwenglish.com"; Document listDoc = Jsoup.connect("http://www.wwenglish.com/en/club/cnntv/").get(); Elements dd = listDoc.select("td.category td.category_main a.list"); for(Element element: dd){ String mp3PageUrl = baseUrl + element.attr("href"); //System.out.println(mp3PageUrl); Document mp3Doc = Jsoup.connect(mp3PageUrl).get(); Elements mp3Elements = mp3Doc.select("a.wwmp3"); if(null != mp3Elements && mp3Elements.size()>0){ String mp3Url = baseUrl + mp3Elements.get(0).attr("href"); String fileName = mp3Url.substring(mp3Url.lastIndexOf("/")); //System.out.println(fileName); DownLoad thread = new DownLoad(mp3Url, "F:/wwenglish"+fileName); thread.start(); } } } }
import java.io.BufferedOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URL; public class DownLoad extends Thread { public DownLoad(String mp3Url, String filepath) { // Open a URL Stream try { URL url = new URL(mp3Url); InputStream in = url.openStream(); OutputStream out = new BufferedOutputStream(new FileOutputStream( filepath)); for (int b; (b = in.read()) != -1;) { out.write(b); } out.close(); in.close(); } catch (IOException e) { e.printStackTrace(); } } }
简单 使用 httpclient jsoup 做的 提取 旺旺英语
库依赖如下:
<?xml version="1.0" encoding="UTF-8"?> <classpath> <classpathentry kind="src" path="src"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/> <classpathentry kind="lib" path="H:/MavenRepository/org/jsoup/1.8.1/jsoup-1.8.1.jar"/> <classpathentry kind="lib" path="H:/MavenRepository/org/apache/httpcomponents/httpclient/4.0/httpclient-4.0.jar"/> <classpathentry kind="lib" path="H:/MavenRepository/org/apache/httpcomponents/httpcore/4.3.2/httpcore-4.3.2.jar"/> <classpathentry kind="output" path="bin"/> </classpath>
标签:
原文地址:http://blog.csdn.net/kanglecjr/article/details/43857549