码迷,mamicode.com
首页 > 其他好文 > 详细

2016

时间:2016-06-06 00:42:18      阅读:134      评论:0      收藏:0      [点我收藏+]

标签:

package Demo;

import java.net.URL;
import java.util.ArrayList;
import java.util.Scanner;

public class WebCrawler {

public static void main(String[] args) {
Scanner in = new Scanner(System.in);
System.out.println("Please enter a url");
String url = in.nextLine();
crawler(url);
}

public static void crawler(String url) {
ArrayList<String> listOne = new ArrayList<>();
ArrayList<String> listTwo = new ArrayList<>();
listOne.add(url);
while (!listOne.isEmpty() && listTwo.size() <= 10) {
String s1 = listOne.remove(0);
if (!listTwo.contains(s1)) {
listTwo.add(s1);
System.out.println(s1);
}
for (String s : getSubstring(s1)) {
if (!listTwo.contains(s))
listOne.add(s);
}
}
}

public static ArrayList<String> getSubstring(String url) {
ArrayList<String> list = new ArrayList<>();
try {
URL urlone = new URL(url);
Scanner input = new Scanner(urlone.openStream());
int current = 0;
while (input.hasNext()) {
String line = input.nextLine();
current = line.indexOf("\"http:", current);
while (current > 0) {
int endIndex = line.indexOf("\"", current+1);
if (endIndex > 0) {
list.add(line.substring(current+1, endIndex));
current = line.indexOf("\"http:", endIndex);
} else
current = -1;
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
return list;
}

}

2016

标签:

原文地址:http://www.cnblogs.com/laigaoxiaode/p/5562410.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!