标签:
用java写的,而且是用来爬邮箱的,关于邮箱的正则只是随便写写,需要优化,仅供娱乐。
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class spider {
public static void main(String[] args) {
try {
getMail();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void getMail() throws Exception {
URL url = new URL("需要爬邮箱的网址");
URLConnection conn = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String regex = "[a-zA-Z0-9]{6,12}@[a-zA-Z]{2,8}(\\.[a-zA-Z]{2,3}){1,2}";
String line = null;
while((line = in.readLine()) != null){
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(line);
while(m.find()){
System.out.println(m.group());
}
}
}
}
标签:
原文地址:http://www.cnblogs.com/vipwolf/p/4403322.html