java爬取网页上qq号，邮箱号等

时间：2018-06-27 22:31:07 阅读：548 评论：0 收藏：0 [点我收藏+]

标签：爬取 except mail main htm puts 汉字 cep .com

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class GetMail {
public static void main(String[] args) throws Exception {
//getMails();
getMails_url();
}

public static void getMails_url() throws Exception {
URL url = new URL("https://wenku.baidu.com/view/ce81b0a1ddccda38366baf61.html");//这里就是要爬取的网页
URLConnection conn = url.openConnection();
BufferedReader bufr = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line = null;
String maileRes = "[\u4E00-\u9FA5]+";//这里存放需要设定的规则

//匹配邮箱："\\w+@\\w+(\\.\\w+)+"
//匹配汉字："[\u4E00-\u9FA5]+";
//匹配QQ号："[1-9][0-9]{4,14}"
//qq邮箱："(.)+@(.)+(\\.[a-z]+){1,}";
Pattern p = Pattern.compile(maileRes);

while((line=bufr.readLine())!=null) {
Matcher m = p.matcher(line);
while(m.find()) {
System.out.println(m.group());
}

}
}

java爬取网页上qq号，邮箱号等

标签：爬取 except mail main htm puts 汉字 cep .com

原文地址：https://www.cnblogs.com/zxwm/p/9235960.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行