爬网页获取电话号码

时间：2018-08-07 00:31:16 阅读：465 评论：0 收藏：0 [点我收藏+]

标签：int while app match new lse exce htm readline

明天补充

 1 import java.io.BufferedReader;
 2 import java.io.IOException;
 3 import java.io.InputStream;
 4 import java.io.InputStreamReader;
 5 import java.net.URL;
 6 import java.net.URLConnection;
 7 import java.util.regex.Matcher;
 8 import java.util.regex.Pattern;
 9 
10 public class URLTest {
11 
12     private URLConnection connection = null;
13     
14     public String getDoucument(String url) throws IOException {
15         
16         URL newUrl = new URL(url);
17         connection = newUrl.openConnection();
18         InputStream is = connection.getInputStream();
19         InputStreamReader isr = new InputStreamReader(is);
20         BufferedReader br = new BufferedReader(isr);
21         
22         String line = "";
23         StringBuffer sb = new StringBuffer();
24         
25         while( (line =  br.readLine()) != null) {
26             sb.append(line +"\n");
27         }
28         
29         return sb.toString();
30         
31     }
32     
33     public String MyFilter( String string ) {//内容过滤器-获取网页上的电话，没有去重
34         
35         String regex = "18[\\d]{9}";
36         Pattern pattern = Pattern.compile(regex);
37         Matcher matcher = pattern.matcher(string);
38         
39         String result = "";
40         while(matcher.find()) {
41             result += matcher.group()+"\n";
42         }
43         
44         return result;
45     }
46     
47     public static void main(String[] args) throws IOException {
48         
49         URLTest test = new URLTest();
50         String page = test.getDoucument("https://power.baidu.com/question/1511959129473915060.html?qbl=relate_question_1");
51         String result = test.MyFilter(page);
52 //        System.out.println(page);
53         
54         if ( result != null ) {
55             System.out.println("从该网页上找到的号码：\n"+result);
56         }
57         else {
58             System.out.println("该网页上没有电话号码");
59         }
60         
61     }
62 }

程序效果

技术分享图片

爬网页获取电话号码

标签：int while app match new lse exce htm readline

原文地址：https://www.cnblogs.com/ynhwl/p/9434230.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行