码迷,mamicode.com
首页 > 编程语言 > 详细

java读取html文件,截取<body>标签中内容

时间:2017-04-06 01:21:08      阅读:1090      评论:0      收藏:0      [点我收藏+]

标签:pre   bytes   sub   append   nts   htm   buffer   serial   sel   

 1     public String readfile(String filePath){
 2         File file = new File(filePath);  
 3         InputStream input = null;
 4         try {
 5             input = new FileInputStream(file);
 6         } catch (FileNotFoundException e) {
 7             e.printStackTrace();
 8         }  
 9         StringBuffer buffer = new StringBuffer();  
10         byte[] bytes = new byte[1024];
11         try {
12             for(int n ; (n = input.read(bytes))!=-1 ; ){  
13                 buffer.append(new String(bytes,0,n,"GBK"));  
14             }
15         } catch (IOException e) {
16             e.printStackTrace();
17         }
18 //        System.out.println(buffer);
19         return buffer.toString();  
20     }
21     
22      public String getBody(String val) {
23           String start = "<body>";
24           String end = "</body>";
25           int s = val.indexOf(start) + start.length();
26           int e = val.indexOf(end);
27         return val.substring(s, e);
28     }
29     
 1     public static void main(String [] args){
 2         OaDao m = new OaDao();
 3 //        String sql = "SELECT sth,xdh FROM TK_ST_0331 where sth=‘022012050101131000100‘ and rownum <=10";
 4         String sql = "select t.sth , t.stgjz ,t.stly, x.mc from TK_ST_0331 t ,TK_STK_ST_0331 k,TK_TX X  where t.sth = k.sth AND X.BH = t.tx and rownum <10 ";
 5         List<OaVo> datalist= m.findAll(sql);
 6         for(OaVo vo : datalist){
 7             System.out.println(vo.getVal1()+"///"+vo.getVal2());
 8             
 9 //            String sth = "022012010100000100100";
10             String sth = vo.getVal1();
11             String kmh = sth.substring(0, 2);    //科目号
12             String nf = sth.substring(2, 6);    //年份
13             String yf = sth.substring(6,10);    //月份
14             String serialno = sth.substring(10, 16);    //序列号
15             String stxl = sth.substring(16, 19);    //题型
16             String path ="/"+kmh+"/"+nf+"/"+yf+"/"+serialno+"/"+stxl+"/";
17             
18             String tm_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_tm.htm";
19             String da_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_da.htm";
20             String jx_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_jx.htm";
21             
22     //        String path = "H:/tk_source/02/0101/000001/001/022012010100000100100_da.htm";
23             
24             String tm = m.getBody(m.readfile(tm_path));
25             System.out.println("----------------------题目------------------------------");
26             System.out.println(tm);
27             
28             String da = m.getBody(m.readfile(da_path));
29             System.out.println("----------------------答案------------------------------");
30             System.out.println(da);
31             
32             
33             String jx = m.getBody(m.readfile(da_path));
34             System.out.println("----------------------解析------------------------------");
35             System.out.println(jx);
36         }
37     }

 

java读取html文件,截取<body>标签中内容

标签:pre   bytes   sub   append   nts   htm   buffer   serial   sel   

原文地址:http://www.cnblogs.com/huanglibin/p/6671202.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!