标签:java 正则表达式 匹配 中文 数字 txt文本读取
java正则表达式打印出匹配的数据
相关知识点
java正则匹配中文
java正则匹配数字
java读取text文件
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.HashSet; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; public class PraseSort0htm { public static void main(String[] args) { String regEx = "[\u4E00-\u9FA5]{1,}[@#]{1,}/Sort-0-[0-9]{1,}.htm"; // 正确匹配中文 String str = "奇异果@#/Sort-0-012345.htm,橙子@#/Sort-0-012345.htm,苹果@#/Sort-0-012345.htm,热带水果@#/Sort-0-012345.htm,"; Pattern pat = Pattern.compile(regEx); Matcher mat = pat.matcher(str); while (mat.find()) { System.out.println(mat.group(0)); } System.out.println("xxxxxxxxxxxxxxxxx"); String filePath = "d:/xxxx.txt"; readTxtFile(filePath); } public static void readTxtFile(String filePath) { try { Set<String> idSet = new HashSet<String>(); Set<String> nameSet = new HashSet<String>(); String regEx = "[\u4E00-\u9FA5]{1,}[@#]{1,}/Sort-0-[0-9]{1,}.htm"; // 正确匹配 String regEx2 = "[0-9]{2,}"; String regEx3 = "[\u4E00-\u9FA5]{1,}"; Pattern pat = Pattern.compile(regEx); Pattern pat2 = Pattern.compile(regEx2); Pattern pat3 = Pattern.compile(regEx3); String encoding = "gbk"; File file = new File(filePath); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; int i = 1; while ((lineTxt = bufferedReader.readLine()) != null) { // System.out.println(lineTxt); Matcher mat = pat.matcher(lineTxt); while (mat.find()) { // System.out.println(mat.group(0));//验证可以打印成功 String temp = mat.group(0); Matcher mat2 = pat2.matcher(temp); String id = ""; while (mat2.find()) { id = mat2.group(0); } if (idSet.add(id)) { System.out.print(id); } else { break; } System.out.print("\t"); Matcher mat3 = pat3.matcher(temp); while (mat3.find()) { System.out.print(mat3.group(0)); } System.out.print("\n"); i++; } } // System.out.println(i); read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } } }
本文出自 “梦里不知身是客” 博客,谢绝转载!
标签:java 正则表达式 匹配 中文 数字 txt文本读取
原文地址:http://tenfee.blog.51cto.com/6353835/1875555