标签:java 正则表达式 匹配 中文 数字 txt文本读取
java正则表达式打印出匹配的数据
相关知识点
java正则匹配中文
java正则匹配数字
java读取text文件
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PraseSort0htm {
public static void main(String[] args) {
String regEx = "[\u4E00-\u9FA5]{1,}[@#]{1,}/Sort-0-[0-9]{1,}.htm"; // 正确匹配中文
String str = "奇异果@#/Sort-0-012345.htm,橙子@#/Sort-0-012345.htm,苹果@#/Sort-0-012345.htm,热带水果@#/Sort-0-012345.htm,";
Pattern pat = Pattern.compile(regEx);
Matcher mat = pat.matcher(str);
while (mat.find()) {
System.out.println(mat.group(0));
}
System.out.println("xxxxxxxxxxxxxxxxx");
String filePath = "d:/xxxx.txt";
readTxtFile(filePath);
}
public static void readTxtFile(String filePath) {
try {
Set<String> idSet = new HashSet<String>();
Set<String> nameSet = new HashSet<String>();
String regEx = "[\u4E00-\u9FA5]{1,}[@#]{1,}/Sort-0-[0-9]{1,}.htm"; // 正确匹配
String regEx2 = "[0-9]{2,}";
String regEx3 = "[\u4E00-\u9FA5]{1,}";
Pattern pat = Pattern.compile(regEx);
Pattern pat2 = Pattern.compile(regEx2);
Pattern pat3 = Pattern.compile(regEx3);
String encoding = "gbk";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判断文件是否存在
InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
int i = 1;
while ((lineTxt = bufferedReader.readLine()) != null) {
// System.out.println(lineTxt);
Matcher mat = pat.matcher(lineTxt);
while (mat.find()) {
// System.out.println(mat.group(0));//验证可以打印成功
String temp = mat.group(0);
Matcher mat2 = pat2.matcher(temp);
String id = "";
while (mat2.find()) {
id = mat2.group(0);
}
if (idSet.add(id)) {
System.out.print(id);
} else {
break;
}
System.out.print("\t");
Matcher mat3 = pat3.matcher(temp);
while (mat3.find()) {
System.out.print(mat3.group(0));
}
System.out.print("\n");
i++;
}
}
// System.out.println(i);
read.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
}
}本文出自 “梦里不知身是客” 博客,谢绝转载!
标签:java 正则表达式 匹配 中文 数字 txt文本读取
原文地址:http://tenfee.blog.51cto.com/6353835/1875555