标签:
本人最近整合了从文本中提取信息,将之保存的新的文本中,在此处用到的是正则表达式,希望大家一起学习.
文件操作类:fileOperation.java
package dyx_13;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
public class fileOperation {
// 创建文件,检测文件是否创建
public boolean crateFile(File fileName) {
boolean flag = false;
try {
if(!fileName.exists()) {
fileName.createNewFile();// 文件没有出现创建新文件
flag = true;
}
}catch(Exception e) {
e.printStackTrace();
}
return true;
}
// 读取txt文件
public static String readTxtFile(File fileName) {
StringBuffer str = new StringBuffer();
String result = null;
FileReader fileReader = null;// 读取文件
BufferedReader bufferReader = null;// 读取缓冲流
try {
fileReader = new FileReader(fileName);
bufferReader = new BufferedReader(fileReader);
try {
for(String line;(line = bufferReader.readLine())!=null;) {
str.append(line).append("\n");
}
result = str.toString();
}catch(Exception e) {
e.printStackTrace();
}
}catch(Exception e) {
e.printStackTrace();
}
//System.out.println("读出来的内容是:"+ result);
return result;
}
// 判定文件权限
public boolean writeTxtFile(String content, File fileName) {
//RandomAccessFile mm = null;
boolean flag = false;
FileOutputStream o = null;
try{
o = new FileOutputStream(fileName);// 文件要输出
o.write(content.getBytes("GBK"));
o.close();
flag = true;
}catch(Exception e) {
e.printStackTrace();
}
return flag;
}
// 写入文件
public static void write(String filePath, String content) throws IOException {
File file2 = new File(filePath);//取得文件路径
BufferedWriter out = null;
try {
out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(file2, true)));
out.write(content+"\r\n");
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
// 清空文本信息
public static void clear(String path) throws IOException {
File f = new File(path);
FileWriter fw = new FileWriter(f);
fw.write("");
fw.close();
}
public static void contentToTxt(String filePath1, String filePath2) {
StringBuffer str = new StringBuffer(); // 原有TXT文件
String s1 = new String();// 新加入的内容
try {
File file1 = new File(filePath1);//取得文件路径
if(file1.exists()) {
System.out.println("源文件存在");
} else {
System.out.println("源文件不存在");
file1.createNewFile();// 不存在创建文件
}
File file2 = new File(filePath2);//取得文件路径
if(file2.exists()) {
System.out.println("目标文件存在");
} else {
System.out.println("目标文件不存在");
file1.createNewFile();// 不存在创建文件
}
BufferedReader input = new BufferedReader(new FileReader(file1));
for(String line;(line = input.readLine())!=null;) {
str.append(line).append("\n");
}
s1 = str.toString();
// String emailStr = regExp.email(s1);
// String teleStr = regExp.tele(s1);
// String netAddStr = regExp.netAddress(s1);
// String idStr = regExp.idCard(s1);
// System.out.println(s1);
input.close();
BufferedWriter output = new BufferedWriter(new FileWriter(file2));
// output.write(emailStr);
// output.write(teleStr+"\n");
// output.write(netAddStr+"\n");
// output.write(idStr+"\n");
output.close();
}catch(Exception e) {
e.printStackTrace();
}
}
}
正则表达式处理:regExp.java
package dyx_13;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class regExp {
public static String email(String filePath,String content) throws IOException {
String regex = "\\w+@\\w+(\\.\\w+)+";
String email = null;
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
email = m.group();
fileOperation.write(filePath, email);
System.out.println(email);
}
return email;
}
// 电话号码
public static String tele(String filePath,String content) throws IOException {
String telephone = null;
String regex = "0?(13[0-9]|15[012356789]|18[0236789]|14[57])[0-9]{8}";
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
telephone = m.group();
fileOperation.write(filePath, telephone);
System.out.println(telephone);
}
return telephone;
}
// 网址
public static String netAddress(String filePath,String content) throws IOException {
String netAdd = null;
String regex = "(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]" ;
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
netAdd = m.group();
fileOperation.write(filePath, netAdd);
System.out.println(netAdd);
}
return netAdd;
}
// 身份证
public static String idCard(String filePath,String content) throws IOException {
String id = null;
String regex = "(\\d{14}\\w)|\\d{17}\\w" ;
Matcher m=Pattern.compile(regex).matcher(content);
while(m.find()){
id = m.group();
fileOperation.write(filePath, id);
System.out.println(id);
}
return id;
}
}
运行主程序zhu.java
package dyx_13;
import java.io.File;
import java.io.IOException;
public class zhu {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
//o.contentToTxt(s1, s2);fileOperation o = new fileOperation();
String s1 = "D:\\java workspace\\dyx\\JAVA class\\dyx_2\\content\\file.txt";
String s2 = "D:\\java workspace\\dyx\\JAVA class\\dyx_2\\content\\outfile.txt";
fileOperation.clear(s2);
File f = new File(s1);
String s = fileOperation.readTxtFile(f);
System.out.println(s);
regExp.email(s2,s);
regExp.tele(s2,s);
regExp.netAddress(s2,s);
regExp.idCard(s2,s);
}
}
利用正则表达式读取txt文件中的邮箱,电话号码,url地址,手机号,将数据一行一个保存到一个新的文件中去
标签:
原文地址:http://blog.csdn.net/demo_deng/article/details/45480779