标签:
package io.charstream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class Text_Code {
public static void main(String[] args) {
// 按字节截取字符串内容,避免出现编码割裂
File file = new File( "temp\\split.txt");
String str = null;
for (int i = 0; i < file.length(); i++) {
for (int j = i + 1; j <= file.length(); j++) {
try {
str = splitString(file, i, j); // 截取文件字符,含头不含尾
System. out.println(str );
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
public static String splitString(File file , int i, int j) throws IOException {
FileInputStream fis = new FileInputStream( file);
byte[] buffer = new byte[1024];
fis.read(buffer);
fis.close();
/*
* 判断某字节是否为双字节编码:
* 由于默认的GBK编码中,可能为单字节编码,也可能是双字节编码。
* 单字节字符对应整型为正,双字节字符第一个字节(后称为头字节)对应整型为负。
* 所以判断该字节是否为双字节字符的第二个字节(后称为尾字节)的关键,在于在该字节之前距离该字节最近的头字节。
* 因为连续负数字节的第一个字节一定是头字节,而连续负数的个数如果是偶数,则该字节刚好是尾子节。否则反之。
* 如果该字节为头字节,则左包含右舍弃。
*/
int pos = i, len = j - i;
if (position(buffer , i ) < 0) {
// 如果截取下标位于尾字节,则下标向前移动一个字节,长度自增
pos = i - 1;
len++;
}
if (position(buffer , j - 1) > 0) {
// 由于含头不含尾,所以判断j - 1
// 如果截取上标位于头字节,则上标向前移动一个字节,长度自减
len--;
}
return new String( buffer, pos, len);
}
// 判断该字节为单字节,头字节还是尾字节
public static int position(byte[] buffer, int i) {
int point = buffer[i];
if (point > 0) {
// 该字节为正,则可能为单字节或尾字节,取决于之前连续负数的个数
if (negCount(buffer , i - 1) % 2 == 0) {
return 0; // 单字节
} else {
return -1; // 尾子节
}
} else {
// 该字节为负,则可能为头字节或尾字节,取决于之前连续负数的个数(包括自己)
if (negCount(buffer , i ) % 2 == 0) {
return -1; // 尾子节
} else {
return 1; // 头字节
}
}
}
// 获取该字节前(包括该字节)有多少个连续负数字节
public static int negCount(byte[] buffer, int i) {
int sum = 0;
while ( i >= 0 && buffer[ i] < 0) {
sum++;
i--;
}
return sum;
}
}
public static String readFile() {
StringBuilder str = new StringBuilder();
char[] buffer = new char[1024];
int len = -1;
FileReader fr = null;
try {
// 使用自定义缓冲区读取文件
fr = new FileReader( "temp\\char.txt");
while (( len = fr.read( buffer)) != -1) {
str.append(buffer, 0, len);
}
} catch (IOException e) {
System.out.println( "读取文件失败!" );
e.printStackTrace();
} finally {
// 释放文件
if (fr != null) {
try {
fr.close();
} catch (IOException e) {
System. out.println("释放文件失败!" );
e.printStackTrace();
throw new RuntimeException();
}
}
}
return str.toString();
}
public static void writeFile(String str) {
FileWriter fw = null;
try {
// 写入文件
fw = new FileWriter( "temp\\char_out.txt");
fw.write(str.toCharArray());
} catch (IOException e) {
System.out.println( "写入文件失败!" );
e.printStackTrace();
} finally {
// 释放文件
if (fw != null) {
try {
fw.close();
} catch (IOException e) {
System. out.println("释放文件失败!" );
e.printStackTrace();
throw new RuntimeException();
}
}
}
}
public static void writeFile_UTF8(String str) {
OutputStreamWriter osw = null;
try {
osw = new OutputStreamWriter( new FileOutputStream("temp\\char_utf8.txt" ), "UTF-8" );
osw.write(str.toCharArray());
} catch (UnsupportedEncodingException e) {
// 不支持的字符编码
e.printStackTrace();
} catch (FileNotFoundException e) {
// 文件没有找到
e.printStackTrace();
} catch (IOException e) {
// 文件写入失败
e.printStackTrace();
} finally {
if (osw != null) {
try {
osw.close();
} catch (IOException e) {
// 文件关闭失败
e.printStackTrace();
throw new RuntimeException( "文件关闭失败!" );
}
}
}
}
package io.charstream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
public class CharBufferIODemo {
public static void main(String[] args) throws IOException {
String str = null;
// 读取文件
BufferedReader br = new BufferedReader( new FileReader("temp\\char.txt" ));
while (( str = br.readLine()) != null) {
System.out.println( str);
}
br.close();
// 复制文件
copyFileUseBuffer("temp\\char.txt" );
}
public static void copyFileUseBuffer(String string) throws IOException {
BufferedReader br = new BufferedReader( new FileReader(string));
// 将文件扩展名前添加_copy来命名复制的文件
string = new StringBuilder(string).insert(string .lastIndexOf(‘.‘), "_copy").toString();
BufferedWriter bw = new BufferedWriter( new FileWriter(string));
String str = null;
while (( str = br.readLine()) != null) {
bw.write(str);
bw.newLine();
}
br.close();
bw.close();
}
}
package tools;
public abstract class GetComplexity {
// 模板方法,实现具体算法骨架,不确定的部分由子类定义。
private final Runtime s_runtime = Runtime. getRuntime();
private String name = null;
public GetComplexity() {
super();
}
public GetComplexity(String name) {
super();
this.name = name;
}
public final long[] start() {
// 获取起始时间
long startTime = System.nanoTime();
runGC();
// 获取起始内存消耗
long startMemory = s_runtime.totalMemory() - s_runtime.freeMemory();
run();
// 获取实际时间和内存消耗
long[] use =
{System. nanoTime() - startTime,
s_runtime.totalMemory() - s_runtime.freeMemory() - startMemory };
if (isPrinted()) {
if (name == null) {
System. out.println(this .getClass().getName() + " :");
} else {
System. out.println(this .name + " :" );
}
System.out.printf( "Estimated Time is %.3f ms.\n" , (double) use [0] / 1000000);
System.out.printf( "Used Memory is %.3f KB.\n" , (double) use [1] / 1000);
}
return use;
}
private void runGC() {
for (int i = 0; i < 4; i++) {
System.gc();
}
}
public abstract void run();
// 这是一个hook,可以由子类扩展功能。
public boolean isPrinted() {
return true;
}
}
package io.charstream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
public class CharFileCopy {
/**
* 使用FileReader/Writer复制文本
*
* @param old
* @param newFile
* @throws IOException
*/
public static void copyCharFile(File old, File newFile) throws IOException {
FileReader fr = new FileReader( old);
FileWriter fw = new FileWriter( newFile);
int ch = -1;
while (( ch = fr.read()) != -1) {
fw.write(ch);
}
fr.close();
fw.close();
}
/**
* 使用自定义缓冲区的FileReader/Writer复制文本
*
* @param old
* @param newFile
* @throws IOException
*/
public static void copyCharFile_UseArr(File old, File newFile) throws IOException {
FileReader fr = new FileReader( old);
FileWriter fw = new FileWriter( newFile);
int len = -1;
char[] buf = new char[1024];
while (( len = fr.read( buf)) != -1) {
fw.write(buf, 0, len);
}
fr.close();
fw.close();
}
/**
* 使用BufferedReader/Writer复制文本
*
* @param old
* @param newFile
* @throws IOException
*/
public static void bufCopyCharFile(File old, File newFile) throws IOException {
BufferedReader br = new BufferedReader( new FileReader(old));
BufferedWriter bw = new BufferedWriter( new FileWriter(newFile));
int ch = -1;
while (( ch = br.read()) != -1) {
bw.write(ch);
}
br.close();
bw.close();
}
/**
* 使用BufferedReader/Writer按行复制文本
*
* @param old
* @param newFile
* @throws IOException
*/
public static void bufCopyCharFile_UseLine(File old, File newFile) throws IOException {
BufferedReader br = new BufferedReader( new FileReader(old));
BufferedWriter bw = new BufferedWriter( new FileWriter(newFile));
String line = null;
while (( line = br.readLine()) != null) {
bw.write(line);
bw.newLine();
}
br.close();
bw.close();
}
/**
* 使用BufferedReader/Writer按数组复制文本
*
* @param old
* @param newFile
* @throws IOException
*/
public static void bufCopyCharFile_UseArr(File old, File newFile) throws IOException {
BufferedReader br = new BufferedReader( new FileReader(old));
BufferedWriter bw = new BufferedWriter( new FileWriter(newFile));
char[] buf = new char[1024];
int len = -1;
while (( len = br.read( buf)) != -1) {
bw.write(buf, 0, len);
}
br.close();
bw.close();
}
}
package io.charstream;
import java.io.File;
import java.io.IOException;
import tools.GetComplexity;
public class CharFileCopyDemo {
public static void main(String[] args) {
File src = new File( "temp\\singlelinechar.txt");
File dest1 = new File("temp\\CopyCharFile\\char_copy.txt" );
File dest2 = new File("temp\\CopyCharFile\\char_copy_arr.txt" );
File dest3 = new File("temp\\CopyCharFile\\char_bufcopy.txt" );
File dest4 = new File("temp\\CopyCharFile\\char_bufcopy_line.txt" );
File dest5 = new File("temp\\CopyCharFile\\char_bufcopy_arr.txt" );
new CopyCharFile( "使用FileReader/Writer复制文本" , src, dest1 ).start();
new CopyCharFile_UseArr( "使用自定义缓冲区的FileReader/Writer复制文本" , src , dest2 ).start();
new BufCopyCharFile( "使用BufferedReader/Writer复制文本" , src , dest3 ).start();
new BufCopyCharFile_UseLine( "使用BufferedReader/Writer按行复制文本" , src , dest4 ).start();
new BufCopyCharFile_UseArr( "使用BufferedReader/Writer按数组复制文本" , src , dest5).start();
}
}
class CopyCharFile extends GetComplexity {
private File src = null;
private File dest = null;
public CopyCharFile(String name, File src, File dest) {
super(name);
this.src = src;
this.dest = dest;
}
@Override
public void run() {
try {
CharFileCopy.copyCharFile(src , dest );
} catch (IOException e) {
e.printStackTrace();
}
}
}
class CopyCharFile_UseArr extends GetComplexity {
private File src = null;
private File dest = null;
public CopyCharFile_UseArr(String name, File src, File dest) {
super(name);
this.src = src;
this.dest = dest;
}
@Override
public void run() {
try {
CharFileCopy.copyCharFile_UseArr(src , dest );
} catch (IOException e) {
e.printStackTrace();
}
}
}
class BufCopyCharFile extends GetComplexity {
private File src = null;
private File dest = null;
public BufCopyCharFile(String name, File src, File dest) {
super(name);
this.src = src;
this.dest = dest;
}
@Override
public void run() {
try {
CharFileCopy.bufCopyCharFile(src , dest );
} catch (IOException e) {
e.printStackTrace();
}
}
}
class BufCopyCharFile_UseLine extends GetComplexity {
private File src = null;
private File dest = null;
public BufCopyCharFile_UseLine(String name, File src, File dest) {
super(name);
this.src = src;
this.dest = dest;
}
@Override
public void run() {
try {
CharFileCopy.bufCopyCharFile_UseLine(src , dest );
} catch (IOException e) {
e.printStackTrace();
}
}
}
class BufCopyCharFile_UseArr extends GetComplexity {
private File src = null;
private File dest = null;
public BufCopyCharFile_UseArr(String name, File src, File dest) {
super(name);
this.src = src;
this.dest = dest;
}
@Override
public void run() {
try {
CharFileCopy.bufCopyCharFile_UseArr(src , dest );
} catch (IOException e) {
e.printStackTrace();
}
}
}
package io.charstream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import tools.GetComplexity;
import tools.StringTools;
public class Text_GetCount {
public static void main(String[] args) {
new GetCountByBuffer( "按自定义缓冲区查找字符串" ).start();
new GetCountByLine( "按行查找字符串" ).start();
}
}
/**
* 通过带缓冲字符流按行读取,获取文件中某字段出现次数
* 继承自GetComplexity,使得可以获取该算法所消耗的时间和内存
*/
class GetCountByLine extends GetComplexity {
public GetCountByLine() {
super();
}
public GetCountByLine(String name) {
super(name);
}
public void run() {
try {
File file = new File( "temp\\singlelinechar.txt");
int count = getCountByLine( file, "个人文档" );
System.out.println( "“个人文档”出现次数:" + count);
} catch (IOException e) {
e.printStackTrace();
}
}
public int getCountByLine(File file, String target) throws IOException {
BufferedReader br = new BufferedReader( new FileReader(file));
String line = null;
int sum = 0;
// 使用字符流循环获取每一行的字符串,并获取其中出现特定字段的次数,并累加
// 由于可能出现文件中某行内容太多,所以该方法可能会出现占用内存过大的问题
while (( line = br.readLine()) != null) {
sum += StringTools.getCount(line , target );
}
br.close();
return sum;
}
}
/**
* 通过字符流以及自定义缓冲区,获取文件中某字段出现次数
* 由于该方法缓冲区大小是固定的,所以避免了某一行过长照成消耗内存过大的情况
* 继承自GetComplexity,使得可以获取该算法所消耗的时间和内存
*/
class GetCountByBuffer extends GetComplexity {
public GetCountByBuffer() {
super();
}
public GetCountByBuffer(String name) {
super(name);
}
// 自定义缓冲区大小为1024*2字节
private final int BUFFER_SIZE = 1024;
public void run() {
File file = new File( "temp\\singlelinechar.txt");
try {
int count = getCount( file, "个人文档" );
System.out.println( "“个人文档”出现次数:" + count);
} catch (IOException e) {
e.printStackTrace();
}
}
public int getCount(File file, String target) throws IOException {
FileReader fr = new FileReader( file);
char[] buf = new char[ BUFFER_SIZE];
int len = -1;
int sum = 0;
// 首先判断文件是否为空,是则返回,否则继续
if ((len = fr.read(buf)) == -1) {
fr.close();
return 0;
}
// 第一次读取,将文件内容读取到缓冲区中,并累加特定字段出现的次数
sum += StringTools.getCount(new String(buf, 0, len), target);
// 由于特定字段不止一个字符,可能出现特定字段被缓冲区截断的情况,以至于可能会漏记次数
// 所以这里将最后可能被截断的字符串保留(长度最长为target.length() - 1),复制到缓冲区最前面
System.arraycopy(buf , BUFFER_SIZE - target .length() + 1, buf, 0, target .length() - 1);
// 下一次读取将覆盖缓冲区剩余空间(长度为BUFFER_SIZE - target.length() + 1),并循环判断累加次数。
while (( len = fr.read( buf, target.length() - 1, BUFFER_SIZE - target .length() + 1)) != -1) {
sum += StringTools.getCount(new String(buf, 0, len + target.length() - 1), target );
System.arraycopy(buf , BUFFER_SIZE - target .length() + 1, buf, 0, target .length() - 1);
}
fr.close();
return sum;
}
}
package io.charstream;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
class MyBufferedReader {
private Reader r;
private static final int SIZE = 1024;
// 缓冲区指针
private int pos = 0;
// 缓冲区字符数
private int count = 0;
// 缓冲区定义
private char[] buffer = new char[ SIZE];
public MyBufferedReader(Reader r) {
super();
this.r = r;
}
/**
* 将数据从流读入缓冲区
*
* @throws IOException
*/
private void fill() throws IOException {
count = r.read(buffer);
pos = 0;
}
/**
* 读取单个字符
*
* @return
* @throws IOException
*/
public int read() throws IOException {
if (pos >= count) {
fill();
}
if (count == -1) {
return -1;
}
return ( int) buffer[ pos++];
}
/**
* 读取一行数据,换行符为"\n"
*
* @return
* @throws IOException
*/
public String readLine() throws IOException {
StringBuilder sb = new StringBuilder();
int ch;
while (( ch = read()) != -1) {
if (ch == ‘\r‘) {
continue;
} else if (ch == ‘\n‘) {
return sb .toString();
} else {
sb.append(( char) ch);
}
}
if (sb.length() != 0) {
return sb.toString();
} else {
return null;
}
}
/**
* 关闭流
* @throws IOException
*/
public void close() throws IOException {
r.close();
}
}
public class CharMyBufferedDemo {
public static void main(String[] args) {
MyBufferedReader mbr = null;
String str = null;
try {
mbr = new MyBufferedReader( new FileReader("temp\\char.txt" ));
while (( str = mbr.readLine()) != null) {
System. out.println(str );
}
mbr.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
标签:
原文地址:http://blog.csdn.net/u010388781/article/details/51167749