标签:
环境:eclipsse, jdk1.6, 没有使用第三方的包,都是JDK有的。
注意,项目源文件我都使用的是UTF-8的编码格式,如果不是,代码里面的汉字注释会显示乱码。
设置UTF-8:windows->Preferences->General->Workspace 页面上Text file encoding,选择Other UTF-8
项目结构:

1.字典文件
dic.txt 下载地址:http://download.csdn.net/detail/wssiqi/5056993
这里只摘录一部分内容,里面共收录了20902个汉字
-
19968,一,一,1,1,GGLL,A,yi1,yī
-
19969,丁,一,2,12,SGH,AI,ding1,dīng,zheng1,zhēng
-
19970,丂,一,2,15,GNV,AZVV,kao3,kǎo,qiao3,qiǎo,yu2,yú
-
19971,七,一,2,15,AGN,HD,qi1,qī
-
19972,丄,一,2,21,HGD,IAVV,shang4,shàng
-
19973,丅,一,2,12,GHK,AIAA,xia4,xià
-
19974,丆,一,2,13,DGT,GDAA,han3,hǎn
-
19975,万,一,3,153,DNV,,wan4,wàn,mo4,mò
-
19976,丈,一,3,134,DYI,AOS,zhang4,zhàng
-
19977,三,一,3,111,DGGG,CD,san1,sān
-
19978,上,一,3,211,HHGG,IDA,shang3,shǎng,shang4,shàng
-
19979,下,一,3,124,GHI,AID,xia4,xià
-
19980,丌,一,3,132,GJK,AND,ji1,jī,qi2,qí
-
19981,不,一,4,1324,GII,GI,fou3,fǒu,bu4,bù
-
19982,与,一,3,151,GNGD,AZA,yu4,yù,yu3,yǔ,yu2,yú
-
19983,丏,一,4,1255,GHNN,AIZY,mian3,miǎn
-
19984,丐,一,4,1215,GHNV,AIZ,gai4,gài
-
19985,丑,一,4,5211,NFD,XED,chou3,chǒu
-
19986,丒,一,4,5341,VYGF,YDSA,chou3,chǒu
2.Dic.java
-
package com.siqi.dict;
-
-
import java.io.BufferedReader;
-
import java.io.ByteArrayInputStream;
-
import java.io.File;
-
import java.io.FileInputStream;
-
import java.io.InputStreamReader;
-
import java.nio.charset.Charset;
-
-
-
-
-
-
-
-
-
-
public class Dic {
-
-
-
-
-
private static boolean DEBUG = true;
-
-
-
-
-
public static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
-
-
-
-
-
public static final int CN_U16_CODE_MIN = 0x4e00;
-
-
-
-
-
public static final int CN_U16_CODE_MAX = 0x9fa5;
-
-
-
-
-
public static final String DIC_FILENAME = "dic.txt";
-
-
-
-
-
public static byte[] bytes = new byte[0];
-
-
-
-
-
public static int count = 0;
-
-
-
-
-
-
public static int INDEX_UNICODE = 0;
-
-
-
-
-
public static int INDEX_CHARACTER = 1;
-
-
-
-
-
public static int INDEX_BUSHOU = 2;
-
-
-
-
-
public static int INDEX_BIHUA = 3;
-
-
-
-
-
public static int INDEX_BISHUN = 4;
-
-
-
-
-
public static int INDEX_WUBI = 5;
-
-
-
-
-
public static int INDEX_ZHENGMA = 6;
-
-
-
-
-
public static int INDEX_PINYIN_EN = 7;
-
-
-
-
-
public static int INDEX_PINYIN_CN = 8;
-
-
-
-
-
static {
-
long time = System.currentTimeMillis();
-
-
try {
-
LoadDictionary();
-
count = count();
-
if (DEBUG) {
-
System.out.println("成功载入字典" + new File(DIC_FILENAME).getCanonicalPath() + " ,用时:"
-
+ (System.currentTimeMillis() - time) + "毫秒,载入字符数"+count);
-
}
-
} catch (Exception e) {
-
try {
-
System.out.println("载入字典失败" + new File(DIC_FILENAME).getCanonicalPath()+"\r\n");
-
} catch (Exception e1) {
-
}
-
e.printStackTrace();
-
}
-
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetUnicode(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_UNICODE);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetPinyinEn(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_PINYIN_EN);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetPinyinEn(String str) throws Exception {
-
StringBuffer sb = new StringBuffer();
-
for (int i = 0; i < str.length(); i++) {
-
char ch = str.charAt(i);
-
if (isChineseChar(ch)) {
-
sb.append(GetPinyinEn(ch) + " ");
-
} else {
-
sb.append(ch);
-
}
-
}
-
-
return sb.toString().trim();
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetPinyinCn(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_PINYIN_CN);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetPinyinCn(String str) throws Exception {
-
StringBuffer sb = new StringBuffer();
-
for (int i = 0; i < str.length(); i++) {
-
char ch = str.charAt(i);
-
if (isChineseChar(ch)) {
-
sb.append(GetPinyinCn(ch) + " ");
-
} else {
-
sb.append(ch);
-
}
-
}
-
-
return sb.toString().trim();
-
}
-
-
-
-
-
-
-
-
-
public static String GetFirstLetter(Character ch) throws Exception {
-
if (isChineseChar(ch)) {
-
return GetPinyinEn(ch).substring(0, 1);
-
} else {
-
return "";
-
}
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetFirstLetter(String str) throws Exception {
-
StringBuffer sb = new StringBuffer();
-
for (int i = 0; i < str.length(); i++) {
-
char ch = str.charAt(i);
-
if (isChineseChar(ch)) {
-
sb.append(GetFirstLetter(ch));
-
}
-
}
-
-
return sb.toString().trim();
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetBushou(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_BUSHOU);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetBihua(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_BIHUA);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetBishun(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_BISHUN);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetWubi(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_WUBI);
-
}
-
-
-
-
-
-
-
-
-
-
public static String GetZhengma(Character ch) throws Exception {
-
return GetCharInfo(ch, INDEX_ZHENGMA);
-
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
public static String GetCharInfo(Character ch) throws Exception {
-
if (!isChineseChar(ch)) {
-
throw new Exception("‘" + ch + "‘ 不是一个汉字!");
-
}
-
-
String result = "";
-
-
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-
BufferedReader br = new BufferedReader(new InputStreamReader(bais));
-
-
String strWord;
-
while ((strWord = br.readLine()) != null) {
-
if (strWord.startsWith(String.valueOf(ch.hashCode()))) {
-
result = strWord;
-
break;
-
}
-
}
-
br.close();
-
bais.close();
-
-
return result;
-
}
-
-
-
-
-
-
-
-
-
-
-
-
private static String GetCharInfo(Character ch, int index) throws Exception {
-
if (!isChineseChar(ch)) {
-
throw new Exception("‘" + ch + "‘ 不是一个汉字!");
-
}
-
-
-
String charInfo = GetCharInfo(ch);
-
-
String result = "";
-
try {
-
result = charInfo.split(",")[index];
-
} catch (Exception e) {
-
throw new Exception("请查看字典中" + ch + "汉字记录是否正确!");
-
}
-
-
return result;
-
}
-
-
-
-
-
-
private static void LoadDictionary() throws Exception {
-
File file = new File(DIC_FILENAME);
-
bytes = new byte[(int) file.length()];
-
FileInputStream fis = new FileInputStream(file);
-
fis.read(bytes, 0, bytes.length);
-
fis.close();
-
}
-
-
-
-
-
-
-
-
-
-
public static boolean isChineseChar(Character ch) {
-
if (ch.hashCode() >= CN_U16_CODE_MIN
-
&& ch.hashCode() <= CN_U16_CODE_MAX) {
-
return true;
-
} else {
-
return false;
-
}
-
}
-
-
-
-
-
-
-
private static int count() throws Exception {
-
int cnt = 0;
-
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-
BufferedReader br = new BufferedReader(new InputStreamReader(bais));
-
-
while (br.readLine() != null) {
-
cnt++;
-
}
-
br.close();
-
bais.close();
-
-
return cnt;
-
}
-
}
3.Sample.java
如何使用字典
-
package com.siqi.dict;
-
-
-
-
-
-
-
public class Sample {
-
-
-
-
-
-
-
public static void main(String[] args) {
-
try {
-
long time = System.currentTimeMillis();
-
-
char ch = ‘打‘;
-
-
System.out.println("====打字信息开始====");
-
System.out.println("首字母:"+Dic.GetFirstLetter(ch));
-
System.out.println("拼音(中):"+Dic.GetPinyinCn(ch));
-
System.out.println("拼音(英):"+Dic.GetPinyinEn(ch));
-
System.out.println("部首:"+Dic.GetBushou(ch));
-
System.out.println("笔画数目:"+Dic.GetBihua(ch));
-
System.out.println("笔画:"+Dic.GetBishun(ch));
-
System.out.println("五笔:"+Dic.GetWubi(ch));
-
System.out.println("====打字信息结束====");
-
-
-
System.out.println("\r\n====汉字字符串====");
-
System.out.println(Dic.GetPinyinEn("返回汉字字符串的拼音。"));
-
System.out.println(Dic.GetPinyinCn("返回汉字字符串的拼音。"));
-
System.out.println(Dic.GetFirstLetter("返回汉字字符串的拼音。"));
-
System.out.println("====汉字字符串====\r\n");
-
-
System.out.println("用时:"+(System.currentTimeMillis()-time)+"毫秒");
-
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
-
}
-
}
4.结果
-
====打字信息开始====
-
成功载入字典C:\workspaces\01_java\DictLocal\dic.txt ,用时:15毫秒,载入字符数20902
-
首字母:d
-
拼音(中):dǎ
-
拼音(英):da3
-
部首:扌
-
笔画数目:5
-
笔画:12112
-
五笔:RSH
-
====打字信息结束====
-
-
====汉字字符串====
-
fan3 hui2 han4 zi4 zi4 fu2 chuan4 di2 pin1 yin1 。
-
fǎn huí hàn zì zì fú chuàn dí pīn yīn 。
-
fhhzzfcdpy
-
====汉字字符串====
-
-
Memory(Used/Total) : 1539/15872 KB
-
用时:218毫秒
待会再上传如何获取字典文件的,我是通过收集http://www.zdic.net/zd/的网页来获取的
=============补充,如何获取汉字的信息================
=============所有的信息都是从汉典网站上获取的=========
目录结构为:

环境:eclipsse, jdk1.6, 没有使用第三方的包,都是JDK有的。
注意,项目源文件我都使用的是UTF-8的编码格式,如果不是,代码里面的汉字注释会显示乱码。
设置UTF-8:windows->Preferences->General->Workspace 页面上Text file encoding,选择Other UTF-8
包说明:
com.siqi.http
Httpclient.Java是我写的一个简单的获取网页的类,用来获取网页内容;
com.siqi.dict
DictMain.java用来下载汉字网页,从中获取汉字的拼音信息,并保存到data.dat中
DownloadThread.java用来下载网页(多线程)
com.siqi.pinyin
PinYin.java在执行过DictMain.java后,会生成一个data.dat,把这个文件拷贝到com.siqi.pinyin包下面,就可以调用PinYin.java里面的函数得到汉字的拼音了
PinYinEle.java一个汉字->拼音->Unicode的模型
源码:
Httpclient.java 可以用来获取网页,可以的到网页内容,网页编码和网页的header,简版
-
package com.siqi.http;
-
-
import java.io.IOException;
-
import java.io.InputStream;
-
import java.net.Socket;
-
import java.net.URLEncoder;
-
import java.util.regex.Matcher;
-
import java.util.regex.Pattern;
-
-
-
-
-
-
-
-
public class Httpclient {
-
-
-
-
-
public static final int METHOD_GET = 0;
-
-
-
-
public static final int METHOD_POST = 1;
-
-
-
-
public static final String HEADER_GET = "GET %s HTTP/1.0\r\nHOST: %s\r\n\r\n";
-
-
-
-
public static final String HEADER_POST = "POST %s HTTP/1.0\r\nHOST: %s\r\nContent-Length: 0\r\n\r\n";
-
-
-
-
public static final String CONTENT_SEPARATOR = "\r\n\r\n";
-
-
-
-
private byte[] bytes = new byte[0];
-
-
-
-
private String header = "";
-
-
-
-
private String content = "";
-
-
-
-
-
public static final String CHARSET_DEFAULT = "UTF-8";
-
-
-
-
private String charset = CHARSET_DEFAULT;
-
-
-
-
-
-
-
-
public static void main(String[] args) throws Exception {
-
Httpclient httpclient = new Httpclient();
-
-
httpclient.processUrl("http://m.baidu.com/");
-
System.out.println("获取网页http://m.baidu.com/");
-
System.out.println("报头为:\r\n" + httpclient.getHeader());
-
System.out.println("内容为:\r\n" + httpclient.getContent());
-
System.out.println("编码为:\r\n" + httpclient.getCharset());
-
System.out.println("************************************");
-
-
-
-
-
String url = String.format("http://m.baidu.com/s?word=%s",
-
URLEncoder.encode("中国", CHARSET_DEFAULT));
-
httpclient.processUrl(url, METHOD_POST);
-
System.out.println("获取网页http://m.baidu.com/s?word=中国");
-
System.out.println("报头为:\r\n" + httpclient.getHeader());
-
System.out.println("内容为:\r\n" + httpclient.getContent());
-
System.out.println("编码为:\r\n" + httpclient.getCharset());
-
}
-
-
-
-
-
private void init() {
-
this.bytes = new byte[0];
-
this.charset = CHARSET_DEFAULT;
-
this.header = "";
-
this.content = "";
-
-
}
-
-
-
-
-
-
-
public String getHeader() {
-
return header;
-
}
-
-
-
-
-
-
-
public String getContent() {
-
return content;
-
}
-
-
-
-
-
-
-
public String getCharset() {
-
return charset;
-
}
-
-
-
-
-
-
-
-
public void processUrl(String url) throws Exception {
-
processUrl(url, METHOD_GET);
-
}
-
-
-
-
-
-
-
-
-
-
-
-
-
public void processUrl(String url, int method) throws Exception {
-
-
init();
-
-
-
-
Matcher mat = Pattern.compile("https?://[^/]+").matcher(url);
-
if (mat.find() && mat.group().equals(url)) {
-
url += "/";
-
}
-
-
Socket socket = new Socket(getHostUrl(url), 80);
-
socket.setSoTimeout(3000);
-
-
String request = null;
-
-
if (method == METHOD_POST) {
-
request = String.format(HEADER_POST, getSubUrl(url),
-
getHostUrl(url));
-
} else {
-
request = String
-
.format(HEADER_GET, getSubUrl(url), getHostUrl(url));
-
}
-
-
socket.getOutputStream().write(request.getBytes());
-
-
this.bytes = InputStream2ByteArray(socket.getInputStream());
-
-
-
String temp = new String(this.bytes, 0,
-
bytes.length < 4096 ? bytes.length : 4096);
-
mat = Pattern.compile("(?<=<meta.{0,100}?charset=)[a-z-0-9]*",
-
Pattern.CASE_INSENSITIVE).matcher(temp);
-
if (mat.find()) {
-
this.charset = mat.group();
-
} else {
-
this.charset = CHARSET_DEFAULT;
-
}
-
-
-
temp = new String(this.bytes, this.charset);
-
int headerEnd = temp.indexOf(CONTENT_SEPARATOR);
-
this.header = temp.substring(0, headerEnd);
-
this.content = temp.substring(headerEnd + CONTENT_SEPARATOR.length(),
-
temp.length());
-
-
socket.close();
-
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
public static String getHostUrl(String url) {
-
String host = "";
-
Matcher mat = Pattern.compile("(?<=https?://).+?(?=/)").matcher(url);
-
if (mat.find()) {
-
host = mat.group();
-
}
-
-
return host;
-
}
-
-
-
-
-
-
-
-
-
-
-
-
public static String getSubUrl(String url) {
-
String subUrl = "";
-
Matcher mat = Pattern.compile("https?://.+?(?=/)").matcher(url);
-
if (mat.find()) {
-
subUrl = url.substring(mat.group().length());
-
}
-
-
return subUrl;
-
}
-
-
-
-
-
-
-
-
-
public static byte[] ByteArrayCat(byte[] b1, byte[] b2) {
-
byte[] b = new byte[b1.length + b2.length];
-
System.arraycopy(b1, 0, b, 0, b1.length);
-
System.arraycopy(b2, 0, b, b1.length, b2.length);
-
return b;
-
}
-
-
-
-
-
-
-
-
-
-
public static byte[] InputStream2ByteArray(InputStream is)
-
throws IOException {
-
byte[] b = new byte[0];
-
byte[] bb = new byte[4096];
-
-
int len = 0;
-
while ((len = is.read(bb)) != -1) {
-
byte[] newb = new byte[b.length + len];
-
System.arraycopy(b, 0, newb, 0, b.length);
-
System.arraycopy(bb, 0, newb, b.length, len);
-
b = newb;
-
}
-
-
return b;
-
}
-
}
DictMain.java
-
package com.siqi.dict;
-
-
import java.io.File;
-
import java.io.FileReader;
-
import java.io.FileWriter;
-
import java.io.IOException;
-
import java.util.regex.Matcher;
-
import java.util.regex.Pattern;
-
-
-
-
-
-
-
public class DictMain {
-
-
-
-
public static final String SAVEPATH = "dict/pages/";
-
-
-
-
public static final String FILEPATH = SAVEPATH + "%s.html";
-
-
-
-
-
public static final String DATA_FILENAME = "data.txt";
-
-
-
-
-
public static final int UNICODE_MIN = 0x4E00;
-
-
-
-
-
public static final int UNICODE_MAX = 0x9FFF;
-
-
-
-
-
-
-
-
-
-
static{
-
-
for (int i = UNICODE_MIN; i <= UNICODE_MAX; i++) {
-
-
String filePath = String.format(FILEPATH, i);
-
File file = new File(filePath);
-
if (!file.exists()) {
-
new DownloadThread(i).start();
-
}
-
}
-
-
-
StringBuffer sb = new StringBuffer();
-
for (int i = UNICODE_MIN; i <= UNICODE_MAX; i++) {
-
String word = new String(Character.toChars(i));
-
String pinyin = getPinYinFromWebpageFile(String.format(FILEPATH, i));
-
String str = String.format("%s,%s,%s\r\n", i,word,pinyin);
-
System.out.print(str);
-
sb.append(str);
-
}
-
-
-
try {
-
FileWriter fw = new FileWriter(DATA_FILENAME);
-
fw.write(sb.toString());
-
fw.close();
-
} catch (IOException e) {
-
e.printStackTrace();
-
}
-
-
}
-
-
public static void main(String[] args){
-
-
System.out.println("All prepared!");
-
}
-
-
-
-
-
-
-
private static String getPinYinFromWebpageFile(String file) {
-
try {
-
-
char[] buff = new char[(int) new File(file).length()];
-
-
FileReader reader = new FileReader(file);
-
reader.read(buff);
-
reader.close();
-
-
String content = new String(buff);
-
-
Matcher mat = Pattern.compile("(?<=spf\\(\")[a-z1-4]{0,100}",
-
Pattern.CASE_INSENSITIVE).matcher(content);
-
if (mat.find()) {
-
return mat.group();
-
}
-
-
mat = Pattern.compile("(?<=class=\"dicpy\">)[a-z1-4]{0,100}",
-
Pattern.CASE_INSENSITIVE).matcher(content);
-
if (mat.find()) {
-
return mat.group();
-
}
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
-
return "";
-
-
}
-
}
DownloadThread.java
-
package com.siqi.dict;
-
-
import java.io.File;
-
import java.io.FileWriter;
-
import java.net.URLEncoder;
-
import java.util.regex.Matcher;
-
import java.util.regex.Pattern;
-
-
import com.siqi.http.Httpclient;
-
-
-
-
-
-
-
-
public class DownloadThread extends Thread{
-
-
-
-
-
public static int THREAD_MAX = 10;
-
-
-
-
-
public static int RETRY_MAX = 5;
-
-
-
-
-
public static String SEARCH_URL = "http://www.zdic.net/search/?q=%s";
-
-
-
-
-
private static int threadCnt = 0;
-
-
-
-
-
private int unicode = 0;
-
-
-
-
-
static{
-
try {
-
File file = new File(DictMain.SAVEPATH);
-
if (!file.exists()) {
-
file.mkdirs();
-
}
-
} catch (Exception e) {
-
-
}
-
}
-
-
-
-
-
-
-
public static synchronized int threadCnt(int i){
-
threadCnt += i;
-
return threadCnt;
-
}
-
-
-
-
-
-
public DownloadThread(int unicode){
-
-
while(threadCnt(0)>THREAD_MAX){
-
try {
-
Thread.sleep(500);
-
} catch (InterruptedException e) {
-
}
-
}
-
-
threadCnt(1);
-
this.unicode = unicode;
-
}
-
-
@Override
-
public void run() {
-
long t1 = System.currentTimeMillis();
-
-
String filePath = String.format(DictMain.FILEPATH, unicode);
-
-
String word = new String(Character.toChars(unicode));
-
-
boolean downloaded = false;
-
int retryCnt = 0;
-
while (!downloaded && retryCnt < RETRY_MAX) {
-
try {
-
String content = DownloadPage(word);
-
SaveToFile(filePath, content);
-
downloaded = true;
-
-
threadCnt(-1);
-
System.out.println(String.format("%s, %s, 下载成功!线程数目:%s 用时:%s",
-
unicode, word, threadCnt(0), System.currentTimeMillis()
-
- t1));
-
return;
-
} catch (Exception e) {
-
retryCnt++;
-
}
-
}
-
-
threadCnt(-1);
-
System.err.println(String.format("%s, %s, 下载失败!线程数目:%s 用时:%s", unicode,
-
word, threadCnt(0), System.currentTimeMillis() - t1));
-
}
-
-
-
-
-
-
-
-
public String DownloadPage(String word) throws Exception{
-
-
Httpclient httpclient = new Httpclient();
-
String url = String.format(SEARCH_URL, URLEncoder.encode(word, "UTF-8"));
-
httpclient.processUrl(url, Httpclient.METHOD_POST);
-
-
-
-
Matcher mat = Pattern.compile("(?<=HREF=\")[^\"]+").matcher(httpclient.getContent());
-
if(mat.find()){
-
httpclient.processUrl(mat.group());
-
}
-
-
return httpclient.getContent();
-
}
-
-
-
-
-
-
-
public void SaveToFile(String file, String content){
-
try {
-
FileWriter fw = new FileWriter(file);
-
fw.write(content);
-
fw.close();
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
}
-
}
PinYin.java
PinYinEle.java
-
package com.siqi.pinyin;
-
-
public class PinYinEle {
-
private int unicode;
-
private String ch;
-
private String pinyin;
-
-
public PinYinEle(){}
-
-
public PinYinEle(String str){
-
if(str!=null){
-
String[] strs = str.split(",");
-
if(strs.length == 3){
-
try{
-
this.unicode = Integer.parseInt(strs[0]);
-
}catch(Exception e){
-
-
}
-
this.ch = strs[1];
-
this.pinyin = strs[2];
-
}
-
}
-
-
}
-
-
public int getUnicode() {
-
return unicode;
-
}
-
public void setUnicode(int unicode) {
-
this.unicode = unicode;
-
}
-
public String getCh() {
-
return ch;
-
}
-
public void setCh(String ch) {
-
this.ch = ch;
-
}
-
public String getPinyin() {
-
return pinyin;
-
}
-
public void setPinyin(String pinyin) {
-
this.pinyin = pinyin;
-
}
-
-
-
}
生成的data.dat里面内容(部分)为:
-
19968,一,yi1
-
19969,丁,ding1
-
19970,丂,kao3
-
19971,七,qi1
-
19972,丄,shang4
-
19973,丅,xia4
-
19974,丆,han3
-
19975,万,wan4
-
19976,丈,zhang4
-
19977,三,san1
-
19978,上,shang4
-
19979,下,xia4
-
19980,丌,qi2
-
19981,不,bu4
运行DictMain.java结果

执行时间可能会有几十分钟到几小时不等,总共会下载200+M的网页(20000+个网页),每次运行都会先判断以前下载过没有,所以结束掉程序不会有影响

显示All prepared!表示已经准备好了,刷新项目文件夹,可以看到网页保持在dict/pages下面,不建议在elipse中打开那个文件夹,因为里面有2万多个文件,会卡死eclipse,
还可以看到生成了data.txt文件,改为data.dat并复制到pinyin文件夹下面
运行PinYin.java
可以看到"大家haome12345"的拼音:
-
包含声调:da4jia1haome12345
-
包含声调:dajiahaome12345
上面只是显示了如何获取拼音,获取笔画等的方法类似,在这里就不演示了。
java实现汉字字典
标签:
原文地址:http://blog.csdn.net/u012868901/article/details/51546719