标签:
#!/usr/bin/python#coding:utf-8#2015-11-04 21:23:17.230000"""改编自:python实现将汉字转换成汉语拼音的库_python_脚本之家 - http://www.jb51.net/article/65496.htm从这里下的字典表(文件编码要转成utf8的): 汉字编码表 - 下载频道 - CSDN.NET - http://download.csdn.net/detail/slowwind9999/291213可以用在线工具验证: 中文拼音五笔转换带声调 - 在线工具 - http://tool.lu/py5bconvert/如果要新增函数把汉字转换为其他编码,仿照hanzi2pinyin或hanzi2wubi,再增加一个字典项并且在load_word里加载数据就行了;"""import sysreload(sys)sys.setdefaultencoding(‘utf8‘)__version__ = ‘0.9‘__all__ = ["PinYin"]import os.pathclass Hanzi2code(object):def __init__(self, dict_file=‘code.txt‘): #code.txt的编码是utf8self.word_dict = {}self.wubi_dict = {}self.dict_file = dict_fileself.load_word() #qxx 对象实例就加载worddef load_word(self):if not os.path.exists(self.dict_file):raise IOError("NotFoundFile")with file(self.dict_file) as f_obj:codeList = f_obj.readlines()[6:]for f_line in codeList:try:line = f_line.strip().split()self.word_dict[line[0]] = line[1]self.wubi_dict[line[0]] = line[2]except:print ‘err....‘# line = f_line.split()# self.word_dict[line[0]] = line[1]# def hanzi2pinyin(self, string=""):# result = []# if not isinstance(string, unicode):# string = string.decode("utf-8")# for char in string:## key = ‘%X‘ % ord(char)# result.append(self.word_dict.get(char.encode(‘utf8‘), char).split()[0].lower())# return resultdef hanzi2pinyin_split(self, string="", split=""):result = self.hanzi2pinyin(string=string)if split == "":return resultelse:return split.join(result)def hanzi2code(self,string=‘‘,dic={}):result = []if not isinstance(string, unicode):string = string.decode("utf-8")for char in string:# key = ‘%X‘ % ord(char)result.append(dic.get(char.encode(‘utf8‘), char).split()[0].lower())return resultdef hanzi2wubi(self,string=‘‘):return self.hanzi2code(string,self.wubi_dict)def hanzi2pinyin(self,string=‘‘):return self.hanzi2code(string,self.word_dict)if __name__ == "__main__":test = Hanzi2code()string = "钓鱼岛是中国的"print "in: %s" % stringprint "out: %s" % str(test.hanzi2pinyin(string=string))print "out: %s" % test.hanzi2pinyin_split(string=string, split="-")print "out: %s" % str(test.hanzi2wubi(string=string))
标签:
原文地址:http://www.cnblogs.com/QIAOXINGXING001/p/5023117.html