标签:
剧情提要:[工程师阿伟]正在和[机器小伟]一起研究[始制文字 世界上的语系及语言]。
下面看一下音节:
<span style="font-size:18px;">import struct; import math; ''' import os from os.path import join, getsize import voice.tian, voice.di, voice.xuan, voice.huang; ''' #辅音音节 class Voice(): def idleWave(self): return [b'\x00']*200; def consonant(self): terms = [ 'b', 'c', 'ch', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 'sh', 't', 'v', 'w', 'x', 'y', 'z' ]; return terms; def vowel(self): terms = [ 'a', 'ai', 'an', 'ang', 'ao', 'e', 'ei', 'en', 'eng', 'er', 'i', 'ian', 'iang', 'ie', 'in', 'ing', 'iong', 'iu' 'o', 'oi', 'on', 'ong', 'ou', 'u', 'ua', 'uan', 'uang', 'ue', 'uei', 'uen', 'ueng', 'üe', 'ün' ]; return terms; def pingYin(self): terms = [#410项 'a', 'ai', 'an', 'ang', 'ao', 'ba', 'bai', 'ban', 'bang', 'bao', 'bei', 'ben', 'beng', 'bi', 'bian', 'biao', 'bie', 'bin', 'bing', 'bo', 'bu', 'ca', 'cai', 'can', 'cang', 'cao', 'ce', 'cen', 'ceng', 'cha', 'chai', 'chan', 'chang', 'chao', 'che', 'chen', 'cheng', 'chi', 'chong', 'chou', 'chu', 'chua', 'chuai', 'chuan', 'chuang', 'chui', 'chun', 'chuo', 'ci', 'cong', 'cou', 'cu', 'cuan', 'cui', 'cun', 'cuo', 'da', 'dai', 'dan', 'dang', 'dao', 'de', 'den', 'dei', 'deng', 'di', 'dia', 'dian', 'diao', 'die', 'ding', 'diu', 'dong', 'dou', 'du', 'duan', 'dui', 'dun', 'duo', 'e', 'ei', 'en', 'eng', 'er', 'fa', 'fan', 'fang', 'fei', 'fen', 'feng', 'fo', 'fou', 'fu', 'ga', 'gai', 'gan', 'gang', 'gao', 'ge', 'gei', 'gen', 'geng', 'gong', 'gou', 'gu', 'gua', 'guai', 'guan', 'guang', 'gui', 'gun', 'guo', 'ha', 'hai', 'han', 'hang', 'hao', 'he', 'hei', 'hen', 'heng', 'hong', 'hou', 'hu', 'hua', 'huai', 'huan', 'huang', 'hui', 'hun', 'huo', 'ji', 'jia', 'jian', 'jiang', 'jiao', 'jie', 'jin', 'jing', 'jiong', 'jiu', 'ju', 'juan', 'jue', 'jun', 'ka', 'kai', 'kan', 'kang', 'kao', 'ke', 'ken', 'keng', 'kong', 'kou', 'ku', 'kua', 'kuai', 'kuan', 'kuang', 'kui', 'kun', 'kuo', 'la', 'lai', 'lan', 'lang', 'lao', 'le', 'lei', 'leng', 'li', 'lia', 'lian', 'liang', 'liao', 'lie', 'lin', 'ling', 'liu', 'long', 'lou', 'lu', 'lü', 'luan', 'lue', 'lüe', 'lun', 'luo', 'm', 'ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mi', 'mian', 'miao', 'mie', 'min', 'ming', 'miu', 'mo', 'mou', 'mu', 'na', 'nai', 'nan', 'nang', 'nao', 'ne', 'nei', 'nen', 'neng', 'ni', 'nian', 'niang', 'niao', 'nie', 'nin', 'ning', 'niu', 'nong', 'nou', 'nu', 'nü', 'nuan', 'nüe', 'nuo', 'nun', 'o', 'ou', 'pa', 'pai', 'pan', 'pang', 'pao', 'pei', 'pen', 'peng', 'pi', 'pian', 'piao', 'pie', 'pin', 'ping', 'po', 'pou', 'pu', 'qi', 'qia', 'qian', 'qiang', 'qiao', 'qie', 'qin', 'qing', 'qiong', 'qiu', 'qu', 'quan', 'que', 'qun', 'ran', 'rang', 'rao', 're', 'ren', 'reng', 'ri', 'rong', 'rou', 'ru', 'ruan', 'rui', 'run', 'ruo', 'sa', 'sai', 'san', 'sang', 'sao', 'se', 'sen', 'seng', 'sha', 'shai', 'shan', 'shang', 'shao', 'she', 'shei', 'shen', 'sheng', 'shi', 'shou', 'shu', 'shua', 'shuai', 'shuan', 'shuang', 'shui', 'shun', 'shuo', 'si', 'song', 'sou', 'su', 'suan', 'sui', 'sun', 'suo', 'ta', 'tai', 'tan', 'tang', 'tao', 'te', 'teng', 'ti', 'tian', 'tiao', 'tie', 'ting', 'tong', 'tou', 'tu', 'tuan', 'tui', 'tun', 'tuo', 'wa', 'wai', 'wan', 'wang', 'wei', 'wen', 'weng', 'wo', 'wu', 'xi', 'xia', 'xian', 'xiang', 'xiao', 'xie', 'xin', 'xing', 'xiong', 'xiu', 'xu', 'xuan', 'xue', 'xun', 'ya', 'yan', 'yang', 'yao', 'ye', 'yi', 'yin', 'ying', 'yo', 'yong', 'you', 'yu', 'yuan', 'yue', 'yun', 'za', 'zai', 'zan', 'zang', 'zao', 'ze', 'zei', 'zen', 'zeng', 'zha', 'zhai', 'zhan', 'zhang', 'zhao', 'zhe', 'zhei', 'zhen', 'zheng', 'zhi', 'zhong', 'zhou', 'zhu', 'zhua', 'zhuai', 'zhuan', 'zhuang', 'zhui', 'zhun', 'zhuo', 'zi', 'zong', 'zou', 'zu', 'zuan', 'zui', 'zun', 'zuo', ]; return terms; def combineYinJie(self): v = self.vowel(); c = self.consonant(); com_ = []; for i in range(len(v)): if (v[i][0] == 'i' or v[i][0] == 'u'): continue; else: com_.append(v[i]); for i in range(len(c)): for j in range(len(v)): com_.append(c[i]+v[j]); return com_; #从音节的录音中(.wav)处理出音节,把数据写入文件 def yinJieProcess(self, sFile): fileSize = getFileSize(sFile); if (fileSize < 44): return; c = []; #每次读取1000个字节 perRead = 100; with open(sFile, 'rb') as f: #这是标准.wav的文件头长度 a = f.read(44); index = 44; start = 0; end = 0; spareCount = 0; while (index < fileSize): a = f.read(perRead); b = list(a); count = countZero(b[1:-1:2], 64); if (count > perRead / 2* 17/20): #认为这是空白 index += perRead; if (start == 0): #前端空白 continue; else: #后端空白 end = index; if (end -start < 5000): continue; else: index = fileSize+1; break; else: if (start == 0): start = index; #正部开始 index += perRead; validLength = end-start; f.seek(start, 0);#第二个参数whence = 0表示从开头,=2表示从结尾倒着数 data = list(f.read(validLength)); f.close(); print('有效部分从{0}字节开始,到{1}字节结束,共有{2}个字节。即{3}秒到{4}秒' .format(start ,end, validLength, round(start/22050, 3), round(end/22050, 3))); dataArray = []; for i in range(len(data)): byte = struct.pack('B', data[i]); dataArray.append(byte); print('文件写入开始。请至tone.py查收>>>'); fout = open('tone.py', 'w'); fout.write('tone_X = ['); size_1 = len(dataArray); for n in range(size_1): fout.write(str(dataArray[n])); if (n < size_1-1): fout.write(', '); fout.write('];'); fout.write('\r\n'); fout.close(); print('文件写入完毕。'); #把十进制数按照小尾字节序切割 def littleEndian(self, number, byte = 4): result = [0]*byte; for i in range(byte): result[i] = number%256; number//=256; return result; #把UltraEdit中的值字串转化为hex序列组 def hexExpr(self, string): resultString = ''; size = len(string); for i in range(size): if (i == 0 ): resultString += '0x'+string[i]; elif (string[i] == ' '): resultString += ', 0x'; else: resultString += string[i]; print(resultString); ### # @usage 写.wav文件,能把声波数据阵列用二进制写成.wav。 # @author mw # @date 2016年04月28日 星期四 14:31:34 # @param # @return # ### def writeWav(self, filename = 'soundwave'): byteArray = []; dataArray = []; #样本数据阵列 sampleArray = []; idleWave = self.idleWave(); sampleArray = self.waveDataGen(); #样本数据点数 N = len(sampleArray); times = 1; dataSize = N*times; fileSize = dataSize+44; #44为格式头部分所用字节数 #RIFF WAVE CHUNK RIFF_ID = [0x52, 0x49, 0x46, 0x46]; #'RIFF' RIFF_Size = self.littleEndian(fileSize-8, 4); #文件总字节数减去8 RIFF_Type = [0x57, 0x41,0x56, 0x45, 0x66, 0x6D, 0x74, 0x20]; #'WAVEfat ' #Format Chunk Format_10_17 = [0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00];#过滤4+格式2+声道2=8个字节 Format_18_1B = [0x11, 0x2B, 0x00, 0x00]; #采样频率0x2B11 = 11025 Format_1C_1F = [0x22, 0x56, 0x00, 0x00]; #比持率 = 频率*通道*样本位 = 22050 Format_20_23 = [0x02, 0x00, 0x10, 0x00]; #块对齐 = 通道数* 样本位数 = 1*2 = 2 #Fact Chunk(optional) #Data Chunk Data_24_27 = [0x64, 0x61, 0x74, 0x61]; #'DATA'标记 Data_Size = self.littleEndian(fileSize-44, 4); #下面的Data部分的字节数,文件总字节数-44 #RIFF WAVE CHUNK ''' for i in range(4): byte = struct.pack('B', RIFF_ID[i]); byteArray.append(byte); ''' RIFF = [b'R', b'I', b'F', b'F']; for i in range(4): byteArray.append(RIFF[i]); for i in range(4): byte = struct.pack('B', RIFF_Size[i]); byteArray.append(byte); ''' for i in range(8): byte = struct.pack('B', RIFF_Type[i]); byteArray.append(byte); #Format Chunk for i in range(8): byte = struct.pack('B', Format_10_17[i]); byteArray.append(byte); for i in range(4): byte = struct.pack('B', Format_18_1B[i]); byteArray.append(byte); for i in range(4): byte = struct.pack('B', Format_1C_1F[i]); byteArray.append(byte); for i in range(4): byte = struct.pack('B', Format_20_23[i]); byteArray.append(byte); #Data Chunk for i in range(4): byte = struct.pack('B', Data_24_27[i]); byteArray.append(byte); ''' #Format_28_2B是数据块大小,formatHead是它前面的所有部分 formatHead = [b'W', b'A', b'V', b'E', b'f', b'm', b't', b' ', b'\x10', b'\x00', b'\x00', b'\x00', b'\x01', b'\x00', b'\x01', b'\x00', b'\x11', b'+', b'\x00', b'\x00', b'"', b'V', b'\x00', b'\x00', b'\x02', b'\x00', b'\x10', b'\x00', b'd', b'a', b't', b'a']; for i in range(len(formatHead)): byteArray.append(formatHead[i]); for i in range(4): byte = struct.pack('B', Data_Size[i]); byteArray.append(byte); #写出到文件 print('文件写入开始。>>>'); s = ''; s = filename+'.wav'; fout= open(s, 'wb'); size = len(byteArray); for i in range(size): fout.write(byteArray[i]); size = len(sampleArray); sizeIdle = len(idleWave); for j in range(times): for i in range(size): fout.write(sampleArray[i]); fout.close(); print('文件写入完毕。'); #声音数据的生成 def waveDataGen(self): tian1 = voice.tian.tone_tian1; di1 = voice.di.tone_di1; xuan1 = voice.xuan.tone_xuan1; huang1 = voice.huang.tone_huang1; idle = self.idleWave(); data = []; sArray = [ 'tian1', 'di1', 'xuan1', 'huang1' ,'di1', 'xuan1', 'huang1', 'tian1']*2; for i in range(len(sArray)): data += eval(sArray[i]); data += idle*30; return data; def read(): fin = open('../input.txt', 'r'); array = []; for line in fin.readlines(): if (line[0]!= '拼' and line[0].isalpha()): array.append(line[:-1]); #print(len(array)); fin.close(); for i in range(len(array)): print('\''+array[i]+'\', ', end = ''); if (i%10 == 9): print(''); #print(array); def getFileSize(sFile): size = getsize(sFile); #print('文件{0}的大小是{1}字节'.format(sFile, size)); return size; def countZero(array, judgeValue): count = 0; for i in range(len(array)): if (abs(array[i]) < judgeValue): count+=1; return count; def tmp(): if (0): pron = Voice(); terms = pron.combineYinJie(); print('音节数共有{0}个'.format(len(terms))); read(); ''' index = 2; pron = Voice(); if (index == 1): #把录音的音节去除空白部分 pron.yinJieProcess('huang1.wav'); elif (index == 2): #写声音文件 pron.writeWav(); ''' if __name__ == '__main__': tmp(); </span>
本节到此结束,欲知后事如何,请看下回分解。
标签:
原文地址:http://blog.csdn.net/mwsister/article/details/51814093