码迷,mamicode.com
首页 > 编程语言 > 详细

[从头读历史] 第281节 始制文字 世界上的语系及语言

时间:2016-07-03 11:56:23      阅读:235      评论:0      收藏:0      [点我收藏+]

标签:

剧情提要:
春秋时期,中华混沌帝国一片兴旺发达的景象,但这在厄尔斯星球上并不是孤例。
那么在那个时期,星球各地的人们用什么语言在交流呢?他们的文字又是怎样的呢?

正剧开始:
星历2016年07月03日 09:36:51, 银河系厄尔斯星球中华帝国江南行省。

[工程师阿伟]正在和[机器小伟]一起研究[始制文字 世界上的语系及语言]。


技术分享


技术分享


技术分享


技术分享


技术分享


技术分享


技术分享


下面看一下音节:


技术分享


<span style="font-size:18px;">import struct;
import math;
'''
import os  
from os.path import join, getsize
import voice.tian,  voice.di, voice.xuan, voice.huang;
'''

#辅音音节
class Voice():
    def idleWave(self):
        return [b'\x00']*200;
    
    def consonant(self):
        terms = [
            'b', 'c', 'ch', 'd', 'f', 'g',
            'h', 'j', 'k', 'l', 'm', 'n',
            'p', 'q', 'r', 's', 'sh', 't',
            'v', 'w', 'x', 'y', 'z'
        ];
        return terms;

    def vowel(self):
        terms = [
            'a', 'ai', 'an', 'ang', 'ao',
            'e', 'ei', 'en', 'eng', 'er',
            'i', 'ian', 'iang', 'ie', 'in', 'ing', 'iong', 'iu'
            'o', 'oi', 'on', 'ong', 'ou', 
            'u', 'ua', 'uan', 'uang', 'ue', 'uei', 'uen', 'ueng', 
            'üe', 'ün'
        ];
        return terms;

    def pingYin(self):
        terms = [#410项
'a', 'ai', 'an', 'ang', 'ao', 'ba', 'bai', 'ban', 'bang', 'bao', 
'bei', 'ben', 'beng', 'bi', 'bian', 'biao', 'bie', 'bin', 'bing', 'bo', 
'bu', 'ca', 'cai', 'can', 'cang', 'cao', 'ce', 'cen', 'ceng', 'cha', 
'chai', 'chan', 'chang', 'chao', 'che', 'chen', 'cheng', 'chi', 'chong', 'chou', 
'chu', 'chua', 'chuai', 'chuan', 'chuang', 'chui', 'chun', 'chuo', 'ci', 'cong', 
'cou', 'cu', 'cuan', 'cui', 'cun', 'cuo', 'da', 'dai', 'dan', 'dang', 
'dao', 'de', 'den', 'dei', 'deng', 'di', 'dia', 'dian', 'diao', 'die', 
'ding', 'diu', 'dong', 'dou', 'du', 'duan', 'dui', 'dun', 'duo', 'e', 
'ei', 'en', 'eng', 'er', 'fa', 'fan', 'fang', 'fei', 'fen', 'feng', 
'fo', 'fou', 'fu', 'ga', 'gai', 'gan', 'gang', 'gao', 'ge', 'gei', 
'gen', 'geng', 'gong', 'gou', 'gu', 'gua', 'guai', 'guan', 'guang', 'gui', 
'gun', 'guo', 'ha', 'hai', 'han', 'hang', 'hao', 'he', 'hei', 'hen', 
'heng', 'hong', 'hou', 'hu', 'hua', 'huai', 'huan', 'huang', 'hui', 'hun', 
'huo', 'ji', 'jia', 'jian', 'jiang', 'jiao', 'jie', 'jin', 'jing', 'jiong', 
'jiu', 'ju', 'juan', 'jue', 'jun', 'ka', 'kai', 'kan', 'kang', 'kao', 
'ke', 'ken', 'keng', 'kong', 'kou', 'ku', 'kua', 'kuai', 'kuan', 'kuang', 
'kui', 'kun', 'kuo', 'la', 'lai', 'lan', 'lang', 'lao', 'le', 'lei', 
'leng', 'li', 'lia', 'lian', 'liang', 'liao', 'lie', 'lin', 'ling', 'liu', 
'long', 'lou', 'lu', 'lü', 'luan', 'lue', 'lüe', 'lun', 'luo', 'm', 
'ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mi', 
'mian', 'miao', 'mie', 'min', 'ming', 'miu', 'mo', 'mou', 'mu', 'na', 
'nai', 'nan', 'nang', 'nao', 'ne', 'nei', 'nen', 'neng', 'ni', 'nian', 
'niang', 'niao', 'nie', 'nin', 'ning', 'niu', 'nong', 'nou', 'nu', 'nü', 
'nuan', 'nüe', 'nuo', 'nun', 'o', 'ou', 'pa', 'pai', 'pan', 'pang', 
'pao', 'pei', 'pen', 'peng', 'pi', 'pian', 'piao', 'pie', 'pin', 'ping', 
'po', 'pou', 'pu', 'qi', 'qia', 'qian', 'qiang', 'qiao', 'qie', 'qin', 
'qing', 'qiong', 'qiu', 'qu', 'quan', 'que', 'qun', 'ran', 'rang', 'rao', 
're', 'ren', 'reng', 'ri', 'rong', 'rou', 'ru', 'ruan', 'rui', 'run', 
'ruo', 'sa', 'sai', 'san', 'sang', 'sao', 'se', 'sen', 'seng', 'sha', 
'shai', 'shan', 'shang', 'shao', 'she', 'shei', 'shen', 'sheng', 'shi', 'shou', 
'shu', 'shua', 'shuai', 'shuan', 'shuang', 'shui', 'shun', 'shuo', 'si', 'song', 
'sou', 'su', 'suan', 'sui', 'sun', 'suo', 'ta', 'tai', 'tan', 'tang', 
'tao', 'te', 'teng', 'ti', 'tian', 'tiao', 'tie', 'ting', 'tong', 'tou', 
'tu', 'tuan', 'tui', 'tun', 'tuo', 'wa', 'wai', 'wan', 'wang', 'wei', 
'wen', 'weng', 'wo', 'wu', 'xi', 'xia', 'xian', 'xiang', 'xiao', 'xie', 
'xin', 'xing', 'xiong', 'xiu', 'xu', 'xuan', 'xue', 'xun', 'ya', 'yan', 
'yang', 'yao', 'ye', 'yi', 'yin', 'ying', 'yo', 'yong', 'you', 'yu', 
'yuan', 'yue', 'yun', 'za', 'zai', 'zan', 'zang', 'zao', 'ze', 'zei', 
'zen', 'zeng', 'zha', 'zhai', 'zhan', 'zhang', 'zhao', 'zhe', 'zhei', 'zhen', 
'zheng', 'zhi', 'zhong', 'zhou', 'zhu', 'zhua', 'zhuai', 'zhuan', 'zhuang', 'zhui', 
'zhun', 'zhuo', 'zi', 'zong', 'zou', 'zu', 'zuan', 'zui', 'zun', 'zuo',
            ];
        return terms;

    def combineYinJie(self):
        v = self.vowel();
        c = self.consonant();
        com_ = [];

        for i in range(len(v)):
            if (v[i][0] == 'i' or v[i][0] == 'u'):
                continue;
            else:
                com_.append(v[i]);

        for i in range(len(c)):
            for j in range(len(v)):
                com_.append(c[i]+v[j]);


        return com_;

    #从音节的录音中(.wav)处理出音节,把数据写入文件
    def yinJieProcess(self, sFile):
        fileSize = getFileSize(sFile);
        if (fileSize < 44):
            return;
        
        c = [];
        #每次读取1000个字节
        perRead = 100;
        
        with open(sFile, 'rb') as f:
            #这是标准.wav的文件头长度
            a = f.read(44);
            index = 44;
            
            start = 0;
            end = 0;
            spareCount = 0;
            
            while (index < fileSize):            
                a = f.read(perRead);                
                b = list(a);                
                count = countZero(b[1:-1:2], 64);
               
                if (count > perRead / 2* 17/20): #认为这是空白
                    index += perRead;
                    if (start == 0): #前端空白
                        continue;
                    else: #后端空白                
                        end = index;
                        if (end -start < 5000):
                            continue;
                        else:
                            index = fileSize+1;
                            break;
                else:
                    if (start == 0):
                        start = index; #正部开始
                    index += perRead;
                        


            validLength = end-start;

            f.seek(start, 0);#第二个参数whence = 0表示从开头,=2表示从结尾倒着数
            data = list(f.read(validLength));
                
            f.close();
        print('有效部分从{0}字节开始,到{1}字节结束,共有{2}个字节。即{3}秒到{4}秒'
              .format(start ,end, validLength, round(start/22050, 3), round(end/22050, 3)));

            
        dataArray = [];
        for i in range(len(data)):
            byte = struct.pack('B', data[i]);
            dataArray.append(byte);    

        print('文件写入开始。请至tone.py查收>>>');
        fout =  open('tone.py', 'w');
        fout.write('tone_X = [');
        
        size_1 = len(dataArray);
        for n in range(size_1):
            fout.write(str(dataArray[n]));
            if (n < size_1-1):
                fout.write(', ');

        fout.write('];');
            
        fout.write('\r\n');
        fout.close();
        print('文件写入完毕。');

    #把十进制数按照小尾字节序切割
    def littleEndian(self, number, byte = 4):
        result = [0]*byte;

        for i in range(byte):
            result[i] = number%256;
            number//=256;
        return result;

    #把UltraEdit中的值字串转化为hex序列组
    def hexExpr(self, string):
        resultString = '';
        size = len(string);
        for i in range(size):
            if (i == 0 ):
                resultString += '0x'+string[i];
            elif (string[i] == ' '):
                resultString += ', 0x';
            else:
                resultString += string[i];
        print(resultString);


    ###
    # @usage   写.wav文件,能把声波数据阵列用二进制写成.wav。
    # @author  mw
    # @date    2016年04月28日  星期四  14:31:34 
    # @param
    # @return
    #
    ###
    def writeWav(self, filename = 'soundwave'):        
        byteArray = [];
        dataArray = [];
        
        #样本数据阵列
        sampleArray = [];
        idleWave = self.idleWave();


        sampleArray = self.waveDataGen();


        #样本数据点数
        N = len(sampleArray);
        times = 1;
        dataSize = N*times;
        
        fileSize = dataSize+44; #44为格式头部分所用字节数
        
        #RIFF WAVE CHUNK
        RIFF_ID = [0x52, 0x49, 0x46, 0x46];  #'RIFF'
        RIFF_Size = self.littleEndian(fileSize-8, 4); #文件总字节数减去8
        RIFF_Type = [0x57, 0x41,0x56, 0x45, 0x66, 0x6D, 0x74, 0x20]; #'WAVEfat '

        #Format Chunk
        Format_10_17 = [0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00];#过滤4+格式2+声道2=8个字节
        Format_18_1B = [0x11, 0x2B, 0x00, 0x00]; #采样频率0x2B11 = 11025
        Format_1C_1F = [0x22, 0x56, 0x00, 0x00]; #比持率 = 频率*通道*样本位 = 22050
        Format_20_23 = [0x02, 0x00, 0x10, 0x00]; #块对齐 = 通道数* 样本位数 = 1*2 = 2    

        #Fact Chunk(optional)

        #Data Chunk
        Data_24_27 = [0x64, 0x61, 0x74, 0x61]; #'DATA'标记
        Data_Size = self.littleEndian(fileSize-44, 4); #下面的Data部分的字节数,文件总字节数-44


        #RIFF WAVE CHUNK
        '''
        for i in range(4):
            byte = struct.pack('B', RIFF_ID[i]);
            byteArray.append(byte);
        '''

        RIFF = [b'R', b'I', b'F', b'F'];
        for i in range(4):
            byteArray.append(RIFF[i]);

        for i in range(4):
            byte = struct.pack('B', RIFF_Size[i]);
            byteArray.append(byte);

        '''
        for i in range(8):
            byte = struct.pack('B', RIFF_Type[i]);
            byteArray.append(byte);

        #Format Chunk
        for i in range(8):
            byte = struct.pack('B', Format_10_17[i]);
            byteArray.append(byte);

        for i in range(4):
            byte = struct.pack('B', Format_18_1B[i]);
            byteArray.append(byte);

        for i in range(4):
            byte = struct.pack('B', Format_1C_1F[i]);
            byteArray.append(byte);

        for i in range(4):
            byte = struct.pack('B', Format_20_23[i]);
            byteArray.append(byte);

        
        #Data Chunk
        for i in range(4):
            byte = struct.pack('B', Data_24_27[i]);
            byteArray.append(byte);
        '''

        #Format_28_2B是数据块大小,formatHead是它前面的所有部分
        formatHead = [b'W', b'A', b'V', b'E', b'f', b'm', b't', b' ',                       b'\x10', b'\x00', b'\x00', b'\x00', b'\x01', b'\x00', b'\x01', b'\x00',                       b'\x11', b'+', b'\x00', b'\x00',                       b'"', b'V', b'\x00', b'\x00',                       b'\x02', b'\x00', b'\x10', b'\x00',                       b'd', b'a', b't', b'a'];

        for i in range(len(formatHead)):
            byteArray.append(formatHead[i]);

        for i in range(4):
            byte = struct.pack('B', Data_Size[i]);
            byteArray.append(byte);


        #写出到文件
        print('文件写入开始。>>>');

        s = '';

        s = filename+'.wav';

            
        fout= open(s, 'wb');
        size = len(byteArray);
        
        for i in range(size):
            fout.write(byteArray[i]);       
        
        size = len(sampleArray);
        sizeIdle = len(idleWave);
        

        
        for j in range(times):
            for i in range(size):
                fout.write(sampleArray[i]);

        fout.close();
        print('文件写入完毕。');

    #声音数据的生成
    def waveDataGen(self):
        

        
        tian1 = voice.tian.tone_tian1;
        di1 = voice.di.tone_di1;
        xuan1 = voice.xuan.tone_xuan1;
        huang1 = voice.huang.tone_huang1;

        idle = self.idleWave();

        data = [];

        sArray = [ 'tian1', 'di1', 'xuan1', 'huang1' ,'di1', 'xuan1', 'huang1', 'tian1']*2;

        for i in range(len(sArray)):
            data += eval(sArray[i]);
            data += idle*30;

        return data;
            

def read():
    fin = open('../input.txt', 'r');

    array = [];

    for line in fin.readlines():
        if (line[0]!= '拼' and line[0].isalpha()):
            array.append(line[:-1]);

    #print(len(array));
    fin.close();

    
    for i in range(len(array)):
        print('\''+array[i]+'\', ', end = '');
        if (i%10 == 9):
            print('');
    

    #print(array);

        
def getFileSize(sFile):
    size = getsize(sFile);
    #print('文件{0}的大小是{1}字节'.format(sFile, size));
    return size;

def countZero(array, judgeValue):
    count = 0;
    for i in range(len(array)):
        if (abs(array[i]) < judgeValue):
            count+=1;
    return count;


    
def tmp():

    if (0):
        pron = Voice();

        terms = pron.combineYinJie();

        print('音节数共有{0}个'.format(len(terms)));

    read();

    '''

    index = 2;
    
    pron = Voice();

    if (index == 1):
        #把录音的音节去除空白部分
        pron.yinJieProcess('huang1.wav');
    elif (index == 2):
        #写声音文件
        pron.writeWav();
    '''
 



if __name__ == '__main__':
    tmp();
</span>


本节到此结束,欲知后事如何,请看下回分解。

[从头读历史] 第281节 始制文字 世界上的语系及语言

标签:

原文地址:http://blog.csdn.net/mwsister/article/details/51814093

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!