码迷,mamicode.com
首页 > 其他好文 > 详细

asdfsadfs

时间:2016-03-13 22:28:34      阅读:219      评论:0      收藏:0      [点我收藏+]

标签:

# -*- coding:utf8 -*-
import os
import jieba.posseg as pseg
# -*- coding:utf8 -*-
import os
 
def splitSentence(inputFile,name):
    fin = open(inputFile, ‘r‘)      #以读的方式打开文件
    print name
    fout= open(‘/home/xdj/target/‘+name,‘w‘)         #以写得方式打开文件
    for eachLine in fin:
        line = eachLine.strip().decode(‘utf-8‘, ‘ignore‘)      #去除每行首尾可能出现的空格,并转为Unicode进行处理
    line=line.strip(‘\n‘)                                       #去掉多余空行
        wordList = pseg.cut(line)                        #用结巴分词,对每行内容进行分词    
        outStr = ‘‘
        for word in wordList:#
        #print word.word,word.flag            
        outStr += word.word+‘/‘+word.flag
    #print outStr
        fout.write(outStr.encode(‘utf-8‘))              #将分词好的结果写入到输出文件
        fout.write(‘\n‘)
    fin.close()
    fout.close()
 
path=‘/media/软件/zhuomian/VARandLDA/xuejiesourse‘
fns=[os.path.join(root,fn) for root,dirs,files in os.walk(path) for fn in files]
#fout= open(‘/home/xdj/myOutput.txt‘,‘w‘)  
 
i=-1
num=0
for f in fns:
    print f
    i=i+1
    strm = ‘%d‘ %i
    splitSentence(f,strm)
#fout.close()
print num   

 

# -*- coding:utf8 -*-
import os
import jieba.posseg as pseg
# -*- coding:utf8 -*-
import os

def splitSentence(inputFile,name):
    fin = open(inputFile, r)      #以读的方式打开文件
    print name
    fout= open(/home/xdj/target/+name,w)         #以写得方式打开文件
    for eachLine in fin:
        line = eachLine.strip().decode(utf-8, ignore)      #去除每行首尾可能出现的空格,并转为Unicode进行处理
    line=line.strip(\n)                                       #去掉多余空行
        wordList = pseg.cut(line)                        #用结巴分词,对每行内容进行分词    
        outStr = ‘‘
        for word in wordList:#
        #print word.word,word.flag            
        outStr += word.word+/+word.flag
    #print outStr
        fout.write(outStr.encode(utf-8))              #将分词好的结果写入到输出文件
        fout.write(\n)
    fin.close()
    fout.close()

path=/media/软件/zhuomian/VARandLDA/xuejiesourse
fns=[os.path.join(root,fn) for root,dirs,files in os.walk(path) for fn in files]
#fout= open(‘/home/xdj/myOutput.txt‘,‘w‘) 

i=-1
num=0
for f in fns:
    print f
    i=i+1
    strm = %d %i
    splitSentence(f,strm)
#fout.close()
print num    

 

asdfsadfs

标签:

原文地址:http://www.cnblogs.com/XDJjy/p/5273276.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!