码迷,mamicode.com
首页 > 编程语言 > 详细

python 多线程 基于正则表达式的多线程文本替换功能实现

时间:2014-09-16 10:34:30      阅读:277      评论:0      收藏:0      [点我收藏+]

标签:des   style   blog   color   io   os   ar   for   div   

#!/usr/bin/env python

import os
import os.path
import sys
import re
import shutil
import csv
from multiprocessing import Pool #support muti thread map reduce function


specialChars = [., ^ , $, *, +, ? ,\\, [, ], |, (, ) ]

g_max_thread_pool_size = 30#max thread pool size

def dealSpecialChars(str):
    s = ‘‘
    for c in str:
        if c in specialChars:
            s = s + ([ + c + ])
        else:
            s = s+c
    return s

def readDictsFromCsv(filePath):
    #print filePath
    dicts = dict()
    with open(filePath, rb) as csvfile:
        dictsReader = csv.DictReader(csvfile, fieldnames=[srcName, newName], restkey=None, delimiter=,, quotechar=|)
        for d in dictsReader:
            #dicts[dealSpecialChars(d[‘srcName‘])] = d[‘newName‘]
            dicts[d[srcName]] = d[newName]
    return dicts

BinaryExtList = [.bmp, .avi, .res, .xls, .doc, .dll, .lib, .bpl, .exe, .chm]

replaceDicts = {r"RNC820V400R008C00SPC500": r"93" }  

def ApplyReplace(str, keys, replaceDicts):
    ret = str    
    for pattern in keys:
        try:
            ret = re.sub(pattern, replaceDicts[pattern], ret)
        except:
            print "Unexpected error ApplyReplace(str, keys, replaceDicts):",str, 
        finally:
            print  str, ret
    return ret

def NeedReplace(str, keys):
    for pattern in keys:
        #print pattern, str
        try:
            if re.search(pattern, str):
                return True
        except:
            print "Unexpected error NeedReplace(str, keys):",str, :
    return False
    

defaultExtList = [.txt, .xml]  
def findFile(srcDir, filter = None):
    if(filter == None):
        filter = defaultExtList
    filelist = []
    for name in os.listdir(srcDir):
        fullPath = srcPath + \\ + name
        if os.path.isdir(fullPath):
            filelist.append(findFile(fullPath))
        else:
            if os.path.splitext(fullPath)[1].lower() in filter:
                filelist.append(fullPath)

    return filelist
                

def ReplaceAllStrInFile(file, dicts, keys, filter = defaultExtList):
    print ReplaceAllStrInFile:file-,file, begin!
   
    for key in keys:
        print key, dicts[key]#for test
    fullPath = file
    if os.path.splitext(fullPath)[1].lower() not in filter:
        return
    #print ‘convert file:‘, fullPath
    try:
        srcFile = open(fullPath, r)
        tempfile = fullPath+temp
        destFile = open(tempfile, w)
        needRewrite = False
        try:
            for line in srcFile:
                if NeedReplace(line, keys):
                    line = ApplyReplace(line, keys, dicts)
                destFile.write(line)
            srcFile.close()
            destFile.close()
            os.remove(fullPath)
            os.rename(tempfile, fullPath)
            print convert file:, fullPath, success!
        except:
            print convert file:, fullPath, failed!
            srcFile.close()
            destFile.close()
            os.remove(tempfile)
        
    except:
        print convert file:, fullPath, failed!
        return False
    return True

def ReplaceAllStrInFileByRows(srcfile, csvfilePath, maxRow = 10):
    if maxRow < 1:
        return false
    dicts = readDictsFromCsv(csvfilePath)
    keys=(sorted(dicts.keys(), key=lambda key: len(key), reverse=True))#reverse keys by elements‘ length
    length = len(keys)
    rows = range(length/maxRow + 1)
    for i in rows:
        ReplaceAllStrInFile(srcfile, dicts, keys[(i*maxRow):(i+1)*maxRow])
        print  .join(keys[(i*maxRow):(i+1)*maxRow])

def f(x):
    return ReplaceAllStrInFileByRows(x[0], x[1])

if __name__ == "__main__":
    args = sys.argv    
    if len(args) <> 3:
        print ‘‘‘
        usage: python testcsv.py D:\ss\temp.csv D:\ss\test        ‘‘‘
        exit    
    csvfilePath = args[1]
    srcPath = args[2]
    
    filelist = findFile(srcPath)
    #print filelist, len(filelist)
   
    dataItems = []
    for file in filelist:
        dataItems.append([file, csvfilePath])
    #ReplaceAllStrInFileByRows(dataItems[0][0], dataItems[0][1])    
    pool_size = g_max_thread_pool_size
    if len(filelist) < g_max_thread_pool_size:
        pool_size = len(filelist)
    pool = Pool(processes=pool_size)#muti thread
   
    pool.map(f, dataItems)
    #print result.get(timeout=10)
    #pool.map

 

 

 

python 多线程 基于正则表达式的多线程文本替换功能实现

标签:des   style   blog   color   io   os   ar   for   div   

原文地址:http://www.cnblogs.com/fangfu/p/3974270.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!