标签:split utf-8 关键词 continue line 文件路径 encoding walk 输出
# @Filename: search_document.py
import os,re
import shutil
import pandas as pd
import time
class atom_file(object):
def re_doucement(self,path,pattern):
for root,dirs,files in os.walk(path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if file_name.endswith((‘.py‘, ‘.png‘,‘.ipynb‘)):
continue
else:
file_path = os.path.join(root,file_name)
#print(file_path)
file_object = open(file_path,encoding=‘utf-8‘,errors=‘replace‘)
document = []
pattern = pattern
## 单行检索
length=0
try:
for line in file_object:
document.append(line,)
output = re.findall(pattern,line)
if len(output)>0:
length=4
#print(‘输出匹配结果---‘+‘‘.join(output)) #line带"\n"
print(‘‘)
print(‘‘)
print(‘‘)
print(‘begin----------------------------------------------------‘)
print(‘输出文件路径---‘+file_path)
#print(‘匹配行内容---‘+line)
length= length -1
if length == 1:
print(‘匹配上下文---‘+‘‘.join(document[-5:]))
print(‘end----------------------------------------------------‘)
break
finally:
file_object.close()
document = ‘‘.join(document)
# 解析多个关键词,多个关键词是and并列关系;
def search_more_keys_doucement(self,path,pattern):
for root,dirs,files in os.walk(path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if file_name.endswith((‘.py‘, ‘.png‘)):
continue
else:
file_path = os.path.join(root,file_name)
result= []
try:
file_object = open(file_path,encoding=‘utf-8‘,errors=‘replace‘)
document = []
for line in file_object:
document.append(line,)
finally:
file_object.close()
result = ‘‘.join(document)
# 文档解析
pattern_list = pattern.split(‘,‘)
len_output=0
length_list = len(pattern_list)
for i in range(len(pattern_list)):
output = re.findall(pattern_list[0],result)
if len(output)>0:
len_output=len_output+1
if len(pattern_list)>0:
#pass
del pattern_list[0]
if len_output == length_list:
print(‘输出文件路径---‘+file_path)
标签:split utf-8 关键词 continue line 文件路径 encoding walk 输出
原文地址:https://www.cnblogs.com/ministep/p/14616979.html