码迷,mamicode.com
首页 > 编程语言 > 详细

python查找相同文件-UNIQ File-wxPython版本

时间:2016-04-17 20:42:54      阅读:305      评论:0      收藏:0      [点我收藏+]

标签:

今天用wxPython做了一个GUI程序,我称之为UNIQ File,实现查找指定目录内的相同文件,主要原理是计算文件的md5值(计算前先找出文件大小相同的文件,然后计算这些文件的md5值,而不是所有文件都计算,大大减少了md5的计算量),加入了多线程功能。

运行该程序需要安装wxPython。

技术分享

界面图

源代码:

UNIQFile-wxPython.py

  1 # -*- coding: gbk -*-
  2 
  3 ‘‘‘
  4 Author:@DoNotSpyOnMe
  5 Blog: http://www.cnblogs.com/aaronhoo
  6 ‘‘‘
  7 
  8 import wx
  9 import hashlib
 10 import os
 11 import threading
 12 
 13 class WorkerThread(threading.Thread):
 14     def __init__(self, frame,dir,operation,msg):
 15         """初始化工作线程: 把主窗口传进来"""
 16         threading.Thread.__init__(self)
 17         self.frame = frame
 18         self.dir=dir
 19         self.operation=operation
 20         self.msg=msg
 21         self.setDaemon(True)#设置子线程随UI主线程结束而结束
 22         self.start() 
 23 
 24     #----------------------------------------------------------------------
 25     def run(self):
 26         """执行工作线程"""
 27         self.frame.SetButtons(operating)
 28         try:
 29             if self.operation==list:
 30                 self.listSameFile(self.dir)
 31                 self.frame.btnList.Enable()
 32             elif self.operation==remove:
 33                 self.removeSameFile(self.dir)
 34                 self.frame.btnRemove.Enable()            
 35         except:
 36             pass
 37         finally:
 38             self.frame.SetButtons(completed)
 39 #         
 40 #     def stop(self):
 41 #         self.keepRunning=False
 42     def appendMsg(self,msg):
 43         if self.frame:
 44             #以下方式可以实现终端式的刷新:自动滚动到最新行
 45             self.frame.txtContent.AppendText(msg+\n)
 46             #废弃的方式
 47 #             currentMsg=self.frame.txtContent.GetValue()
 48 #             currentMsg=currentMsg+‘\n‘+msg
 49 #             self.frame.txtContent.SetValue(currentMsg)
 50             
 51     def getFileSize(self,filePath):
 52         return os.path.getsize(filePath)        
 53     
 54     ‘‘‘ 一般文件的md5计算方法,一次读取文件的全部内容‘‘‘           
 55     def CalcMD5(self,filepath):
 56         with open(filepath,rb) as f:
 57             md5obj = hashlib.md5()
 58             md5obj.update(f.read())
 59             hash = md5obj.hexdigest()
 60             return hash    
 61     ‘‘‘大文件计算md5的方法,分批读取文件内容,防止内存爆掉‘‘‘    
 62     def GetFileMd5(self,filename):
 63         if not os.path.isfile(filename):
 64             return
 65         myhash = hashlib.md5()
 66         f = open(filename,rb)
 67         while True:
 68             b = f.read(8*1024)
 69             if not b :
 70                 break
 71             myhash.update(b)
 72         f.close()
 73         return myhash.hexdigest()
 74     
 75     def GetAllFiles(self,directory):
 76         files=[]
 77         for dirpath, dirnames,filenames in os.walk(directory):
 78             if filenames!=[]:
 79                 for file in filenames:
 80                     files.append(dirpath+\\+file)
 81         return files
 82     
 83     def findSameSizeFiles(self,files):
 84         dicSize={}
 85         for f in files:
 86             size=self.getFileSize(f)        
 87             if not dicSize.has_key(size):
 88                 dicSize[size]=f
 89             else:
 90                 dicSize[size]=dicSize[size]+;+f
 91         dicCopy=dicSize.copy()
 92         for k in dicSize.iterkeys():
 93             if dicSize[k].find(;)==-1:
 94                dicCopy.pop(k) 
 95         del dicSize
 96         return dicCopy
 97             
 98     def findSameMD5Files(self,files):
 99         dicMD5={}
100         for f in files:
101             self.appendMsg(calculating the md5 value of file %s%f)
102             md5=self.GetFileMd5(f)        
103             if not dicMD5.has_key(md5):
104                 dicMD5[md5]=f
105             else:
106                 dicMD5[md5]=dicMD5[md5]+;+f
107         dicCopy=dicMD5.copy()
108         for k in dicMD5.iterkeys():
109             if dicMD5[k].find(;)==-1:
110                dicCopy.pop(k) 
111         del dicMD5
112         return dicCopy
113     
114     def removeSameFile(self,mydir):
115         msg=‘‘
116         msgUniq=Congratulations,no file is removed since they are all uniq.
117         try:
118             existsFlag=False
119             files=self.GetAllFiles(mydir)
120             self.appendMsg(%s files found in directory %s\n%(len(files),mydir))
121             dicFileOfSameSize=self.findSameSizeFiles(files)
122             if dicFileOfSameSize=={}:
123                 self.appendMsg(msgUniq)
124                 return
125             else:
126                 #list the duplicated files first: 
127                 dicFiltered={}
128                 for k in dicFileOfSameSize.iterkeys():
129                     filesOfSameSize=dicFileOfSameSize[k].split(;)
130                     dicSameMD5file=self.findSameMD5Files(filesOfSameSize)
131                     if dicSameMD5file!={}:
132                         existsFlag=True
133                         for k in dicSameMD5file.iterkeys():
134                             msg=msg+md5 %s: %s%(k,dicSameMD5file[k])+\n
135                             dicFiltered[k]=dicSameMD5file[k]
136                 if not existsFlag:
137                     msg=msgUniq
138                     return
139                 else:
140                     msg=Duplicated files:\n+msg+\n   
141                     #then remove the duplicated files:
142                     removeCount=0  
143                     for k in dicFiltered.iterkeys():
144                         sameFiles=dicFiltered[k].split(;)
145                         flagRemove=False
146                         for f in sameFiles:
147                             if not flagRemove:
148                                 flagRemove=True
149                             else:
150                                 msg=msg+Removing file: %s%f+\n
151                                 os.remove(f)
152                                 removeCount=removeCount+1
153                     msg=msg+%s files are removed.\n%removeCount              
154         except Exception,e:
155 #             print e
156             msg=Exception occured.
157         finally:
158             self.appendMsg(msg+\n+Operation finished.)
159             
160             
161     def listSameFile(self,mydir):
162         msg=‘‘
163         msgUniq=Congratulations,all files are uniq.
164         try:
165             existsFlag=False
166             files=self.GetAllFiles(mydir)
167             self.appendMsg(%s files found in directory %s\n%(len(files),mydir))
168             dicFileOfSameSize=self.findSameSizeFiles(files)
169             if dicFileOfSameSize=={}:
170                 self.appendMsg(msgUniq)
171                 return 
172             else:
173                 for k in dicFileOfSameSize.iterkeys():
174                     filesOfSameSize=dicFileOfSameSize[k].split(;)
175                     dicSameMD5file=self.findSameMD5Files(filesOfSameSize)
176                     if dicSameMD5file!={}:
177                         existsFlag=True
178                         for k in dicSameMD5file.iterkeys():
179                             msg=msg+md5 %s: %s%(k,dicSameMD5file[k])+\n
180                 if not existsFlag:
181                     msg=msgUniq
182                 else:
183                     msg=Duplicated files:\n+msg
184         except Exception,e:
185 #             print e
186             msg=Exception occured.
187         finally:
188             self.appendMsg(msg+\n+Operation finished.)
189 
190     
191 class MyFrame(wx.Frame):
192     def __init__(self):
193         super(MyFrame,self).__init__(None,title=UNIQ File-wxPython,size=(780,450))
194         pan=wx.Panel(self)
195         self.lblDir=wx.StaticText(pan,-1,Dir:,style=wx.ALIGN_LEFT)
196         self.txtFile=wx.TextCtrl(pan,size=(380,30))
197 #         self.txtFile.Disable()
198         self.btnOpen=wx.Button(pan,label=Pick Directory)
199         self.btnOpen.Bind(wx.EVT_BUTTON, self.BtnOpenHandler)
200         self.btnList=wx.Button(pan,label=Find Same)
201         self.btnList.Bind(wx.EVT_BUTTON, self.BtnListHandler)
202         self.btnRemove=wx.Button(pan,label=Remove duplicated)
203         self.btnRemove.Bind(wx.EVT_BUTTON, self.BtnRemoveHandler)
204 #         self.btnStop=wx.Button(pan,label=‘Stop‘)
205 #         self.btnStop.Bind(wx.EVT_BUTTON, self.BtnStopHandler)
206         
207         hbox=wx.BoxSizer()
208         hbox.Add(self.lblDir,proportion=0,flag=wx.LEFT,border=5)
209         hbox.Add(self.txtFile,proportion=0,flag=wx.LEFT,border=5)
210         hbox.Add(self.btnOpen,proportion=0,flag=wx.LEFT,border=5)
211         hbox.Add(self.btnList,proportion=0,flag=wx.LEFT,border=5)
212         hbox.Add(self.btnRemove,proportion=0,flag=wx.LEFT,border=5)
213 #         hbox.Add(self.btnStop,proportion=0,flag=wx.LEFT,border=5)
214         
215         self.txtContent=wx.TextCtrl(pan,style=wx.TE_MULTILINE|wx.HSCROLL)
216         vbox=wx.BoxSizer(wx.VERTICAL)
217         vbox.Add(hbox,proportion=0,flag=wx.EXPAND|wx.ALL,border=5)
218         vbox.Add(self.txtContent,proportion=1,flag=wx.EXPAND,border=5)
219         pan.SetSizer(vbox)
220 #         self.SetButtons(‘init‘)
221     
222     def BtnOpenHandler(self,event): 
223         dlg = wx.DirDialog(None,u"选择文件夹",style=wx.DD_DEFAULT_STYLE)  
224         if dlg.ShowModal() == wx.ID_OK:
225             dlg.Destroy()
226             if dlg.GetPath():  
227                 self.dirSelected=dlg.GetPath() #文件夹路径      
228                 self.txtFile.SetValue(self.dirSelected)      
229                 
230                 self.SetButtons(selected)
231                 self.txtContent.SetValue(Selected dirctory: %s\n%self.dirSelected)
232      
233     def BtnListHandler(self,event):
234         if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()):
235             wx.MessageBox(please select a valid directory first.,Tip Message,wx.YES_DEFAULT|wx.ICON_INFORMATION)
236             return
237         self.dirSelected=self.txtFile.GetValue()
238         self.txtContent.SetValue(‘‘)
239         msg=Listing same files in %s\n%self.dirSelected
240         self.txtContent.SetValue(msg)
241         workThread=WorkerThread(self,self.dirSelected,list,msg)
242 
243     def BtnRemoveHandler(self,event): 
244         if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()):
245             wx.MessageBox(please select a valid directory first.,Tip Message,wx.YES_DEFAULT|wx.ICON_INFORMATION)
246             return
247         self.dirSelected=self.txtFile.GetValue()
248         self.txtContent.SetValue(‘‘)
249         msg=Removing duplicated files in %s\n%self.dirSelected
250         self.txtContent.SetValue(msg)
251         WorkerThread(self,self.dirSelected,remove,msg)
252 
253     def BtnStopHandler(self,event): 
254         pass
255 
256     def SetButtons(self,status):
257         if status==init:
258             self.btnOpen.Enable()
259             self.btnList.Disable()
260             self.btnRemove.Disable()
261 #             self.btnStop.Disable()
262         elif status==operating:
263             self.btnOpen.Disable()
264             self.btnList.Disable()
265             self.btnRemove.Disable()
266 #             self.btnStop.Enable()
267         elif status==completed:
268             self.btnOpen.Enable()
269             self.btnList.Enable()
270             self.btnRemove.Enable()
271 #             self.btnStop.Disable()  
272         elif status==selected:
273             self.btnOpen.Enable()
274             self.btnList.Enable()
275             self.btnRemove.Enable()
276 #             self.btnStop.Disable() 
277                    
278 if __name__=="__main__":
279     app=wx.App()
280     MyFrame().Show()
281     app.MainLoop()

 

python查找相同文件-UNIQ File-wxPython版本

标签:

原文地址:http://www.cnblogs.com/aaronhoo/p/5401968.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!