首页 > 技术文章 > 重复文件删除

DennyT 2019-10-28 22:22 原文

这段代码使用python3写的,(python2明年就不维护了)比较文件的md5值,来比较文件是否相同,每个文件只要有一个标点符号,哪怕是一个空格的异同,md5值都会不一样,所以完全不用担心删错文件。

import hashlib
import os
from time import clock as now
def getmd5(filename):
    file_txt = open(filename,'rb').read()
    m = hashlib.md5(file_txt)
    return m.hexdigest()
def main():
    path = input("path: ")
    all_md5=[]
    total_file=0
    total_delete=0
    start=now()
    for file in os.listdir(path):
        total_file += 1;
        real_path=os.path.join(path,file)
        if os.path.isfile(real_path) == True:
            filemd5=getmd5(real_path)
            if filemd5 in all_md5:
                total_delete += 1
                os.remove(real_path)
                print('删除',file)
            else:
                all_md5.append(filemd5)
    end = now()
    time_last = end - start
    print('文件总数:',total_file)
    print('删除个数:',total_delete)
    print('当前文件个数:',(total_file-total_delete))
    print('耗时:',time_last,'')
     
if __name__=='__main__': 
    main()

 

推荐阅读