场景:提供了很多个文件,需要对文件分析,如果每次读取多个文件,造成很多麻烦,所以需要对源文件进行合并预处理。
支持两种用法:(1)合并某一文件夹下的所有文件(忽略文件夹等非文件条目)
(2)显示的合并多文件。
1 import sys 2 import os 3 4 ''' 5 usage(1): merge_files pathname 6 pathname is directory and merge files in pathname directory 7 usage(2): merge_files file1 file2 [file3[...]] 8 9 ''' 10 11 12 FILE_SLIM = (256*(1024*1024)) #256M match 2**n 13 14 def merge_files(fileslist,mfname): 15 16 global FILE_SLIM 17 18 p_fp = open(mfname,"wba") 19 for file in fileslist: 20 with open(file,"rb") as c_fp: 21 fsize = os.stat(file).st_size 22 count = fsize&FILE_SLIM 23 while count>0: 24 p_fp.write(c_fp.read(FILE_SLIM)) 25 fsize -= FILE_SLIM 26 count -= 1 27 p_fp.write(c_fp.read()) 28 29 p_fp.close 30 31 def main(): 32 argc = len(sys.argv) - 1 33 fileslist = [] 34 if argc == 2: 35 dir_name = os.path.realpath(sys.argv[1]) 36 assert(os.path.isdir(dir_name)) 37 file_dir = os.listdir(dir_name) 38 fileslist = [os.path.join(dir_name,file) for file in file_dir if os.path.isfile(os.path.join(dir_name,file))] 39 print(fileslist) 40 elif argc >=3: 41 fileslist = [os.path.realpath(sys.argv[index]) for index in range(1,argc) if os.path.isfile(os.path.realpath(sys.argv[index]))] 42 43 merge_files(fileslist,sys.argv[argc]) 44 45 46 if __name__ == '__main__': 47 main() 48 49