首页 > 技术文章 > python 合并文件

lonelytree 2013-08-08 11:22 原文

场景:提供了很多个文件,需要对文件分析,如果每次读取多个文件,造成很多麻烦,所以需要对源文件进行合并预处理。

支持两种用法:(1)合并某一文件夹下的所有文件(忽略文件夹等非文件条目)

                    (2)显示的合并多文件。

 1 import sys
 2 import os
 3 
 4 '''
 5     usage(1): merge_files pathname
 6               pathname is directory and merge files in pathname directory
 7     usage(2): merge_files file1 file2 [file3[...]]
 8               
 9 '''
10 
11 
12 FILE_SLIM = (256*(1024*1024)) #256M match 2**n
13 
14 def merge_files(fileslist,mfname):
15     
16     global FILE_SLIM
17     
18     p_fp = open(mfname,"wba")
19     for file in fileslist:
20         with open(file,"rb") as c_fp:
21             fsize = os.stat(file).st_size
22             count = fsize&FILE_SLIM
23             while count>0:
24                 p_fp.write(c_fp.read(FILE_SLIM))
25                 fsize -= FILE_SLIM
26                 count -= 1
27             p_fp.write(c_fp.read())
28             
29     p_fp.close
30 
31 def main():
32     argc = len(sys.argv) - 1
33     fileslist = []
34     if argc == 2:
35         dir_name = os.path.realpath(sys.argv[1])
36         assert(os.path.isdir(dir_name))
37         file_dir = os.listdir(dir_name)
38         fileslist = [os.path.join(dir_name,file) for file in file_dir if os.path.isfile(os.path.join(dir_name,file))]
39         print(fileslist)
40     elif argc >=3:
41         fileslist = [os.path.realpath(sys.argv[index]) for index in range(1,argc) if os.path.isfile(os.path.realpath(sys.argv[index]))]
42     
43     merge_files(fileslist,sys.argv[argc])
44     
45             
46 if __name__ == '__main__':
47     main()
48 
49     

 

推荐阅读