首页 > 解决方案 > 在从一个文件夹传输到另一个 Python 期间修改多个文件

问题描述

现在我有一个程序将文件从 SOURCE 文件夹中的子目录移动到 DESTINATION 文件夹中的子目录。这些文件包含如下信息: 移动前的文件内容。

现在在从 SOURCE 到 DESTINATION 的移动过程中,我想修改 2 个位置的移动文件。

因此,在文件从 SOURCE 移动到 DESTINATION 后,它必须如下所示:

移动后的文件内容。

这是我现在用于移动文件的代码,所有移动都很顺利。当我想修改文件的内容时,我只是不知道从哪里开始:

import os, os.path
import time

#Make source, destination and archive paths.
source = r'c:\data\AS\Desktop\Source'
destination = r'c:\data\AS\Desktop\Destination'
archive = r'c:\data\AS\Desktop\Archive'

#Make directory paths and make sure to consider only directories under source.
for subdir in os.listdir(source):
    subdir_path = os.path.join(source, subdir)
    if not os.path.isdir(subdir_path):
        continue

#Now we want to get the absolute paths of the files inside those directories 
#and store them in a list.
    all_file_paths = [os.path.join(subdir_path, file) for file in os.listdir(subdir_path)]
    all_file_paths = [p for p in all_file_paths if os.path.isfile(p)]

#Exclude empty sub-directories
    if len(all_file_paths) == 0:
        continue

#Get only the newest files of those directories.
    newest_file_paths = max(all_file_paths, key=os.path.getctime)


#Now we are selecting the files which will be moved
#and make a destination path for them.
    for file_path in all_file_paths:
        if file_path == newest_file_paths and os.path.getctime(newest_file_paths) < time.time() - 120:
            dst_root = destination
        else:
            dst_root = archive

#Now its time to make the move.
        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        os.rename(file_path, dst_path)

标签: pythonfilefile-management

解决方案


如果文件很小,那么您可以简单地移动文件而不是移动文件:

  1. 从所有文件中读取信息
  2. 找到要替换的数据
  3. 用源目录中的新数据写入文件
  4. 删除旧文件

就像是

def move_file(file_path, dst_path):
  with open(file_path, "r") as input_file, open(dst_path, "w") as output_file:
      for line in input_file:
         if <line meets criteria to modify>:
             <modify_line>
         print(line, file=output_file)
      for <data> in <additional_data>:
         print(<data>, file=output_file)

  # remove the old file
  os.remove(file_path)

然后代替原始代码中的 os.rename 调用 move_file 函数

#Now we are selecting the files which will be moved
#and make a destination path for them.
    for file_path in all_file_paths:
        if file_path == newest_file_paths and os.path.getctime(newest_file_paths) < time.time() - 120:
            dst_root = destination
        else:
            dst_root = archive
#Now its time to make the move.
        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        move_file(file_path, dst_path)

你可以像这样实现

import os
import time
from datetime import datetime

SOURCE = r'c:\data\AS\Desktop\Source'
DESTINATION = r'c:\data\AS\Desktop\Destination'
ARCHIVE = r'c:\data\AS\Desktop\Archive'

def get_time_difference(date, time_string):
    """
    You may want to modify this logic to change the way the time difference is calculated.
    """
    time_difference = datetime.now() - datetime.strptime(f"{date} {time_string}", "%d-%m-%Y %H:%M")
    hours = time_difference.total_seconds() // 3600
    minutes = (time_difference.total_seconds() % 3600) // 60
    return f"{int(hours)}:{int(minutes)}"

def move_and_transform_file(file_path, dst_path, delimiter="\t"):
    """
    Reads the data from the old file, writes it into the new file and then 
    deletes the old file.
    """
    with open(file_path, "r") as input_file, open(dst_path, "w") as output_file:
        data = {
            "Date": None,
            "Time": None,
            "Power": None,
        }
        time_difference_seen = False
        for line in input_file:
            (line_id, item, line_type, value) = line.strip().split()
            if item in data:
                data[item] = value
                if not time_difference_seen and data["Date"] is not None and data["Time"] is not None:
                    time_difference = get_time_difference(data["Date"], data["Time"])
                    time_difference_seen = True
                    print(delimiter.join([line_id, "TimeDif", line_type, time_difference]), file=output_file)
                if item == "Power":
                    value = str(int(value) * 10)
            print(delimiter.join((line_id, item, line_type, value)), file=output_file)

    os.remove(file_path)

def process_files(all_file_paths, newest_file_path, subdir):
    """
    For each file, decide where to send it, then perform the transformation.
    """
    for file_path in all_file_paths:
        if file_path == newest_file_path and os.path.getctime(newest_file_path) < time.time() - 120:
            dst_root = DESTINATION
        else:
            dst_root = ARCHIVE

        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        move_and_transform_file(file_path, dst_path)

def main():
    """
    Gather the files from the directories and then process them.
    """
    for subdir in os.listdir(SOURCE):
        subdir_path = os.path.join(SOURCE, subdir)
        if not os.path.isdir(subdir_path):
            continue

        all_file_paths = [
            os.path.join(subdir_path, p) 
            for p in os.listdir(subdir_path) 
            if os.path.isfile(os.path.join(subdir_path, p))
        ]

        if all_file_paths:
            newest_path = max(all_file_paths, key=os.path.getctime)
            process_files(all_file_paths, newest_path, subdir)

if __name__ == "__main__":
    main()

推荐阅读