首页 > 解决方案 > 在 python 中从文本文件(也特定于路径)中附加带有特定单词(RegEx)的 CSV

问题描述

# coding=utf-8
# Libreria RegEx de Python.
import re
# Libreria para rutas.
import os
import csv

# function scan folder DiarioOficial
def scan_folder():
    # directory 'path'
    path = '/Users/.../DiarioOficial'
    # contador de ficheros del path
    count = 0
    # variables declaration
    company_name = ''
    company_capital = ''

    # creation csv as csvFile
    with open('aaa.csv', 'a') as csvFile:
        # iterate all paths in the folder DiarioOficial without name
        for (path, dirnames, file_names) in os.walk(path):
            # iterate over all the files in the path (+ file_name)
            for file_name in file_names:
                # Add extension that is required
                if file_name.endswith(".txt"):
                    # summatory count files in DiarioOficial folder
                    count = count + 1
                    # concatenation path + file name
                    file_path=os.path.join(path, file_name)
                    #print(file_path)
                    # open and read the file path
                    mensaje = open(file_path).read()
                    # Replace a newline for a space
                    mensaje = mensaje.replace("\n","")

                    # Company Name
                    keywords_cap = ['SpA', 'SPA', 'LIMITADA', 'LTDA', 'S.A.', 'E.I.R.L.']
                    keywords_cap = map(re.escape, keywords_cap)
                    keywords_cap.sort(key=len, reverse=True)
                    obj = re.compile(r'[:,;.]\s*"?([^:,;.]*?(?<!\w)(?:{}))'.format('|'.join(keywords_cap)))
                    if obj:
                        company_name = obj.search(mensaje)
                    else:
                        company_name = "None"

                    # Social Capital ($)
                    cap = r"\s*(CAPITAL:\s+([^-]*)|Capital social:\s+([^-]*)|Capital:\s+([^-]*))"
                    caps = re.search(cap, mensaje)
                    if caps:
                        company_capital = caps.group()
                    else:
                        company_capital = 'None'
        print (count)

    csvData = [company_name, company_capital]
    writer = csv.writer(csvFile, delimiter='-')
    headers = ['COMPANY NAME', 'COMPANY CAPITAL']
    writer.writerow(headers)  # print the header row
    writer.writerow(csvData)  # print the Data in csv

scan_folder()

我想为信息在路径/Users/.../DiarioOficial 中的公司创建一个带有行的csv。此行包括公司名称和公司资本。但是Python:

错误:

回溯(最后一次调用):文件“/Users/anna/PycharmProjects/extractData/post.py”,第 61 行,在 scan_folder() 文件“/Users/anna/PycharmProjects/extractData/post.py”,第 59 行, in scan_folder writer.writerow(csvData) # print the Data in csv 247 ValueError: I/O operation on closed file

标签: pythonregexcsvpath

解决方案


推荐阅读