首页 > 解决方案 > 如何在 python 中使用文件路径作为输出中的列从 excel 文件中读取数据

问题描述

import pandas as pd
import numpy as np
import glob
import os

all_data = pd.DataFrame() 
rows = 0
for file in glob.glob("Ranking*.xlsx"):
    xls = pd.ExcelFile(file)
    sheets = xls.sheet_names

    for sheet_name in sheets:
        df = pd.read_excel(file, sheet_name='Output',header= 3)

    all_data = all_data.append(df, ignore_index = True)
    output_tab_data = all_data[['Supplier','Tariff','Region']]
    output_tab_data_no_NA = output_tab_data[output_tab_data.Supplier.notnull()]
    output_tab_data_no_NA ['file_source_name'] = os.path.abspath(file)

    print(output_tab_data_no_NA)    

上面的代码目前生成这些列 - 'Supplier','Tariff','Region'

但我希望它包含一个名为 file_source_name 的 EXTRA COLUMN - 这是加载的每个 excel 文件的 excel 文件名路径

标签: pythonexcel

解决方案


import pandas as pd
import numpy as np
import glob
import os

all_data = pd.DataFrame() #creating an empty data frame
rows = 0
for f in glob.glob("../<path where python is>/*.xlsx"): #import every file that ends in .xls
    xls = pd.ExcelFile(file)
    sheets = xls.sheet_names # To get names of all the sheets
    for sheet_name in sheets:
             df = pd.read_excel(file, sheet_name='Output',header= 3) #start copying data from line 4 in each file
             df['file_source_name'] = f  #append individual file name/file path
    all_data = all_data.append(df, ignore_index = True) #put all the copied data together
    output_tab_data = all_data[['Supplier','Tariff','Region','file_source_name']]
    output_tab_data_no_NA = output_tab_data[output_tab_data.Supplier.notnull()]
print(output_tab_data_no_NA)    

推荐阅读