python - KeyError:“存档中没有名为 'dataset.csv' 的项目”
问题描述
在raw_file_processing
函数中,我将原始数据处理为可用格式,然后dataset.csv
在dataset_csv
函数中创建一个。然后我想在zip_files
函数中压缩这个文件。我的代码提出了KeyError( KeyError: "There is no item named 'dataset.csv' in the archive"
。
from zipfile import ZipFile
import pandas as pd
import warnings
import numpy as np
import os
import re
import shutil
class DataProcesser:
def __init__(self, raw_path, col_id='ID', col_class='class', col_classname='class_name', col_set='set',
read_on_init=True, **kwargs):
self.archive = ZipFile('data.zip', 'w')
self.raw_path = raw_path
self.col_id = col_id
self.col_class = col_class
self.col_classname = col_classname
self.col_set = col_set
self.dataset = None
self.dataset_cropped = None
self.id_set = None
self.classes = None
self.train_set = None
self.validation_set = None
self.test_set = None
self.logs = []
self.stats = None
self.flag_subset = False
self.flag_process = False
self.flag_split = False
self.measurement_df = None
if read_on_init:
self.read_archive(**kwargs)
def raw_file_processing(self):
# If the path contains HTAN CODEX data, perform the following processing steps
if os.path.isdir(self.raw_path):
self.col_classname = self.raw_path.iloc[2]
# Dummy-code the classes
self.col_class = pd.get_dummies(self.col_classname)
# Create the ID series by concatenating columns 1-3
self.col_id = self.raw_path.assign(
ID=self.raw_path[['cell_id:cell_id', 'region:region', 'tile_num:tile_num']].apply(
lambda row: '_'.join([str(each) for each in row]), axis=1))
self.col_id = self.raw_path.drop(columns=['cell_id:cell_id', 'region:region', 'tile_num:tile_num'])
# Obtain measurement info
# Normalize data against blank/empty columns
# log-transform the data
for col in self.raw_path[9:]:
if re.findall(r"Blank|Empty", col):
background = col
else:
for index, row in col:
norm_data = row / background
self.measurement_df = np.log2(norm_data)
return self.raw_path, self.col_id, self.col_class, self.measurement_df
def dataset_csv(self):
# If the path contains HTAN CODEX data, perform the following processing steps
if os.path.isdir(self.raw_path):
"""Col 1: ID
Col 2: class
Col 3-n: measurements"""
id_col = self.col_id
self.col_class = self.col_class.to_frame()
frames = [id_col, self.col_class, self.measurement_df]
self.dataset = pd.concat(frames)
data_csv = self.dataset.to_csv("../input_data/data/dataset.csv")
return data_csv
def zip_files(self):
# Create a ZipFile object for dataset.csv, classes.csv, and id_set.csv
self.archive = shutil.make_archive(self.archive_path, "zip", "../input_data/data/")
return self.archive
def read_archive(self, datatable=True, **kwargs):
if datatable:
try:
from datatable import fread
self.dataset = fread(self.archive.open('dataset.csv'), **kwargs).to_pandas()
self.id_set = fread(self.archive.open('id_set.csv'), **kwargs).to_pandas()
self.classes = fread(self.archive.open('classes.csv'), **kwargs).to_pandas()
except ModuleNotFoundError:
warnings.warn('datatable module not found, using pandas instead. To prevent this message from appearing'
' use "datatable = False" when reading the archive.')
self.dataset = pd.read_csv(self.archive.open('dataset.csv'))
self.id_set = pd.read_csv(self.archive.open('id_set.csv'))
self.classes = pd.read_csv(self.archive.open('classes.csv'))
else:
self.dataset = pd.read_csv(self.archive.open('dataset.csv'))
self.id_set = pd.read_csv(self.archive.open('id_set.csv'))
self.classes = pd.read_csv(self.archive.open('classes.csv'))
self.check_datasets()
return None
input_path = "//wsl$/Ubuntu-20.04/home/melissachua/CODEX/input_data"
# Open all the subfolders within path
for root, dirs, files in os.walk(input_path):
for file in files:
with open(os.path.join(root, file), "r") as data:
raw_files = pd.read_csv(data)
input_path = "//wsl$/Ubuntu-20.04/home/melissachua/CODEX/input_data"
# Open all the subfolders within path
for root, dirs, files in os.walk(input_path):
for file in files:
with open(os.path.join(root, file), "r") as data:
data_file = pd.read_csv(data)
data = DataProcesser(data_file, datatable=False)
meas_var = None
start_time = None
end_time = None
# Open all the subfolders within path
for root, dirs, files in os.walk(input_path):
for file in files:
with open(os.path.join(root, file), "r") as data:
raw_files = pd.read_csv(data)
data_file = "data.zip"
# The data object is used to automatically derive some parameters (e.g. number of classes)
data = DataProcesser(data_file, datatable=False)
追溯:
> --------------------------------------------------------------------------- KeyError Traceback (most recent call
> last) /tmp/ipykernel_17522/1589195238.py in <module>
> 1 # The data object is used to automatically derive some parameters (e.g. number of classes)
> ----> 2 data = DataProcesser(raw_files, datatable=False)
> 3
> 4 # Update default for the data
> 5 meas_var = data.detect_groups_times()['groups'] if meas_var is None else meas_var
>
> ~/CODEX/Notebooks/../source/load_data.py in __init__(self, raw_path,
> col_id, col_class, col_classname, col_set, read_on_init, **kwargs)
> 74 self.measurement_df = None
> 75 if read_on_init:
> ---> 76 self.read_archive(**kwargs)
> 77
> 78 def raw_file_processing(self):
>
> ~/CODEX/Notebooks/../source/load_data.py in read_archive(self,
> datatable, **kwargs)
> 194 self.classes = pd.read_csv(self.archive.open('classes.csv'))
> 195 else:
> --> 196 self.dataset = pd.read_csv(self.archive.open('dataset.csv'))
> 197 self.id_set = pd.read_csv(self.archive.open('id_set.csv'))
> 198 self.classes = pd.read_csv(self.archive.open('classes.csv'))
>
> /usr/lib/python3.8/zipfile.py in open(self, name, mode, pwd,
> force_zip64) 1512 else: 1513 # Get info
> object for name
> -> 1514 zinfo = self.getinfo(name) 1515 1516 if mode == 'w':
>
> /usr/lib/python3.8/zipfile.py in getinfo(self, name) 1439
> info = self.NameToInfo.get(name) 1440 if info is None:
> -> 1441 raise KeyError( 1442 'There is no item named %r in the archive' % name) 1443
>
> KeyError: "There is no item named 'dataset.csv' in the archive"
解决方案
推荐阅读
- dataweave - 从 Json 中提取键值 - Mule Dataweave
- java - Kubernetes 上的 Kafka Streams:重新部署后的长期重新平衡
- reactjs - 从父 React Native 调用子函数 - TypeError 无法读取 null 的属性
- ios - 自定义暗模式 iOS 问题
- swift - SwiftUI 使用 'id' 属性为 @Published 属性添加 Codable 一致性
- unity3d - Unity2D - 刚体到达某个位置时不会停止
- python - 仅使用标准库创建 Web 服务器
- swift - 为什么在所有边缘上固定 UIStackView 会导致模棱两可的约束错误?
- javascript - 等待循环完成,然后从中获取数据
- asp.net-mvc - 在其他项目的 mvc 中使用 partials