python - 保存后更改的 .txt 导致 CSV 阅读器看到太多字段
问题描述
我正在JupyterLab
运行AWS SageMaker
. 核心:conda_amazonei_mxnet_p27
找到的字段数:saw 9
每次运行时递增 1。
错误: ParserError: Error tokenizing data. C error: Expected 2 fields in line 50, saw 9
代码:
调用(在此之前运行所有单元格时不会出现错误,但在运行时会出现错误):
train = open('train_textcorrupted.csv', 'a')
val = open('val.csv', 'a')
classes = open('classes.txt', 'a')
uni_label = 'Organisation\tUniversity'
n_pad = 4
for i in range(len(unis)-n_pad):
record = ' '.join(unis[i:(i+n_pad)])
full_record = f'{uni_label}\t{record}\n'
if random.random() > 0.9:
val.write(full_record)
else:
train.write(full_record)
classes.write(uni_label)
classes.close()
val.close()
train.close()
追溯:
---------------------------------------------------------------------------
ParserError Traceback (most recent call last)
<ipython-input-8-89b1728bd5a6> in <module>
7 --gpus 1
8 """.split()
----> 9 run_training(args)
<ipython-input-5-091daf2638a1> in run_training(input)
55 csv_logger = pl.loggers.CSVLogger(save_dir=f'{args.modeldir}/csv_logs')
56 loggers = [logger, csv_logger]
---> 57 dm = OntologyTaggerDataModule.from_argparse_args(args)
58 if args.model_uri:
59 local_model_uri = os.environ.get('SM_CHANNEL_MODEL', '.')
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/core/datamodule.py in from_argparse_args(cls, args, **kwargs)
324 datamodule_kwargs.update(**kwargs)
325
--> 326 return cls(**datamodule_kwargs)
327
328 @classmethod
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/core/datamodule.py in __call__(cls, *args, **kwargs)
47
48 # Get instance of LightningDataModule by mocking its __init__ via __call__
---> 49 obj = type.__call__(cls, *args, **kwargs)
50
51 return obj
<ipython-input-3-66ee2be72e78> in __init__(self, traindir, train_file, validate_file, model_name, labels, batch_size)
30 print('tokenizer', tokenizer)
31 print('labels_file', labels_file)
---> 32 label_mapper = LabelMapper(labels_file)
33 self.batch_size = batch_size
34 self.num_classes = label_mapper.num_classes
<ipython-input-3-66ee2be72e78> in __init__(self, classes_file)
102
103 def __init__(self, classes_file):
--> 104 self._raw_labels = pd.read_csv(classes_file, header=None, sep='\t')
105
106 self._map = []
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pandas/io/parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
686 )
687
--> 688 return _read(filepath_or_buffer, kwds)
689
690
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
458
459 try:
--> 460 data = parser.read(nrows)
461 finally:
462 parser.close()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, nrows)
1196 def read(self, nrows=None):
1197 nrows = _validate_integer("nrows", nrows)
-> 1198 ret = self._engine.read(nrows)
1199
1200 # May alter columns / col_dict
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, nrows)
2155 def read(self, nrows=None):
2156 try:
-> 2157 data = self._reader.read(nrows)
2158 except StopIteration:
2159 if self._first_chunk:
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
pandas/_libs/parsers.pyx in pandas._libs.parsers.raise_parser_error()
ParserError: Error tokenizing data. C error: Expected 2 fields in line 50, saw 9
classes.txt
(制表符分隔)运行前
Activity Event
Actor Person
Agent Person
Album Product
Animal Object
ArchitecturalStructure Location
Artist Person
Athlete Person
AutomobileEngine Product
Award Object
Biomolecule Object
Bird Object
BodyOfWater Location
Building Location
ChemicalSubstance Object
Company Organisation
Competition Event
Device Product
Disease Object
District Location
Eukaryote Object
Event Event
Film Object
Food Object
Language Object
Location Location
MeanOfTransportation Product
MotorsportSeason Event
Municipality Location
MusicalWork Product
Organisation Organisation
Painter Person
PeriodicalLiterature Product
Person Person
PersonFunction Person
Plant Object
Poet Person
Politician Person
River Location
School Organisation
Settlement Location
Software Product
Song Product
Species Object
SportsSeason Event
Station Location
Town Location
Village Location
Writer Person
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
Organisation University
解决方案
推荐阅读
- javascript - guildMemberAdd 和 guildMemberRemove 嵌入不再发送(Discord.js)
- android - 如何真正重置editText以显示修改后的提示
- tensorflow-federated - TFF:如何拆分每个客户端的数据
- python - 如何让tensorflow同时使用cpu和gpu
- java - 如何为相似文本(语义相似性)生成相同的哈希?
- angular - 角,HttpClient;“可观察”类型上不存在属性“.shareReplay”
- r - 如何添加两个单词数据框 Rstudio
- matlab - 在二维空间中旋转弹簧
- java - 尝试捕获分配值问题
- windows - cmd 显示 unicode 字符作为替换