pandas - 读取文件时出现熊猫错误
问题描述
我是 jupyter 的新手,上周才尝试使用它进行绘图。我有巨大的 Excel 表格 .csv 格式,我想阅读和绘制。我通过将 .csv 转换为 .dat 格式来做到这一点。
我的代码看起来像这样
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
file1 = 'king2.dat'
file2 = 'iso.dat'
data1 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data1.columns = ['no_plt', 'Op_RA_plt', 'Op_DE_plt', 'Vmag_plt', 'B-
V_plt', '(B-V)o_plt', 'no_2_plt', 'FUV_mag_plt','FUV_magerr_plt',
'(FUV-V)_plt', '(V-I)_plt', '(FUV-I)_plt',
'no_op_2M','Op_RA_2M','Op_DE_2M','Vmag_2M','(B-V)o_2M',
'FUV_mag_2M','FUV-V_2M','no_2M','j_m_2M','h_m_2M','k_m_2M','j-h_2M',
'h-k_2M','j-k_2M','(V-I)_2M','(FUV-I)_2M',
'no_MS','Op_RA_MS','Op_DE_MS','Vmag_MS','(B-V)o_MS',
'FUV_mag_MS','FUV-V_MS','(V-I)_MS','(FUV-I)_MS',
'no_508','Op_RA_508','Op_DE_508','Vmag_508','(B-V)o_508',
'FUV_mag_508','FUV-V_508', '(V-I)_508','(FUV-I)_508',
'no_RG','Op_RA_RG','Op_DE_RG','Vmag_RG','(B-V)o_RG','FUV_mag_RG',
'FUV-V_RG','(V-I)_RG','(FUV-I)_RG','no_RG609','Op_RA_RG609',
'Op_DE_RG609','Vmag_RG609','(B-V)o_RG609','FUV_mag_RG609',
'FUV-V_RG609', '(V-I)_RG609', '(FUV-I)_RG609',
'no_TF621','Op_RA_TF621','Op_DE_TF621','Vmag_TF621','(B-V)o_TF621',
'FUV_mag_TF621','FUV-V_TF621','(V-I)_TF621','(FUV-I)_TF621',
'no_onBSS','Op_RA_onBSS','Op_DE_onBSS','Vmag_onBSS','(B-V)o_onBSS',
'FUV_mag_onBSS','FUV-V_onBSS','(V-I)_onBSs','(FUV-I)_onBSS',
'no_BSSreg','Op_RA_BSSreg','Op_DE_BSSreg','Vmag_BSSreg',
'(B-V)o_BSSreg','FUV_mag_BSSreg','FUV-V_BSSreg','(V-I)_BSSreg',
'(FUV-I)_BSSreg','no_BSSreg558', 'Op_RA_BSSreg558' , 'Op_DE_BSSreg558','Vmag_BSSreg558','(B-V)o_BSSreg558',
'FUV_mag_BSSreg558','FUV-V_BSSreg558','(V-I)_BSSreg558','(FUV-I)_BSSreg558','no_aMS','Op_RA_aMS',
'Op_DE_aMS','Vmag_aMS','(B-V)o_aMS','FUV_mag_aMS','FUV-V_aMS','(V-I)_aMS','(FUV-I)_aMS','no_bMS',
'Op_RA_bMS','Op_DE_bMS','Vmag_bMS','(B-V)o_bMS','FUV_mag_bMS','FUV-V_bMS','(V-I)_bMS',
'(FUV-I)_bMS','no _SED','Op_RA _SED','Op_DE _SED','Vmag _SED','(B-V)o _SED','FUV_mag _SED',
'FUV-V _SED','(V-I)_SED','(FUV-I)_SED']
data2 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data2.columns =['age','log(Z)','mass','logl','logt','logg',
'FUVCa_14.76','NUVB15_14.76', '(FUV-NUV)14.76','V14.76','B14.76',
'B-V14.76','(FUV-V)14.76','(NUV-V)14.76','GA NUV14.76',
'(Uf-Gn)14.76','I14.76','(V-I)14.76','(FUV-I)14.76']
def fit_data():
fig = plt.figure(1,figsize=(8,8))
plt.subplot(111)
plt.scatter(data1['(B-V)o_plt'], data1['Vmag_plt'], marker='.', color='r', s=5)
plt.scatter(data2['B-V14.76'], data2['V14.76'], marker='o', color='g', s=6)
plt.xlabel('RA_F',size=20)
plt.ylabel('DEC_F',size=20)
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
plt.show()
plt.close()
fit_data()
当我有 4 列的表格时,它会读取和绘制没有任何错误。但是如果我增加列,它会给我错误:
ParserError Traceback (most recent call last)
<ipython-input-5-fe66f2a2aac9> in <module>()
13 data1.columns = ['age','FUVCa','NUVB15','(FUV-NUV)','V','B','B-V','(FUV-V)','(NUV-V)','I','(V-I)','(FUV-I)']
14
---> 15 data2 = pd.read_csv(file2, delimiter='\s+', header=None, engine='python')
16 data2.columns = ['no','Op_RA','Op_DE','Vmag','(B-V)o','FUV_mag','(FUV-V)','(V-I)','(FUV-I)', 'no_MS','Op_RA_MS','Op_DE_MS','Vmag_MS','(B-V)o_MS',
17 'FUV_mag_MS','FUV-V_MS','(V-I)_MS','(FUV-I)_MS','no_508','Op_RA_508','Op_DE_508','Vmag_508',
~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
707 skip_blank_lines=skip_blank_lines)
708
--> 709 return _read(filepath_or_buffer, kwds)
710
711 parser_f.__name__ = name
~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
453
454 try:
--> 455 data = parser.read(nrows)
456 finally:
457 parser.close()
~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, nrows)
1067 raise ValueError('skipfooter not supported for iteration')
1068
-> 1069 ret = self._engine.read(nrows)
1070
1071 if self.options.get('as_recarray'):
~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, rows)
2261 content = content[1:]
2262
-> 2263 alldata = self._rows_to_cols(content)
2264 data = self._exclude_implicit_index(alldata)
2265
~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _rows_to_cols(self, content)
2916 msg += '. ' + reason
2917
-> 2918 self._alert_malformed(msg, row_num + 1)
2919
2920 # see gh-13320
~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _alert_malformed(self, msg, row_num)
2683
2684 if self.error_bad_lines:
-> 2685 raise ParserError(msg)
2686 elif self.warn_bad_lines:
2687 base = 'Skipping line {row_num}: '.format(row_num=row_num)
ParserError: Expected 30 fields in line 21, saw 45. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
我无法理解这意味着什么。我不知道我哪里出错了,或者我是否给出了太多的列来处理它。
20号线
508 12.76968 58.18559 18.97 0.96 0.65 1371 22.925 0.343 3.955 508 12.76968 58.18559 18.97 0.65 22.925 3.955 32 16.111 15.777 15.253 0.334 0.524 0.858 508 12.76968 58.18559 18.97 0.65 22.925 3.955 508 12.76968 58.18559 18.97 0.65 22.925 3.955
21号线
508 12.76968 58.18559 18.97 0.96 0.65 1371 22.925 0.343 3.955 508 12.76968 58.18559 18.97 0.6522.925 3.955 32 16.111 15.777 15.253 0.334 0.524 0.858 508 12.76968 58.18559 18.97 0.65 22.925 3.955 508 12.76968 58.18559 18.97 0.65 22.925 3.955 508 12.76968 58.18559 18.97 0.65 22.925 3.955
解决方案
推荐阅读
- ios - ViewController 中的所有按钮触发 AirPrint Swift
- javascript - 使用 javascript 数组方法更新 mongoose 中的嵌套数组
- php - 订单编辑页面中的 WooCommerce 自定义字段
- kubernetes - 如何将 shell 连接到 Kubernetes pod
- vb.net - 它告诉我更新成功而不将数据添加到 ms 访问数据库文件
- kotlin - 使用 jooq 和 gradle-jooq-plugin (kotlin) 生成代码
- spotify - Apple Music API 获取当前播放的歌曲
- python - 如何在自动编码器中将编码模型与解码模型分开?
- opencv - 需要编程方法,在图像上发现不规则/异常图案
- c# - 自定义路由,替换 URL,同时保留部分 URL