首页 > 解决方案 > 读取文件时出现熊猫错误

问题描述

我是 jupyter 的新手,上周才尝试使用它进行绘图。我有巨大的 Excel 表格 .csv 格式,我想阅读和绘制。我通过将 .csv 转换为 .dat 格式来做到这一点。

我的代码看起来像这样

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, LinearSegmentedColormap


file1 = 'king2.dat'
file2 = 'iso.dat'

data1 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data1.columns = ['no_plt', 'Op_RA_plt', 'Op_DE_plt', 'Vmag_plt', 'B- 
V_plt', '(B-V)o_plt', 'no_2_plt', 'FUV_mag_plt','FUV_magerr_plt', 
'(FUV-V)_plt', '(V-I)_plt', '(FUV-I)_plt', 
'no_op_2M','Op_RA_2M','Op_DE_2M','Vmag_2M','(B-V)o_2M', 
'FUV_mag_2M','FUV-V_2M','no_2M','j_m_2M','h_m_2M','k_m_2M','j-h_2M',
'h-k_2M','j-k_2M','(V-I)_2M','(FUV-I)_2M', 
'no_MS','Op_RA_MS','Op_DE_MS','Vmag_MS','(B-V)o_MS',
'FUV_mag_MS','FUV-V_MS','(V-I)_MS','(FUV-I)_MS', 
'no_508','Op_RA_508','Op_DE_508','Vmag_508','(B-V)o_508', 
'FUV_mag_508','FUV-V_508', '(V-I)_508','(FUV-I)_508',  
'no_RG','Op_RA_RG','Op_DE_RG','Vmag_RG','(B-V)o_RG','FUV_mag_RG',
'FUV-V_RG','(V-I)_RG','(FUV-I)_RG','no_RG609','Op_RA_RG609',
'Op_DE_RG609','Vmag_RG609','(B-V)o_RG609','FUV_mag_RG609',
'FUV-V_RG609', '(V-I)_RG609', '(FUV-I)_RG609', 
'no_TF621','Op_RA_TF621','Op_DE_TF621','Vmag_TF621','(B-V)o_TF621',
'FUV_mag_TF621','FUV-V_TF621','(V-I)_TF621','(FUV-I)_TF621', 
'no_onBSS','Op_RA_onBSS','Op_DE_onBSS','Vmag_onBSS','(B-V)o_onBSS', 
'FUV_mag_onBSS','FUV-V_onBSS','(V-I)_onBSs','(FUV-I)_onBSS', 
'no_BSSreg','Op_RA_BSSreg','Op_DE_BSSreg','Vmag_BSSreg',
'(B-V)o_BSSreg','FUV_mag_BSSreg','FUV-V_BSSreg','(V-I)_BSSreg',
'(FUV-I)_BSSreg','no_BSSreg558', 'Op_RA_BSSreg558' , 'Op_DE_BSSreg558','Vmag_BSSreg558','(B-V)o_BSSreg558',
             'FUV_mag_BSSreg558','FUV-V_BSSreg558','(V-I)_BSSreg558','(FUV-I)_BSSreg558','no_aMS','Op_RA_aMS',
             'Op_DE_aMS','Vmag_aMS','(B-V)o_aMS','FUV_mag_aMS','FUV-V_aMS','(V-I)_aMS','(FUV-I)_aMS','no_bMS',
             'Op_RA_bMS','Op_DE_bMS','Vmag_bMS','(B-V)o_bMS','FUV_mag_bMS','FUV-V_bMS','(V-I)_bMS',
             '(FUV-I)_bMS','no _SED','Op_RA _SED','Op_DE _SED','Vmag _SED','(B-V)o _SED','FUV_mag _SED',
             'FUV-V _SED','(V-I)_SED','(FUV-I)_SED']

data2 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data2.columns =['age','log(Z)','mass','logl','logt','logg',
'FUVCa_14.76','NUVB15_14.76', '(FUV-NUV)14.76','V14.76','B14.76',
'B-V14.76','(FUV-V)14.76','(NUV-V)14.76','GA NUV14.76', 
'(Uf-Gn)14.76','I14.76','(V-I)14.76','(FUV-I)14.76']


def fit_data():
fig = plt.figure(1,figsize=(8,8))
plt.subplot(111)
plt.scatter(data1['(B-V)o_plt'], data1['Vmag_plt'],  marker='.', color='r', s=5)
plt.scatter(data2['B-V14.76'], data2['V14.76'],  marker='o', color='g', s=6)
plt.xlabel('RA_F',size=20)
plt.ylabel('DEC_F',size=20)
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
plt.show()
plt.close()

fit_data()

当我有 4 列的表格时,它会读取和绘制没有任何错误。但是如果我增加列,它会给我错误:

ParserError                               Traceback (most recent call last)
<ipython-input-5-fe66f2a2aac9> in <module>()
     13 data1.columns = ['age','FUVCa','NUVB15','(FUV-NUV)','V','B','B-V','(FUV-V)','(NUV-V)','I','(V-I)','(FUV-I)']
     14 
---> 15 data2 = pd.read_csv(file2, delimiter='\s+', header=None, engine='python')
     16 data2.columns = ['no','Op_RA','Op_DE','Vmag','(B-V)o','FUV_mag','(FUV-V)','(V-I)','(FUV-I)', 'no_MS','Op_RA_MS','Op_DE_MS','Vmag_MS','(B-V)o_MS',
     17                'FUV_mag_MS','FUV-V_MS','(V-I)_MS','(FUV-I)_MS','no_508','Op_RA_508','Op_DE_508','Vmag_508',

~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    707                     skip_blank_lines=skip_blank_lines)
    708 
--> 709         return _read(filepath_or_buffer, kwds)
    710 
    711     parser_f.__name__ = name

~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    453 
    454     try:
--> 455         data = parser.read(nrows)
    456     finally:
    457         parser.close()

~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, nrows)
   1067                 raise ValueError('skipfooter not supported for iteration')
   1068 
-> 1069         ret = self._engine.read(nrows)
   1070 
   1071         if self.options.get('as_recarray'):

~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, rows)
   2261             content = content[1:]
   2262 
-> 2263         alldata = self._rows_to_cols(content)
   2264         data = self._exclude_implicit_index(alldata)
   2265 

~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _rows_to_cols(self, content)
   2916                     msg += '. ' + reason
   2917 
-> 2918                 self._alert_malformed(msg, row_num + 1)
   2919 
   2920         # see gh-13320

~/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _alert_malformed(self, msg, row_num)
   2683 
   2684         if self.error_bad_lines:
-> 2685             raise ParserError(msg)
   2686         elif self.warn_bad_lines:
   2687             base = 'Skipping line {row_num}: '.format(row_num=row_num)

ParserError: Expected 30 fields in line 21, saw 45. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.

我无法理解这意味着什么。我不知道我哪里出错了,或者我是否给出了太多的列来处理它。

20号线

508 12.76968    58.18559    18.97   0.96    0.65    1371    22.925  0.343   3.955           508 12.76968    58.18559    18.97   0.65    22.925  3.955   32  16.111  15.777  15.253  0.334   0.524   0.858    508    12.76968    58.18559    18.97   0.65    22.925  3.955 508   12.76968    58.18559    18.97   0.65    22.925  3.955

21号线

508 12.76968    58.18559    18.97   0.96    0.65    1371    22.925 0.343    3.955           508 12.76968    58.18559    18.97   0.6522.925  3.955   32  16.111  15.777  15.253  0.334   0.524   0.858   508 12.76968    58.18559    18.97   0.65    22.925  3.955   508 12.76968    58.18559    18.97   0.65    22.925  3.955   508 12.76968    58.18559    18.97   0.65    22.925  3.955

标签: pandasplotjupyter-notebookjupyter

解决方案


推荐阅读