首页 > 解决方案 > 尝试将文本编码为数字时的Python KeyError

问题描述

起初,我将 pandas 导入为 pd,并将我的数据集导入为 dF。起初我得到了 elseif 的一个明显的错误。在玩了之后,我摆脱了那个错误并遇到了下面的错误。我正在使用 pythong Jupyter 笔记本

Null = dF.isnull() .any()

dF = dF.drop([["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"]], axis=1)

for column in range(len(list(dF.columns.values))):
    for index, row in dF.iterrows():
        if "No" in row[column] or "Female" in row[column]:
            dF.iloc[index, column] = 0
    
        elif "Yes" in row[column] or "Male" in row[column]:
            dF.iloc[index, column] = 1
        
dF.to_excel('Cleaned.xlsx', index=False)

以下错误

KeyError                                  Traceback (most recent call last)
<ipython-input-94-076be1113e81> in <module>
  1 Null = dF.isnull() .any()
  2 
----> 3 dF = dF.drop([["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"]], axis=1)
  4 
  5 for column in range(len(list(dF.columns.values))):

~\Anaconda3\lib\site-packages\pandas\core\frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   4100             level=level,
   4101             inplace=inplace,
-> 4102             errors=errors,
   4103         )
   4104 

~\Anaconda3\lib\site-packages\pandas\core\generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   3912         for axis, labels in axes.items():
   3913             if labels is not None:
-> 3914                 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
   3915 
   3916         if inplace:

~\Anaconda3\lib\site-packages\pandas\core\generic.py in _drop_axis(self, labels, axis, level, errors)
   3944                 new_axis = axis.drop(labels, level=level, errors=errors)
   3945             else:
-> 3946                 new_axis = axis.drop(labels, errors=errors)
   3947             result = self.reindex(**{axis_name: new_axis})
   3948 

~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in drop(self, labels, errors)
   5338         if mask.any():
   5339             if errors != "ignore":
-> 5340                 raise KeyError("{} not found in axis".format(labels[mask]))
   5341             indexer = indexer[~mask]
   5342         return self.delete(indexer)

KeyError: "[('customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn')] not found in axis"

标签: pythonjupyter-notebook

解决方案


我将假设 dF 已经填充。我相信你得到的错误是因为你有双重排列要删除的列。这就是为什么它告诉你整个数组不是 dF 中的关键。因为不是。

代替

dF = dF.drop([["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"]], axis=1)

尝试

dF = dF.drop(["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"], axis=1)

推荐阅读