首页 > 解决方案 > AttributeError:只能使用带有字符串值的 .str 访问器(cdQA 包)

问题描述

import os    
import pandas as pd    
from ast import literal_eval    

from cdqa.utils.filters import filter_paragraphs    
from cdqa.pipeline import QAPipeline    

df = pd.read_csv('./data/House_copy.csv', converters={'paragraphs': literal_eval}).astype(str)    
df = filter_paragraphs(df)    
df.head()    

cdqa_pipeline = QAPipeline(reader='./models/bert_qa.joblib')  
cdqa_pipeline.fit_retriever(df=df)    

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-30-cf7aa8b65439> in <module>
      1 cdqa_pipeline = QAPipeline(reader='./models/bert_qa.joblib')
----> 2 cdqa_pipeline.fit_retriever(df=df)

~/cdQA/cdqa/pipeline/cdqa_sklearn.py in fit_retriever(self, df)
    109             )
    110         else:
--> 111             self.metadata = self._expand_paragraphs(df)
    112 
    113         self.retriever.fit(self.metadata)

~/cdQA/cdqa/pipeline/cdqa_sklearn.py in _expand_paragraphs(df)
    230             {
    231                 col: np.repeat(df[col].values, df[lst_col].str.len())
--> 232                 for col in df.columns.drop(lst_col)
    233             }
    234         ).assign(**{lst_col: np.concatenate(df[lst_col].values)})[df.columns]

~/cdQA/cdqa/pipeline/cdqa_sklearn.py in <dictcomp>(.0)
    230             {
    231                 col: np.repeat(df[col].values, df[lst_col].str.len())
--> 232                 for col in df.columns.drop(lst_col)
    233             }
    234         ).assign(**{lst_col: np.concatenate(df[lst_col].values)})[df.columns]

~/opt/anaconda3/envs/sklearn-dev/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5174             or name in self._accessors
   5175         ):
-> 5176             return object.__getattribute__(self, name)
   5177         else:
   5178             if self._info_axis._can_hold_identifiers_and_holds_name(name):

~/opt/anaconda3/envs/sklearn-dev/lib/python3.7/site-packages/pandas/core/accessor.py in __get__(self, obj, cls)
    173             # we're accessing the attribute of the class, i.e., Dataset.geo
    174             return self._accessor
--> 175         accessor_obj = self._accessor(obj)
    176         # Replace the property with the accessor object. Inspired by:
    177         # http://www.pydanny.com/cached-property.html

~/opt/anaconda3/envs/sklearn-dev/lib/python3.7/site-packages/pandas/core/strings.py in __init__(self, data)
   1915 
   1916     def __init__(self, data):
-> 1917         self._inferred_dtype = self._validate(data)
   1918         self._is_categorical = is_categorical_dtype(data)
   1919 

~/opt/anaconda3/envs/sklearn-dev/lib/python3.7/site-packages/pandas/core/strings.py in _validate(data)
   1962 
   1963         if inferred_dtype not in allowed_types:
-> 1964             raise AttributeError("Can only use .str accessor with string " "values!")
   1965         return inferred_dtype
   1966 

AttributeError: Can only use .str accessor with string values!

我正在创建一个 QA 系统,并且正在修改 cdQA 包中的现有代码。我创建了一个类似的 csv 文件,其中包含一些文本。

  1. df.head() 之后的表格中没有显示任何内容。
  2. 我不断收到 .str 访问器错误,我不知道我的数据集是否是问题所在。

有人可以帮忙吗?

谢谢!

标签: python-3.x

解决方案


推荐阅读