首页 > 解决方案 > scikit 管道中的 LeaveOneOut 编码器

问题描述

我正在尝试在 scikit 管道中使用 LeaveOneOut 编码器,但出现此错误:

AttributeError:“numpy.ndarray”对象没有属性“fit”

有人知道如何解决吗?

我的代码:

imputer_num = SimpleImputer(strategy = 'median')
imputer_cat = SimpleImputer(strategy = 'most_frequent')

XGB = XGBClassifier()
BBC = BalancedBaggingClassifier()
BRC = BalancedRandomForestClassifier()

models = [XGB, BBC, BRC]

class MyLEncoder(BaseEstimator, TransformerMixin):

    
    def fit(self, X, y, **fit_params):
        enc = LeaveOneOutEncoder()
        self.enc = enc.fit(np.asarray(X), y)

        return self

    def transform(self, X, **fit_params):

        enc_data = self.enc.transform(np.asarray(X))
        return enc_data


    def fit_transform(self, X,y=None, **fit_params):
        self.fit(X, y, **fit_params)
        return self.transform(X)

numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = 'median'))
,('scaler', StandardScaler())
])
    
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = 'most_frequent'))
,('encod', MyLEncoder)
])
    
preprocessor = ColumnTransformer(
 transformers=[
('num', numeric_transformer, numericas_all)
,('cat', categorical_transformer, categoricas_all)
])
    
for item in models:
    pipe = Pipeline(steps=[('preprocessor', preprocessor),('classifier', item)])
    model = pipe.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    test_probs = model.predict_proba(X_test)
    print(model)
    print(balanced_accuracy_score(y_test, y_pred))
    print(roc_auc_score(y_test, y_pred))

完全错误

AttributeError Traceback(最近一次调用最后一次)在 56 中用于模型中的项目:57 管道 = 管道(步骤 =[(“预处理器”,预处理器),(“分类器”,项目)])---> 58 模型 = pipe.fit (X_train, y_train) 59 y_pred = model.predict(X_test) 60 test_probs = model.predict_proba(X_test)

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params) 328 """ 329 fit_params_steps = self._check_fit_params(**fit_params) - -> 330 Xt = self._fit(X, y, **fit_params_steps) 331 with _print_elapsed_time('Pipeline', 332 self._log_message(len(self.steps) - 1)):

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps) 294 message_clsname='Pipeline', 295 message=self._log_message(step_idx) , --> 296 **fit_params_steps[name]) 297 # 将步骤的变压器替换为已安装的 298 # 变压器。这在加载变压器时是必要的

~/opt/anaconda3/lib/python3.7/site-packages/joblib/memory.py in call (self, *args, **kwargs) 353 354 def call (self, *args, **kwargs): -- > 355 return self.func(*args, **kwargs) 356 357 def call_and_shelve(self, *args, **kwargs):

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 738 with _print_elapsed_time(message_clsname, message): 739 if hasattr(transformer, 'fit_transform'): --> 740 res = transformer.fit_transform(X, y, **fit_params) 741 else: 742 res = transformer.fit(X, y, **fit_params).transform(X )

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y) 529 self._validate_remainder(X) 530 --> 531 结果 = self._fit_transform( X, y, _fit_transform_one) 532 533 如果不是结果:

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func,fitted) 465 message=self._log_message(name, idx, len(transformers) ))) 466 for idx, (name, trans, column, weight) in enumerate(--> 467 self._iter(fitted=fitted, replace_strings=True), 1)) 468 除了 ValueError as e: 469 if "Expected 2D数组,取而代之的是一维数组”在 str(e) 中:

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in call (self, iterable) 922 self._iterating = self._original_iterator is not None 923 --> 924 while self.dispatch_one_batch(iterator ): 925 通过 926

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator) 757 return False 758 else: --> 759 self._dispatch(tasks) 760 return True 761

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch) 714 with self._lock: 715 job_idx = len(self._jobs) --> 716 job = self ._backend.apply_async(batch, callback=cb) 717 # 一个作业完成的速度比它的回调是 718 # 在我们到达这里之前调用,导致 self._jobs

~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback) 180 def apply_async(self, func, callback=None): 181 """调度一个函数待运行""" --> 182 result = ImmediateResult(func) 183 if callback: 184 callback(result)

~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in init (self, batch) 547 # 不要延迟应用程序,避免将输入保留在内存中 548 # 参数 --> 549 self.results = batch() 550 551 def get(self):

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py 在 调用(self) 223 with parallel_backend(self._backend, n_jobs=self._n_jobs): 224 return [func(*args, * *kwargs) --> 225 用于 self.items 中的 func、args、kwargs] 226 227 def len (self):

~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in (.0) 223 with parallel_backend(self._backend, n_jobs=self._n_jobs): 224 return [func(*args, * *kwargs) --> 225 用于 self.items 中的 func、args、kwargs] 226 227 def len (self):

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 738 with _print_elapsed_time(message_clsname, message): 739 if hasattr(transformer, 'fit_transform'): --> 740 res = transformer.fit_transform(X, y, **fit_params) 741 else: 742 res = transformer.fit(X, y, **fit_params).transform(X )

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params) 374 fit_params_last_step = fit_params_steps[self.steps[-1][0]] 375 if hasattr(last_step, 'fit_transform'): --> 376 return last_step.fit_transform(Xt, y, **fit_params_last_step) 377 else: 378 return last_step.fit(Xt, y,

在 fit_transform(self, X, y, **fit_params) 35 36 def fit_transform(self, X,y=None, **fit_params): ---> 37 self.fit(X, y, **fit_params) 38 返回self.transform(X) 39

AttributeError:“numpy.ndarray”对象没有属性“fit”

标签: pythonscikit-learnpipeline

解决方案


推荐阅读