python - scikit 管道中的 LeaveOneOut 编码器
问题描述
我正在尝试在 scikit 管道中使用 LeaveOneOut 编码器,但出现此错误:
AttributeError:“numpy.ndarray”对象没有属性“fit”
有人知道如何解决吗?
我的代码:
imputer_num = SimpleImputer(strategy = 'median')
imputer_cat = SimpleImputer(strategy = 'most_frequent')
XGB = XGBClassifier()
BBC = BalancedBaggingClassifier()
BRC = BalancedRandomForestClassifier()
models = [XGB, BBC, BRC]
class MyLEncoder(BaseEstimator, TransformerMixin):
def fit(self, X, y, **fit_params):
enc = LeaveOneOutEncoder()
self.enc = enc.fit(np.asarray(X), y)
return self
def transform(self, X, **fit_params):
enc_data = self.enc.transform(np.asarray(X))
return enc_data
def fit_transform(self, X,y=None, **fit_params):
self.fit(X, y, **fit_params)
return self.transform(X)
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = 'median'))
,('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = 'most_frequent'))
,('encod', MyLEncoder)
])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numericas_all)
,('cat', categorical_transformer, categoricas_all)
])
for item in models:
pipe = Pipeline(steps=[('preprocessor', preprocessor),('classifier', item)])
model = pipe.fit(X_train, y_train)
y_pred = model.predict(X_test)
test_probs = model.predict_proba(X_test)
print(model)
print(balanced_accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test, y_pred))
完全错误
AttributeError Traceback(最近一次调用最后一次)在 56 中用于模型中的项目:57 管道 = 管道(步骤 =[(“预处理器”,预处理器),(“分类器”,项目)])---> 58 模型 = pipe.fit (X_train, y_train) 59 y_pred = model.predict(X_test) 60 test_probs = model.predict_proba(X_test)
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params) 328 """ 329 fit_params_steps = self._check_fit_params(**fit_params) - -> 330 Xt = self._fit(X, y, **fit_params_steps) 331 with _print_elapsed_time('Pipeline', 332 self._log_message(len(self.steps) - 1)):
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps) 294 message_clsname='Pipeline', 295 message=self._log_message(step_idx) , --> 296 **fit_params_steps[name]) 297 # 将步骤的变压器替换为已安装的 298 # 变压器。这在加载变压器时是必要的
~/opt/anaconda3/lib/python3.7/site-packages/joblib/memory.py in call (self, *args, **kwargs) 353 354 def call (self, *args, **kwargs): -- > 355 return self.func(*args, **kwargs) 356 357 def call_and_shelve(self, *args, **kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 738 with _print_elapsed_time(message_clsname, message): 739 if hasattr(transformer, 'fit_transform'): --> 740 res = transformer.fit_transform(X, y, **fit_params) 741 else: 742 res = transformer.fit(X, y, **fit_params).transform(X )
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y) 529 self._validate_remainder(X) 530 --> 531 结果 = self._fit_transform( X, y, _fit_transform_one) 532 533 如果不是结果:
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self, X, y, func,fitted) 465 message=self._log_message(name, idx, len(transformers) ))) 466 for idx, (name, trans, column, weight) in enumerate(--> 467 self._iter(fitted=fitted, replace_strings=True), 1)) 468 除了 ValueError as e: 469 if "Expected 2D数组,取而代之的是一维数组”在 str(e) 中:
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in call (self, iterable) 922 self._iterating = self._original_iterator is not None 923 --> 924 while self.dispatch_one_batch(iterator ): 925 通过 926
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator) 757 return False 758 else: --> 759 self._dispatch(tasks) 760 return True 761
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch) 714 with self._lock: 715 job_idx = len(self._jobs) --> 716 job = self ._backend.apply_async(batch, callback=cb) 717 # 一个作业完成的速度比它的回调是 718 # 在我们到达这里之前调用,导致 self._jobs
~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback) 180 def apply_async(self, func, callback=None): 181 """调度一个函数待运行""" --> 182 result = ImmediateResult(func) 183 if callback: 184 callback(result)
~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in init (self, batch) 547 # 不要延迟应用程序,避免将输入保留在内存中 548 # 参数 --> 549 self.results = batch() 550 551 def get(self):
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py 在 调用(self) 223 with parallel_backend(self._backend, n_jobs=self._n_jobs): 224 return [func(*args, * *kwargs) --> 225 用于 self.items 中的 func、args、kwargs] 226 227 def len (self):
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in (.0) 223 with parallel_backend(self._backend, n_jobs=self._n_jobs): 224 return [func(*args, * *kwargs) --> 225 用于 self.items 中的 func、args、kwargs] 226 227 def len (self):
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 738 with _print_elapsed_time(message_clsname, message): 739 if hasattr(transformer, 'fit_transform'): --> 740 res = transformer.fit_transform(X, y, **fit_params) 741 else: 742 res = transformer.fit(X, y, **fit_params).transform(X )
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params) 374 fit_params_last_step = fit_params_steps[self.steps[-1][0]] 375 if hasattr(last_step, 'fit_transform'): --> 376 return last_step.fit_transform(Xt, y, **fit_params_last_step) 377 else: 378 return last_step.fit(Xt, y,
在 fit_transform(self, X, y, **fit_params) 35 36 def fit_transform(self, X,y=None, **fit_params): ---> 37 self.fit(X, y, **fit_params) 38 返回self.transform(X) 39
AttributeError:“numpy.ndarray”对象没有属性“fit”
解决方案
推荐阅读
- c - 在 Cygwin 中使用 NCurses 扫描 USB 设备的输入
- chart.js - 第二个 X 轴上的条不显示
- haskell - Haskell 中更整洁的二进制文件处理
- powershell - 如何在 PowerShell 中创建私有类成员?
- java - 如何将这段 C# 泛型翻译成 Java
- windows - 如何在 Visual Studio 中将资源管理器作为预构建事件重新启动
- swift - 我正在尝试制作一张在快速滑动时滑动的卡片
- ruby-on-rails - Rails:从 github 下载后,Bundle Install 无法安装必要的 gem
- c# - 尽管共享相同的选定项属性,但从列表框中选择新项不会从另一个列表框中取消选择现有选定项
- php - 如何让服务像 SwiftMailer 一样在 kernel.terminate 上工作?