首页 > 解决方案 > Anaconda Jupyter 单例数组

问题描述

我是 anaconda 和 StratifiedKFold 的新手。我不断收到此错误:我尝试关注 scikit 学习网站,但运气不佳。

def redwine(X, y, inner_cv, outer_cv, Classifier, parameter_grid):
    outer_scores = []
    # for each split of the data in the outer cross-validation
            # (split method returns indices of training and test parts)
            for training_samples, test_samples in outer_cv.split(X, y):
                # find best parameter using inner cross-validation
                best_parms = {}
                best_score = -np.inf
                # iterate over parameters
                for parameters in parameter_grid:
                    # accumulate score over inner splits
                    cv_scores = []
                    # iterate over inner cross-validation
                    for inner_train, inner_test in inner_cv.split(
                            X[training_samples], y[training_samples]):
                        # build classifier given parameters and training data
                        clf = Classifier(**parameters)
                        clf.fit(X[inner_train], y[inner_train])
                        # evaluate on inner test set
                        score = clf.score(X[inner_test], y[inner_test])
                        cv_scores.append(score)
                    # compute mean score over inner folds
                    mean_score = np.mean(cv_scores)
                    if mean_score > best_score:
                        # if better than so far, remember parameters
                        best_score = mean_score
                        best_params = parameters
                # build classifier on best parameters using outer training set
                clf = Classifier(**best_params)
                clf.fit(X[training_samples], y[training_samples])
                # evaluate
                outer_scores.append(clf.score(X[test_samples], y[test_samples]))
            return np.array(outer_scores)

这是错误输出的样子:

    TypeError                                 Traceback (most recent call last)
    <ipython-input-28-24a443b27f0f> in <module>
          1 from sklearn.model_selection import ParameterGrid, StratifiedKFold
          2 scores = redwine(redwine, y, StratifiedKFold(5),
    ----> 3                    StratifiedKFold(5), SVC, ParameterGrid(param_grid))
          4 print("Cross-validation scores: {}".format(scores))

    <ipython-input-26-bf7dd21089c9> in redwine(X, y, inner_cv, outer_cv, Classifier, parameter_grid)
          3    # for each split of the data in the outer cross-validation
          4    # (split method returns indices of training and test parts)
    ----> 5    for training_samples, test_samples in outer_cv.split(X, y):
          6        # find best parameter using inner cross-validation
          7        best_parms = {}

    ~\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
        325             The testing set indices for that split.
        326         """
    --> 327         X, y, groups = indexable(X, y, groups)
        328         n_samples = _num_samples(X)
        329         if self.n_splits > n_samples:

    ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in indexable(*iterables)
        228         else:
        229             result.append(np.array(X))
    --> 230     check_consistent_length(*result)
        231     return result
        232 

    ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
        199     """
        200 
    --> 201     lengths = [_num_samples(X) for X in arrays if X is not None]
        202     uniques = np.unique(lengths)
        203     if len(uniques) > 1:

    ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in <listcomp>(.0)
        199     """
        200 
    --> 201     lengths = [_num_samples(X) for X in arrays if X is not None]
        202     uniques = np.unique(lengths)
        203     if len(uniques) > 1:

    ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _num_samples(x)
        144         if len(x.shape) == 0:
        145             raise TypeError("Singleton array %r cannot be considered"
    --> 146                             " a valid collection." % x)
        147         # Check that shape is returning an integer or default to len
        148         # Dask dataframes may not return numeric shape[0] value

    TypeError: Singleton array array(<function redwine at 0x00000110E29E82F0>, dtype=object) cannot be considered a valid collection.

有什么建议么?我是否需要以不同的方式拆分数据? ‎ ‎ ‎ ‎</p>

标签: pythonarraysscikit-learnjupyter-notebookanaconda

解决方案


推荐阅读