首页 > 解决方案 > 在使用类线性回归时获得 nan

问题描述

我有一个线性回归类,想检查它如何与数据集 load_boston 一起工作。我计算了平均绝对百分比误差 (MAPE),结果为 nan。

import numpy as np 
import warnings
from sklearn.base import BaseEstimator
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import pandas as pd
warnings.filterwarnings('ignore')
class LinearRegressionSGD(BaseEstimator):
    def __init__(self, epsilon=1e-4, max_steps=1000, w0=None, alpha=1e-2):
        '''
        epsilon: difference for the rate of change of weights
        max_steps: maximum number of steps in gradient descent
        w0: np.array (d,) - initial weights
        alpha: learning step 
        '''
        self.epsilon = epsilon
        self.max_steps = max_steps
        self.w0 = w0
        self.alpha = alpha
        self.w = None
        self.w_history = []

    def fit(self, X, y):
        """
        X: np.array (l, d)
        y: np.array (l)
        ---
        output: self
        """

        l, d = X.shape

        if self.w0 is None:
            self.w0 = np.zeros(d)

        self.w = self.w0

        for step in range(self.max_steps):
            self.w_history.append(self.w)

            w_new = self.w - self.alpha * self.calc_gradient(X, y)

            if (np.linalg.norm(w_new - self.w) < self.epsilon):
                break

            self.w = w_new

        return self

    def predict(self, X):
        """
        X: np.array (l, d)
        ---
        output: np.array (l)
        """
        if self.w is None:
            raise Exception('Not trained yet')

        l, d = X.shape

        y_pred = []

        for i in range(l):
            y_pred.append(np.dot(X[i], self.w))

        return np.array(y_pred)

    def calc_gradient(self, X, y):
        """
        X: np.array (l, d)
        y: np.array (l)
        ---
        output: np.array (d)
        """
        l, d = X.shape
        gradient = []

        for j in range(d):
            dQ = 0
            for i in range(l):
                dQ += (2 / l) * X[i][j] * (np.dot(X[i], self.w) - y[i])
            gradient.append(dQ)

        return np.array(gradient)

data = load_boston()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(np.array(X), y, test_size=0.3, random_state=10)

def MAPE(y_true, y_pred):
    """
    y_true: np.array (l)
    y_pred: np.array (l)
    ---
    output: float [0, +inf)
    """

    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Task 2
sgd = LinearRegressionSGD()
sgd.fit(X_train, y_train)
y_pred_sgd = sgd.predict(X_test)
print(MAPE(y_test, y_pred_sgd))

# Task 3
a, b = X_test.shape
w_0 = np.random.uniform(-2, 2, (b))
lr = LinearRegressionSGD(w0=w_0)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print(MAPE(y_test, y_pred_lr))

但是当我像下面这样创建 X, y 时,代码可以正常工作并且 MAPE 会给出浮点值

n_features = 2
n_objects = 300
num_steps = 100

np.random.seed(1)
w_true = np.random.normal(0, 0.1, size=(n_features, ))
w_0 = np.random.uniform(-2, 2, (n_features))

X = np.random.uniform(-5, 5, (n_objects, n_features))
y = np.dot(X, w_true) + np.random.normal(0, 1, (n_objects))

我的代码有什么问题?以及如何修复它以获得浮点值?(对不起我的英语不好,它不是我的母语)

标签: pythonmachine-learninglinear-regressiongradient-descent

解决方案


推荐阅读