python - 在使用类线性回归时获得 nan
问题描述
我有一个线性回归类,想检查它如何与数据集 load_boston 一起工作。我计算了平均绝对百分比误差 (MAPE),结果为 nan。
import numpy as np
import warnings
from sklearn.base import BaseEstimator
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import pandas as pd
warnings.filterwarnings('ignore')
class LinearRegressionSGD(BaseEstimator):
def __init__(self, epsilon=1e-4, max_steps=1000, w0=None, alpha=1e-2):
'''
epsilon: difference for the rate of change of weights
max_steps: maximum number of steps in gradient descent
w0: np.array (d,) - initial weights
alpha: learning step
'''
self.epsilon = epsilon
self.max_steps = max_steps
self.w0 = w0
self.alpha = alpha
self.w = None
self.w_history = []
def fit(self, X, y):
"""
X: np.array (l, d)
y: np.array (l)
---
output: self
"""
l, d = X.shape
if self.w0 is None:
self.w0 = np.zeros(d)
self.w = self.w0
for step in range(self.max_steps):
self.w_history.append(self.w)
w_new = self.w - self.alpha * self.calc_gradient(X, y)
if (np.linalg.norm(w_new - self.w) < self.epsilon):
break
self.w = w_new
return self
def predict(self, X):
"""
X: np.array (l, d)
---
output: np.array (l)
"""
if self.w is None:
raise Exception('Not trained yet')
l, d = X.shape
y_pred = []
for i in range(l):
y_pred.append(np.dot(X[i], self.w))
return np.array(y_pred)
def calc_gradient(self, X, y):
"""
X: np.array (l, d)
y: np.array (l)
---
output: np.array (d)
"""
l, d = X.shape
gradient = []
for j in range(d):
dQ = 0
for i in range(l):
dQ += (2 / l) * X[i][j] * (np.dot(X[i], self.w) - y[i])
gradient.append(dQ)
return np.array(gradient)
data = load_boston()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(np.array(X), y, test_size=0.3, random_state=10)
def MAPE(y_true, y_pred):
"""
y_true: np.array (l)
y_pred: np.array (l)
---
output: float [0, +inf)
"""
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
# Task 2
sgd = LinearRegressionSGD()
sgd.fit(X_train, y_train)
y_pred_sgd = sgd.predict(X_test)
print(MAPE(y_test, y_pred_sgd))
# Task 3
a, b = X_test.shape
w_0 = np.random.uniform(-2, 2, (b))
lr = LinearRegressionSGD(w0=w_0)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print(MAPE(y_test, y_pred_lr))
但是当我像下面这样创建 X, y 时,代码可以正常工作并且 MAPE 会给出浮点值
n_features = 2
n_objects = 300
num_steps = 100
np.random.seed(1)
w_true = np.random.normal(0, 0.1, size=(n_features, ))
w_0 = np.random.uniform(-2, 2, (n_features))
X = np.random.uniform(-5, 5, (n_objects, n_features))
y = np.dot(X, w_true) + np.random.normal(0, 1, (n_objects))
我的代码有什么问题?以及如何修复它以获得浮点值?(对不起我的英语不好,它不是我的母语)
解决方案
推荐阅读
- php - laravel如何动态创建多个数据库
- python - Python 列表未按预期打印
- c# - If Condition inside SqlReader.Read() While 循环未执行
- asp.net - 带有借记卡号的贝宝显示无效的卡号
- php - Yii2 队列扩展:单个队列服务器的多个队列通道/管
- adonis.js - “E_INVALID_HASHER_DRIVER:哈希驱动程序 bcryptsu 不存在。> 更多详细信息:https://err.sh/adonisjs/errors/E_INVALID_HASHER_DRIVER”
- hibernate - 休眠@OneToMany DTO @Where 无效
- python - 无法在 Google AI 平台 (CMLE) 上安装 pycocotools
- javascript - React-materialize-css Modals 无法正常工作,因为它无法读取 M 的 Modal 类
- python - 在两个 Rdd 上使用联合后,无法将 Pyspark 作业结果保存到单个文本文件