python - Python 中的正则化逻辑回归(Andrew ng 课程)
问题描述
我正在开始 ML 之旅,但我在这个编码练习中遇到了麻烦,这是我的代码
import numpy as np
import pandas as pd
import scipy.optimize as op
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, name['Test1', 'Test2', 'Accepted'])
# Separate the features to make it fit into the mapFeature function
X1 = data['Test1'].values.T
X2 = data['Test2'].values.T
# This function makes more features (degree)
def mapFeature(x1, x2):
degree = 6
out = np.ones((x1.shape[0], sum(range(degree + 2))))
curr_column = 1
for i in range(1, degree + 1):
for j in range(i+1):
out[:,curr_column] = np.power(x1, i-j) * np.power(x2, j)
curr_column += 1
return out
# Separate the data into training and target, also initialize theta
X = mapFeature(X1, X2)
y = np.matrix(data['Accepted'].values).T
m, n = X.shape
cols = X.shape[1]
theta = np.matrix(np.zeros(cols))
#Initialize the learningRate(sigma)
learningRate = 1
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X @ theta)
first = np.multiply(-y, np.log(error))
second = np.multiply(1 - y, np.log(1 - error))
j = np.sum((first - second)) / m + (learningRate * np.sum(np.power(theta, 2)) / 2 * m)
return j
# Define the gradient of the cost function
def gradient(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X @ theta)
grad = (X.T @ (error - y)) / m + ((learningRate * theta) / m)
grad_no = (X.T @ (error - y)) / m
grad[0] = grad_no[0]
return grad
Result = op.minimize(fun=cost, x0=theta, args=(X, y, learningRate), method='TNC', jac=gradient)
opt_theta = np.matrix(Result.x)
def predict(theta, X):
sigValue = sigmoid(X @ theta.T)
p = sigValue >= 0.5
return p
p = predict(opt_theta, X)
print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100))
所以,当 时learningRate = 1
,准确度应该在附近,83,05%
但我得到了80.5%
,当 时learningRate = 0
,准确度应该在,91.52%
但我得到了87.28%
所以问题是我做错了什么?为什么我的准确性低于问题的默认答案?
希望有人可以指导我正确的方向。谢谢!
PD:这是数据集,也许它可以提供帮助
解决方案
嘿伙计们,我找到了一种让它变得更好的方法!这是代码
import numpy as np
import pandas as pd
import scipy.optimize as op
from sklearn.preprocessing import PolynomialFeatures
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, names=['Test1', 'Test2', 'Accepted'])
# Separate the data into training and target
X = (data.iloc[:, 0:2]).values
y = (data.iloc[:, 2:3]).values
# Modify the features to a certain degree (Polynomial)
poly = PolynomialFeatures(6)
m = y.size
XX = poly.fit_transform(data.iloc[:, 0:2].values)
# Initialize Theta
theta = np.zeros(XX.shape[1])
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return(1 / (1 + np.exp(-z)))
# Define the Regularized cost function
def costFunctionReg(theta, reg, *args):
# This is require to make the optimize function work
h = sigmoid(XX @ theta)
first = np.log(h).T @ - y
second = np.log(1 - h).T @ (1 - y)
J = (1 / m) * (first - second) + (reg / (2 * m)) * np.sum(np.square(theta[1:]))
return J
# Define the Regularized gradient function
def gradientReg(theta, reg, *args):
theta = theta.reshape(-1, 1)
h = sigmoid(XX @ theta)
grad = (1 / m) * (XX.T @ (h - y)) + (reg / m) * np.r_[[[0]], theta[1:]]
return grad.flatten()
# Define the predict Function
def predict(theta, X):
sigValue = sigmoid(X @ theta.T)
p = sigValue >= 0.5
return p
# A loop to test between different values for sigma (reg parameter)
for i, Sigma in enumerate([0, 1, 100]):
# Optimize costFunctionReg
res2 = op.minimize(costFunctionReg, theta, args=(Sigma, XX, y), method=None, jac=gradientReg)
# Get the accuracy of the model
accuracy = 100 * sum(predict(res2.x, XX) == y.ravel()) / y.size
# Get the Error between different weights
error1 = costFunctionReg(res2.x, Sigma, XX, y)
# print the accuracy and error
print('Train accuracy {}% with Lambda = {}'.format(np.round(accuracy, decimals=4), Sigma))
print(error1)
感谢你的帮助!
推荐阅读
- google-analytics - 获取实时分析数据
- sql-server - 指导主键数据库迁移的整数
- r - 从 grid.arrange 中删除 textGrob 的空白
- ajv - 使用 AJV 强制时刻/日期字符串
- python - pyqt5。运行字符串和时钟
- angular - 如何测试“管道”操作的结果是否为“未定义”。TypeError:无法读取未定义的属性“订阅”
- oracle - 如何在单行中显示所有记录
- templates - 可以在节头中包含在包含文件中定义的样式类和脚本函数,该文件包含在另一个包含文件中
- amazon-web-services - 如何在 Xamarin 应用程序中集成 AWS Lex 聊天机器人?
- python - 在 python 3.7.4 中打开大约 1.5GB 的大型 Zip 文件时出现 MemoryError