python - Python KNN 回归
问题描述
我正在尝试使用 KNN 算法来预测汽车 MPG。我首先清理了我的数据,制作了一个测试和训练数据集,然后我制作了一个归一化和非归一化的 KNN 函数。现在我试图通过 KNN 算法传递我的测试数据,然后创建所有预测的列表。然后我想使用均方误差来分析我的预测。目前,我无法设置传递我的测试数据的功能。任何指导将不胜感激!
import pandas as pd
import numpy as np
import math
from google.colab import drive
drive.mount('/content/drive')
pd.set_option('display.max_columns', 100)
vehicles = pd.read_csv('/content/drive/MyDrive/CS_167/vehicles (2).csv')
subset_cars = vehicles[vehicles["fuelType"] == 'Regular']
final_sub = subset_cars[["comb08", "year", "cylinders", "displ"]]
column_nulls = final_sub.isna().any()
Cylinder_no_null = final_sub.cylinders.dropna()
displ_no_null = final_sub.displ.dropna()
pure_data = final_sub.dropna()
# pure_data.head()
shuffled_data = pure_data.sample(frac=1, random_state=41)
test_data = shuffled_data.iloc[0:500]
train_data = shuffled_data.iloc[500:]
train_data_euc = train_data.copy()
test_data_euc = test_data.copy()
def Regression_KNN(MPG,train_data_euc,k):
train_data_euc['euc_dis'] = np.sqrt(
(MPG['year']-train_data_euc['year'])**2+
(MPG['cylinders']-train_data_euc['cylinders'])**2+
(MPG['displ']-train_data_euc['displ'])**2)
sorted_train_data = train_data_euc.sort_values(['euc_dis'])
prediction = sorted_train_data.iloc[0:k]['comb08'].mean()
return prediction
MPG ={}
MPG['year'] = 2020
MPG['cylinders'] = 4
MPG['displ'] = 5.2
print(f"The average MPG for this car is: %d" %Regression_KNN(MPG, train_data_euc, 5))
z_train_copy = train_data_euc.copy()
z_train_year_std = z_train_copy['year'].std()
z_train_year_mean = z_train_copy['year'].mean()
z_train_cylinders_std = z_train_copy['cylinders'].std()
z_train_cylinders_mean = z_train_copy['cylinders'].mean()
z_train_displ_std = z_train_copy['displ'].std()
z_train_displ_mean = z_train_copy['displ'].mean()
z_train_euc_std = z_train_copy['euc_dis'].std()
z_train_euc_mean = z_train_copy['euc_dis'].mean()
z_train_copy['year'] = (z_train_copy['year'] - z_train_year_mean)/z_train_year_std
z_train_copy['cylinders'] = (z_train_copy['cylinders'] - z_train_cylinders_mean)/z_train_cylinders_std
z_train_copy['displ'] = (z_train_copy['displ'] - z_train_displ_mean)/z_train_displ_std
z_train_copy['euc_dis'] = (z_train_copy['euc_dis'] - z_train_euc_mean)/z_train_euc_std
def Z_TRAIN_KNN(MPG, z_train_copy, k):
z_train_copy['euc_dis'] = np.sqrt(
(MPG['year']-z_train_copy['year'])**2+
(MPG['cylinders']-z_train_copy['cylinders'])**2+
(MPG['displ']-z_train_copy['displ'])**2)
z_train_sorted_data = z_train_copy.sort_values(['euc_dis'])
z_train_prediction = z_train_sorted_data.iloc[0:k]['comb08'].mean()
return z_train_prediction
MPG ={}
MPG['year'] = 2020
MPG['cylinders'] = 4
MPG['displ'] = 5.2
print(f"The average MPG for this car is: %d" %Z_TRAIN_KNN(MPG, z_train_copy, 5))
def regression_all_kNN(test_data_euc,z_train_data,k):
#apply the classify_kNN function to each item in the test data with the train
#data and k passed as the other two arguments. The result will be a series of
#the individual results.
for i in test_data:
z_train_data['euc_dis'] = np.sqrt(
(test_data['year']- z_train_data['year'])**2+
(test_data['cylinders']- z_train_data['cylinders'])**2+
(test_data['displ']- z_train_data['displ'])**2)
sorted_train_data = z_train_data.sort_values(['euc_dis'])
prediction = test_data.apply(regression_all_kNN,args=(z_train_data,k))
return prediction
predictions5NN = regression_all_kNN(test_data, train_data, 5)
解决方案
推荐阅读
- keras - 为什么我的自动编码器模型没有学习?
- python - 在 Google Colab 中训练多个模型
- r - R中的网页抓取:如何处理重定向?
- maven - 在 renovate 中排除特定版本模式
- linux - icmp.ListenPacket("udp4", "127.0.0.1") 返回权限被拒绝
- java - 使用 Spring Integration 和 Jms 将 Soap 消息放入队列的 Web 服务
- java - 在 Postman 中显示主从响应
- c# - 如何从此图像中删除随机生成的线条和点?[使用 Tesseract 的预 OCR]
- php - ldap_add symfony : 服务器不愿意执行
- javascript - 类型错误:_dataProvider_server_rest__WEBPACK_IMPORTED_MODULE_3 react-admin,strapi