r - 计算训练和测试数据集的准确性
问题描述
我在计算训练数据预测和测试数据预测时感到困惑,因为每个特征的值被计算为相同并且测试精度不是那么准确
library(dbplyr)
library(tidyverse)
library(caret)
用于测试和训练准确性的占位符
train_Data_prediction=rep(1,100)
test_Data_prediction=rep(1,100)
执行 100 次,然后平均准确率
for(count in c(1:100))
{
data_train <- read.csv("parktraining.csv",FALSE,",")
data_train = as.matrix(data_train)
x_index=ncol(data_train)
data_Without_lable <- data_train[,-x_index]
lable <- data_train[,x_index]
Train_mean = apply(data_Without_lable,2,mean)
Train_sd = apply(data_Without_lable,2,sd)
Train_offsets <- t(t(data_Without_lable) - Train_mean)
Train_scaled_data <- t(t(Train_offsets) / Train_sd)
positive_ids = which(data_train[,x_index] == 1)
negative_ids = which(data_train[,x_index] == 0)
positive_data = Train_scaled_data[positive_ids,]
negative_data = Train_scaled_data[negative_ids,]
pos_Mean = apply(positive_data,2,mean)
positive_sd=apply(positive_data,2,sd)
neg_Mean = apply(negative_data,2,mean)
negative_sd=apply(negative_data,2,sd)
tested_data <- read.csv("parktesting.csv",FALSE,",")
tested_data = as.matrix(tested_data)
testing_data = tested_data[,-x_index]
predict=function(testing_data_row){
target=0;
使用 dnorm() 函数进行正态分布并计算概率
p_pos=sum(log(dnorm(testing_data_row,pos_Mean,positive_sd)))
+log(length(positive_ids)/length(lable))
p_neg=sum(log(dnorm(testing_data_row,neg_Mean,negative_sd)))+log( 1 -
(length(negative_ids)/length(lable)))
if(p_pos>p_neg){
target=1
}else{
target=0
}
}
test_mean = apply(testing_data,2,mean)
test_sd = apply(testing_data,2,sd)
test_offset <- t(t(testing_data) - test_mean)
test_scaled_data <- t(t(test_offset) / test_sd)
test_prediction <- apply(test_scaled_data,1,predict)
target=tested_data[,x_index]
target
test_Data_prediction[count]=length(which((test_prediction == target)==TRUE))/length(target)
test_Data_prediction
#Predict for train data ( optional, output not printed )
train_prediction =apply(Train_scaled_data,1,predict)
train_Data_prediction[count]=length(which((train_prediction == lable)==TRUE))/length(lable)
}
test_Data_prediction
train_Data_prediction
print(paste("Average Train Data Accuracy:",mean(train_Data_prediction)*100.0,sep = " "))
print(paste("Average Test Data Accuracy:",mean(test_Data_prediction)*100.0,sep = " "))
解决方案
推荐阅读
- audio - OpenAL 未检测到任何播放或捕获设备
- ansible - 即使项目失败,如何获得循环的结果?
- java - 我将如何将 Sentinel 迭代实施到两种方法程序?
- python - 使用 Python Twisted,编写一个能够响应传入 APL 查询的 DNS 服务器
- python - 为什么 PyCharm 认为这个变量可能在赋值之前被引用了 while True 循环?
- elasticsearch - 使用 .NET Nest 在 POST 上的不成功 () 低级调用构建的无效 NEST 响应
- c++ - 为什么 random_shuffle 使用 std::swap 而不是 std::iter_swap?
- flutter - Firebase 存储拉动视频颤振
- c - strncat 返回垃圾值
- reactjs - 在 React 中刷新页面后保持选中多个复选框?