首页 > 解决方案 > 在 R 中循环一个函数

问题描述

我在 R 中编写了一个交叉验证/网格搜索样式代码,它试图找到给定值的最佳阈值mtry(使用随机森林算法)。我已经使用Sonar库中的数据在下面发布了我的代码mlbench但是,此代码似乎存在一些问题。

library(caret)
library(mlbench)
library(randomForest)

res <- matrix(0, nrow = 10, ncol = 6)
colnames(res) <- c("mtry","Threshhold","Accuracy", "PositivePred", "NegativePred", "F-value")
out <- matrix(0, nrow = 17, ncol = 6)
colnames(out) <- c("mtry","Threshhold","Avg.Accuracy", "Avg.PosPred", "Avg.NegPred", "Avg.F_Value")
rep <- matrix(0, nrow = 10, ncol = 6)
colnames(out) <- c("mtry","Threshhold","Avg_Accuracy", "Avg_PosPred", "Avg_NegPred", "Avg_F_Value")

data(Sonar)
N=Sonar

### creating 10 folds 


folds <- cut(seq(1,nrow(N)),breaks=10,labels=FALSE)
for (mtry in 5:14)  {
  K=mtry-4
for(thresh in seq(1,9,0.5)) {
   J = 2*thresh-1
  dataset<-N[sample(nrow(N)),]              ####  mix up the dataset N
   for(I in 1:10){
    #Segement your data by fold using the which() function 
    testIndexes <- which(folds==I,arr.ind=TRUE)
    N_test <- dataset[testIndexes, ]              ### select each fold for test
    N_train <- dataset[-testIndexes, ]            ### select rest for training 
    rf = randomForest(Class~., data = N_train, mtry=mtry, ntree=500)
    pred = predict(rf, N_test, type="prob")
    label = as.factor(ifelse(pred[,2]>=thresh,"M","R"))
    confusion = confusionMatrix(N_test$Class, label)
    res[I,1]=mtry
    res[I,2]=thresh   
    res[I,3]=confusion$overall[1]
    res[I,4]=confusion$byClass[3]
    res[I,5]=confusion$byClass[4]
    res[I,6]=confusion$byClass[7]
   } 
  print(res)
  out[J,1] = mtry
  out[J,2] = thresh
  out[J,3] = mean(res[,2])
  out[J,4] = mean(res[,3])
  out[J,5] = mean(res[,4])
  out[J,6] = mean(res[,5])

 }
 print(out)
  rep[K,1] = mtry
  rep[K,2] = thresh
  rep[K,3] = mean(out[,2])
  rep[K,4] = mean(out[,3])
  rep[K,5] = mean(out[,4])
  rep[K,6] = mean(out[,5])
}
print(rep)

早些时候,我用“iris”数据集写了一个类似的代码,我似乎没有任何问题:

library(caret)
library(randomForest)

data(iris)
N <- iris
N$Species = ifelse(N$Species == "setosa", "a", "b") 

N$Species = as.factor(N$Species) 

res <- matrix(0, nrow = 10, ncol = 5)
colnames(res) <- c("Threshhold","Accuracy", "PositivePred", "NegativePred", "F-value")
out <- matrix(0, nrow = 9, ncol = 5)
colnames(out) <- c("Threshhold","Avg.Accuracy", "Avg.PosPred", "Avg.NegPred", "Avg.F_Value")


### creating 10 folds 

folds <- cut(seq(1,nrow(N)),breaks=10,labels=FALSE)

for(J in 1:9) {
 thresh = J/10
 dataset<-N[sample(nrow(N)),]              ####  mix up the dataset N
 for(I in 1:10){
    #Segement your data by fold using the which() function 
    testIndexes <- which(folds==I,arr.ind=TRUE)
    N_test <- dataset[testIndexes, ]              ### select each fold for test
    N_train <- dataset[-testIndexes, ]            ### select rest for training 
    rf = randomForest(Species~., data = N_train, mtry=3, ntree=10)
    pred = predict(rf, N_test, type="prob")
    label = as.factor(ifelse(pred[,1]>=thresh,"a","b"))
    confusion = confusionMatrix(N_test$Species, label)
    res[I,1]=thresh   
    res[I,2]=confusion$overall[1]
    res[I,3]=confusion$byClass[3]
    res[I,4]=confusion$byClass[4]
    res[I,5]=confusion$byClass[7]
 } 
print(res)
out[J,1] = thresh
out[J,2] = mean(res[,2])
out[J,3] = mean(res[,3])
out[J,4] = mean(res[,4])
out[J,5] = mean(res[,5])

}
print(out)

有人可以帮我调试第一个代码吗?谢谢

标签: rrandom-forestcross-validation

解决方案


您需要)在 for 循环中关闭括号。

替换这个

for(thresh in seq(1,9,0.5) {

for(thresh in seq(1,9,0.5)) {

更新:

此外,您似乎thresh总是高于 1 在标签中给出单个值R,因为它永远不会高于thresh

label = as.factor(ifelse(pred[,2]>=thresh,"M","R"))

这在下一个语句中产生了一个问题

confusion = confusionMatrix(N_test$Class, label)

我用 0.5 进行了测试,我没有收到任何错误。

label = as.factor(ifelse(pred[,2]>=0.5,"M","R"))

如果你能定义一个更好的thresh- 保持在 0 和 1 之间,你应该没问题。


推荐阅读