r - R:为什么第一个代码运行,但第二个代码导致错误?
问题描述
我正在与 R 合作。我正在尝试使用以下库来优化我编写的任意函数:https ://cran.r-project.org/web/packages/nsga2R/nsga2R.pdf
首先,我为此示例创建了一些数据:
#load library
set.seed(123)
library(dplyr)
library(nsga2R)
#create data for this example
# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,10)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)
然后,我定义了以下两个函数进行优化(funct_set 和 funct_set2)
这是第一个功能:
#define first function
funct_set <- function (x) {
x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]; x5 <- x[5]; x6 <- x[6]; x[7] <- x[7]
f <- numeric(4)
#bin data according to random criteria
train_data <- train_data %>%
mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
train_data$cat = as.factor(train_data$cat)
#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)
b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)
c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)
#calculate quantile ("quant") for each bin
table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[5],1,0 )))
table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[6],1,0 )))
table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[7],1,0 )))
f[1] = mean(table_a$quant)
f[2] = mean(table_b$quant)
f[3] = mean(table_c$quant)
#group all tables
final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
f[4] = mean(final_table$quant)
#this is the only line of code that is different between the two functions
return (f);
}
这是第二个功能:
#define second function
funct_set_2 <- function (x) {
x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]; x5 <- x[5]; x6 <- x[6]; x[7] <- x[7]
f <- numeric(4)
#bin data according to random criteria
train_data <- train_data %>%
mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
train_data$cat = as.factor(train_data$cat)
#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)
b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)
c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)
#calculate quantile ("quant") for each bin
table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[5],1,0 )))
table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[6],1,0 )))
table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > x[7],1,0 )))
f[1] = mean(table_a$quant)
f[2] = mean(table_b$quant)
f[3] = mean(table_c$quant)
#group all tables
final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
f[4] = mean(final_table$quant)
#here is the only line of code that is different
return(f[3:4])
}
从这里开始,我尝试优化这两个功能。
第一次优化导致错误:
results_1 <- nsga2R(fn=funct_set, varNo=7, objDim=4,
lowerBounds=c(80,80,80,80, 100, 200, 300),
upperBounds=c(120,120,120,120,200,300,400),
popSize=50, tourSize=2, generations=50,
cprob=0.9, XoverDistIdx=20, mprob=0.1,MuDistIdx=3)
********** R based Nondominated Sorting Genetic Algorithm II *********
initializing the population
ranking the initial population
Error in if (all(xi <= xj) && any(xi < xj)) { :
missing value where TRUE/FALSE needed
但第二个优化代码似乎运行良好:
results_2 <- nsga2R(fn=funct_set_2, varNo=7, objDim=2,
lowerBounds=c(80,80,80,80, 100, 200, 300),
upperBounds=c(120,120,120,120,200,300,400),
popSize=50, tourSize=2, generations=50,
cprob=0.9, XoverDistIdx=20, mprob=0.1,MuDistIdx=3)
问题:有谁知道为什么“results_1”会产生错误但“results_2”不会产生错误?是因为我设置函数或数据的方式吗?
谢谢
解决方案
A partial solution :
#define function
funct_set <- function (x) {
x1 <- x[1]; x2 <- x[2]; x3 <- x[3] ; x4 <- x[4]
f <- numeric(4)
#bin data according to random criteria
train_data <- train_data %>%
mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
train_data$cat = as.factor(train_data$cat)
#new splits
a_table = train_data %>%
filter(cat == "a") %>%
select(a1, b1, c1, cat)
b_table = train_data %>%
filter(cat == "b") %>%
select(a1, b1, c1, cat)
c_table = train_data %>%
filter(cat == "c") %>%
select(a1, b1, c1, cat)
#calculate quantile ("quant") for each bin
table_a = data.frame(a_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > 100,1,0 )))
table_b = data.frame(b_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > 300,1,0 )))
table_c = data.frame(c_table%>% group_by(cat) %>%
mutate(quant = ifelse(c1 > 500,1,0 )))
f[1] = mean(table_a$quant)
f[2] = mean(table_b$quant)
f[3] = mean(table_c$quant)
#group all tables
final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
f[4] = mean(final_table$quant)
return (f[2:4]);
}
#optimization
results_2 <- nsga2R(fn=funct_set, varNo=4, objDim=3,
lowerBounds=c(70,90,70,90),
upperBounds=c(90,110,90,110),
popSize=50, tourSize=2, generations=50,
cprob=0.9, XoverDistIdx=20, mprob=0.1,MuDistIdx=3)