r - 使用for循环进行模拟
问题描述
我正在使用 parSim 包运行多个场景的模拟(最少 n = 1000),即样本大小和 beta ,我评估的每个估计都需要近 4 天。
为了解决这个问题,我决定使用 for 循环,它应该更快。但是,我不断得到的输出似乎是不同场景的平均值,而不是考虑的每个重复/场景的估计值。如果有人能就如何解决这个问题提供一些建议,我将不胜感激。谢谢
为清楚起见,已包含示例代码和图像。
library(foreach)
fail = 0
reps = 100
sampleSize = c(120, 250, 350, 500, 730)
b0 = c(-1.09,-0.3962,-0.51,-0.015,-0.5965)
b1 = c(0, 0, 0, 0, 0)
b2 = c(0.063626, 0.0321, 0.2031, 0.52031, 0.8101)
b3 = c(0.3954, 0.5954, 0.8012, 0.9547, 0.9801)
df.scenarios = expand.grid(sampleSize, b0, b1, b2, b3)
colnames(df.scenarios) = c("sampleSize", "b0", "b1", "b2", "b3")
foreach::foreach (i = 1:length(df.scenarios)) %do%
{
for (r in 1:reps) {
sex = rbinom(sampleSize, size = 1, p = 0.5)
treatmentgroup = rbinom(sampleSize, size = 1, p = 0.5)
age_cat = rbinom(sampleSize, size = 1, p = 0.5)
prob = 1 / (1 + exp(-b0 + b1 * treatmentgroup + b2 * sex + b3 * age_cat))
mortality = rbinom(sampleSize, size = 1, p = prob)
dat = data.frame(mortality, sex, treatmentgroup, age_cat)
dat <- as.data.frame(lapply(dat, as.numeric))
dat = dat %>% mutate(id = row_number())
dat$id <- paste0(rep("S0"), sep = "_", dat$id)
dat = dat %>% relocate(id)
or = summary(glm(mortality ~ treatmentgroup, data = dat), family = binomial(link = "logit"))
b <- or$coef[2]
se <- or$coef[2,2]
t <- (or$coef[2,3])/or$coef[2,4]
p.val <- or$coef[2,4]
##
OR <- exp(b)
LCI <- exp(b-1.96*or$coef[2,2])
UCI <- exp(b+1.96*or$coef[2,2])
method <- rep("Unadjusted.logit", 1)
results <- list(id = reps, estimate = b, std.error = se, z.value = t, p.value = p.val, OR = OR, lower_bound = LCI, upper_bound = UCI, method = "Unadjusted.logit", fail = fail)
or.models = data.frame(results)
return(or.models)
}
}
解决方案
好吧,我认为现在是可能的。首先我们加载包并设置重复次数。我正在使用tidyverse
和foreach
。
library(tidyverse)
library(foreach)
reps = 100
然后,我们创建数据集。
sampleSize = c(120, 250, 350, 500, 730)
b0 = c(-1.09,-0.3962,-0.51,-0.015,-0.5965)
b1 = c(0, 0, 0, 0, 0)
b2 = c(0.063626, 0.0321, 0.2031, 0.52031, 0.8101)
b3 = c(0.3954, 0.5954, 0.8012, 0.9547, 0.9801)
df.scenarios = expand.grid(sampleSize, b0, b1, b2, b3)
colnames(df.scenarios) = c("sampleSize", "b0", "b1", "b2", "b3")
然后我们创建get_repetition()
. 此函数将为我们提供一个与您之前的代码具有相同参数的数据集。由于我们在 的子集中使用它df.scenarios
,这与使用 相同sampleSize[i]
。
get_repetition <- function(df_subset) {
df_subset %>%
mutate(sex = list(rbinom(sampleSize, size = 1, p = 0.5)),
treatmentgroup = list(rbinom(sampleSize, size = 1, p = 0.5)),
age_cat = list(rbinom(sampleSize, size = 1, p = 0.5))) %>%
unnest(sex:age_cat) %>%
mutate(prob = 1 / (1 + exp(-b0 + b1 * treatmentgroup + b2 * sex + b3 * age_cat)),
mortality = rbinom(nrow(.), size = 1, p = prob)) %>%
select(mortality, sex, treatmentgroup, age_cat) %>%
mutate(across(everything(), as.numeric),
id = paste0(rep("S0"), sep = "_", row_number())) %>%
relocate(id)
}
现在我们创建get_results()
,以便我们可以从模型的摘要中获得一个 df。这也将使我们能够与其他所有重复绑定。
get_results <- function(or_summary) {
b <- or_summary$coef[2]
se <- or_summary$coef[2, 2]
t <- (or_summary$coef[2, 3]) / or_summary$coef[2, 4]
p.val <- or_summary$coef[2, 4]
##
OR <- exp(b)
LCI <- exp(b - 1.96 * or_summary$coef[2, 2])
UCI <- exp(b + 1.96 * or_summary$coef[2, 2])
method <- rep("Unadjusted.logit", 1)
results <-
list(
id = r,
estimate = b,
std.error = se,
z.value = t,
p.value = p.val,
OR = OR,
lower_bound = LCI,
upper_bound = UCI,
method = "Unadjusted.logit"
)
data.frame(results)
}
最后,我们应用循环。我为前五行做了修改df.scenarios
。您可以删除[1:5,]
以使用完整的数据集。列表中的每个元素都是一个场景,每个元素都是id
重复次数。不要忘记使用<-
or为最终输出命名=
。
foreach::foreach (i = 1:nrow(df.scenarios[1:5,])) %do%
{
summaries <- tibble()
df_subset = df.scenarios[i,]
for (r in 1:reps) {
dat = get_repetition(df_subset)
model = glm(mortality ~ treatmentgroup, data = dat)
or = summary(model, family = binomial(link = "logit"))
or.models = get_results(or)
summaries <- bind_rows(summaries, or.models)
}
summaries
}
#> [[1]]
#> # A tibble: 100 x 9
#> id estimate std.error z.value p.value OR lower_bound upper_bound method
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 1 -0.100 0.0731 -7.88 0.174 0.905 0.784 1.04 Unadj~
#> 2 2 -0.0539 0.0781 -1.40 0.492 0.948 0.813 1.10 Unadj~
#> 3 3 0.00528 0.0873 0.0635 0.952 1.01 0.847 1.19 Unadj~
#> 4 4 0.0893 0.0745 5.14 0.233 1.09 0.945 1.27 Unadj~
#> 5 5 0.0578 0.0624 2.61 0.356 1.06 0.938 1.20 Unadj~
#> 6 6 -0.0166 0.0646 -0.322 0.798 0.984 0.867 1.12 Unadj~
#> 7 7 -0.0662 0.0750 -2.32 0.380 0.936 0.808 1.08 Unadj~
#> 8 8 -0.100 0.0731 -7.88 0.174 0.905 0.784 1.04 Unadj~
#> 9 9 0.0684 0.0722 2.74 0.345 1.07 0.930 1.23 Unadj~
#> 10 10 -0.0364 0.0714 -0.833 0.612 0.964 0.838 1.11 Unadj~
#> # ... with 90 more rows
#>
#> [[2]]
#> # A tibble: 100 x 9
#> id estimate std.error z.value p.value OR lower_bound upper_bound method
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 1 0.00800 0.0504 0.182 0.874 1.01 0.913 1.11 Unadj~
#> 2 2 -0.0487 0.0511 -2.79 0.341 0.952 0.862 1.05 Unadj~
#> 3 3 0.0327 0.0488 1.33 0.504 1.03 0.939 1.14 Unadj~
#> 4 4 0.0678 0.0544 5.82 0.214 1.07 0.962 1.19 Unadj~
#> 5 5 -0.0999 0.0567 -22.3 0.0791 0.905 0.810 1.01 Unadj~
#> 6 6 -0.104 0.0494 -57.2 0.0367 0.901 0.818 0.993 Unadj~
#> 7 7 0.0171 0.0462 0.519 0.712 1.02 0.929 1.11 Unadj~
#> 8 8 0.0250 0.0519 0.763 0.631 1.03 0.926 1.14 Unadj~
#> 9 9 -0.0166 0.0488 -0.465 0.733 0.983 0.894 1.08 Unadj~
#> 10 10 0.0672 0.0495 7.69 0.176 1.07 0.971 1.18 Unadj~
#> # ... with 90 more rows
#>
#> [[3]]
#> # A tibble: 100 x 9
#> id estimate std.error z.value p.value OR lower_bound upper_bound method
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 1 0.00571 0.0436 0.146 0.896 1.01 0.923 1.10 Unadj~
#> 2 2 0.0366 0.0429 2.17 0.394 1.04 0.954 1.13 Unadj~
#> 3 3 -0.0329 0.0427 -1.74 0.442 0.968 0.890 1.05 Unadj~
#> 4 4 0.0412 0.0473 2.27 0.384 1.04 0.950 1.14 Unadj~
#> 5 5 0.0258 0.0425 1.12 0.544 1.03 0.944 1.12 Unadj~
#> 6 6 0.0276 0.0410 1.34 0.501 1.03 0.949 1.11 Unadj~
#> 7 7 -0.0571 0.0432 -7.07 0.187 0.944 0.868 1.03 Unadj~
#> 8 8 0.0607 0.0395 12.2 0.125 1.06 0.983 1.15 Unadj~
#> 9 9 -0.0346 0.0433 -1.88 0.424 0.966 0.887 1.05 Unadj~
#> 10 10 0.0394 0.0446 2.34 0.377 1.04 0.953 1.14 Unadj~
#> # ... with 90 more rows
#>
#> [[4]]
#> # A tibble: 100 x 9
#> id estimate std.error z.value p.value OR lower_bound upper_bound
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 -0.0728 0.0358 -47.4 0.0428 0.930 0.867 0.997
#> 2 2 0.00578 0.0378 0.174 0.878 1.01 0.934 1.08
#> 3 3 -0.00934 0.0354 -0.333 0.792 0.991 0.924 1.06
#> 4 4 0.0416 0.0391 3.68 0.289 1.04 0.966 1.13
#> 5 5 -0.0136 0.0360 -0.537 0.705 0.986 0.919 1.06
#> 6 6 0.143 0.0375 25092. 0.000152 1.15 1.07 1.24
#> 7 7 0.000384 0.0375 0.0103 0.992 1.00 0.930 1.08
#> 8 8 0.0267 0.0330 1.93 0.419 1.03 0.963 1.10
#> 9 9 -0.0183 0.0357 -0.845 0.608 0.982 0.915 1.05
#> 10 10 0.0260 0.0350 1.62 0.458 1.03 0.958 1.10
#> # ... with 90 more rows, and 1 more variable: method <chr>
#>
#> [[5]]
#> # A tibble: 100 x 9
#> id estimate std.error z.value p.value OR lower_bound upper_bound
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.0647 0.0296 75.5 0.0290 1.07 1.01 1.13
#> 2 2 -0.0707 0.0291 -159. 0.0153 0.932 0.880 0.986
#> 3 3 -0.00496 0.0314 -0.181 0.874 0.995 0.936 1.06
#> 4 4 -0.0613 0.0291 -59.6 0.0354 0.941 0.888 0.996
#> 5 5 -0.0229 0.0303 -1.68 0.450 0.977 0.921 1.04
#> 6 6 -0.00424 0.0303 -0.157 0.889 0.996 0.938 1.06
#> 7 7 -0.0364 0.0314 -4.70 0.247 0.964 0.907 1.03
#> 8 8 -0.0234 0.0307 -1.71 0.446 0.977 0.920 1.04
#> 9 9 -0.0156 0.0307 -0.826 0.613 0.985 0.927 1.05
#> 10 10 -0.00270 0.0297 -0.0980 0.928 0.997 0.941 1.06
#> # ... with 90 more rows, and 1 more variable: method <chr>
由reprex 包于 2021-08-31 创建(v2.0.1)
推荐阅读
- ruby-on-rails - Rails 5.2 credentials.yaml.enc 和 master.key 在 Heroku 上不起作用
- java - 为字符串“008949679851”引发 java.lang.NumberFormatException
- android - Android Room 多个相同类型的嵌入字段
- javascript - SunburstR 图的 Javascript 排序
- javacard - Java卡通过服务器发送APDU命令
- php - Websocket在服务器上不起作用
- ios - 如何在开发过程中持续演示您的 react-native 应用程序
- javascript - jquery更新价格onclick单选或带有条件的复选框
- objective-c - 如何在 Swift 中使用 RLMArray 和 Realm for Objective-C?
- db2 - db2 更新 dbm cfg 立即