首页 > 解决方案 > 使用for循环进行模拟

问题描述

我正在使用 parSim 包运行多个场景的模拟(最少 n = 1000),即样本大小和 beta ,我评估的每个估计都需要近 4 天。

为了解决这个问题,我决定使用 for 循环,它应该更快。但是,我不断得到的输出似乎是不同场景的平均值,而不是考虑的每个重复/场景的估计值。如果有人能就如何解决这个问题提供一些建议,我将不胜感激。谢谢

为清楚起见,已包含示例代码和图像。

library(foreach)

    fail = 0
    reps = 100
    sampleSize = c(120, 250, 350, 500, 730)
    b0 = c(-1.09,-0.3962,-0.51,-0.015,-0.5965)
    b1 = c(0, 0, 0, 0, 0)
    b2 = c(0.063626, 0.0321, 0.2031, 0.52031, 0.8101)
    b3 = c(0.3954, 0.5954, 0.8012, 0.9547, 0.9801)


    df.scenarios = expand.grid(sampleSize, b0, b1, b2, b3)
    colnames(df.scenarios) = c("sampleSize", "b0", "b1", "b2", "b3")
    
    foreach::foreach (i = 1:length(df.scenarios))  %do%
        

{
        for (r in 1:reps)  {
            sex = rbinom(sampleSize, size = 1, p = 0.5)
            treatmentgroup = rbinom(sampleSize, size = 1, p = 0.5)
            age_cat  = rbinom(sampleSize, size = 1, p = 0.5)
            prob = 1 / (1 + exp(-b0 + b1 * treatmentgroup + b2 * sex + b3 * age_cat))
            mortality = rbinom(sampleSize, size = 1, p = prob)
            
            dat = data.frame(mortality, sex, treatmentgroup, age_cat)
            dat <- as.data.frame(lapply(dat, as.numeric))
            dat = dat %>% mutate(id = row_number())
            dat$id <- paste0(rep("S0"), sep = "_", dat$id)
            dat = dat %>%  relocate(id)
            
            or = summary(glm(mortality ~ treatmentgroup, data = dat), family = binomial(link = "logit"))
    

   

    b <- or$coef[2]
    se <- or$coef[2,2]
    t <- (or$coef[2,3])/or$coef[2,4]
    p.val <- or$coef[2,4]
    ##
    OR <- exp(b)
    LCI <- exp(b-1.96*or$coef[2,2])
    UCI <- exp(b+1.96*or$coef[2,2])
    method <- rep("Unadjusted.logit", 1) 
    
     results <- list(id = reps,  estimate = b, std.error = se, z.value = t, p.value = p.val, OR = OR, lower_bound = LCI, upper_bound = UCI, method =  "Unadjusted.logit", fail = fail)
    
      or.models = data.frame(results)
    
     return(or.models)
       
              }
       }

100 个模拟的输出显示一行而不是 100

df.scenarios

标签: rsimulation

解决方案


好吧,我认为现在是可能的。首先我们加载包并设置重复次数。我正在使用tidyverseforeach

library(tidyverse)
library(foreach)

reps = 100

然后,我们创建数据集。

sampleSize = c(120, 250, 350, 500, 730)
b0 = c(-1.09,-0.3962,-0.51,-0.015,-0.5965)
b1 = c(0, 0, 0, 0, 0)
b2 = c(0.063626, 0.0321, 0.2031, 0.52031, 0.8101)
b3 = c(0.3954, 0.5954, 0.8012, 0.9547, 0.9801)


df.scenarios = expand.grid(sampleSize, b0, b1, b2, b3)
colnames(df.scenarios) = c("sampleSize", "b0", "b1", "b2", "b3")

然后我们创建get_repetition(). 此函数将为我们提供一个与您之前的代码具有相同参数的数据集。由于我们在 的子集中使用它df.scenarios,这与使用 相同sampleSize[i]

get_repetition <- function(df_subset) {
    df_subset %>% 
        mutate(sex = list(rbinom(sampleSize, size = 1, p = 0.5)),
               treatmentgroup = list(rbinom(sampleSize, size = 1, p = 0.5)),
               age_cat  = list(rbinom(sampleSize, size = 1, p = 0.5))) %>%
        unnest(sex:age_cat) %>%
        mutate(prob = 1 / (1 + exp(-b0 + b1 * treatmentgroup + b2 * sex + b3 * age_cat)),
               mortality = rbinom(nrow(.), size = 1, p = prob)) %>% 
        select(mortality, sex, treatmentgroup, age_cat) %>% 
        mutate(across(everything(), as.numeric),
               id = paste0(rep("S0"), sep = "_", row_number())) %>% 
        relocate(id)
}

现在我们创建get_results(),以便我们可以从模型的摘要中获得一个 df。这也将使我们能够与其他所有重复绑定。

get_results <- function(or_summary) {
    b <- or_summary$coef[2]
    se <- or_summary$coef[2, 2]
    t <- (or_summary$coef[2, 3]) / or_summary$coef[2, 4]
    p.val <- or_summary$coef[2, 4]
    ##
    OR <- exp(b)
    LCI <- exp(b - 1.96 * or_summary$coef[2, 2])
    UCI <- exp(b + 1.96 * or_summary$coef[2, 2])
    method <- rep("Unadjusted.logit", 1)
    
    results <-
        list(
            id = r,
            estimate = b,
            std.error = se,
            z.value = t,
            p.value = p.val,
            OR = OR,
            lower_bound = LCI,
            upper_bound = UCI,
            method =  "Unadjusted.logit"
        )
    
    data.frame(results)
}

最后,我们应用循环。我为前五行做了修改df.scenarios。您可以删除[1:5,]以使用完整的数据集。列表中的每个元素都是一个场景,每个元素都是id重复次数。不要忘记使用<-or为最终输出命名=

foreach::foreach (i = 1:nrow(df.scenarios[1:5,]))  %do%
    {
        summaries <- tibble()
        df_subset = df.scenarios[i,]
        
        for (r in 1:reps)  {
            dat = get_repetition(df_subset)
            
            model = glm(mortality ~ treatmentgroup, data = dat)
            or = summary(model, family = binomial(link = "logit"))
            
            or.models = get_results(or)
            
            summaries <- bind_rows(summaries, or.models)
        }
        summaries
    }
#> [[1]]
#> # A tibble: 100 x 9
#>       id estimate std.error z.value p.value    OR lower_bound upper_bound method
#>    <int>    <dbl>     <dbl>   <dbl>   <dbl> <dbl>       <dbl>       <dbl> <chr> 
#>  1     1 -0.100      0.0731 -7.88     0.174 0.905       0.784        1.04 Unadj~
#>  2     2 -0.0539     0.0781 -1.40     0.492 0.948       0.813        1.10 Unadj~
#>  3     3  0.00528    0.0873  0.0635   0.952 1.01        0.847        1.19 Unadj~
#>  4     4  0.0893     0.0745  5.14     0.233 1.09        0.945        1.27 Unadj~
#>  5     5  0.0578     0.0624  2.61     0.356 1.06        0.938        1.20 Unadj~
#>  6     6 -0.0166     0.0646 -0.322    0.798 0.984       0.867        1.12 Unadj~
#>  7     7 -0.0662     0.0750 -2.32     0.380 0.936       0.808        1.08 Unadj~
#>  8     8 -0.100      0.0731 -7.88     0.174 0.905       0.784        1.04 Unadj~
#>  9     9  0.0684     0.0722  2.74     0.345 1.07        0.930        1.23 Unadj~
#> 10    10 -0.0364     0.0714 -0.833    0.612 0.964       0.838        1.11 Unadj~
#> # ... with 90 more rows
#> 
#> [[2]]
#> # A tibble: 100 x 9
#>       id estimate std.error z.value p.value    OR lower_bound upper_bound method
#>    <int>    <dbl>     <dbl>   <dbl>   <dbl> <dbl>       <dbl>       <dbl> <chr> 
#>  1     1  0.00800    0.0504   0.182  0.874  1.01        0.913       1.11  Unadj~
#>  2     2 -0.0487     0.0511  -2.79   0.341  0.952       0.862       1.05  Unadj~
#>  3     3  0.0327     0.0488   1.33   0.504  1.03        0.939       1.14  Unadj~
#>  4     4  0.0678     0.0544   5.82   0.214  1.07        0.962       1.19  Unadj~
#>  5     5 -0.0999     0.0567 -22.3    0.0791 0.905       0.810       1.01  Unadj~
#>  6     6 -0.104      0.0494 -57.2    0.0367 0.901       0.818       0.993 Unadj~
#>  7     7  0.0171     0.0462   0.519  0.712  1.02        0.929       1.11  Unadj~
#>  8     8  0.0250     0.0519   0.763  0.631  1.03        0.926       1.14  Unadj~
#>  9     9 -0.0166     0.0488  -0.465  0.733  0.983       0.894       1.08  Unadj~
#> 10    10  0.0672     0.0495   7.69   0.176  1.07        0.971       1.18  Unadj~
#> # ... with 90 more rows
#> 
#> [[3]]
#> # A tibble: 100 x 9
#>       id estimate std.error z.value p.value    OR lower_bound upper_bound method
#>    <int>    <dbl>     <dbl>   <dbl>   <dbl> <dbl>       <dbl>       <dbl> <chr> 
#>  1     1  0.00571    0.0436   0.146   0.896 1.01        0.923        1.10 Unadj~
#>  2     2  0.0366     0.0429   2.17    0.394 1.04        0.954        1.13 Unadj~
#>  3     3 -0.0329     0.0427  -1.74    0.442 0.968       0.890        1.05 Unadj~
#>  4     4  0.0412     0.0473   2.27    0.384 1.04        0.950        1.14 Unadj~
#>  5     5  0.0258     0.0425   1.12    0.544 1.03        0.944        1.12 Unadj~
#>  6     6  0.0276     0.0410   1.34    0.501 1.03        0.949        1.11 Unadj~
#>  7     7 -0.0571     0.0432  -7.07    0.187 0.944       0.868        1.03 Unadj~
#>  8     8  0.0607     0.0395  12.2     0.125 1.06        0.983        1.15 Unadj~
#>  9     9 -0.0346     0.0433  -1.88    0.424 0.966       0.887        1.05 Unadj~
#> 10    10  0.0394     0.0446   2.34    0.377 1.04        0.953        1.14 Unadj~
#> # ... with 90 more rows
#> 
#> [[4]]
#> # A tibble: 100 x 9
#>       id  estimate std.error    z.value  p.value    OR lower_bound upper_bound
#>    <int>     <dbl>     <dbl>      <dbl>    <dbl> <dbl>       <dbl>       <dbl>
#>  1     1 -0.0728      0.0358   -47.4    0.0428   0.930       0.867       0.997
#>  2     2  0.00578     0.0378     0.174  0.878    1.01        0.934       1.08 
#>  3     3 -0.00934     0.0354    -0.333  0.792    0.991       0.924       1.06 
#>  4     4  0.0416      0.0391     3.68   0.289    1.04        0.966       1.13 
#>  5     5 -0.0136      0.0360    -0.537  0.705    0.986       0.919       1.06 
#>  6     6  0.143       0.0375 25092.     0.000152 1.15        1.07        1.24 
#>  7     7  0.000384    0.0375     0.0103 0.992    1.00        0.930       1.08 
#>  8     8  0.0267      0.0330     1.93   0.419    1.03        0.963       1.10 
#>  9     9 -0.0183      0.0357    -0.845  0.608    0.982       0.915       1.05 
#> 10    10  0.0260      0.0350     1.62   0.458    1.03        0.958       1.10 
#> # ... with 90 more rows, and 1 more variable: method <chr>
#> 
#> [[5]]
#> # A tibble: 100 x 9
#>       id estimate std.error   z.value p.value    OR lower_bound upper_bound
#>    <int>    <dbl>     <dbl>     <dbl>   <dbl> <dbl>       <dbl>       <dbl>
#>  1     1  0.0647     0.0296   75.5     0.0290 1.07        1.01        1.13 
#>  2     2 -0.0707     0.0291 -159.      0.0153 0.932       0.880       0.986
#>  3     3 -0.00496    0.0314   -0.181   0.874  0.995       0.936       1.06 
#>  4     4 -0.0613     0.0291  -59.6     0.0354 0.941       0.888       0.996
#>  5     5 -0.0229     0.0303   -1.68    0.450  0.977       0.921       1.04 
#>  6     6 -0.00424    0.0303   -0.157   0.889  0.996       0.938       1.06 
#>  7     7 -0.0364     0.0314   -4.70    0.247  0.964       0.907       1.03 
#>  8     8 -0.0234     0.0307   -1.71    0.446  0.977       0.920       1.04 
#>  9     9 -0.0156     0.0307   -0.826   0.613  0.985       0.927       1.05 
#> 10    10 -0.00270    0.0297   -0.0980  0.928  0.997       0.941       1.06 
#> # ... with 90 more rows, and 1 more variable: method <chr>

reprex 包于 2021-08-31 创建(v2.0.1)


推荐阅读