首页 > 解决方案 > 为功率分析的每次迭代创建新数据集

问题描述

我有以下代码来估计我的研究的能力,它运行得很好。问题是我正在运行 n = 1000 次迭代,但每次迭代都会生成完全相同的数据集。我认为这是因为我创建的函数(powercrosssw)中的命令利用了上面固定值的数据定义?如何确保生成的每个数据集(下面命名为 dx)是不同的(即每次迭代的 u_3、error 和 y 的值都不同),以便我正确计算功率?

library(simstudy)
library(nlme)
library(gendata)
library(data.table)
library(geepack)

set.seed(12345)

clusterDef <- defDataAdd(varname = "u_3", dist = "normal", formula = 0, variance = 25.77) #cluster-level random effect
patError <- defDataAdd(varname = "error", dist = "normal", formula = 0, variance = 38.35) #error term 

#Generate cluster-level data
cohortsw <- genData(3, id = "cluster")
cohortsw <- addColumns(clusterDef, cohortsw) 
cohortswTm <- addPeriods(cohortsw, nPeriods = 6, idvars = "cluster", perName = "period")
cohortstep <- trtStepWedge(cohortswTm, "cluster", nWaves = 3, lenWaves = 1, startPer = 1, grpName = "Ijt")
cohortstep

#Generate individual patient-level data
pat <- genCluster(cohortswTm, cLevelVar = "timeID", numIndsVar = 5, level1ID = "id")
pat

dx <- merge(pat[, .(cluster, period, id)], cohortstep, by = c("cluster", "period")) 
dx <- addColumns(patError, dx)

setkey(dx, id, cluster, period)

#Define outcome y 
outDef <- defDataAdd(varname = "y", formula = "17.87 + 5.0*Ijt - 5.42*I(period == 1) - 5.72*I(period == 2) - 7.03*I(period == 3) - 6.13*I(period == 4) - 9.13*I(period == 5) + u_3 + error", dist = "normal")
dx <- addColumns(outDef, dx)

#Fit GLMM model to simulated dataset
model1 <- lme(y ~ factor(period) + factor(Ijt), random = ~1|cluster, data = dx, method = "REML")
summary(model1)

#Power analysis 
powercrosssw <- function(nclus = 3, clsize = 5) {

  cohortsw <- genData(nclus, id = "cluster")
  cohortsw <- addColumns(clusterDef, cohortsw) 
  cohortswTm <- addPeriods(cohortsw, nPeriods = 6, idvars = "cluster", perName = "period")
  cohortstep <- trtStepWedge(cohortswTm, "cluster", nWaves = 3, lenWaves = 1, startPer = 1, grpName = "Ijt")

  pat <- genCluster(cohortswTm, cLevelVar = "timeID", numIndsVar = clsize, level1ID = "id")

  dx <- merge(pat[, .(cluster, period, id)], cohortstep, by = c("cluster", "period"))
  dx <- addColumns(patError, dx)

  setkey(dx, id, cluster, period)

  return(dx)

}

bresult <- NULL
presult <- NULL
eresult <- NULL

intercept <- NULL 
trt <- NULL 
timecoeff1 <- NULL
timecoeff2 <- NULL
timecoeff3 <- NULL
timecoeff4 <- NULL
timecoeff5 <- NULL
ranclus <- NULL
error <- NULL 

i=1

while (i < 1000) {

  cohortsw <- powercrosssw()

  #Fit multi-level model to simulated dataset
  model1 <- tryCatch(lme(y ~ factor(period) + factor(Ijt), data = dx, random = ~1|cluster, method = "REML"),
                     warning = function(w) { "warning" }
  )

  if (! is.character(model1)) {

    coeff <- coef(summary(model1))["factor(Ijt)1", "Value"]
    pvalue <- coef(summary(model1))["factor(Ijt)1", "p-value"]
    error <- coef(summary(model1))["factor(Ijt)1", "Std.Error"]
    bresult <- c(bresult, coeff)
    presult <- c(presult, pvalue)
    eresult <- c(eresult, error)

   i <- i + 1
  }
}

标签: rsimulate

解决方案


推荐阅读