首页 > 解决方案 > Cox 模型,coxph(),无事件对照处理,种子萌发

问题描述

我正在执行生存分析,但我不确定我是否正确执行。我的数据集是种子发芽实验的结果。感兴趣的主要变量是“治疗”变量(3 个级别的分类)。在我的脚本中,我试图通过比较 PH 系数百分比来确定治疗之间是否存在差异,哪一种是最好的,以及在多大程度上。谁能帮我解决我正在处理的一些问题?

1) 我是否需要将变量声明为 as.factor() 才能使用它们?还是整数被平等解释?

2) 如果违反风险比例假设 (PH),我应该如何处理我的数据以进行 cox 模型构建?我进行了深入研究,但无法理解向我的模型添加协变量*时间交互或分层的编程。

3) 如何在cox 模型中包含脆弱项并检测随机效应(例如,种子发芽的板,4 个级别的分类变量,代表重复)。

4) 我也无法解释 print(summary(cox.fra)).*

*见下文

请参阅下面我的两个带有评论的完整脚本。

脚本 1

    rd01 <- read.table("sa_kb01.txt", header = T) # raw dataset, seed 
    survival
    rd01

    str(rd01) 

    rd01$begin <- as.factor(rd01$begin) # integers to factors
    rd01$spp <- as.factor(rd01$spp)
    rd01$cit <- as.factor(rd01$cit)
    rd01$treat <- as.factor(rd01$treat)
    rd01$plate <- as.factor(rd01$plate)

    str(rd01) 

    summary(rd01)

    names(rd01) # headers

    ### Survival analysis

    # install.packages("survival")

    library(survival)
    library (survminer)

    ?survfit
    ?survfit.formula
    ?survfit.coxph
    ?ggsurvplot

    ## Fit Kaplan-Meier survivor function

    km.fit <- survfit(Surv(day, status) ~ treat, data= rd01, type="kaplan-meier")
    km.fit
    print(summary(km.fit))

    plot(km.fit, conf.int= T, fun = "event", mark.time = c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"),lwd = 1.5, xlab = "time [days]", ylab = "germination probability [%]")

    print(summary(km.fit))

    ## Comparison of Survivor Functions

    # Log-rank tests

    ?survdiff

    # Log-rank or Mantel-Haenszel test in "rho = 0" OR 
    # Peto & Peto modification of the Gehan-Wilcoxon test in "rho = 1"
    # ... Assess all groups for heterogeneity
    lrmh.123 <- survdiff(Surv(day,status) ~ treat, data= rd01, rho= 0) 

    print(lrmh.123) # If p<0.05 there are difference between all groups!

    # ... Comparing groups pairwise

    lrmh.120 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=3}, rho= 0)
    lrmh.103 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=2}, rho= 0)
    lrmh.023 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=1}, rho= 0)

    print(lrmh.120)
    print(lrmh.103)
    print(lrmh.023) # If p<0.05 there are difference pairwised groups!

    ## Checking Proportional Hazard (PH) assumption

    # Define function mlogmlog() to calculate -log(-log(S(t)))
    mlogmlog <- function(y){-log(-log(y))}

    # Use estimated Kaplan-Meier survivor functions
    km.fit

    # ... to plot -log(-log(S(t))) versus log(t)
    plot(km.fit, fun= mlogmlog, log="x", mark.time= c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"), lwd = 1.5, xlab="time [days]", ylab= "-log(-log(S(t)))") # If lines do not cross, PH assumption is plausible!

    # Interpretarion: http://www.sthda.com/english/wiki/cox-model-assumptions#testing-proportional-hazards-assumption

    ## Checking for multicollinearity

    # install.packages("HH")
    library(HH)

    # Fit a generalized linear model predicting days from treatment
    ?glm
    mc.glm <- glm(day ~ treat, data=rd01)
    print(mc.glm) # doesn't need interpretation, only used to create object to VIF function

    # Check for multicollinearity among covariates throught variance inflation factor (VIF)
    ?vif
    mc.vif <- vif(mc.glm)
    print(mc.vif) # VIF can determine what proportion of the variation in each covariate 
    # is explained by the other covariates:
    # VIF > 10, serious multicollinearity; VIF = 5, evidence of multicollinearity;
    # VIF < 1, no evidence of multicollinearity

    ## Adding covariates to the Cox model

    # Create a Cox model
    cox.mod <- coxph(Surv(day, status) ~ treat, data= rd01)
    print(summary(cox.mod)) 

    # Interpretation: http://www.sthda.com/english/wiki/cox-proportional-hazards-model

    # Double check for PH assumption now with Cox model built
    dc.ph <- cox.zph(cox.mod)
    dc.ph  
    ggcoxzph(dc.ph) # if global and individual p-vale > 0.05, PH assumption is plausible! 

    ## Including random effects
    ?frailty

    # Adding plate variable as frailty term 
    cox.fra <- coxph(Surv(day, status) ~ treat + frailty(plate), data= rd01)
    print(summary(cox.fra)) # if global and individual p-vale < 0.05, 
    # maintain frailty term while adding covariates 1 at a time in cox model!`

SCRIPT 2 - 相同但不同的数据集,控制treat1 没有事件!

    rd01 <- read.table("sa_hal01.txt", header = T) # raw dataset, seed         survival
    rd01

    str(rd01) 

    rd01$begin <- as.factor(rd01$begin) # integers to factors
    rd01$spp <- as.factor(rd01$spp)
    rd01$cit <- as.factor(rd01$cit)
    rd01$treat <- as.factor(rd01$treat)
    rd01$plate <- as.factor(rd01$plate)

    str(rd01) 

    summary(rd01)

    names(rd01) # headers

    ### Survival analysis

    # install.packages("survival")

    library(survival)
    library (survminer)

    ?survfit
    ?survfit.formula
    ?survfit.coxph
    ?ggsurvplot

    ## Fit Kaplan-Meier survivor function

    km.fit <- survfit(Surv(day, status) ~ treat, data= rd01, type="kaplan-meier")
    km.fit
    print(summary(km.fit))

    plot(km.fit, conf.int= T, fun = "event", mark.time = c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"),lwd = 1.5, xlab = "time [days]", ylab = "germination probability [%]")

    print(summary(km.fit))

    ## Comparison of Survivor Functions

    # Log-rank tests

    ?survdiff

    # Log-rank or Mantel-Haenszel test in "rho = 0" OR 
    # Peto & Peto modification of the Gehan-Wilcoxon test in "rho = 1"
    # ... Assess all groups for heterogeneity
    lrmh.123 <- survdiff(Surv(day,status) ~ treat, data= rd01, rho= 0) 

    print(lrmh.123) # If p<0.05 there are difference between all groups!

    # ... Comparing groups pairwise

    lrmh.120 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=3}, rho= 0)
    lrmh.103 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=2}, rho= 0)
    lrmh.023 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset=         {treat!=1}, rho= 0)

    print(lrmh.120)
    print(lrmh.103)
    print(lrmh.023) # If p<0.05 there are difference pairwised groups!

    ## Checking Proportional Hazard (PH) assumption

    # Define function mlogmlog() to calculate -log(-log(S(t)))
    mlogmlog <- function(y){-log(-log(y))}

    # Use estimated Kaplan-Meier survivor functions
    km.fit

    # ... to plot -log(-log(S(t))) versus log(t)
    plot(km.fit, fun= mlogmlog, log="x", mark.time= c(140), pch =         c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty =         c("solid","dotted","longdash"), lwd = 1.5, xlab="time [days]", ylab= "-        log(-log(S(t)))") # If lines do not cross, PH assumption is plausible!

    # Interpretarion: http://www.sthda.com/english/wiki/cox-model-        assumptions#testing-proportional-hazards-assumption

    ## Checking for multicollinearity

    # install.packages("HH")
    library(HH)

    # Fit a generalized linear model predicting days from treatment
    ?glm
    mc.glm <- glm(day ~ treat, data=rd01)
    print(mc.glm) # doesn't need interpretation, only used to create object to         VIF function

    # Check for multicollinearity among covariates throught variance inflation         factor (VIF)
    ?vif
    mc.vif <- vif(mc.glm)
    print(mc.vif) # VIF can determine what proportion of the variation in each covariate 
    # is explained by the other covariates:
    # VIF > 10, serious multicollinearity; VIF = 5, evidence of                 multicollinearity;
    # VIF < 1, no evidence of multicollinearity

    ## Adding covariates to the Cox model

    # Create a Cox model
    cox.mod <- coxph(Surv(day, status) ~ treat, data= rd01)
    print(summary(cox.mod)) 

    # Interpretation: http://www.sthda.com/english/wiki/cox-proportional-hazards-model

    # Double check for PH assumption now with Cox model built
    dc.ph <- cox.zph(cox.mod)
    dc.ph  
    ggcoxzph(dc.ph) # if global and individual p-vale > 0.05, PH assumption is                         plausible! 

    ## Including random effects
    ?frailty

    # Adding plate variable as frailty term 
    cox.fra <- coxph(Surv(day, status) ~ treat + frailty(plate), data=                 rd01)
    print(summary(cox.fra)) # if global and individual p-vale < 0.05, 
    # maintain frailty term while adding covariates 1 at a time in cox model!

似乎存在统计学上的显着差异,并且treat3 与两个脚本中的其他组不同。在脚本 1 中违反了 PH,我现在不知道该怎么做。除此之外,脚本 1 中的 Cox 模型似乎工作正常,并且对风险比的解释还可以,但在脚本 2 中,不知道如何解释或解决该问题(控制处理 1 中没有事件)。

标签: rsurvival-analysiscox-regression

解决方案


1) 我是否需要将变量声明为 as.factor() 才能使用它们?还是整数被平等解释?

我认为在你的情况下 as.factor 是正确的。如果你有连续的数值变量,你可以使用整数——例如,如果你在实验之前已经存储了时间种子,你可以使用 as.numeric 作为时间变量。

2) 如果违反了 PH,我应该如何处理我的数据以进行 cox 模型构建?我进行了深入研究,但无法理解向我的模型添加协变量 x 时间交互或分层的编程。

Cox 回归,又称 Cox 比例风险模型,是基于比例风险假设的。如果违反该假设,您将无法获得可靠的结果。您可能可以尝试一些数据转换,看看是否有帮助。或者,如果在某些子实验/组中违反了它,您可以将其排除在外。


推荐阅读