library(tidyverse) library("survival") library("survminer") test_data=read.table("survival.txt",header = T,sep = "\t",stringsAsFactors = F) test_data$over55=ifelse(test_data$age >= 55,1,0) test_data$over55=as.factor(test_data$over55) test_data$drug=as.factor(test_data$drug) head(test_data) # studytime died drug age over55 # 1 1 1 0 61 1 # 2 1 1 0 65 1 # 3 2 1 0 59 1 # 4 3 1 0 52 0 # 5 4 1 0 56 1 # 6 4 1 0 67 1 summary(test_data) # studytime died drug age over55 # Min. : 1.00 Min. :0.0000 0:20 Min. :47.00 0:19 # 1st Qu.: 7.75 1st Qu.:0.0000 1:28 1st Qu.:50.75 1:29 # Median :12.50 Median :1.0000 Median :56.00 # Mean :15.50 Mean :0.6458 Mean :55.88 # 3rd Qu.:23.00 3rd Qu.:1.0000 3rd Qu.:60.00 # Max. :39.00 Max. :1.0000 Max. :67.00 attach(test_data)
注意,由於我爲了自動補全變量名,使用了attach()。若是沒有用這個,像survfit() coxph()這些函數還須要加上data參數函數
drug_KM=survfit(Surv(studytime,died) ~ drug,data=test_data) ggsurvplot(drug_KM,data=test_data,legend.title = "drug",pval = TRUE,pval.method=T,palette=c("red","blue")) ggsave("drug.png",width = 5,height = 5) over55_KM=survfit(Surv(studytime,died) ~ over55,data=test_data) ggsurvplot(over55_KM,data=test_data,legend.title = "over55",pval = TRUE,pval.method=T,palette=c("red","blue")) ggsave("over55.png",width = 5,height = 5)
用兩個變量進行Cox-PH model,都是分類變量,固然數值變量也能夠作3d
cox.mod=coxph(Surv(studytime,died) ~ drug + over55) #描述一下 summary(cox.mod) # Call: # coxph(formula = Surv(studytime, died) ~ drug + over55) # # n= 48, number of events= 31 # # coef exp(coef) se(coef) z Pr(>|z|) # drug1 -2.2632 0.1040 0.4582 -4.940 7.82e-07 *** # over551 0.9274 2.5278 0.3935 2.356 0.0184 * # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # exp(coef) exp(-coef) lower .95 upper .95 # drug1 0.104 9.6135 0.04238 0.2553 # over551 2.528 0.3956 1.16888 5.4668 # # Concordance= 0.784 (se = 0.039 ) # Likelihood ratio test= 31.03 on 2 df, p=2e-07 # Wald test = 26.92 on 2 df, p=1e-06 # Score (logrank) test = 34.49 on 2 df, p=3e-08
通常文獻裏面比較關心HR,咱們能夠用森林圖來表示rest
ggforest(cox.mod, main = '',data = test_data) ggsave(filename = "cox.mod.png",device = "png",width = 20,height = 10,units = c("cm"))
有時候也能看到KM曲線裏面加HR的圖,這須要結合KM的結果和COX的結果。
到這裏,對於paper的畫圖已經足夠了,若是想深刻了解Cox比例風險模型,能夠看看下面的內容code
用LRT (likelihood ratio tests) 比較嵌套模型(一個模型含有另外一個模型的所有變量),零假設是兩個模型沒有差別orm
cox.mod=coxph(Surv(studytime,died) ~ drug + over55) cox.mod2=coxph(Surv(studytime,died) ~ over55) anova(cox.mod2,cox.mod,test = "LRT") # Analysis of Deviance Table # Cox model: response is Surv(studytime, died) # Model 1: ~ over55 # Model 2: ~ drug + over55 # loglik Chisq Df P(>|Chi|) # 1 -98.062 # 2 -83.994 28.136 1 1.131e-07 ***
p值很小,說明有差別,因此咱們不能去掉drug變量blog
cox.mod3=coxph(Surv(studytime,died) ~ age) summary(cox.mod3)
exp(coef)等於1.08表示:每增長1歲,風險增長0.08事件
cox.mod4=coxph(Surv(studytime,died) ~ age) png("Martingale_residuals.png") plot(predict(cox.mod4),residuals(cox.mod4,type = "martingale"), xlab = "fitted values",ylab = "Martingale residuals", main = "Residual Plot",las=1) #加水平線 abline(h=0) #畫殘差的擬合線 lines(smooth.spline(predict(cox.mod4),residuals(cox.mod4,type = "martingale")),col="red") dev.off()
換一種殘差檢查線性,deviance residualsstring
png("deviance_residuals.png") plot(predict(cox.mod4),residuals(cox.mod4,type = "deviance"), xlab = "fitted values",ylab = "Deviance residuals", main = "Residual Plot",las=1) abline(h=0) lines(smooth.spline(predict(cox.mod4),residuals(cox.mod4,type = "deviance")),col="red") dev.off()
check proportional hazards assumption
using Schoenfeld test for PH, Ho: HAZARDS are prop, Ha: HAZARDS are NOT prop
結果會返回每一個變量,以及總體的p值it
cox.zph(cox.mod4) # chisq df p # age 0.662 1 0.42 # GLOBAL 0.662 1 0.42
p值較大能夠接受原假設。io
也能夠看某個變量的係數(β)是否是隨着時間而改變,若是是(也就是說HR隨時間而改變),則說明爲non-prop hazard
par(mfrow=c(1,1)) png("cox.zph.png") plot(cox.zph(cox.mod4)[1]) dev.off() detach(test_data)