We discuss connections between the Cox proportional hazards model and Poisson generalized linear models as described in Whitehead (1980). We fit comparable models to a sample dataset using coxph()
, glm()
, phmm()
, and glmer()
and explore similarities.
We generate proportional hazards mixed model data.
library(phmm)
n <- 50 # total sample size
nclust <- 5 # number of clusters
clusters <- rep(1:nclust,each=n/nclust)
beta0 <- c(1,2)
set.seed(13)
Z <-cbind(Z1=sample(0:1,n,replace=TRUE),
Z2=sample(0:1,n,replace=TRUE),
Z3=sample(0:1,n,replace=TRUE))
b <- cbind(rep(rnorm(nclust), each=n/nclust),
rep(rnorm(nclust), each=n/nclust))
Wb <- matrix(0,n,2)
for( j in 1:2) Wb[,j] <- Z[,j]*b[,j]
Wb <- apply(Wb,1,sum)
T <- -log(runif(n,0,1))*exp(-Z[,c('Z1','Z2')]%*%beta0-Wb)
C <- runif(n,0,1)
time <- ifelse(T<C,T,C)
event <- ifelse(T <= C,1,0)
sum(event)
## [1] 30
fit.ph <- coxph(Surv(time, event) ~ Z1 + Z2,
phmmd, method="breslow", x=TRUE, y=TRUE)
summary(fit.ph)
## Call:
## coxph(formula = Surv(time, event) ~ Z1 + Z2, data = phmmd, x = TRUE,
## y = TRUE, method = "breslow")
##
## n= 50, number of events= 30
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Z1 1.5061 4.5091 0.4313 3.492 0.00048 ***
## Z2 0.4376 1.5490 0.3708 1.180 0.23798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Z1 4.509 0.2218 1.9361 10.501
## Z2 1.549 0.6456 0.7488 3.204
##
## Concordance= 0.696 (se = 0.053 )
## Likelihood ratio test= 16.81 on 2 df, p=2e-04
## Wald test = 14.93 on 2 df, p=6e-04
## Score (logrank) test = 17.2 on 2 df, p=2e-04
Next we create data to fit an auxilary Poisson model as described in Whitehead (1980) using the pseudoPoisPHMM()
function provided in the phmm
package. This function also extracts the linear predictors as estimated from the Cox PH model so that we can calculate likelihoods and degrees of freedom.
ppd <- as.data.frame(as.matrix(pseudoPoisPHMM(fit.ph)))
# pois likelihood
poisl <- c()
eventtimes <- sort(phmmd$time[phmmd$event == 1])
for(h in 1:length(eventtimes)){
js <- ppd$time == eventtimes[h] & ppd$m >= 1 # j star
j <- ppd$time == eventtimes[h]
if(sum(js) > 1) stop("tied event times")
poisl <- c(poisl,
ppd[js, "N"]*exp(-1)*exp(ppd[js, "linear.predictors"])/
sum(ppd[j, "N"]*exp(ppd[j, "linear.predictors"])))
}
Poisson likelihood:
## [1] -65.95449
## [1] 25.25443
Poisson degrees of freedom
## [1] 32
We fit an auxiliary Poisson GLM and note that the parameter estimates for z1 and z2 are identical to the coxph() fit, and the likelihood and degrees of freedom are as expected.
ppd$t <- as.factor(ppd$time)
fit.glm <- glm(m~-1+t+z1+z2+offset(log(N)),
ppd, family=poisson)
summary(fit.glm)
##
## Call:
## glm(formula = m ~ -1 + t + z1 + z2 + offset(log(N)), family = poisson,
## data = ppd)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0148 -0.8050 -0.4622 0.3374 1.7482
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## t0.000446182539531382 -5.1899 1.0759 -4.824 1.41e-06 ***
## t0.00163087691684562 -5.1813 1.0767 -4.812 1.49e-06 ***
## t0.00275830723468582 -5.1725 1.0774 -4.801 1.58e-06 ***
## t0.00280059794673464 -5.1466 1.0778 -4.775 1.80e-06 ***
## t0.00318134995082413 -5.1051 1.0759 -4.745 2.09e-06 ***
## t0.00320419136302427 -5.0618 1.0739 -4.713 2.44e-06 ***
## t0.00395636869870054 -5.0166 1.0718 -4.681 2.86e-06 ***
## t0.00511123775406486 -5.0063 1.0727 -4.667 3.05e-06 ***
## t0.00559427171447325 -4.9756 1.0729 -4.637 3.53e-06 ***
## t0.00766727170160828 -4.9262 1.0705 -4.602 4.19e-06 ***
## t0.00808285780728387 -4.9189 1.0715 -4.591 4.42e-06 ***
## t0.019339488197591 -4.8312 1.0691 -4.519 6.22e-06 ***
## t0.0299199501201303 -4.7739 1.0662 -4.477 7.56e-06 ***
## t0.0531838782317072 -4.7040 1.0642 -4.420 9.86e-06 ***
## t0.066999301944422 -4.6476 1.0652 -4.363 1.28e-05 ***
## t0.0855879977109686 -4.6034 1.0652 -4.322 1.55e-05 ***
## t0.128630049328015 -4.5408 1.0664 -4.258 2.06e-05 ***
## t0.131437682173085 -4.4634 1.0623 -4.202 2.65e-05 ***
## t0.15257919709977 -4.3669 1.0591 -4.123 3.74e-05 ***
## t0.157383776779992 -4.2092 1.0531 -3.997 6.41e-05 ***
## t0.163824053514786 -4.1398 1.0522 -3.934 8.34e-05 ***
## t0.168953982363505 -4.0653 1.0515 -3.866 0.000111 ***
## t0.227852125295401 -3.8120 1.0431 -3.654 0.000258 ***
## t0.280623578426198 -3.7668 1.0471 -3.597 0.000322 ***
## t0.314323389014675 -3.6567 1.0463 -3.495 0.000474 ***
## t0.351296650884504 -3.5733 1.0492 -3.406 0.000660 ***
## t0.485749622685594 -3.0723 1.0355 -2.967 0.003007 **
## t0.509510538708177 -2.9979 1.0372 -2.890 0.003849 **
## t0.529434934651452 -2.7430 1.0338 -2.653 0.007969 **
## t0.540077948287249 -2.6765 1.0385 -2.577 0.009960 **
## z1 1.5061 0.4313 3.492 0.000480 ***
## z2 0.4376 0.3708 1.180 0.237981
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1701.491 on 118 degrees of freedom
## Residual deviance: 71.909 on 86 degrees of freedom
## AIC: 195.91
##
## Number of Fisher Scoring iterations: 7
## coxph.coef glm.coef
## Z1 1.5060975 1.5060975
## Z2 0.4376126 0.4376126
## coxph.pois.loglik glm.loglik
## [1,] -65.95449 -65.95449
The additional parameter estimates correspond to the estimated log baseline hazard, which we verify using the basehaz()
function.
bh <- basehaz(fit.ph, centered = FALSE)
cbind(
coxph.bh.step = log(bh$hazard - c(0,bh$hazard[1:(length(bh$hazard)-1)]))[1:5],
glm.bh.step = coef(fit.glm)[1:5]
)
## coxph.bh.step glm.bh.step
## t0.000446182539531382 -5.189938 -5.189938
## t0.00163087691684562 -5.181269 -5.181269
## t0.00275830723468582 -5.172524 -5.172524
## t0.00280059794673464 -5.146624 -5.146624
## t0.00318134995082413 -5.105131 -5.105131
set.seed(20200316)
fit.phmm <- phmm(Surv(time, event) ~ Z1 + Z2 + (Z1 + Z2|cluster),
phmmd, Gbs = 100, Gbsvar = 1000, VARSTART = 1,
NINIT = 10, MAXSTEP = 100, CONVERG=90)
summary(fit.phmm)
##
## Proportional Hazards Mixed-Effects Model fit by MCMC-EM
## Model: Surv(time, event) ~ Z1 + Z2 + (Z1 + Z2 | cluster)
## Data: phmmd
## Log-likelihood:
## Conditional Laplace RIS
## -83.08 -118.76 -118.70
##
## Fixed effects: Surv(time, event) ~ Z1 + Z2
## Estimate Std.Error
## Z1 1.6170 0.4564
## Z2 0.5818 0.5866
##
## Random effects: (Z1 + Z2 | cluster)
## Estimated variance-covariance matrix:
## (Intercept) Z1 Z2
## (Intercept) 0.01026 0.000000 0.000
## Z1 0.00000 0.006868 0.000
## Z2 0.00000 0.000000 1.056
##
## Number of Observations: 50
## Number of Groups: 5
ppd <- as.data.frame(as.matrix(pseudoPoisPHMM(fit.phmm)))
poisl <- c()
eventtimes <- sort(phmmd$time[phmmd$event == 1])
for(h in 1:length(eventtimes)){
js <- ppd$time == eventtimes[h] & ppd$m >= 1 # j star
j <- ppd$time == eventtimes[h]
if(sum(js) > 1) stop("tied event times")
poisl <- c(poisl,
ppd[js, "N"]*exp(-1)*exp(ppd[js, "linear.predictors"])/
sum(ppd[j, "N"]*exp(ppd[j, "linear.predictors"])))
}
Poisson likelihood:
## Conditional
## -13.58648
Poisson degrees of freedom
# Poisson GLMM degrees of freedom length(unique(x$cluster)) * x$nrandom + x$nfixed
traceHat(fit.phmm, "pseudoPois") # + 2*sum(phmmd$event)
## [1] 5.018931
We fit an auxiliary Poisson GLMM, although with a general variance-covariance matrix for the random effects (phmm() only fits models with diagonal variance-covariance matrix).
library(lme4)
ppd$t <- as.factor(ppd$time)
fit.lmer <- glmer(m~-1+t+z1+z2+
(z1+z2|cluster)+offset(log(N)),
data=ppd, family=poisson, nAGQ=0)
summary(fit.lmer)$coef
## Estimate Std. Error z value Pr(>|z|)
## t0.000446182539531382 -5.7813397 1.1440995 -5.053179 4.345156e-07
## t0.00163087691684562 -5.7685008 1.1453474 -5.036464 4.742104e-07
## t0.00275830723468582 -5.7531424 1.1471637 -5.015101 5.300550e-07
## t0.00280059794673464 -5.7378389 1.1486000 -4.995507 5.868147e-07
## t0.00318134995082413 -5.6452747 1.1414477 -4.945715 7.586501e-07
## t0.00320419136302427 -5.5432601 1.1349421 -4.884179 1.038609e-06
## t0.00395636869870054 -5.3975473 1.1228024 -4.807210 1.530511e-06
## t0.00511123775406486 -5.3752136 1.1248473 -4.778616 1.765056e-06
## t0.00559427171447325 -5.3521882 1.1261561 -4.752616 2.008011e-06
## t0.00766727170160828 -5.1728904 1.1132452 -4.646677 3.373250e-06
## t0.00808285780728387 -5.1646583 1.1148223 -4.632719 3.608942e-06
## t0.019339488197591 -4.9601814 1.1019248 -4.501379 6.751395e-06
## t0.0299199501201303 -4.6818910 1.0850479 -4.314916 1.596633e-05
## t0.0531838782317072 -4.6126510 1.0816498 -4.264459 2.003875e-05
## t0.066999301944422 -4.5481475 1.0821894 -4.202728 2.637175e-05
## t0.0855879977109686 -4.4951855 1.0818533 -4.155079 3.251755e-05
## t0.128630049328015 -4.4249733 1.0829876 -4.085895 4.390729e-05
## t0.131437682173085 -4.3506932 1.0767235 -4.040678 5.329686e-05
## t0.15257919709977 -4.2702442 1.0774013 -3.963467 7.386916e-05
## t0.157383776779992 -4.1073777 1.0689456 -3.842457 1.218087e-04
## t0.163824053514786 -4.0253605 1.0673062 -3.771514 1.622599e-04
## t0.168953982363505 -3.9362644 1.0656891 -3.693633 2.210728e-04
## t0.227852125295401 -3.6778267 1.0541765 -3.488815 4.851671e-04
## t0.280623578426198 -3.6268487 1.0584957 -3.426418 6.115984e-04
## t0.314323389014675 -3.4875614 1.0563771 -3.301436 9.619133e-04
## t0.351296650884504 -3.3234060 1.0598050 -3.135866 1.713477e-03
## t0.485749622685594 -2.9208484 1.0476710 -2.787944 5.304365e-03
## t0.509510538708177 -2.8564227 1.0482173 -2.725029 6.429581e-03
## t0.529434934651452 -2.5278619 1.0453509 -2.418195 1.559773e-02
## t0.540077948287249 -2.4051694 1.0540487 -2.281839 2.249885e-02
## z1 1.5783789 0.4787804 3.296666 9.783994e-04
## z2 0.6575325 0.6543760 1.004824 3.149818e-01
## Z1 Z2
## 1.6169634 0.5818235
## 'log Lik.' -102.0292 (df=38)
## [1] 5.364446
## phmm.bh.step glm.bh.step
## t0.000446182539531382 -5.786086 -5.781340
## t0.00163087691684562 -5.770504 -5.768501
## t0.00275830723468582 -5.749254 -5.753142
## t0.00280059794673464 -5.732263 -5.737839
## t0.00318134995082413 -5.644879 -5.645275