The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
MetaIntegration
This R
package is an ensemble meta-prediction framework
to integrate multiple regression models into a current study.
If the devtools package is not yet installed, install it first:
install.packages('devtools')
# install the package from Github:
::install_github('umich-biostatistics/MetaIntegration') devtools
Once installed, load the package:
library(MetaIntegration)
For this example, we simulate data and test each of the functions.
############## Generating 1 internal dataset of size n ###########################################
# Full model: Y|X1, X2, X3, X4, B
# (X1, X2, X3, X4, B) follows normal distribution with mean zero, variance one and correlation 0.3
# Y|X1, X2, X3, X4, B follows Bernoulli[expit(-1-0.5X+0.5B)], where expit(x)=exp(x)/[1+exp(x)]
##################################################################################################
set.seed(2333)
= 200
n = data.frame(matrix(ncol = 5, nrow = n))
data.n colnames(data.n) = c('Y', 'X1', 'X2', 'X3', 'B')
c('X1', 'X2', 'X3', 'B')] = MASS::mvrnorm(n, rep(0,4), diag(0.7,4)+0.3)
data.n[,############# Function 1 #########################################
$Y = rbinom(n, 1, expit(-1 - 0.5*(data.n$X1 + data.n$X2 + data.n$X3) + 0.5*data.n$B))
data.n
############# Generate k=3 external models #########################################
# Full model: Y|X1, X2, X3, B
# Reduced external model 1: Y|X1, X2 with sample size m1
# Reduced external model 2: Y|X1, X3 with sample size m2
# Reduced external model 3: Y|X1, X2, X3 with sample size m3
####################################################################################
# generate data from the full model first, then fit the reduced logistic regression
# on the data to obtain the beta estiamtes and the corresponsing estimated variance
####################################################################################
= 500
m1 = 2000
m2 = 30000
m3 = data.frame(matrix(ncol = 5, nrow = m1))
data.m1 = data.frame(matrix(ncol = 5, nrow = m2))
data.m2 = data.frame(matrix(ncol = 5, nrow = m3))
data.m3 names(data.m1) = names(data.m2) = names(data.m3) = c('Y', 'X1', 'X2', 'X3', 'B')
c('X1', 'X2', 'X3', 'B')] = MASS::mvrnorm(m1, rep(0,4), diag(0.7,4)+0.3)
data.m1[,c('X1', 'X2', 'X3', 'B')] = MASS::mvrnorm(m2, rep(0,4), diag(0.7,4)+0.3)
data.m2[,c('X1', 'X2', 'X3', 'B')] = MASS::mvrnorm(m3, rep(0,4), diag(0.7,4)+0.3)
data.m3[,$Y = rbinom(m1, 1, expit(-1 - 0.5*(data.m1$X1 + data.m1$X2 + data.m1$X3) + 0.5*data.m1$B))
data.m1$Y = rbinom(m2, 1, expit(-1 - 0.5*(data.m2$X1 + data.m2$X2 + data.m2$X3) + 0.5*data.m2$B))
data.m2$Y = rbinom(m3, 1, expit(-1 - 0.5*(data.m3$X1 + data.m3$X2 + data.m3$X3) + 0.5*data.m3$B))
data.m3
#fit Y|X using logistic regression to obtain the external beta estimates
= glm(Y ~ X1 + X2, data = data.m1, family = binomial(link='logit'))
fit.E1 = glm(Y ~ X1 + X3, data = data.m2, family = binomial(link='logit'))
fit.E2 = glm(Y ~ X1 + X2 + X3, data = data.m3, family = binomial(link='logit'))
fit.E3
#Save the beta estiamtes and the corresponsing estimated variance of the reduced external model 1
= coef(fit.E1)
beta.E1 names(beta.E1) = c('int', 'X1', 'X2')
= vcov(fit.E1)
V.E1
#Save the beta estiamtes and the corresponsing estimated variance of the reduced external model 2
= coef(fit.E2)
beta.E2 names(beta.E2) = c('int','X1','X3')
= vcov(fit.E2)
V.E2
#Save the beta estiamtes and the corresponsing estimated variance of the reduced external model 3
= coef(fit.E3)
beta.E3 names(beta.E3) = c('int','X1','X2','X3')
= vcov(fit.E3)
V.E3
#Save all the external model information into lists for later use
= list(Ext1 = beta.E1, Ext2 = beta.E2, Ext3 = beta.E3)
betaHatExt_list = list(Ext1 = V.E1, Ext2 = V.E2, Ext3 = V.E3)
CovExt_list = list(Ext1 = n/m1, Ext2 = n/m2, Ext3 = n/m3) rho
Constraint maximum likelihood (CML) method for logistic regression (binary outcome Y).
= glm(Y ~ X1 + X2 + X3 + B, data = data.n, family = binomial(link='logit'))
fit.gamma.I = coef(fit.gamma.I)
gamma.I = fxnCC_LogReg(p=3,
gamma.CML1 q=5,
YInt=data.n$Y,
XInt=cbind(data.n$X1,data.n$X2),
BInt=cbind(data.n$X3,data.n$B),
betaHatExt=beta.E1,
gammaHatInt=gamma.I,
n=n,
tol=1e-8,
maxIter=400,
factor=1)[["gammaHat"]]
= fxnCC_LogReg(p=3,
gamma.CML2 q=5,
YInt=data.n$Y,
XInt=cbind(data.n$X1,data.n$X3),
BInt=cbind(data.n$X2, data.n$B),
betaHatExt=beta.E2,
gammaHatInt=c(gamma.I[1:2],gamma.I[4],gamma.I[3],gamma.I[5]),
n=n,
tol=1e-8,
maxIter=400,
factor=1)[["gammaHat"]]
= c(gamma.CML2[1:2], gamma.CML2[4], gamma.CML2[3], gamma.CML2[5])
gamma.CML2 = fxnCC_LogReg(p=4,
gamma.CML3 q=5,
YInt=data.n$Y,
XInt=cbind(data.n$X1,data.n$X2,data.n$X3),
BInt=cbind(data.n$B),
betaHatExt=beta.E3,
gammaHatInt=gamma.I,
n=n,
tol=1e-8,
maxIter=400,
factor=1)[["gammaHat"]]
Asymptotic variance-covariance matrix for gamma_Int and gamma_CML for logistic regression (binary outcome Y).
= asympVar_LogReg(k=3,
asy.CML p=4,
q=5,
YInt=data.n$Y,
XInt=data.n[,c('X1','X2','X3')], #covariates that appeared in at least one external model
BInt=data.n$B, #covariates that not used in any of the external models
gammaHatInt=gamma.I,
betaHatExt_list=betaHatExt_list,
CovExt_list=CovExt_list,
rho=rho,
ExUncertainty=TRUE)
= asy.CML[['asyV.I']] #variance of gamma.I
asyV.I = asy.CML[['asyV.CML']][[1]] #variance of gamma.CML1
asyV.CML1 = asy.CML[['asyV.CML']][[2]] #variance of gamma.CML2
asyV.CML2 = asy.CML[['asyCov.CML.I']][[1]] #covariance of gamma.CML1 and gamma.I
asyCov.CML1.I = asy.CML[['asyCov.CML.I']][[2]] #covariance of gamma.CML2 and gamma.I
asyCov.CML2.I = asy.CML[['asyCov.CML']][['12']] #covariance of gamma.CML1 and gamma.CML2 asyCov.CML12
Calculate the empirical Bayes (EB) estimates.
= get_gamma_EB(gamma_I=gamma.I, gamma_CML=gamma.CML1, asyV.I=asyV.I)[['gamma.EB']]
gamma.EB1 = get_gamma_EB(gamma_I=gamma.I, gamma_CML=gamma.CML2, asyV.I=asyV.I)[['gamma.EB']]
gamma.EB2 = get_gamma_EB(gamma_I=gamma.I, gamma_CML=gamma.CML3, asyV.I=asyV.I)[['gamma.EB']] gamma.EB3
Using simulation to obtain the asymptotic variance-covariance matrix of gamma_EB, package corpcor and MASS are required.
#Get the asymptotic variance of the EB estimates
= get_var_EB(k=3,
V.EB q=5,
gamma.CML=c(gamma.CML1, gamma.CML2, gamma.CML3),
gamma.I = gamma.I,
asy.CML=asy.CML,
seed=2333,
nsim=2000)
= V.EB[['asyV.EB']][[1]] #variance of gamma.EB1
asyV.EB1 = V.EB[['asyV.EB']][[2]] #variance of gamma.EB2
asyV.EB2 = V.EB[['asyV.EB']][[3]] #variance of gamma.EB3
asyV.EB3 = V.EB[['asyCov.EB.I']][[1]] #covariance of gamma.EB1 and gamma.I
asyCov.EB1.I = V.EB[['asyCov.EB.I']][[2]] #covariance of gamma.EB2 and gamma.I
asyCov.EB2.I = V.EB[['asyCov.EB.I']][[3]] #covariance of gamma.EB2 and gamma.I
asyCov.EB3.I = V.EB[['asyCov.EB']][['12']] #covariance of gamma.EB1 and gamma.EB2
asyCov.EB12 = V.EB[['asyCov.EB']][['13']] #covariance of gamma.EB1 and gamma.EB3
asyCov.EB13 = V.EB[['asyCov.EB']][['23']] #covariance of gamma.EB2 and gamma.EB3 asyCov.EB23
Obtain the proposed Optimal covariate-Weighted (OCW) estimates, package Rsolnp is required.
get_OCW(k=3,
q=5,
data.XB=data.n[,c('X1','X2','X3','B')],
gamma.EB=c(gamma.EB1, gamma.EB2, gamma.EB3),
V.EB=V.EB)
Obtain the proposed Selective Coefficient-Learner (SC-Learner) estimates.
= matrix(c(1,1,1,0,0,
pred.matrix 1,1,0,1,0,
1,1,1,1,0), 5, 3)
rownames(pred.matrix) = c('int','X1','X2','X3','B')
colnames(pred.matrix) = c('E1','E2','E3')
get_SCLearner(k=3,
q=5,
pred.matrix=pred.matrix,
gamma.EB=cbind(gamma.EB1, gamma.EB2, gamma.EB3),
V.EB)
Gu, T., Taylor, J.M.G. and Mukherjee, B. (2020). An ensemble meta-prediction framework to integrate multiple regression models into a current study. Manuscript in preparation.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.