The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

medicalrisk: Advanced comorbidity analysis

Patrick McCormick patrick.mccormick@alum.mit.edu

2020-02-28

Introduction

To demonstrate how the medicalrisk package can be useful, this vignette shows the kinds of descriptive statistics and inferences that can be made from a simple administrative dataset.

This vignette assumes you have read the introductory vignette for medicalrisk.

Calculating Mortality Risk

First, use the medicalrisk package to create a single dataframe with information on each patient:

library(medicalrisk)
library(plyr)
data(vt_inp_sample)
cm_df <- generate_comorbidity_df(vt_inp_sample, icd9mapfn=icd9cm_charlson_quan)
cci_df <- generate_charlson_index_df(cm_df)
rsi_df <- ddply(vt_inp_sample, .(id), function(x) { icd9cm_sessler_rsi(x$icd9cm) } )
num_icd9_df <- count(vt_inp_sample, c('id'))
num_icd9_df <- rename(num_icd9_df, c("freq" = "num_icd9"))
wide_df <- merge(merge(merge(merge(
  rsi_df, cci_df), 
    cm_df), 
      unique(vt_inp_sample[,c('id','scu_days','drg','mdc')])),
        num_icd9_df)
id rsi_1yrpod rsi_30dlos rsi_30dpod rsi_inhosp index mi chf perivasc cvd dementia chrnlung rheum
1 -2.019 0.156 -1.699 -1.848 2 FALSE TRUE FALSE FALSE FALSE FALSE FALSE
2 -4.142 0.893 -3.802 -3.543 0 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
3 -2.627 0.831 -2.911 -2.861 0 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
4 -0.798 0.336 -1.551 -0.267 4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
5 2.580 -1.790 2.455 1.762 2 FALSE FALSE FALSE FALSE FALSE TRUE FALSE
id ulcer liver dm dmcx para renal tumor modliver mets aids scu_days drg mdc num_icd9
1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 0 470 8 12
2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 0 470 8 15
3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 0 462 8 7
4 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE 0 470 8 10
5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 0 689 11 24

Let’s explore the data here with some graphs. First, a histogram:

library(reshape2)
library(ggplot2)
# generate a 100 pt x 17 comorbidity table (1700 rows)
cm_melted <- melt(cm_df, id.vars=c('id'), variable.name='cm')
# get rid of all the false entries
cm_melted <- cm_melted[cm_melted$value,]
## count only flags that are true
ggplot(cm_melted, aes(cm, fill=cm)) + 
  geom_bar() + 
  scale_fill_discrete()

The chrnlung comorbidity seems well represented. Let’s create a histogram breaking down which ICD-9-CM codes are mapping to chrnlung in this dataset:

# make a histogram dataframe for all the icd-9 codes
icd9cm_df <- count(vt_inp_sample, vars='icd9cm')
# create a charlson comorbidity map for all icd-9 codes
icd9cm_charlson_df <- icd9cm_charlson_quan(icd9cm_df$icd9cm)
# isolate just the chrnlung icd_9_cm codes
icd9cm_chrnlung <- row.names(icd9cm_charlson_df[icd9cm_charlson_df$chrnlung,])
# create a hist df
icd9cm_chrnlung_hist <- icd9cm_df[icd9cm_df$icd9cm %in% icd9cm_chrnlung,]
# plot it
ggplot(icd9cm_chrnlung_hist, aes(icd9cm, freq)) + 
  geom_bar(stat="identity")

Let’s see how often ICD-9-CM codes used for chrnlung coincide within patients:

# create base dataset
pairs <- unique(
  vt_inp_sample[vt_inp_sample$icd9cm %in% icd9cm_chrnlung, c('id','icd9cm')])

# create coincidence matrix
t <- table(
    ddply(pairs, c('id'), function(x) { if (length(x$icd9cm) > 1) {
      data.frame(t(combn(as.character(x$icd9cm),2))) } })[c('X1','X2')])
D4168 D49390
D49122 1 0
D4168 0 1

How often do comorbidities coincide?

# create coincidence matrix
t <- table(
    ddply(cm_melted, c('id'), function(x) { if (length(x$cm) > 1) {
      data.frame(t(combn(as.character(x$cm),2))) } })[c('X1','X2')])
# sort it
t <- t[order(rownames(t)),order(colnames(t))]
chf chrnlung cvd dementia dm liver mets para perivasc renal rheum tumor ulcer
chf 0 8 0 0 3 0 1 0 4 3 3 1 1
chrnlung 0 0 0 0 7 1 0 1 0 5 3 1 0
cvd 0 1 0 2 1 0 0 1 0 1 1 0 0
dementia 0 2 0 0 0 0 0 0 0 1 1 0 0
dm 0 0 0 0 0 0 0 1 0 5 0 2 0
liver 0 0 0 0 1 0 0 1 0 0 0 0 0
mi 4 6 1 1 4 0 0 0 0 1 3 1 1
perivasc 0 3 1 1 2 0 0 0 0 3 1 1 0
rheum 0 0 0 0 1 0 1 0 0 0 0 0 0
ulcer 0 0 0 0 1 0 0 0 0 1 0 0 0

Plot the above table:

m <- melt(t)
ggplot(m[m$value>0,], aes(X1,X2)) + stat_sum(aes(group=value))

This is a scatterplot of the Charlson Comorbidity Index versus each RSI mortality estimate. A linear regression line is superimposed:

library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.2
p.inhosp <- ggplot(wide_df, aes(rsi_inhosp, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
p.30dpod <- ggplot(wide_df, aes(rsi_30dpod, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
p.1yrpod <- ggplot(wide_df, aes(rsi_1yrpod, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
grid.arrange(p.inhosp, p.30dpod, p.1yrpod, nrow=1)

As expected, the Risk Stratification Index is correlated with an increased Charlson Comorbidity Index.

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.