The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
To demonstrate how the medicalrisk package can be useful, this vignette shows the kinds of descriptive statistics and inferences that can be made from a simple administrative dataset.
This vignette assumes you have read the introductory vignette for medicalrisk.
First, use the medicalrisk package to create a single dataframe with information on each patient:
library(medicalrisk)
library(plyr)
data(vt_inp_sample)
cm_df <- generate_comorbidity_df(vt_inp_sample, icd9mapfn=icd9cm_charlson_quan)
cci_df <- generate_charlson_index_df(cm_df)
rsi_df <- ddply(vt_inp_sample, .(id), function(x) { icd9cm_sessler_rsi(x$icd9cm) } )
num_icd9_df <- count(vt_inp_sample, c('id'))
num_icd9_df <- rename(num_icd9_df, c("freq" = "num_icd9"))
wide_df <- merge(merge(merge(merge(
rsi_df, cci_df),
cm_df),
unique(vt_inp_sample[,c('id','scu_days','drg','mdc')])),
num_icd9_df)
id | rsi_1yrpod | rsi_30dlos | rsi_30dpod | rsi_inhosp | index | mi | chf | perivasc | cvd | dementia | chrnlung | rheum |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | -2.019 | 0.156 | -1.699 | -1.848 | 2 | FALSE | TRUE | FALSE | FALSE | FALSE | FALSE | FALSE |
2 | -4.142 | 0.893 | -3.802 | -3.543 | 0 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE |
3 | -2.627 | 0.831 | -2.911 | -2.861 | 0 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE |
4 | -0.798 | 0.336 | -1.551 | -0.267 | 4 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE |
5 | 2.580 | -1.790 | 2.455 | 1.762 | 2 | FALSE | FALSE | FALSE | FALSE | FALSE | TRUE | FALSE |
id | ulcer | liver | dm | dmcx | para | renal | tumor | modliver | mets | aids | scu_days | drg | mdc | num_icd9 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 470 | 8 | 12 |
2 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 470 | 8 | 15 |
3 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 462 | 8 | 7 |
4 | FALSE | FALSE | TRUE | FALSE | FALSE | TRUE | FALSE | FALSE | FALSE | FALSE | 0 | 470 | 8 | 10 |
5 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 689 | 11 | 24 |
Let’s explore the data here with some graphs. First, a histogram:
library(reshape2)
library(ggplot2)
# generate a 100 pt x 17 comorbidity table (1700 rows)
cm_melted <- melt(cm_df, id.vars=c('id'), variable.name='cm')
# get rid of all the false entries
cm_melted <- cm_melted[cm_melted$value,]
## count only flags that are true
ggplot(cm_melted, aes(cm, fill=cm)) +
geom_bar() +
scale_fill_discrete()
The chrnlung comorbidity seems well represented. Let’s create a histogram breaking down which ICD-9-CM codes are mapping to chrnlung in this dataset:
# make a histogram dataframe for all the icd-9 codes
icd9cm_df <- count(vt_inp_sample, vars='icd9cm')
# create a charlson comorbidity map for all icd-9 codes
icd9cm_charlson_df <- icd9cm_charlson_quan(icd9cm_df$icd9cm)
# isolate just the chrnlung icd_9_cm codes
icd9cm_chrnlung <- row.names(icd9cm_charlson_df[icd9cm_charlson_df$chrnlung,])
# create a hist df
icd9cm_chrnlung_hist <- icd9cm_df[icd9cm_df$icd9cm %in% icd9cm_chrnlung,]
# plot it
ggplot(icd9cm_chrnlung_hist, aes(icd9cm, freq)) +
geom_bar(stat="identity")
Let’s see how often ICD-9-CM codes used for chrnlung coincide within patients:
# create base dataset
pairs <- unique(
vt_inp_sample[vt_inp_sample$icd9cm %in% icd9cm_chrnlung, c('id','icd9cm')])
# create coincidence matrix
t <- table(
ddply(pairs, c('id'), function(x) { if (length(x$icd9cm) > 1) {
data.frame(t(combn(as.character(x$icd9cm),2))) } })[c('X1','X2')])
D4168 | D49390 | |
---|---|---|
D49122 | 1 | 0 |
D4168 | 0 | 1 |
How often do comorbidities coincide?
# create coincidence matrix
t <- table(
ddply(cm_melted, c('id'), function(x) { if (length(x$cm) > 1) {
data.frame(t(combn(as.character(x$cm),2))) } })[c('X1','X2')])
# sort it
t <- t[order(rownames(t)),order(colnames(t))]
chf | chrnlung | cvd | dementia | dm | liver | mets | para | perivasc | renal | rheum | tumor | ulcer | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chf | 0 | 8 | 0 | 0 | 3 | 0 | 1 | 0 | 4 | 3 | 3 | 1 | 1 |
chrnlung | 0 | 0 | 0 | 0 | 7 | 1 | 0 | 1 | 0 | 5 | 3 | 1 | 0 |
cvd | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 |
dementia | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
dm | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 5 | 0 | 2 | 0 |
liver | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
mi | 4 | 6 | 1 | 1 | 4 | 0 | 0 | 0 | 0 | 1 | 3 | 1 | 1 |
perivasc | 0 | 3 | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 3 | 1 | 1 | 0 |
rheum | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
ulcer | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
Plot the above table:
This is a scatterplot of the Charlson Comorbidity Index versus each RSI mortality estimate. A linear regression line is superimposed:
## Warning: package 'gridExtra' was built under R version 3.6.2
p.inhosp <- ggplot(wide_df, aes(rsi_inhosp, index)) + geom_point() + geom_smooth(method=lm) +
scale_y_continuous(limits=c(-3,10))
p.30dpod <- ggplot(wide_df, aes(rsi_30dpod, index)) + geom_point() + geom_smooth(method=lm) +
scale_y_continuous(limits=c(-3,10))
p.1yrpod <- ggplot(wide_df, aes(rsi_1yrpod, index)) + geom_point() + geom_smooth(method=lm) +
scale_y_continuous(limits=c(-3,10))
grid.arrange(p.inhosp, p.30dpod, p.1yrpod, nrow=1)
As expected, the Risk Stratification Index is correlated with an increased Charlson Comorbidity Index.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.