Let’s first load the packages required.
library(CDMConnector)
library(CohortSurvival)
library(dplyr)
library(ggplot2)
We’ll create a cdm reference to use our example MGUS2 survival dataset. In practice you would use the CDMConnector package to connect to your data mapped to the OMOP CDM.
<- CohortSurvival::mockMGUS2cdm() cdm
In this vignette we’ll first estimate survival following a diagnosis of MGUS, with death our outcome of interest.
We would typically need to define study cohorts ourselves, but in the case of our example data we already have these cohorts available. You can see for our diagnosis cohort we also have a number of additional features recorded for individuals which we’ll use for stratification.
$mgus_diagnosis %>%
cdmglimpse()
#> Rows: ??
#> Columns: 10
#> Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/:memory:]
#> $ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
#> $ subject_id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15…
#> $ cohort_start_date <date> 1981-01-01, 1968-01-01, 1980-01-01, 1977-01-01, …
#> $ cohort_end_date <date> 1981-01-01, 1968-01-01, 1980-01-01, 1977-01-01, …
#> $ age <dbl> 88, 78, 94, 68, 90, 90, 89, 87, 86, 79, 86, 89, 8…
#> $ sex <fct> F, F, M, M, F, M, F, F, F, F, M, F, M, F, M, F, F…
#> $ hgb <dbl> 13.1, 11.5, 10.5, 15.2, 10.7, 12.9, 10.5, 12.3, 1…
#> $ creat <dbl> 1.30, 1.20, 1.50, 1.20, 0.80, 1.00, 0.90, 1.20, 0…
#> $ mspike <dbl> 0.5, 2.0, 2.6, 1.2, 1.0, 0.5, 1.3, 1.6, 2.4, 2.3,…
#> $ age_group <chr> ">=70", ">=70", ">=70", "<70", ">=70", ">=70", ">…
$death_cohort %>%
cdmglimpse()
#> Rows: ??
#> Columns: 4
#> Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/:memory:]
#> $ cohort_definition_id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
#> $ subject_id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 1…
#> $ cohort_start_date <date> 1981-01-31, 1968-01-26, 1980-02-16, 1977-04-03, …
#> $ cohort_end_date <date> 1981-01-31, 1968-01-26, 1980-02-16, 1977-04-03, …
First, we can estimate survival for the cohort overall like so.
<- estimateSingleEventSurvival(cdm,
MGUS_death targetCohortTable = "mgus_diagnosis",
outcomeCohortTable = "death_cohort"
)%>%
MGUS_death glimpse()
#> Rows: 2,550
#> Columns: 14
#> $ cdm_name <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock",…
#> $ result_type <chr> "Survival estimate", "Survival estimate", "Survival est…
#> $ group_name <chr> "Cohort", "Cohort", "Cohort", "Cohort", "Cohort", "Coho…
#> $ group_level <chr> "mgus_diagnosis", "mgus_diagnosis", "mgus_diagnosis", "…
#> $ strata_name <chr> "Overall", "Overall", "Overall", "Overall", "Overall", …
#> $ strata_level <chr> "Overall", "Overall", "Overall", "Overall", "Overall", …
#> $ variable <chr> "Outcome", "Outcome", "Outcome", "Outcome", "Outcome", …
#> $ variable_level <chr> "death_cohort", "death_cohort", "death_cohort", "death_…
#> $ variable_type <chr> "estimate", "estimate_95CI_lower", "estimate_95CI_upper…
#> $ estimate_type <chr> "Survival probability", "Survival probability", "Surviv…
#> $ time <dbl> 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6…
#> $ analysis_type <chr> "Single event", "Single event", "Single event", "Single…
#> $ outcome <chr> "death_cohort", "death_cohort", "death_cohort", "death_…
#> $ estimate <dbl> 1.0000, 1.0000, 1.0000, 0.9697, 0.9607, 0.9787, 0.9494,…
As we can see above our results have been outputted in long format. We can plot these results like so.
plotSurvival(MGUS_death)
Our returned results also have attributes containing information that summarises survival.
survivalSummary(MGUS_death) %>%
::pivot_wider(names_from = "variable_type", values_from = "estimate") %>%
tidyr::mutate(
dplyr"Median survival (95% CI)" = paste0(median_survival,
" (", median_survival_95CI_lower,
" to ", median_survival_95CI_higher, ")")
%>%
) ::select("Median survival (95% CI)")
dplyr#> # A tibble: 1 × 1
#> `Median survival (95% CI)`
#> <chr>
#> 1 98 (92 to 103)
To estimate survival for particular strata of interest we need these features to have been added to the target cohort table. Once we have them defined, and as seen above we already have a number of example characteristics added to our diagnosis cohort, we can add stratifications like so.
<- estimateSingleEventSurvival(cdm,
MGUS_death targetCohortTable = "mgus_diagnosis",
outcomeCohortTable = "death_cohort",
strata = list(c("age_group"),
c("sex"),
c("age_group", "sex"))
)
As we can see as well as results for each strata, we’ll always also have overall results returned.
plotSurvival(MGUS_death,
facet = "strata_name",
colour = "strata_level")
And we also now have summary statistics for each of the strata as well as overall.
survivalSummary(MGUS_death) %>%
::pivot_wider(names_from = "variable_type", values_from = "estimate") %>%
tidyr::mutate("Median survival (95% CI)" = paste0(median_survival, " (", median_survival_95CI_lower, " to ", median_survival_95CI_higher, ")")
dplyr%>%
) ::select(strata_name, strata_level,"Median survival (95% CI)")
dplyr#> # A tibble: 9 × 3
#> strata_name strata_level `Median survival (95% CI)`
#> <chr> <chr> <chr>
#> 1 Overall Overall 98 (92 to 103)
#> 2 age_group <70 180 (158 to 206)
#> 3 age_group >=70 71 (66 to 77)
#> 4 sex F 108 (100 to 121)
#> 5 sex M 88 (79 to 97)
#> 6 age_group and sex <70 and F 215 (179 to 260)
#> 7 age_group and sex <70 and M 158 (139 to 189)
#> 8 age_group and sex >=70 and F 82 (75 to 94)
#> 9 age_group and sex >=70 and M 61 (54 to 70)
If we set returnParticipants as TRUE then we will also be able to access the individuals that contributed to the analysis.
<- estimateSingleEventSurvival(cdm,
MGUS_death targetCohortTable = "mgus_diagnosis",
outcomeCohortTable = "death_cohort",
returnParticipants = TRUE
)survivalParticipants(MGUS_death)
#> # Source: table<dbplyr_028> [?? x 4]
#> # Database: DuckDB 0.8.1 [eburn@Windows 10 x64:R 4.2.1/:memory:]
#> cohort_definition_id subject_id cohort_start_date cohort_end_date
#> <int> <dbl> <date> <date>
#> 1 1 1 1981-01-01 1981-01-01
#> 2 1 2 1968-01-01 1968-01-01
#> 3 1 4 1977-01-01 1977-01-01
#> 4 1 5 1973-01-01 1973-01-01
#> 5 1 6 1990-01-01 1990-01-01
#> 6 1 10 1981-01-01 1981-01-01
#> 7 1 11 1972-01-01 1972-01-01
#> 8 1 12 1983-01-01 1983-01-01
#> 9 1 13 1968-01-01 1968-01-01
#> 10 1 17 1975-01-01 1975-01-01
#> # ℹ more rows
cdm_disconnect(cdm)