The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

OdysseusCharacterizationModule — Eunomia Walkthrough

This vignette demonstrates every major feature of OdysseusCharacterizationModule using the Eunomia synthetic OMOP CDM database.

Prerequisites

for(package in c("DatabaseConnector", "Eunomia")) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
}

library(OdysseusCharacterizationModule)
library(DatabaseConnector)
library(Eunomia)

1. Connect to Eunomia and create cohorts

Eunomia ships four built-in cohorts — Celecoxib (id = 1), Diclofenac (id = 2), GiBleed (id = 3), and NSAIDs (id = 4).


connectionDetails <- getEunomiaConnectionDetails()
Eunomia::createCohorts(connectionDetails)
connection <- connect(connectionDetails)

Verify the cohort table:

cohortCounts <- querySql(connection, "
  SELECT cohort_definition_id, COUNT(*) AS cnt
  FROM main.cohort
  GROUP BY cohort_definition_id
  ORDER BY cohort_definition_id
")
cohortCounts

We will characterise the Celecoxib new-user cohort (id = 1) throughout this vignette.

COHORT_ID   <- 1L
CDM_SCHEMA  <- "main"
COHORT_TBL  <- "cohort"
TEMP_SCHEMA <- "main"   # SQLite temp-table emulation

2. Define analysis windows

windows <- defineAnalysisWindows(
  startDays = c(-365, 1),
  endDays   = c(  -1, 365)
)
windows

3. Base feature — Condition Occurrence (start type)

The simplest case: one domain, start-date logic, aggregated.

plan_cond <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = TRUE, type = "start"),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_cond <- singleNodeSetting(
  plan                     = plan_cond,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

cat("Specs generated:", length(specs_cond), "\n")

results_cond <- executeSpecs(
  connection, specs_cond,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_cond[["1001"]], 10)

4. Base feature — Drug Exposure

plan_drug <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = TRUE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_drug <- singleNodeSetting(
  plan                     = plan_drug,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_drug <- executeSpecs(
  connection, specs_drug,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_drug[["1001"]], 10)

5. Base feature — Condition Era (overlap type)

Overlap logic checks whether the era period overlaps the analysis window, rather than simply checking the start date.

plan_era <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = TRUE, type = "overlap"),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_era <- singleNodeSetting(
  plan                     = plan_era,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_era <- executeSpecs(
  connection, specs_era,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_era[["1001"]], 10)

6. Base feature — Drug Era (overlap type)

plan_dera <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = TRUE, type = "overlap"),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_dera <- singleNodeSetting(
  plan                     = plan_dera,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_dera <- executeSpecs(
  connection, specs_dera,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_dera[["1001"]], 10)

7. Base feature — Procedure Occurrence

plan_proc <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = TRUE, type = "start"),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_proc <- singleNodeSetting(
  plan                     = plan_proc,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_proc <- executeSpecs(
  connection, specs_proc,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_proc[["1001"]], 10)

8. Base feature — Measurement

plan_meas <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = TRUE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_meas <- singleNodeSetting(
  plan                     = plan_meas,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_meas <- executeSpecs(
  connection, specs_meas,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_meas[["1001"]], 10)

9. Base feature — Observation

plan_obs <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = TRUE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_obs <- singleNodeSetting(
  plan                     = plan_obs,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_obs <- executeSpecs(
  connection, specs_obs,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_obs[["1001"]])

10. Base feature — Visit Occurrence (overlap type)

plan_visit <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = TRUE, type = "overlap"),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_visit <- singleNodeSetting(
  plan                     = plan_visit,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_visit <- executeSpecs(
  connection, specs_visit,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

head(results_visit[["1001"]])

11. Non-aggregated (patient-level) output

Setting aggregated = FALSE returns one row per patient-concept pair instead of summing across patients.

specs_raw <- singleNodeSetting(
  plan                     = plan_cond,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = FALSE
)

results_raw <- executeSpecs(
  connection, specs_raw,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

cat("Patient-level rows (window 1):", nrow(results_raw[["1001"]]), "\n")
head(results_raw[["1001"]], 10)

12. Multiple domains at once

Enable several domains in a single plan for an integrated analysis.

plan_multi <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = TRUE, type = "start"),
    condition_era        = list(include = TRUE, type = "overlap"),
    drug_exposure        = list(include = TRUE),
    drug_era             = list(include = TRUE, type = "overlap"),
    procedure_occurrence = list(include = TRUE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = TRUE, type = "overlap"),
    measurement          = list(include = TRUE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_multi <- singleNodeSetting(
  plan                     = plan_multi,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

cat("Total specs:", length(specs_multi), "\n")

results_multi <- executeSpecs(
  connection, specs_multi,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

# Summary across all specs
summary_df <- do.call(rbind, lapply(names(results_multi), function(nm) {
  df <- results_multi[[nm]]
  data.frame(analysis_id = nm, rows = nrow(df), stringsAsFactors = FALSE)
}))
summary_df

13. Cohort features — Using GiBleed cohort as a covariate

Use pre-defined cohorts as binary covariates. Here we test whether Celecoxib patients overlap with the GiBleed cohort.

plan_cohort <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures = list(
    include         = TRUE,
    type            = "start",
    cohortIds       = c(3L, 4L),
    cohortNames     = c("GiBleed", "NSAIDs"),
    cohortTable     = "cohort",
    covariateSchema = "main"
  ),
  useConceptSetFeatures = list(include = FALSE)
)

specs_cohort <- singleNodeSetting(
  plan                     = plan_cohort,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

cat("Cohort feature specs:", length(specs_cohort), "\n")

results_cohort <- executeSpecs(
  connection, specs_cohort,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

# Show results for every cohort feature spec
for (nm in names(results_cohort)) {
  cat("\n--- Analysis", nm, "---\n")
  print(results_cohort[[nm]])
}

14. Cohort features — Overlap type

plan_coh_ov <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = FALSE),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures = list(
    include         = TRUE,
    type            = "overlap",
    cohortIds       = c(3L),
    cohortNames     = c("GiBleed"),
    cohortTable     = "cohort",
    covariateSchema = "main"
  ),
  useConceptSetFeatures = list(include = FALSE)
)

specs_coh_ov <- singleNodeSetting(
  plan                     = plan_coh_ov,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

cat("Overlap flag:", specs_coh_ov[[1]]$overlap, "\n")

results_coh_ov <- executeSpecs(
  connection, specs_coh_ov,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

for (nm in names(results_coh_ov)) {
  cat("\n--- Analysis", nm, "---\n")
  print(results_coh_ov[[nm]])
}

15. SQL rendering without execution

You can inspect the generated SQL without a database connection using renderSpecSql() and renderAllSpecSql().

sql_default <- renderSpecSql(specs_cond[[1]])
cat("--- SQL Server (default) ---\n")
cat(substr(sql_default, 1, 500), "\n...\n")

Translate to other dialects:

for (dialect in c("postgresql", "redshift", "oracle", "spark")) {
  cat("\n--- Dialect:", dialect, "---\n")
  sql_translated <- renderSpecSql(specs_cond[[1]], targetDialect = dialect)
  cat(substr(sql_translated, 1, 400), "\n...\n")
}

Batch rendering:

all_sql <- renderAllSpecSql(specs_cond)
cat("Number of rendered SQL statements:", length(all_sql), "\n")
cat("Analysis IDs:", paste(names(all_sql), collapse = ", "), "\n")

16. Multiple time windows

The number of specs scales linearly with the number of windows.

windows_8 <- defineAnalysisWindows(
  startDays = c(-365, -180, -90, -30, 1, 31, 91, 181),
  endDays   = c(  -1,  -1,  -1,  -1, 30, 90, 180, 365)
)

plan_8w <- planAnalysis(
  analysisWindows = windows_8,
  useBaseFeatures = list(
    condition_occurrence = list(include = TRUE, type = "start"),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = FALSE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = FALSE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = FALSE)
  ),
  useCohortFeatures     = list(include = FALSE),
  useConceptSetFeatures = list(include = FALSE)
)

specs_8w <- singleNodeSetting(
  plan                     = plan_8w,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

cat("Specs with 8 windows:", length(specs_8w), "\n")

results_8w <- executeSpecs(
  connection, specs_8w,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

data.frame(
  analysis_id = names(results_8w),
  rows        = vapply(results_8w, nrow, integer(1))
)

17. Combined — Base + Cohort features in one run

plan_combined <- planAnalysis(
  analysisWindows = windows,
  useBaseFeatures = list(
    condition_occurrence = list(include = TRUE, type = "start"),
    condition_era        = list(include = FALSE),
    drug_exposure        = list(include = TRUE),
    drug_era             = list(include = FALSE),
    procedure_occurrence = list(include = TRUE),
    observation          = list(include = FALSE),
    device_exposure      = list(include = FALSE),
    visit_occurrence     = list(include = FALSE),
    measurement          = list(include = TRUE)
  ),
  useCohortFeatures = list(
    include         = TRUE,
    type            = "start",
    cohortIds       = c(3L),
    cohortNames     = c("GiBleed"),
    cohortTable     = "cohort",
    covariateSchema = "main"
  ),
  useConceptSetFeatures = list(include = FALSE)
)

specs_combined <- singleNodeSetting(
  plan                     = plan_combined,
  cohortId                 = COHORT_ID,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

cat("Total specs (4 base domains x 2 windows + 1 cohort x 2 windows):",
    length(specs_combined), "\n")

results_combined <- executeSpecs(
  connection, specs_combined,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

summary_combined <- do.call(rbind, lapply(names(results_combined), function(nm) {
  df <- results_combined[[nm]]
  data.frame(
    analysis_id = nm,
    source      = if (nrow(df) > 0) "data" else "empty",
    rows        = nrow(df),
    stringsAsFactors = FALSE
  )
}))
summary_combined

18. Characterising a different cohort — Diclofenac

All examples so far used Celecoxib (id = 1). Switching to a different cohort is as simple as changing cohortId.

specs_diclo <- singleNodeSetting(
  plan                     = plan_cond,
  cohortId                 = 2L,
  cohortDatabaseSchema     = CDM_SCHEMA,
  cohortTable              = COHORT_TBL,
  cdmDatabaseSchema        = CDM_SCHEMA,
  vocabularyDatabaseSchema = CDM_SCHEMA,
  aggregated               = TRUE
)

results_diclo <- executeSpecs(
  connection, specs_diclo,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE
)

cat("Diclofenac condition covariates (pre-index):\n")
head(results_diclo[["1001"]], 10)

19. Error handling with stopOnError = FALSE

When executing many specs, you can continue past failures.

results_safe <- executeSpecs(
  connection, specs_multi,
  tempEmulationSchema = TEMP_SCHEMA,
  cleanTempTables     = TRUE,
  stopOnError         = FALSE
)

failed <- vapply(results_safe, function(df) !is.null(attr(df, "error")), logical(1))
cat("Failed specs:", sum(failed), "/", length(results_safe), "\n")

20. Cleanup

disconnect(connection)

Session info

sessionInfo()

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.