The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
This vignette demonstrates every major feature of OdysseusCharacterizationModule using the Eunomia synthetic OMOP CDM database.
Eunomia ships four built-in cohorts — Celecoxib (id = 1), Diclofenac (id = 2), GiBleed (id = 3), and NSAIDs (id = 4).
connectionDetails <- getEunomiaConnectionDetails()
Eunomia::createCohorts(connectionDetails)
connection <- connect(connectionDetails)Verify the cohort table:
cohortCounts <- querySql(connection, "
SELECT cohort_definition_id, COUNT(*) AS cnt
FROM main.cohort
GROUP BY cohort_definition_id
ORDER BY cohort_definition_id
")
cohortCountsWe will characterise the Celecoxib new-user cohort (id = 1) throughout this vignette.
The simplest case: one domain, start-date logic, aggregated.
plan_cond <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_cond <- singleNodeSetting(
plan = plan_cond,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Specs generated:", length(specs_cond), "\n")
results_cond <- executeSpecs(
connection, specs_cond,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_cond[["1001"]], 10)plan_drug <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = TRUE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_drug <- singleNodeSetting(
plan = plan_drug,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_drug <- executeSpecs(
connection, specs_drug,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_drug[["1001"]], 10)Overlap logic checks whether the era period overlaps the analysis window, rather than simply checking the start date.
plan_era <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = TRUE, type = "overlap"),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_era <- singleNodeSetting(
plan = plan_era,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_era <- executeSpecs(
connection, specs_era,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_era[["1001"]], 10)plan_dera <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = TRUE, type = "overlap"),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_dera <- singleNodeSetting(
plan = plan_dera,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_dera <- executeSpecs(
connection, specs_dera,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_dera[["1001"]], 10)plan_proc <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = TRUE, type = "start"),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_proc <- singleNodeSetting(
plan = plan_proc,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_proc <- executeSpecs(
connection, specs_proc,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_proc[["1001"]], 10)plan_meas <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = TRUE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_meas <- singleNodeSetting(
plan = plan_meas,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_meas <- executeSpecs(
connection, specs_meas,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_meas[["1001"]], 10)plan_obs <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = TRUE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_obs <- singleNodeSetting(
plan = plan_obs,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_obs <- executeSpecs(
connection, specs_obs,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_obs[["1001"]])plan_visit <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = TRUE, type = "overlap"),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_visit <- singleNodeSetting(
plan = plan_visit,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_visit <- executeSpecs(
connection, specs_visit,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
head(results_visit[["1001"]])Setting aggregated = FALSE returns one row per
patient-concept pair instead of summing across patients.
specs_raw <- singleNodeSetting(
plan = plan_cond,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = FALSE
)
results_raw <- executeSpecs(
connection, specs_raw,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
cat("Patient-level rows (window 1):", nrow(results_raw[["1001"]]), "\n")
head(results_raw[["1001"]], 10)Enable several domains in a single plan for an integrated analysis.
plan_multi <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = TRUE, type = "overlap"),
drug_exposure = list(include = TRUE),
drug_era = list(include = TRUE, type = "overlap"),
procedure_occurrence = list(include = TRUE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = TRUE, type = "overlap"),
measurement = list(include = TRUE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_multi <- singleNodeSetting(
plan = plan_multi,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Total specs:", length(specs_multi), "\n")
results_multi <- executeSpecs(
connection, specs_multi,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
# Summary across all specs
summary_df <- do.call(rbind, lapply(names(results_multi), function(nm) {
df <- results_multi[[nm]]
data.frame(analysis_id = nm, rows = nrow(df), stringsAsFactors = FALSE)
}))
summary_dfUse pre-defined cohorts as binary covariates. Here we test whether Celecoxib patients overlap with the GiBleed cohort.
plan_cohort <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(
include = TRUE,
type = "start",
cohortIds = c(3L, 4L),
cohortNames = c("GiBleed", "NSAIDs"),
cohortTable = "cohort",
covariateSchema = "main"
),
useConceptSetFeatures = list(include = FALSE)
)
specs_cohort <- singleNodeSetting(
plan = plan_cohort,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Cohort feature specs:", length(specs_cohort), "\n")
results_cohort <- executeSpecs(
connection, specs_cohort,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
# Show results for every cohort feature spec
for (nm in names(results_cohort)) {
cat("\n--- Analysis", nm, "---\n")
print(results_cohort[[nm]])
}plan_coh_ov <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = FALSE),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(
include = TRUE,
type = "overlap",
cohortIds = c(3L),
cohortNames = c("GiBleed"),
cohortTable = "cohort",
covariateSchema = "main"
),
useConceptSetFeatures = list(include = FALSE)
)
specs_coh_ov <- singleNodeSetting(
plan = plan_coh_ov,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Overlap flag:", specs_coh_ov[[1]]$overlap, "\n")
results_coh_ov <- executeSpecs(
connection, specs_coh_ov,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
for (nm in names(results_coh_ov)) {
cat("\n--- Analysis", nm, "---\n")
print(results_coh_ov[[nm]])
}You can inspect the generated SQL without a database connection using
renderSpecSql() and renderAllSpecSql().
sql_default <- renderSpecSql(specs_cond[[1]])
cat("--- SQL Server (default) ---\n")
cat(substr(sql_default, 1, 500), "\n...\n")Translate to other dialects:
for (dialect in c("postgresql", "redshift", "oracle", "spark")) {
cat("\n--- Dialect:", dialect, "---\n")
sql_translated <- renderSpecSql(specs_cond[[1]], targetDialect = dialect)
cat(substr(sql_translated, 1, 400), "\n...\n")
}Batch rendering:
The number of specs scales linearly with the number of windows.
windows_8 <- defineAnalysisWindows(
startDays = c(-365, -180, -90, -30, 1, 31, 91, 181),
endDays = c( -1, -1, -1, -1, 30, 90, 180, 365)
)
plan_8w <- planAnalysis(
analysisWindows = windows_8,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = FALSE),
drug_exposure = list(include = FALSE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = FALSE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = FALSE)
),
useCohortFeatures = list(include = FALSE),
useConceptSetFeatures = list(include = FALSE)
)
specs_8w <- singleNodeSetting(
plan = plan_8w,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Specs with 8 windows:", length(specs_8w), "\n")
results_8w <- executeSpecs(
connection, specs_8w,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
data.frame(
analysis_id = names(results_8w),
rows = vapply(results_8w, nrow, integer(1))
)plan_combined <- planAnalysis(
analysisWindows = windows,
useBaseFeatures = list(
condition_occurrence = list(include = TRUE, type = "start"),
condition_era = list(include = FALSE),
drug_exposure = list(include = TRUE),
drug_era = list(include = FALSE),
procedure_occurrence = list(include = TRUE),
observation = list(include = FALSE),
device_exposure = list(include = FALSE),
visit_occurrence = list(include = FALSE),
measurement = list(include = TRUE)
),
useCohortFeatures = list(
include = TRUE,
type = "start",
cohortIds = c(3L),
cohortNames = c("GiBleed"),
cohortTable = "cohort",
covariateSchema = "main"
),
useConceptSetFeatures = list(include = FALSE)
)
specs_combined <- singleNodeSetting(
plan = plan_combined,
cohortId = COHORT_ID,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
cat("Total specs (4 base domains x 2 windows + 1 cohort x 2 windows):",
length(specs_combined), "\n")
results_combined <- executeSpecs(
connection, specs_combined,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
summary_combined <- do.call(rbind, lapply(names(results_combined), function(nm) {
df <- results_combined[[nm]]
data.frame(
analysis_id = nm,
source = if (nrow(df) > 0) "data" else "empty",
rows = nrow(df),
stringsAsFactors = FALSE
)
}))
summary_combinedAll examples so far used Celecoxib (id = 1). Switching to a different
cohort is as simple as changing cohortId.
specs_diclo <- singleNodeSetting(
plan = plan_cond,
cohortId = 2L,
cohortDatabaseSchema = CDM_SCHEMA,
cohortTable = COHORT_TBL,
cdmDatabaseSchema = CDM_SCHEMA,
vocabularyDatabaseSchema = CDM_SCHEMA,
aggregated = TRUE
)
results_diclo <- executeSpecs(
connection, specs_diclo,
tempEmulationSchema = TEMP_SCHEMA,
cleanTempTables = TRUE
)
cat("Diclofenac condition covariates (pre-index):\n")
head(results_diclo[["1001"]], 10)stopOnError = FALSEWhen executing many specs, you can continue past failures.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.