generateDrugUtilisationCohortSet function from DrugUtilisation is used here. Example with duckdb shown in this vignette, but the actual computing time comparison from postgres database is provided in the end.
First connect to database
library(DrugUtilisation)
library(CodelistGenerator)
library(Capr)
library(CDMConnector)
library(dplyr)
library(tictoc)
<- Sys.getenv("...")
db_name <- Sys.getenv("...")
host <- Sys.getenv("...")
user <- Sys.getenv("...")
password <- Sys.getenv("...")
port
<- dbConnect(RPostgres::Postgres(),
db dbname = db_name,
port = port,
host = host,
user = user,
password = password
)
# The name of the schema that contains the OMOP CDM with patient-level data
<- "..."
cdm_database_schema
# The name of the schema where results tables will be created
<- "..."
results_database_schema
<- "..."
stem_table
# cretae cdm object
<- CDMConnector::cdm_from_con(
cdm con = db,
cdm_schema = cdm_database_schema,
write_schema = results_database_schema
)
A function to benchmark this is provided.
<- function(
benchmarkGenerateDrugUtilisationCohortSet
cdm,name = "test",
conceptSet,durationRange = c(1, Inf),
imputeDuration = "none",
gapEra = 0,
priorUseWashout = 0,
priorObservation = 0,
cohortDateRange = as.Date(c(NA, NA)),
limit = "all",
numberOfCohort = c(1:20)) {
<- list()
time_record
for (j in numberOfCohort)
{
<- conceptSet[c(1:j)]
conceptSetList
<- paste0("atc_dus_", j)
name
tic()
<- generateDrugUtilisationCohortSet(
cdm cdm = cdm,
name = name,
conceptSet = conceptSet,
durationRange = durationRange,
imputeDuration = imputeDuration,
gapEra = gapEra,
priorUseWashout = priorUseWashout,
priorObservation = priorObservation,
cohortDateRange = cohortDateRange,
limit = limit
)
cohort_count(cdm[[name]])
<- toc(quiet = TRUE)
t
paste0("DUs number of cohorts", j)]] <-c("timeTaken" = as.numeric(t$toc - t$tic))
time_record[[
}
return(time_record)
}
Now we create ingredient code using CodelistGenerator to get the code for cohort generation
<- getATCCodes(cdm, level = "ATC 1st")
atcCodes
<- getDrugIngredientCodes(cdm)
ingredientCodes
<- benchmarkGenerateDrugUtilisationCohortSet(cdm,
time_record_cprdgold_dus numberOfCohort = c(1:12,
conceptSet = ingredientCodes
)
Now create benachmarking function using Capr
<- function(cdm,
benchmarkCapr conceptSetList = atcCodes,
numberOfCohort = c(1:20),
observationWindow = continuousObservation(priorDays = 365),
qualifiedLimit = "All",
studyStartDate = as.Date("2015-01-01"),
studyEndDate = as.Date("2022-12-31"),
persistenceWindow = 0) {
<- list()
time_record
for (j in numberOfCohort)
{<- conceptSetList[c(1:j)]
conceptSetList
<- lapply(seq_along(conceptSetList), function(i) {
atc_test cs(conceptSetList[[i]], name = names(conceptSetList)[[i]])
})
<- lapply(atc_test, function(x) {
ch cohort(
entry = entry(
drugExposure(x),
observationWindow = observationWindow,
qualifiedLimit = qualifiedLimit
),exit = exit(
endStrategy = drugExit(
conceptSet = x,
persistenceWindow = persistenceWindow
)
),era = era(studyStartDate = studyStartDate, studyEndDate = studyEndDate)
)
})
names(ch) <- paste0("cohort_", seq_along(ch))
tic()
<- generateCohortSet(cdm = cdm,
cdm cohortSet = ch,
name = paste0("capr_cohorts_", j),
overwrite = TRUE)
<- toc(quiet = TRUE)
t
paste0("CapR number of cohorts", j)]] <-c("timeTaken" = as.numeric(t$toc - t$tic))
time_record[[
}
return(time_record)
}
<- benchmarkCapr(
time_record_cprdgold_capr cdm = cdm,
numberOfCohort = c(1:20),
conceptSetList = ingredientCodes)