The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
Run multiple stations and models for 9 euro ticket
sample_name <- "NeunEuroTicket"
target <- "NO2"
stations <- list(Luenen = "DENW006", AachenBurtscheid = "DENW094")
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time
buffer <- 0 # number of data points to be ignored before effect
trend <- "linear"
# hyperparameters can be set in params/params.yaml
model_types <- c("lightgbm", "rf", "dynamic_regression", "fnn")
window_size <- 14 # days of data to calculate the mean in prediction results
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)
params <- load_params()
params$target <- target
params$meteo_variables <- meteo_variables
for (station_name in names(stations)) {
station <- stations[[station_name]]
predictions_all <- data.table::data.table()
metrics_all <- data.table::data.table()
env_data <- clean_data(data, station = station)
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
dt_prepared,
application_start = application_start,
application_end = application_end
)
for (model_type in model_types) {
message(paste("start training:", station_name, station, model_type))
res <- run_counterfactual(split_data,
params,
detrending_function = trend,
model_type = model_type,
alpha = 0.9,
log_transform = FALSE
)
predictions <- data.table::copy(res$prediction)
# plot
bau_plot <- plot_counterfactual(predictions, params,
window_size = window_size,
date_effect_start,
buffer = buffer
)
# evaluation
metrics <- round(calc_performance_metrics(predictions,
date_effect_start,
buffer = buffer
), 2)
effect <- estimate_effect_size(predictions,
date_effect_start,
buffer = buffer,
verbose = FALSE
)
metrics["effect_size"] <- effect["absolute_effect"]
metrics["relative_effect"] <- effect["relative_effect"]
# add information for export
metrics["model"] <- model_type
metrics["trend"] <- trend
metrics["station_name"] <- station_name
metrics["station"] <- station
metrics["buffer_start"] <- format(
date_effect_start - as.difftime(buffer, units = "hours"),
"%Y-%m-%d"
)
metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d")
metrics_dt <- data.table::as.data.table(t(metrics))
metrics_all <- rbind(metrics_all, metrics_dt)
predictions[, station := station]
predictions[, model := model_type]
predictions[, trend := trend]
predictions_all <- rbind(predictions_all, predictions)
}
# save predictions (hourly data) and metrics
predictions_save <- dplyr::select(
predictions_all,
c(
date,
value,
prediction,
prediction_lower,
prediction_upper,
station,
model,
trend
)
)
predictions_save$date <- format(predictions_save$date, "%Y-%m-%d %H:%M")
}
#> start training: Luenen DENW006 lightgbm
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000319 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1549
#> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8
#> [LightGBM] [Info] Start training from score 0.000000
#> start training: Luenen DENW006 rf
#> start training: Luenen DENW006 dynamic_regression
#> Using data for dynamic regression training from 2021-01-22 01:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> start training: Luenen DENW006 fnn
#> start training: AachenBurtscheid DENW094 lightgbm
#> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.031247 seconds.
#> You can set `force_col_wise=true` to remove the overhead.
#> [LightGBM] [Info] Total Bins 1550
#> [LightGBM] [Info] Number of data points in the train set: 60039, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> start training: AachenBurtscheid DENW094 rf
#> start training: AachenBurtscheid DENW094 dynamic_regression
#> Using data for dynamic regression training from 2021-01-10 04:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.
#> start training: AachenBurtscheid DENW094 fnn
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.