The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
Installation for user as described in README
library(ubair)
library(ggplot2)
# set data_dir where the data is stored
data_dir <- "../../Daten/user_sample_data/"
# This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)
target <- "NO2"
stations <- list(
Lünen = "DENW006", Bottrop = "DENW021",
Köln = "DENW212"
)
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")
# dates for 9 Euro ticket effect
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time
buffer <- 0 # number of data points to be ignored before effect
trend <- "linear"
model_type <- "lightgbm"
window_size <- 14 # days of data to calculate the mean in prediction results
params <- load_params()
# adapt params programatically
params$target <- target
params$meteo_variables <- meteo_variables
results_all <- data.table::data.table()
for (station_name in names(stations)) {
env_data <- clean_data(data, station = stations[station_name])
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
dt_prepared,
application_start = application_start,
application_end = application_end
)
res <- run_counterfactual(split_data,
params,
detrending_function = trend,
model_type = model_type,
alpha = 0.9,
log_transform = TRUE
)
predictions <- res$prediction
predictions[, station_name := station_name]
predictions[, station := stations[station_name]]
predictions[, model := model_type]
predictions[, trend := trend]
results_all <- rbind(results_all, predictions)
}
#> Warning in log(dt_train_new$value): NaNs wurden erzeugt
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000316 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1549
#> [LightGBM] [Info] Number of data points in the train set: 60472, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> Warning in log(dt_train_new$value): NaNs wurden erzeugt
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000346 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1550
#> [LightGBM] [Info] Number of data points in the train set: 60133, number of used features: 8
#> [LightGBM] [Info] Start training from score -0.000000
#> [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000315 seconds.
#> You can set `force_row_wise=true` to remove the overhead.
#> And if memory is not enough, you can set `force_col_wise=true`.
#> [LightGBM] [Info] Total Bins 1552
#> [LightGBM] [Info] Number of data points in the train set: 60494, number of used features: 8
#> [LightGBM] [Info] Start training from score 0.000000
results_all[, bias := prediction - value]
results_all[, station_label := paste0(station_name, " (", station, ")")]
results_all[, station := as.character(unlist(station))]
results_all[, smoothed_bias := data.table::frollmean(bias,
n = 24 * window_size,
align = "center"
),
by = station
]
bias_plot <- ggplot2::ggplot(
results_all,
ggplot2::aes(
x = date,
y = smoothed_bias,
color = station_label,
group = station_label
)
) +
geom_line() +
labs(
title = paste("NO2 Bias Over Time by Station (", window_size, "-day mean)"),
x = "Date",
y = "NO2 Bias \n(prediction - observation) [µg/m³]",
color = "Station"
) +
ggplot2::geom_vline(
xintercept = date_effect_start, linetype = 4,
colour = "black"
) +
ggplot2::theme_bw() +
ggplot2::scale_x_datetime(
date_minor_breaks = "1 month",
date_breaks = "2 month"
) +
ggplot2::theme(legend.position = "bottom")
bias_plot
#> Warning: Removed 1005 rows containing missing values or values outside the scale range
#> (`geom_line()`).
station_name_plot <- "Köln"
counterfactual_plot <- plot_counterfactual(
dplyr::filter(results_all, station_name == station_name_plot), params,
window_size = window_size,
date_effect_start,
buffer = buffer
)
counterfactual_plot
```
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.