# nolint start
library(mlexperiments)
library(mllrnrs)
The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
# nolint start
library(mlexperiments)
library(mllrnrs)
See https://github.com/kapsner/mllrnrs/blob/main/R/learner_ranger.R for implementation details.
library(mlbench)
data("BostonHousing")
<- BostonHousing |>
dataset ::as.data.table() |>
data.tablena.omit()
<- colnames(dataset)[1:13]
feature_cols <- "medv"
target_col <- "chas" cat_vars
<- 123
seed if (isTRUE(as.logical(Sys.getenv("_R_CHECK_LIMIT_CORES_")))) {
# on cran
<- 2L
ncores else {
} <- ifelse(
ncores test = parallel::detectCores() > 4,
yes = 4L,
no = ifelse(
test = parallel::detectCores() < 2L,
yes = 1L,
no = parallel::detectCores()
)
)
}options("mlexperiments.bayesian.max_init" = 10L)
<- splitTools::partition(
data_split y = dataset[, get(target_col)],
p = c(train = 0.7, test = 0.3),
type = "stratified",
seed = seed
)
<- data.matrix(
train_x $train, .SD, .SDcols = feature_cols]
dataset[data_split
)<- log(dataset[data_split$train, get(target_col)])
train_y
<- data.matrix(
test_x $test, .SD, .SDcols = feature_cols]
dataset[data_split
)<- log(dataset[data_split$test, get(target_col)]) test_y
<- splitTools::create_folds(
fold_list y = train_y,
k = 3,
type = "stratified",
seed = seed
)
# required learner arguments, not optimized
<- NULL
learner_args
# set arguments for predict function and performance metric,
# required for mlexperiments::MLCrossValidation and
# mlexperiments::MLNestedCV
<- NULL
predict_args <- metric("rmsle")
performance_metric <- NULL
performance_metric_args <- FALSE
return_models
# required for grid search and initialization of bayesian optimization
<- expand.grid(
parameter_grid num.trees = seq(500, 1000, 500),
mtry = seq(2, 6, 2),
min.node.size = seq(1, 9, 4),
max.depth = seq(1, 9, 4),
sample.fraction = seq(0.5, 0.8, 0.3)
)# reduce to a maximum of 10 rows
if (nrow(parameter_grid) > 10) {
set.seed(123)
<- sample(seq_len(nrow(parameter_grid)), 10, FALSE)
sample_rows <- kdry::mlh_subset(parameter_grid, sample_rows)
parameter_grid
}
# required for bayesian optimization
<- list(
parameter_bounds num.trees = c(100L, 1000L),
mtry = c(2L, 9L),
min.node.size = c(1L, 20L),
max.depth = c(1L, 40L),
sample.fraction = c(0.3, 1.)
)<- list(
optim_args iters.n = ncores,
kappa = 3.5,
acq = "ucb"
)
<- mlexperiments::MLTuneParameters$new(
tuner learner = mllrnrs::LearnerRanger$new(),
strategy = "grid",
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
tuner$learner_args <- learner_args
tuner$split_type <- "stratified"
tuner
$set_data(
tunerx = train_x,
y = train_y,
cat_vars = cat_vars
)
<- tuner$execute(k = 3)
tuner_results_grid #>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=====================================>----------------------------------------------------------] 4/10 ( 40%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================================================================] 10/10 (100%)
#> Regression: using 'mean squared error' as optimization metric.
head(tuner_results_grid)
#> setting_id metric_optim_mean num.trees mtry min.node.size max.depth sample.fraction
#> 1: 1 0.04406585 500 2 9 5 0.5
#> 2: 2 0.03987001 500 2 5 5 0.8
#> 3: 3 0.03405954 500 4 9 9 0.5
#> 4: 4 0.09531892 1000 2 9 1 0.5
#> 5: 5 0.09497929 500 2 9 1 0.8
#> 6: 6 0.03046036 1000 6 1 9 0.5
<- mlexperiments::MLTuneParameters$new(
tuner learner = mllrnrs::LearnerRanger$new(),
strategy = "bayesian",
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
tuner$parameter_bounds <- parameter_bounds
tuner
$learner_args <- learner_args
tuner$optim_args <- optim_args
tuner
$split_type <- "stratified"
tuner
$set_data(
tunerx = train_x,
y = train_y,
cat_vars = cat_vars
)
<- tuner$execute(k = 3)
tuner_results_bayesian #>
#> Registering parallel backend using 4 cores.
head(tuner_results_bayesian)
#> Epoch setting_id num.trees mtry min.node.size max.depth sample.fraction gpUtility acqOptimum inBounds Elapsed Score
#> 1: 0 1 500 2 9 5 0.5 NA FALSE TRUE 1.013 -0.04356188
#> 2: 0 2 500 2 5 5 0.8 NA FALSE TRUE 1.035 -0.03848441
#> 3: 0 3 500 4 9 9 0.5 NA FALSE TRUE 1.064 -0.03375279
#> 4: 0 4 1000 2 9 1 0.5 NA FALSE TRUE 0.994 -0.09582667
#> 5: 0 5 500 2 9 1 0.8 NA FALSE TRUE 0.070 -0.09470805
#> 6: 0 6 1000 6 1 9 0.5 NA FALSE TRUE 0.690 -0.03014795
#> metric_optim_mean errorMessage
#> 1: 0.04356188 NA
#> 2: 0.03848441 NA
#> 3: 0.03375279 NA
#> 4: 0.09582667 NA
#> 5: 0.09470805 NA
#> 6: 0.03014795 NA
<- mlexperiments::MLCrossValidation$new(
validator learner = mllrnrs::LearnerRanger$new(),
fold_list = fold_list,
ncores = ncores,
seed = seed
)
$learner_args <- tuner$results$best.setting[-1]
validator
$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator
$set_data(
validatorx = train_x,
y = train_y,
cat_vars = cat_vars
)
<- validator$execute()
validator_results #>
#> CV fold: Fold1
#>
#> CV fold: Fold2
#>
#> CV fold: Fold3
head(validator_results)
#> fold performance num.trees mtry min.node.size max.depth sample.fraction
#> 1: Fold1 0.04028795 100 9 1 9 1
#> 2: Fold2 0.05592193 100 9 1 9 1
#> 3: Fold3 0.04012856 100 9 1 9 1
<- mlexperiments::MLNestedCV$new(
validator learner = mllrnrs::LearnerRanger$new(),
strategy = "grid",
fold_list = fold_list,
k_tuning = 3L,
ncores = ncores,
seed = seed
)
$parameter_grid <- parameter_grid
validator$learner_args <- learner_args
validator$split_type <- "stratified"
validator
$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- return_models
validator
$set_data(
validatorx = train_x,
y = train_y,
cat_vars = cat_vars
)
<- validator$execute()
validator_results #>
#> CV fold: Fold1
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================================================================] 10/10 (100%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> CV fold: Fold2
#> CV progress [====================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================================================================] 10/10 (100%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> CV fold: Fold3
#> CV progress [========================================================================================================] 3/3 (100%)
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=====================================>----------------------------------------------------------] 4/10 ( 40%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================>------------------------------------------------] 5/10 ( 50%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=========================================================>--------------------------------------] 6/10 ( 60%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [==================================================================>-----------------------------] 7/10 ( 70%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [============================================================================>-------------------] 8/10 ( 80%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [=====================================================================================>----------] 9/10 ( 90%)
#> Regression: using 'mean squared error' as optimization metric.
#>
#> Parameter settings [===============================================================================================] 10/10 (100%)
#> Regression: using 'mean squared error' as optimization metric.
head(validator_results)
#> fold performance num.trees mtry min.node.size max.depth sample.fraction
#> 1: Fold1 0.0444887 1000 6 1 9 0.5
#> 2: Fold2 0.0481817 500 4 9 9 0.8
#> 3: Fold3 0.0442502 1000 6 1 9 0.5
<- mlexperiments::MLNestedCV$new(
validator learner = mllrnrs::LearnerRanger$new(),
strategy = "bayesian",
fold_list = fold_list,
k_tuning = 3L,
ncores = ncores,
seed = 312
)
$parameter_grid <- parameter_grid
validator$learner_args <- learner_args
validator$split_type <- "stratified"
validator
$parameter_bounds <- parameter_bounds
validator$optim_args <- optim_args
validator
$predict_args <- predict_args
validator$performance_metric <- performance_metric
validator$performance_metric_args <- performance_metric_args
validator$return_models <- TRUE
validator
$set_data(
validatorx = train_x,
y = train_y,
cat_vars = cat_vars
)
<- validator$execute()
validator_results #>
#> CV fold: Fold1
#>
#> Registering parallel backend using 4 cores.
#>
#> CV fold: Fold2
#> CV progress [====================================================================>-----------------------------------] 2/3 ( 67%)
#>
#> Registering parallel backend using 4 cores.
#>
#> CV fold: Fold3
#> CV progress [========================================================================================================] 3/3 (100%)
#>
#> Registering parallel backend using 4 cores.
head(validator_results)
#> fold performance num.trees mtry min.node.size max.depth sample.fraction
#> 1: Fold1 0.04142935 640 7 1 9 0.7460504
#> 2: Fold2 0.05358418 100 9 1 9 1.0000000
#> 3: Fold3 0.04264248 367 4 5 9 0.8388297
<- mlexperiments::predictions(
preds_ranger object = validator,
newdata = test_x
)
<- mlexperiments::performance(
perf_ranger object = validator,
prediction_results = preds_ranger,
y_ground_truth = test_y,
type = "regression"
)
perf_ranger#> model performance mse msle mae mape rmse rmsle rsq sse
#> 1: Fold1 0.04145400 0.02627203 0.001718434 0.1125978 0.03799847 0.1620865 0.04145400 0.8291229 4.072165
#> 2: Fold2 0.04849306 0.03319570 0.002351577 0.1270962 0.04379366 0.1821969 0.04849306 0.7840903 5.145334
#> 3: Fold3 0.03827309 0.02222906 0.001464829 0.1067541 0.03631993 0.1490941 0.03827309 0.8554189 3.445504
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.