tidymodels / parsnip Integration

The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

ggmlR registers a "ggml" engine for parsnip::mlp(), giving you GPU-accelerated neural networks inside the tidymodels ecosystem — resampling, tuning, workflows, and recipes all work out of the box.

1. Classification

spec <- mlp(
  hidden_units = c(64L, 32L),
  epochs       = 20L,
  dropout      = 0.1
) |>
  set_engine("ggml") |>
  set_mode("classification")

fit_obj <- fit(spec, Species ~ ., data = iris)
#> Note: dropping last 22 sample(s) (150 -> 128) because batch_size=32 must divide evenly. Training metrics are computed on 128 samples only.

# Class predictions
preds <- predict(fit_obj, new_data = iris)
head(preds)
#> # A tibble: 6 × 1
#>   .pred_class
#>   <fct>      
#> 1 setosa     
#> 2 setosa     
#> 3 setosa     
#> 4 setosa     
#> 5 setosa     
#> 6 setosa

# Probability predictions
probs <- predict(fit_obj, new_data = iris, type = "prob")
head(probs)
#> # A tibble: 6 × 3
#>   .pred_setosa .pred_versicolor .pred_virginica
#>          <dbl>            <dbl>           <dbl>
#> 1        0.960           0.0272          0.0129
#> 2        0.938           0.0441          0.0179
#> 3        0.946           0.0366          0.0174
#> 4        0.919           0.0588          0.0217
#> 5        0.960           0.0271          0.0130
#> 6        0.957           0.0320          0.0112

# Accuracy
cat(sprintf("Accuracy: %.4f\n", mean(preds$.pred_class == iris$Species)))
#> Accuracy: 0.6667

2. Regression

spec_reg <- mlp(
  hidden_units = c(64L, 32L),
  epochs       = 50L
) |>
  set_engine("ggml") |>
  set_mode("regression")

fit_reg <- fit(spec_reg, mpg ~ ., data = mtcars)

preds_reg <- predict(fit_reg, new_data = mtcars)
head(preds_reg)
#> # A tibble: 6 × 1
#>   .pred
#>   <dbl>
#> 1  19.2
#> 2  19.3
#> 3  17.1
#> 4  17.1
#> 5  23.2
#> 6  16.7

3. Engine parameters

The ggml engine maps standard parsnip arguments to ggmlR internals:

parsnip	ggmlR	Default
`hidden_units`	`hidden_layers`	`c(128, 64)`
`epochs`	`epochs`	`10`
`dropout`	`dropout`	`0.2`
`activation`	`activation`	`"relu"`
`learn_rate`	`learn_rate`	`0.001`

# Customize architecture
spec_custom <- mlp(
  hidden_units = c(128L, 64L, 32L),
  epochs       = 30L,
  dropout      = 0.3,
  activation   = "relu"
) |>
  set_engine("ggml") |>
  set_mode("classification")

4. Resampling with rsample

library(rsample)

folds <- vfold_cv(iris, v = 5L)

spec <- mlp(hidden_units = c(32L), epochs = 10L) |>
  set_engine("ggml") |>
  set_mode("classification")

library(tune)
library(yardstick)
library(workflows)

wf <- workflow() |>
  add_model(spec) |>
  add_formula(Species ~ .)

results <- fit_resamples(wf, resamples = folds)
collect_metrics(results)

5. Recipes for preprocessing

ggmlR accepts only numeric features. Use recipes to handle factors, missing values, and scaling:

library(recipes)
library(workflows)

rec <- recipe(Species ~ ., data = iris) |>
  step_normalize(all_numeric_predictors())

spec <- mlp(hidden_units = c(32L), epochs = 10L) |>
  set_engine("ggml") |>
  set_mode("classification")

wf <- workflow() |>
  add_recipe(rec) |>
  add_model(spec)

fit_obj <- fit(wf, data = iris)
predict(fit_obj, new_data = iris)

For datasets with factors:

rec <- recipe(Species ~ ., data = iris) |>
  step_dummy(all_nominal_predictors()) |>
  step_normalize(all_numeric_predictors())

6. Hyperparameter tuning

library(tune)
library(dials)
library(workflows)

spec <- mlp(
  hidden_units = tune(),
  epochs       = tune(),
  dropout      = tune()
) |>
  set_engine("ggml") |>
  set_mode("classification")

wf <- workflow() |>
  add_model(spec) |>
  add_formula(Species ~ .)

grid <- grid_regular(
  hidden_units(range = c(16L, 128L)),
  epochs(range = c(10L, 50L)),
  dropout(range = c(0, 0.4)),
  levels = 3L
)

folds <- vfold_cv(iris, v = 3L)
results <- tune_grid(wf, resamples = folds, grid = grid)
show_best(results, metric = "accuracy")

7. Comparison with other engines

library(workflows)
library(workflowsets)

specs <- workflow_set(
  preproc = list(basic = Species ~ .),
  models  = list(
    ggml  = mlp(hidden_units = c(32L), epochs = 20L) |> set_engine("ggml"),
    nnet  = mlp(hidden_units = 32L,    epochs = 200L) |> set_engine("nnet")
  )
) |>
  workflow_map("fit_resamples",
               resamples = vfold_cv(iris, v = 5L))

rank_results(specs, rank_metric = "accuracy")

Summary

Feature	Supported
Classification	Yes (`class`, `prob`)
Regression	Yes (`numeric`)
GPU (Vulkan)	Yes (auto-detected)
Recipes / preprocessing	Yes
Resampling	Yes
Tuning	Yes
Workflows	Yes

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.