## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
set.seed(1234)

## ----load---------------------------------------------------------------------
library(blockwise)
data(adult)

# Drop `native.country`: it has ~41 levels with a long tail of rare
# countries, which can leave a per-block logistic-regression model
# without an example of a level that later appears at predict time.
# Tree-based learners (`learner_rpart`, `learner_gbm`) tolerate this;
# `learner_glm_binomial` does not. Either drop the column or coarsen
# its levels before fitting.
adult <- adult[, setdiff(names(adult), "native.country")]

str(adult, list.len = 20)
table(adult$salary)

## ----mask---------------------------------------------------------------------
bike_style_groups <- list(
  c("age", "workclass", "education"),
  c("marital.status", "occupation", "relationship")
)

adult_miss <- simulate_blockwise_missing(
  adult,
  blocks       = bike_style_groups,
  prop_missing = 0.30,
  noise        = 0.02
)
round(colMeans(is.na(adult_miss)) * 100, 1)

## ----split--------------------------------------------------------------------
set.seed(1234)
idx <- sample(nrow(adult_miss), floor(0.75 * nrow(adult_miss)))
train <- adult_miss[idx, ]
test  <- adult_miss[-idx, ]

X_train <- train[, setdiff(names(train), "salary")]
y_train <- train$salary
X_test  <- test[,  setdiff(names(test),  "salary")]
y_test  <- test$salary

## ----fit----------------------------------------------------------------------
set.seed(1234)
fit <- brm(X_train, y_train,
           learner = learner_glm_binomial())
fit

## ----eval---------------------------------------------------------------------
prob <- predict(fit, X_test)
pred_class <- as.integer(prob >= 0.5)

acc <- mean(pred_class == y_test)
cat("Accuracy:", round(acc, 3), "\n")

# Confusion matrix
table(truth = y_test, predicted = pred_class)

