The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

Exploring The Variables Importance

Gabriele Pittarello

2024-11-14

Introduction

Machine learning models catch interactions between covariates. Often they are a black-box but they can be interpreted with SHAP values. We generate two data sets, one from scenario Alpha and one from scenario Delta the plotting functionalities of the ReSurv package.

input_data_0 <- data_generator(
  random_seed = 1,
  scenario = 0,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_0 <- IndividualDataPP(
  data = input_data_0,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)
# Input data scenario Delta

input_data3 <- data_generator(
  random_seed = 1,
  scenario = 3,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_3 <- IndividualDataPP(
  data = input_data3,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)

Here we fit Neural Networks and XGB. In order to simplify this vignette, we provide in advance the optimal hyperparameters.

hp_scenario_alpha_xgb <- list(
  params = list(
    booster = "gbtree",
    eta = 0.9887265,
    subsample = 0.7924135 ,
    alpha = 10.85342,
    lambda = 6.213317,
    min_child_weight = 3.042204,
    max_depth = 1
  ),
  print_every_n = 0,
  nrounds = 3000,
  verbose = FALSE,
  early_stopping_rounds = 500
)

hp_scenario_alpha_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 10,
  optim = "SGD",
  batch_size = as.integer(5000),
  lr = 0.3023043,
  xi = 0.426443,
  eps = 0,
  activation = "SELU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)

hp_scenario_delta_xgb <- list(params=list(booster="gbtree",
                                          eta=0.2717736,
                                          subsample=0.9043068 ,
                                          alpha=7.789214,
                                          lambda=12.09398 ,
                                          min_child_weight=22.4837 ,
                                          max_depth = 4),
                                          print_every_n = 0,
                                          nrounds=3000,
                                          verbose= FALSE,
                                          early_stopping_rounds = 500)

hp_scenario_delta_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 2,
  optim = "Adam",
  batch_size = as.integer(5000),
  lr = 0.3542422,
  xi = 0.1803953,
  eps = 0,
  activation = "LeakyReLU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)
resurv_model_xgb_A <-  ReSurv(individual_data_0,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_alpha_xgb)

resurv_model_nn_A <-  ReSurv(individual_data_0,
                             hazard_model = "NN",
                             hparameters = hp_scenario_alpha_nn)

resurv_model_xgb_D <-  ReSurv(individual_data_3,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_delta_xgb)

resurv_model_nn_D <- ReSurv(individual_data_3,
                            hazard_model = "NN",
                            hparameters = hp_scenario_delta_nn)

Shap values (XGB)

plot(resurv_model_xgb_A)
plot(resurv_model_xgb_D)

Shap values (NN)

plot(resurv_model_nn_A, nsamples = 10000)
plot(resurv_model_nn_D, nsamples=10000)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.