The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
The modelStudio()
function uses DALEX
explainers created with DALEX::explain()
or
DALEXtra::explain_*()
.
# packages for the explainer objects
install.packages("DALEX")
install.packages("DALEXtra")
In this example, we make a studio for the ranger
model
on the apartments
data.
# load packages and data
library(mlr)
library(DALEXtra)
library(modelStudio)
<- DALEX::apartments
data
# split the data
<- sample(1:nrow(data), 0.7*nrow(data))
index <- data[index,]
train <- data[-index,]
test
# fit a model
<- makeRegrTask(id = "apartments", data = train, target = "m2.price")
task <- makeLearner("regr.ranger", predict.type = "response")
learner <- train(learner, task)
model
# create an explainer for the model
<- explain_mlr(model,
explainer data = test,
y = test$m2.price,
label = "mlr")
# pick observations
<- test[1:2,]
new_observation rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)
In this example, we make a studio for the ranger
model
on the titanic
data.
# load packages and data
library(mlr3)
library(mlr3learners)
library(DALEXtra)
library(modelStudio)
<- DALEX::titanic_imputed
data
# split the data
<- sample(1:nrow(data), 0.7*nrow(data))
index <- data[index,]
train <- data[-index,]
test
# mlr3 TaskClassif takes target as factor
$survived <- as.factor(train$survived)
train
# fit a model
<- TaskClassif$new(id = "titanic", backend = train, target = "survived")
task <- lrn("classif.ranger", predict_type = "prob")
learner $train(task)
learner
# create an explainer for the model
<- explain_mlr3(learner,
explainer data = test,
y = test$survived,
label = "mlr3")
# pick observations
<- test[1:2,]
new_observation rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)
In this example, we make a studio for the xgboost
model
on the titanic
data.
# load packages and data
library(xgboost)
library(DALEX)
library(modelStudio)
<- DALEX::titanic_imputed
data
# split the data
<- sample(1:nrow(data), 0.7*nrow(data))
index <- data[index,]
train <- data[-index,]
test
<- model.matrix(survived ~.-1, train)
train_matrix <- model.matrix(survived ~.-1, test)
test_matrix
# fit a model
<- xgb.DMatrix(train_matrix, label = train$survived)
xgb_matrix <- list(max_depth = 3, objective = "binary:logistic", eval_metric = "auc")
params <- xgb.train(params, xgb_matrix, nrounds = 500)
model
# create an explainer for the model
<- explain(model,
explainer data = test_matrix,
y = test$survived,
type = "classification",
label = "xgboost")
# pick observations
<- test_matrix[1:2, , drop=FALSE]
new_observation rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)
In this example, we make a studio for the gbm
model on
the titanic
data.
# load packages and data
library(caret)
library(DALEX)
library(modelStudio)
<- DALEX::titanic_imputed
data
# split the data
<- sample(1:nrow(data), 0.7*nrow(data))
index <- data[index,]
train <- data[-index,]
test
# caret train takes target as factor
$survived <- as.factor(train$survived)
train
# fit a model
<- trainControl(method = "repeatedcv", number = 3, repeats = 3)
cv <- train(survived ~ ., data = train, method = "gbm", trControl = cv, verbose = FALSE)
model
# create an explainer for the model
<- explain(model,
explainer data = test,
y = test$survived,
label = "caret")
# pick observations
<- test[1:2,]
new_observation rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)
In this example, we make a studio for the h2o.automl
model on the titanic
data.
# load packages and data
library(h2o)
library(DALEXtra)
library(modelStudio)
<- DALEX::titanic_imputed
data
# init h2o
h2o.init()
h2o.no_progress()
# split the data
<- h2o.splitFrame(as.h2o(data))
h2o_split <- h2o_split[[1]]
train <- as.data.frame(h2o_split[[2]])
test
# h2o automl takes target as factor
$survived <- as.factor(train$survived)
train
# fit a model
<- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30)
automl <- automl@leader
model
# create an explainer for the model
<- explain_h2o(model,
explainer data = test,
y = test$survived,
label = "h2o")
# pick observations
<- test[1:2,]
new_observation rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation,
B = 5)
# shutdown h2o
h2o.shutdown(prompt = FALSE)
In this example, we make a studio for the ranger
model
on the apartments
data.
# load packages and data
library(parsnip)
library(DALEX)
library(modelStudio)
<- DALEX::apartments
data
# split the data
<- sample(1:nrow(data), 0.7*nrow(data))
index <- data[index,]
train <- data[-index,]
test
# fit a model
<- rand_forest() %>%
model set_engine("ranger", importance = "impurity") %>%
set_mode("regression") %>%
fit(m2.price ~ ., data = train)
# create an explainer for the model
<- explain(model,
explainer data = test,
y = test$m2.price,
label = "parsnip")
# make a studio for the model
modelStudio(explainer)
In this example, we make a studio for the ranger
model
on the titanic
data.
# load packages and data
library(tidymodels)
library(DALEXtra)
library(modelStudio)
<- DALEX::titanic_imputed
data
# split the data
<- sample(1:nrow(data), 0.7*nrow(data))
index <- data[index,]
train <- data[-index,]
test
# tidymodels fit takes target as factor
$survived <- as.factor(train$survived)
train
# fit a model
<- recipe(survived ~ ., data = train) %>%
rec step_normalize(fare)
<- rand_forest(mtry = 2) %>%
clf set_engine("ranger") %>%
set_mode("classification")
<- workflow() %>%
wflow add_recipe(rec) %>%
add_model(clf)
<- wflow %>% fit(data = train)
model
# create an explainer for the model
<- explain_tidymodels(model,
explainer data = test,
y = test$survived,
label = "tidymodels")
# pick observations
<- test[1:2,]
new_observation rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer, new_observation)
The modelStudio()
function uses dalex
explainers created with dalex.Explainer()
.
# package for the Explainer object
pip install dalex -U
Use pickle
Python module and reticulate
R
package to easily make a studio for a model.
# package for pickle load
install.packages("reticulate")
In this example, we make a studio for the Pipeline SVR
model on the fifa
data.
First, use dalex
in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from numpy import log
= dx.datasets.load_fifa()
data = data.drop(columns=['overall', 'potential', 'value_eur', 'wage_eur', 'nationality'], axis=1)
X = log(data.value_eur)
y
# split the data
= train_test_split(X, y)
X_train, X_test, y_train, y_test
# fit a pipeline model
= Pipeline([('scale', StandardScaler()), ('svm', SVR())])
model
model.fit(X_train, y_train)
# create an explainer for the model
= dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')
explainer
# pack the explainer into a pickle file
open('explainer_scikitlearn.pickle', 'wb')) explainer.dump(
Then, use modelStudio
in R:
# load the explainer from the pickle file
library(reticulate)
<- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")
explainer
# make a studio for the model
library(modelStudio)
modelStudio(explainer, B = 5)
In this example, we make a studio for the
Pipeline LGBMClassifier
model on the titanic
data.
First, use dalex
in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from lightgbm import LGBMClassifier
= dx.datasets.load_titanic()
data = data.drop(columns='survived')
X = data.survived
y
# split the data
= train_test_split(X, y)
X_train, X_test, y_train, y_test
# fit a pipeline model
= ['age', 'fare', 'sibsp', 'parch']
numerical_features = Pipeline(
numerical_transformer =[
steps'imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
(
]
)= ['gender', 'class', 'embarked']
categorical_features = Pipeline(
categorical_transformer =[
steps'imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
(
]
)
= ColumnTransformer(
preprocessor =[
transformers'num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
(
]
)
= LGBMClassifier(n_estimators=300)
classifier
= Pipeline(
model =[
steps'preprocessor', preprocessor),
('classifier', classifier)
(
]
)
model.fit(X_train, y_train)
# create an explainer for the model
= dx.Explainer(model, data=X_test, y=y_test, label='lightgbm')
explainer
# pack the explainer into a pickle file
open('explainer_lightgbm.pickle', 'wb')) explainer.dump(
Then, use modelStudio
in R:
# load the explainer from the pickle file
library(reticulate)
<- py_load_object("explainer_lightgbm.pickle", pickle = "pickle")
explainer
# make a studio for the model
library(modelStudio)
modelStudio(explainer)
In this example, we make a studio for the
Pipeline KerasClassifier
model on the titanic
data.
First, use dalex
in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense
from keras.models import Sequential
= dx.datasets.load_titanic()
data = data.drop(columns='survived')
X = data.survived
y
# split the data
= train_test_split(X, y)
X_train, X_test, y_train, y_test
# fit a pipeline model
= ['age', 'fare', 'sibsp', 'parch']
numerical_features = Pipeline(
numerical_transformer =[
steps'imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
(
]
)= ['gender', 'class', 'embarked']
categorical_features = Pipeline(
categorical_transformer =[
steps'imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
(
]
)
= ColumnTransformer(
preprocessor =[
transformers'num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
(
]
)
def create_architecture():
= Sequential()
model # there are 17 inputs after the pipeline
60, input_dim=17, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.add(Dense(compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.return model
= KerasClassifier(build_fn=create_architecture,
classifier =100, batch_size=32, verbose=False)
epochs
= Pipeline(
model =[
steps'preprocessor', preprocessor),
('classifier', classifier)
(
]
)
model.fit(X_train, y_train)
# create an explainer for the model
= dx.Explainer(model, data=X_test, y=y_test, label='keras')
explainer
# pack the explainer into a pickle file
open('explainer_keras.pickle', 'wb')) explainer.dump(
Then, use modelStudio
in R:
# load the explainer from the pickle file
library(reticulate)
#! add blank create_architecture function before load !
py_run_string('
def create_architecture():
return True
')
<- py_load_object("explainer_keras.pickle", pickle = "pickle")
explainer
# make a studio for the model
library(modelStudio)
modelStudio(explainer)
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.