To begin, load the package.
library(smoothic)
#> Loading required package: MASS
#> Loading required package: numDeriv
# For data manipulation and plotting if required
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:MASS':
#>
#> select
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
Boston Housing Data
Perform automatic variable selection using a smooth information criterion.
fit <- smoothic(
formula = lcmedv ~ .,
data = bostonhouseprice2,
family = "sgnd", # Smooth Generalized Normal Distribution
model = "mpr" # model location and scale
)
Display the estimates and standard errors.
summary(fit)
#> Call:
#> smoothic(formula = lcmedv ~ ., data = bostonhouseprice2, family = "sgnd",
#> model = "mpr")
#> Family:
#> [1] "sgnd"
#> Model:
#> [1] "mpr"
#>
#> Coefficients:
#> Estimate SEE Z Pvalue
#> intercept_0_beta 3.66846106 0.14300775 25.6522 < 2.2e-16 ***
#> crim_1_beta -0.01580316 0.00238199 -6.6344 7.903e-09 ***
#> zn_2_beta 0 0 0 0
#> indus_3_beta 0 0 0 0
#> rm_4_beta 0.23056343 0.02153543 10.7062 < 2.2e-16 ***
#> age_5_beta -0.00110288 0.00050773 -2.1722 0.0195869 *
#> rad_6_beta 0.00809799 0.00211141 3.8353 0.0001971 ***
#> ptratio_7_beta -0.02495948 0.00329515 -7.5746 1.482e-10 ***
#> lnox_8_beta -0.29896928 0.13694515 -2.1831 0.0190779 *
#> ldis_9_beta -0.16497074 0.02977220 -5.5411 5.667e-07 ***
#> ltax_10_beta -0.19372179 0.01704358 -11.3663 < 2.2e-16 ***
#> llstat_11_beta -0.17098692 0.02250256 -7.5986 1.335e-10 ***
#> chast_12_beta 0.05397796 0.02081743 2.5929 0.0068622 **
#> intercept_0_alpha -8.30433637 1.98647604 -4.1804 6.579e-05 ***
#> crim_1_alpha 0 0 0 0
#> zn_2_alpha 0 0 0 0
#> indus_3_alpha 0 0 0 0
#> rm_4_alpha 0 0 0 0
#> age_5_alpha 0 0 0 0
#> rad_6_alpha 0.05458394 0.01528126 3.5720 0.0004410 ***
#> ptratio_7_alpha 0 0 0 0
#> lnox_8_alpha 0 0 0 0
#> ldis_9_alpha -0.84897004 0.18717010 -4.5358 2.023e-05 ***
#> ltax_10_alpha 0.85582818 0.33280742 2.5715 0.0072450 **
#> llstat_11_alpha 0 0 0 0
#> chast_12_alpha 0 0 0 0
#> nu_0 0.27682140 0.11205396 2.4704 0.0094034 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> Penalized Likelihood:
#> [1] 228.3027
fit$kappa # shape estimate
#> [1] 1.518931
Plot the standardized coefficient values with respect to the epsilon-telescope.
telescope_df <- fit$telescope_df # dataframe with standardized coefficient values for each epsilon in the telescope
# Variable names (without the response & intercept terms)
p <- ncol(bostonhouseprice2) - 1
names_coef <- names(coef(fit))
names_coef <- names_coef[!(names_coef %in% c(
"intercept_0_beta",
"intercept_0_alpha",
"nu_0"
))]
# Tidy dataframe for plotting
plot_df <- telescope_df %>%
select(
epsilon,
contains(c("beta", "alpha")),
-c("beta_0", "alpha_0")
) %>%
rename_all(~ c("epsilon", names_coef)) %>% # rename columns
pivot_longer(-epsilon) %>%
mutate(type = case_when( # extract whether variable is location or scale
grepl("_beta", name) ~ "location",
grepl("alpha", name) ~ "scale"
)) %>%
mutate(coef = sub("_.*", "", name)) # extract variable name
# Plot
plot_df %>%
ggplot(aes(
x = epsilon,
y = value,
colour = coef
)) +
facet_wrap(~type) +
geom_line() +
labs(y = "Standardized Coefficient Value") +
theme_bw()