Goodness-of-fit tests and the bootstrap

The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

The three nested statistics

For a candidate family fitted by maximum likelihood, the package computes a Hotelling-type quadratic form in the discrepancy between the sample and fitted log-cumulants. Three nested choices of cumulant orders give three statistics:

\(T^2_{(2,3)}\): log-scale shape (dispersion, skewness);
\(T^2_{(1,2,3)}\): adds the first log-cumulant (entropy/scale);
\(T^2_{(1,\ldots,6)}\): adds higher-order cumulants for tail discrimination.

x <- reliability_datasets$Yarn
T2_all(x, "Weibull")
#> $T2_23
#> $T2_23$T2
#> [1] 3.205122
#> 
#> $T2_23$df
#> [1] 1
#> 
#> $T2_23$p_chisq
#> [1] 0.07340803
#> 
#> $T2_23$p_F
#> [1] 0.07646484
#> 
#> $T2_23$theta
#> [1]   1.605465 248.033500
#> 
#> $T2_23$d
#> [1] -0.04889879  0.16479398
#> 
#> $T2_23$Kd
#>           [,1]      [,2]
#> [1,] 0.0732021 0.2102838
#> [2,] 0.2102838 0.3415546
#> 
#> $T2_23$eigmin
#> [1] -0.04206613
#> 
#> $T2_23$conv
#> [1] TRUE
#> 
#> 
#> $T2_123
#> $T2_123$T2
#> [1] 4.984082
#> 
#> $T2_123$df
#> [1] 2
#> 
#> $T2_123$p_chisq
#> [1] 0.08274091
#> 
#> $T2_123$p_F
#> [1] 0.09010487
#> 
#> $T2_123$theta
#> [1]   1.605465 248.033500
#> 
#> $T2_123$d
#> [1]  0.009550301 -0.048898794  0.164793978
#> 
#> $T2_123$Kd
#>             [,1]       [,2]       [,3]
#> [1,] -0.02944059 0.04955572 -0.3173502
#> [2,]  0.04955572 0.07320210  0.2102838
#> [3,] -0.31735019 0.21028381  0.3415546
#> 
#> $T2_123$eigmin
#> [1] -0.2793186
#> 
#> $T2_123$conv
#> [1] TRUE
#> 
#> 
#> $T2_123456
#> $T2_123456$T2
#> [1] 13.61917
#> 
#> $T2_123456$df
#> [1] 4
#> 
#> $T2_123456$p_chisq
#> [1] 0.008615166
#> 
#> $T2_123456$p_F
#> [1] 0.01399289
#> 
#> $T2_123456$theta
#> [1]   1.605465 248.033500
#> 
#> $T2_123456$d
#> [1]  0.009550301 -0.048898794  0.164793978 -0.658861256  2.731493015
#> [6] -9.511474432
#> 
#> $T2_123456$Kd
#>             [,1]         [,2]        [,3]        [,4]       [,5]       [,6]
#> [1,] -0.02944059   0.04955572  -0.3173502    1.824940  -6.639026   20.13131
#> [2,]  0.04955572   0.07320210   0.2102838   -2.720260  12.809123  -45.83800
#> [3,] -0.31735019   0.21028381   0.3415546    2.090514 -15.927277   68.35576
#> [4,]  1.82494003  -2.72025951   2.0905143   -4.777864  24.301953 -117.77434
#> [5,] -6.63902601  12.80912265 -15.9272766   24.301953 -55.470330  231.31724
#> [6,] 20.13131432 -45.83800063  68.3557640 -117.774338 231.317244 -736.64961
#> 
#> $T2_123456$eigmin
#> [1] -834.2953
#> 
#> $T2_123456$conv
#> [1] TRUE
#> 
#> 
#> $fit
#> $fit$theta
#> [1]   1.605465 248.033500
#> 
#> $fit$Sigma
#>           [,1]        [,2]
#> [1,]  1.487107    62.69933
#> [2,] 62.699329 26511.04699
#> 
#> $fit$loglik
#> [1] -625.2069
#> 
#> $fit$conv
#> [1] TRUE

Why the bootstrap?

The discrepancy covariance \(\mathbf{K}_d\) is typically ill-conditioned: one eigenvalue is much smaller than the others and is estimated with large relative error. As a result the asymptotic chi-squared reference over-rejects, and the distortion does not vanish as the sample grows. The parametric bootstrap reproduces the ill-conditioning in each replicate, so it cancels in the bootstrap p-value.

T2_bootstrap(x, "Weibull", B = 299, seed = 1)
#> $p_boot
#>     T2_23    T2_123 T2_123456 
#> 0.4180602 0.2541806 0.3779264 
#> 
#> $T2_obs
#>     T2_23    T2_123 T2_123456 
#>  3.205122  4.984082 13.619165 
#> 
#> $B
#> [1] 299
#> 
#> $valid
#> [1] 299 299 299
#> 
#> $obs
#> $obs$T2_23
#> $obs$T2_23$T2
#> [1] 3.205122
#> 
#> $obs$T2_23$df
#> [1] 1
#> 
#> $obs$T2_23$p_chisq
#> [1] 0.07340803
#> 
#> $obs$T2_23$p_F
#> [1] 0.07646484
#> 
#> $obs$T2_23$theta
#> [1]   1.605465 248.033500
#> 
#> $obs$T2_23$d
#> [1] -0.04889879  0.16479398
#> 
#> $obs$T2_23$Kd
#>           [,1]      [,2]
#> [1,] 0.0732021 0.2102838
#> [2,] 0.2102838 0.3415546
#> 
#> $obs$T2_23$eigmin
#> [1] -0.04206613
#> 
#> $obs$T2_23$conv
#> [1] TRUE
#> 
#> 
#> $obs$T2_123
#> $obs$T2_123$T2
#> [1] 4.984082
#> 
#> $obs$T2_123$df
#> [1] 2
#> 
#> $obs$T2_123$p_chisq
#> [1] 0.08274091
#> 
#> $obs$T2_123$p_F
#> [1] 0.09010487
#> 
#> $obs$T2_123$theta
#> [1]   1.605465 248.033500
#> 
#> $obs$T2_123$d
#> [1]  0.009550301 -0.048898794  0.164793978
#> 
#> $obs$T2_123$Kd
#>             [,1]       [,2]       [,3]
#> [1,] -0.02944059 0.04955572 -0.3173502
#> [2,]  0.04955572 0.07320210  0.2102838
#> [3,] -0.31735019 0.21028381  0.3415546
#> 
#> $obs$T2_123$eigmin
#> [1] -0.2793186
#> 
#> $obs$T2_123$conv
#> [1] TRUE
#> 
#> 
#> $obs$T2_123456
#> $obs$T2_123456$T2
#> [1] 13.61917
#> 
#> $obs$T2_123456$df
#> [1] 4
#> 
#> $obs$T2_123456$p_chisq
#> [1] 0.008615166
#> 
#> $obs$T2_123456$p_F
#> [1] 0.01399289
#> 
#> $obs$T2_123456$theta
#> [1]   1.605465 248.033500
#> 
#> $obs$T2_123456$d
#> [1]  0.009550301 -0.048898794  0.164793978 -0.658861256  2.731493015
#> [6] -9.511474432
#> 
#> $obs$T2_123456$Kd
#>             [,1]         [,2]        [,3]        [,4]       [,5]       [,6]
#> [1,] -0.02944059   0.04955572  -0.3173502    1.824940  -6.639026   20.13131
#> [2,]  0.04955572   0.07320210   0.2102838   -2.720260  12.809123  -45.83800
#> [3,] -0.31735019   0.21028381   0.3415546    2.090514 -15.927277   68.35576
#> [4,]  1.82494003  -2.72025951   2.0905143   -4.777864  24.301953 -117.77434
#> [5,] -6.63902601  12.80912265 -15.9272766   24.301953 -55.470330  231.31724
#> [6,] 20.13131432 -45.83800063  68.3557640 -117.774338 231.317244 -736.64961
#> 
#> $obs$T2_123456$eigmin
#> [1] -834.2953
#> 
#> $obs$T2_123456$conv
#> [1] TRUE
#> 
#> 
#> $obs$fit
#> $obs$fit$theta
#> [1]   1.605465 248.033500
#> 
#> $obs$fit$Sigma
#>           [,1]        [,2]
#> [1,]  1.487107    62.69933
#> [2,] 62.699329 26511.04699
#> 
#> $obs$fit$loglik
#> [1] -625.2069
#> 
#> $obs$fit$conv
#> [1] TRUE

In practice, prefer the bootstrap p-values for all sample sizes.

Full model comparison

gof_compare_all() runs the three \(T^2\) tests, the Anderson–Darling and Cramer–von Mises tests, and the AIC across all six families:

gof_compare_all(x, use_bootstrap = TRUE, B = 199, seed = 1)
#>              Dist    T2_23        p_23    T2_123        p_123     T2_full
#> stat      Weibull 3.205122 0.073408033  4.984082 8.274091e-02   13.619165
#> stat1     Frechet 9.283366 0.002312441  7.596404 5.848483e-03 4590.357543
#> stat2       Gamma 1.129038 0.568633637  1.103158 5.760397e-01    6.820438
#> stat3    InvGamma 8.310963 0.003940649  7.003544 8.134853e-03  913.428776
#> stat4   LogNormal 5.455520 0.019506603 29.914648 3.192395e-07   92.386879
#> stat5 LogLogistic 5.963457 0.014605378  8.832689 1.207830e-02   85.779331
#>              p_full        AD        AD_p       CvM       CvM_p      AIC
#> stat   8.615166e-03 0.5410004 0.705401889 0.0947568 0.611194678 1254.414
#> stat1  0.000000e+00 5.5787380 0.001522060 0.9905471 0.002589291 1308.897
#> stat2  1.456870e-01 0.6813406 0.574619957 0.1259204 0.472054154 1254.526
#> stat3 2.051427e-196 5.2662380 0.002139914 1.0097728 0.002334086 1301.964
#> stat4  4.095678e-19 2.0232404 0.089180840 0.3729128 0.085283955 1267.620
#> stat5  1.036615e-17 1.3077253 0.229857896 0.1636898 0.350430282 1263.243
#>            pb_23     pb_123     pb_full
#> stat  0.42713568 0.26633166 0.346733668
#> stat1 0.15577889 0.24120603 0.000000000
#> stat2 0.51758794 0.47738693 0.356783920
#> stat3 0.04020101 0.05025126 0.000000000
#> stat4 0.02512563 0.00000000 0.010050251
#> stat5 0.02010050 0.04522613 0.005025126

The \(T^2\) tests and the EDF tests are complementary: the former are most sensitive to subtle log-shape departures, the latter to tail departures.

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.