IRT without the normality assumption

library(IRTest)
#> Thank you for using IRTest!
#> Please cite the package as:
#> Li, S. (2022). IRTest: Parameter estimation of item response theory with estimation of latent distribution (Version 1.0.0). R package.
#> 
#>  URL: https://CRAN.R-project.org/package=IRTest
library(ggplot2)




1. Dichotomous items

The function DataGeneration can be used for the pre-analysis step. This function returns a set of artificial data and some useful objects for analysis (e.g., theta, data_D, item_D, & initialitem_D).

In the parameter estimation process, the initialitem_D can be used for an input of the function IRTest_Dich (i.e., initialitem = initialitem_D). The data_D is an artificial item response data that could be used for some analyses such as computer simulation techniques, but would be unnecessary if user-imported item response data is used. The theta and item_D are not used for the estimation process, but could play a role as true parameters if the artificial data (data_D) is used for an analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_D = rep(1:2, each=5),
                          N=500,
                          nitem_D = 10,
                          nitem_P = 0,
                          latent_dist = "2NM",
                          d = 1.664,
                          sd_ratio = 2,
                          prob = 0.3)

data <- Alldata$data_D
item <- Alldata$item_D
initialitem <- Alldata$initialitem_D
theta <- Alldata$theta

If the artificial data (data_D) is used, the true latent distribution looks like;




######                            ######
###### Empirical histogram method ######
######                            ######
Mod1 <- IRTest_Dich(initialitem = initialitem,
                    data = data,
                    model = rep(1:2, each=5),
                    latent_dist = "EHM",
                    max_iter = 200,
                    threshold = .0001)

######                                  ######
###### Kernel density estimation method ######
######                                  ######
# Mod1 <- IRTest_Dich(initialitem = initialitem,
#                     data = data,
#                     model = rep(1:2, each=5),
#                     latent_dist = "KDE",
#                     bandwidth = "SJ-ste",
#                     max_iter = 200,
#                     threshold = .001)

######                      ######
###### Normality assumption ######
######                      ######
#  Mod1 <- IRTest_Dich(initialitem = initialitem,
#                      data = data,
#                      model = rep(1:2, each=5),
#                      latent_dist = "Normal",
#                      max_iter = 200,
#                      threshold = .0001)

######                                             ######
###### Two-component Gaussian mixture distribution ######
######                                             ######
#  Mod1 <- IRTest_Dich(initialitem = initialitem,
#                      data = data,
#                      model = rep(1:2, each=5),
#                      latent_dist = "Mixture",
#                      max_iter = 200,
#                      threshold = .0001)

######                                                       ######
###### Davidian curve (for an arbitrarily chosen case of h=4)######
######                                                       ######
#  Mod1 <- IRTest_Dich(initialitem = initialitem,
#                      data = data,
#                      model = rep(1:2, each=5),
#                      latent_dist = "DC",
#                      max_iter = 200,
#                      threshold = .0001,
#                      h=4)




### Summary
summary(Mod1)
#> Convergence:  
#> Successfully converged below the threshold of 1e-04 on 54th iterations. 
#> 
#> Model Fit:  
#>    deviance   5763.024 
#>         AIC   6031.024 
#>         BIC   6595.781 
#> 
#> The Number of Parameters:  
#>        item   15 
#>        dist   119 
#>       total   134 
#> 
#> The Number of Items:  
#> dichotomous   10 
#> polyotomous   0 
#> 
#> The Estimated Latent Distribution:  
#> method - EHM 
#> ----------------------------------------
#>                                           
#>                                           
#>             @                             
#>           @ @ @             @ @           
#>           @ @ @           @ @ @ .         
#>         @ @ @ @ @       . @ @ @ @         
#>         @ @ @ @ @ @   . @ @ @ @ @ @       
#>       @ @ @ @ @ @ @ @ @ @ @ @ @ @ @       
#>     . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @     
#>   . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .   
#> +---------+---------+---------+---------+
#> -2        -1        0         1         2

### The estimated item parameters
Mod1$par_est
#>              a           b c
#>  [1,] 1.000000 -0.74508109 0
#>  [2,] 1.000000  0.51116617 0
#>  [3,] 1.000000  0.80336947 0
#>  [4,] 1.000000  0.53158883 0
#>  [5,] 1.000000 -0.39455364 0
#>  [6,] 1.700258  0.01012807 0
#>  [7,] 1.526307  0.89420050 0
#>  [8,] 2.203176 -1.12635131 0
#>  [9,] 1.566285  0.39084407 0
#> [10,] 1.385190  0.76792824 0

### The asymptotic standard errors of item parameters
Mod1$se
#>               a          b  c
#>  [1,]        NA 0.10304406 NA
#>  [2,]        NA 0.10098172 NA
#>  [3,]        NA 0.10355444 NA
#>  [4,]        NA 0.10112153 NA
#>  [5,]        NA 0.10034057 NA
#>  [6,] 0.1446766 0.06711641 NA
#>  [7,] 0.1537574 0.08561540 NA
#>  [8,] 0.2434543 0.07258288 NA
#>  [9,] 0.1404522 0.07275011 NA
#> [10,] 0.1388418 0.08841969 NA

### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.8551470 -0.6567867 -0.7198031 -1.0039922 -1.2714637 -0.7823374

### The estimated latent distribution
plot(Mod1) +
  lims(y = c(0, .5))




2. Polytomous items

As in the case of dichotomous items, the function DataGeneration can be used for the pre-analysis step. This function returns a set of artificial data and some useful objects for analysis (e.g., theta, data_P, item_P, & initialitem_P).

In the parameter estimation process, the initialitem_P can be used for an input of the function IRTest_Poly (i.e., initialitem = initialitem_P). The data_P is an artificial item response data that could be used for some analyses such as computer simulation techniques, but would be unnecessary if user-imported item response data is used. The theta and item_P are not used for the estimation process, but could play a role as true parameters if the artificial data (data_P) is used for an analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_P = "GPCM",
                          categ = rep(c(3,7), each = 5),
                          N=1000,
                          nitem_D = 0,
                          nitem_P = 10,
                          latent_dist = "2NM",
                          d = 1.414,
                          sd_ratio = 2,
                          prob = 0.5)

data <- Alldata$data_P
item <- Alldata$item_P
initialitem <- Alldata$initialitem_P
theta <- Alldata$theta

If the artificial data (data_P) is used, the true latent distribution looks like;




######                                  ######
###### Kernel density estimation method ######
######                                  ######
Mod1 <- IRTest_Poly(initialitem = initialitem,
                    data = data,
                    model = "GPCM",
                    latent_dist = "KDE",
                    bandwidth = "SJ-ste",
                    max_iter = 200,
                    threshold = .001)

######                      ######
###### Normality assumption ######
######                      ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "Normal",
#                      max_iter = 200,
#                      threshold = .001)

######                            ######
###### Empirical histogram method ######
######                            ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "EHM",
#                      max_iter = 200,
#                      threshold = .001)

######                                             ######
###### Two-component Gaussian mixture distribution ######
######                                             ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "Mixture",
#                      max_iter = 200,
#                      threshold = .001)

######                                                        ######
###### Davidian curve (for an arbitrarily chosen case of h=4) ######
######                                                        ######
#  Mod1 <- IRTest_Poly(initialitem = initialitem,
#                      data = data,
#                      model = "GPCM",
#                      latent_dist = "DC",
#                      max_iter = 200,
#                      threshold = .001,
#                      h=4)




### Summary
summary(Mod1)
#> Convergence:  
#> Successfully converged below the threshold of 0.001 on 39th iterations. 
#> 
#> Model Fit:  
#>    deviance   20359.2 
#>         AIC   20461.2 
#>         BIC   20711.5 
#> 
#> The Number of Parameters:  
#>        item   50 
#>        dist   1 
#>       total   51 
#> 
#> The Number of Items:  
#> dichotomous   0 
#> polyotomous   10 
#> 
#> The Estimated Latent Distribution:  
#> method - KDE 
#> ----------------------------------------
#>                                           
#>               . .                         
#>             @ @ @ .                       
#>           . @ @ @ @ .                     
#>           @ @ @ @ @ @                     
#>         . @ @ @ @ @ @ @ .                 
#>         @ @ @ @ @ @ @ @ @ @ @ . .         
#>       . @ @ @ @ @ @ @ @ @ @ @ @ @ .       
#>       @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .   
#>   . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 
#> +---------+---------+---------+---------+
#> -2        -1        0         1         2

### The estimated item parameters
Mod1$par_est
#>               a          b_1         b_2        b_3         b_4        b_5
#>  [1,] 1.9320419  0.396698909  0.46603651         NA          NA         NA
#>  [2,] 1.6880632 -0.303763927  0.05188864         NA          NA         NA
#>  [3,] 2.0504530 -0.376056390 -0.24941074         NA          NA         NA
#>  [4,] 1.0605873 -0.159282350  0.16815184         NA          NA         NA
#>  [5,] 0.7835049  0.003729855  0.22658723         NA          NA         NA
#>  [6,] 1.8907281  0.253908544  0.29706486  0.5563451  0.56108999  0.7435219
#>  [7,] 1.5439636 -1.603051232 -1.12357477 -1.0013097 -1.06426560 -0.7066684
#>  [8,] 0.9479375 -0.332417438 -0.17196848  0.3086190  0.06595696 -0.2510836
#>  [9,] 1.4230350 -0.784687384 -0.69487372 -0.6851528 -0.46813907 -0.5758896
#> [10,] 2.5827215  0.546921194  0.94939512  0.7099745  1.04467562  1.0309654
#>              b_6
#>  [1,]         NA
#>  [2,]         NA
#>  [3,]         NA
#>  [4,]         NA
#>  [5,]         NA
#>  [6,]  0.8505778
#>  [7,] -0.6593098
#>  [8,]  0.1465100
#>  [9,] -0.4351178
#> [10,]  1.2435975

### The asymptotic standard errors of item parameters
Mod1$se
#>                a        b_1        b_2       b_3        b_4        b_5
#>  [1,] 0.11560425 0.06407208 0.06475432        NA         NA         NA
#>  [2,] 0.09873818 0.05692682 0.05873271        NA         NA         NA
#>  [3,] 0.12254146 0.05222493 0.05290338        NA         NA         NA
#>  [4,] 0.06700339 0.08275252 0.08438107        NA         NA         NA
#>  [5,] 0.05524448 0.10839246 0.10990234        NA         NA         NA
#>  [6,] 0.11338544 0.06983861 0.08428678 0.1076463 0.11530990 0.10459915
#>  [7,] 0.09071435 0.14185389 0.12458750 0.1208955 0.10620807 0.08435877
#>  [8,] 0.05380619 0.11427616 0.13330931 0.1692535 0.18844552 0.17129060
#>  [9,] 0.08124190 0.10135122 0.11385157 0.1183858 0.11737822 0.10510023
#> [10,] 0.16806178 0.05840518 0.08680555 0.1003863 0.09807777 0.09198491
#>              b_6
#>  [1,]         NA
#>  [2,]         NA
#>  [3,]         NA
#>  [4,]         NA
#>  [5,]         NA
#>  [6,] 0.08789139
#>  [7,] 0.07567987
#>  [8,] 0.13258224
#>  [9,] 0.08666445
#> [10,] 0.07509506

### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.5375323 -0.5787214 -0.2605974 -1.0428218 -0.9306040 -1.2750381

### The estimated latent distribution
plot(Mod1) +
  lims(y = c(0, .5))




3. Mixed-format test

As in the case of dichotomous and polytomous items, the function DataGeneration can be used for the pre-analysis step. This function returns artificial data and some useful objects for analysis (i.e., theta, data_D, item_D, initialitem_D, data_P, item_P, & initialitem_P).

In the parameter estimation process, the initialitem_D and initialitem_P can be used for an input of the function IRTest_Mix (i.e., initialitem_D = initialitem_D, & initialitem_P = initialitem_P). The data_D and data_P are artificial item response data sets that could be used for some analyses such as computer simulation techniques, but would be unnecessary if user-imported item response data is used. The theta and item_D and item_P are not used for the estimation process, but could play a role as true parameters if the artificial data (data_D & data_P) is used for an analysis.

Alldata <- DataGeneration(seed = 123456789,
                          model_D = rep(2,5),
                          model_P = "GPCM",
                          categ = rep(3,5),
                          N=1000,
                          nitem_D = 5,
                          nitem_P = 5,
                          latent_dist = "2NM",
                          d = 1.664,
                          sd_ratio = 1,
                          prob = 0.5)

DataD <- Alldata$data_D
DataP <- Alldata$data_P
itemD <- Alldata$item_D
itemP <- Alldata$item_P
initialitemD <- Alldata$initialitem_D
initialitemP <- Alldata$initialitem_P
theta <- Alldata$theta

If the artificial data (data) is used, the true latent distribution looks like,

#> Scale for y is already present.
#> Adding another scale for y, which will replace the existing scale.




######                                  ######
###### Kernel density estimation method ######
######                                  ######
Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
                   initialitem_P = initialitemP,
                   data_D = DataD,
                   data_P = DataP,
                   model_D = rep(2,5),
                   model_P = "GPCM",
                   latent_dist = "KDE",
                   bandwidth = "SJ-ste",
                   max_iter = 200,
                   threshold = .001)

######                      ######
###### Normality assumption ######
######                      ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "Normal",
#                     max_iter = 200,
#                     threshold = .001)

######                            ######
###### Empirical histogram method ######
######                            ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "EHM",
#                     max_iter = 200,
#                     threshold = .001)

######                                             ######
###### Two-component Gaussian mixture distribution ######
######                                             ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "Mixture",
#                     max_iter = 200,
#                     threshold = .001)

######                                                        ######
###### Davidian curve (for an arbitrarily chosen case of h=4) ######
######                                                        ######
#  Mod1 <- IRTest_Mix(initialitem_D = initialitemD,
#                     initialitem_P = initialitemP,
#                     data_D = DataD,
#                     data_P = DataP,
#                     model_D = rep(2,5),
#                     model_P = "GPCM",
#                     latent_dist = "DC",
#                     max_iter = 200,
#                     threshold = .001,
#                     h = 4)




### Summary
summary(Mod1)
#> Convergence:  
#> Successfully converged below the threshold of 0.001 on 31th iterations. 
#> 
#> Model Fit:  
#>    deviance   2854157 
#>         AIC   2854209 
#>         BIC   2854337 
#> 
#> The Number of Parameters:  
#>        item   25 
#>        dist   1 
#>       total   26 
#> 
#> The Number of Items:  
#> dichotomous   5 
#> polyotomous   5 
#> 
#> The Estimated Latent Distribution:  
#> method - KDE 
#> ----------------------------------------
#>                                           
#>                                           
#>                                           
#>                         . .               
#>             . . . . . @ @ @ @             
#>         . @ @ @ @ @ @ @ @ @ @ @           
#>         @ @ @ @ @ @ @ @ @ @ @ @ @         
#>       @ @ @ @ @ @ @ @ @ @ @ @ @ @ @       
#>     @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @     
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ . 
#> +---------+---------+---------+---------+
#> -2        -1        0         1         2

### The estimated item parameters
Mod1$par_est
#> $Dichotomous
#>             a          b c
#> [1,] 2.200954  0.9115074 0
#> [2,] 1.984903 -1.0332999 0
#> [3,] 1.110728  0.4980855 0
#> [4,] 1.258792  0.5392781 0
#> [5,] 2.287480  1.4436252 0
#> 
#> $Polytomous
#>             a        b_1         b_2 b_3 b_4 b_5 b_6
#> [1,] 1.966701  0.3937469  0.42467910  NA  NA  NA  NA
#> [2,] 2.007852 -0.3261131  0.06844343  NA  NA  NA  NA
#> [3,] 2.056885 -0.3897486 -0.22236003  NA  NA  NA  NA
#> [4,] 1.030497 -0.1731280  0.19751324  NA  NA  NA  NA
#> [5,] 0.785141  0.2433940  0.08376259  NA  NA  NA  NA

### The asymptotic standard errors of item parameters
Mod1$se
#> $Dichotomous
#>               a          b  c
#> [1,] 0.15475720 0.04668629 NA
#> [2,] 0.14171212 0.05355501 NA
#> [3,] 0.08520384 0.06947386 NA
#> [4,] 0.09141200 0.06332382 NA
#> [5,] 0.19665345 0.06471224 NA
#> 
#> $Polytomous
#>               a        b_1        b_2 b_3 b_4 b_5 b_6
#> [1,] 0.11688799 0.05981212 0.05958615  NA  NA  NA  NA
#> [2,] 0.11456065 0.05304505 0.05249768  NA  NA  NA  NA
#> [3,] 0.11996224 0.05626596 0.05611498  NA  NA  NA  NA
#> [4,] 0.06432937 0.08543217 0.08539457  NA  NA  NA  NA
#> [5,] 0.05401889 0.11481480 0.11424988  NA  NA  NA  NA

### The estimated ability parameters
head(Mod1$theta)
#> [1] -0.5775496 -0.7558405  0.6185433 -0.8691925 -1.3624809 -1.5623822

### The estimated latent distribution
plot(Mod1) +
  lims(y = c(0, .5))



—-