library(IRTest)
#> Thank you for using IRTest!
#> Please cite the package as:
#> Li, S. (2022). IRTest: Parameter estimation of item response theory with estimation of latent distribution (Version 1.7.0). R package.
#> URL: https://CRAN.R-project.org/package=IRTest
library(ggplot2)
The function DataGeneration
can be used for the
pre-analysis step. This function returns a set of artificial data and
some useful objects for analysis (e.g., theta
,
data_D
, item_D
, &
initialitem_D
).
In the parameter estimation process, the initialitem_D
can
be used for an input of the function IRTest_Dich
(i.e.,
initialitem = initialitem_D
). The data_D
is an
artificial item response data that could be used for some analyses such
as computer simulation techniques, but would be unnecessary if
user-imported item response data is used. The theta
and
item_D
are not used for the estimation process, but could
play a role as true parameters if the artificial data
(data_D
) is used for an analysis.
<- DataGeneration(seed = 123456789,
Alldata model_D = rep(1:2, each=5),
N=1000,
nitem_D = 10,
nitem_P = 0,
latent_dist = "2NM",
d = 1.664,
sd_ratio = 2,
prob = 0.3)
<- Alldata$data_D
data <- Alldata$item_D
item <- Alldata$theta
theta 1:500, 1] <- NA
data[501:1000, 2] <- NA data[
If the artificial data (data_D
) is used, the true latent
distribution looks like;
<- IRTest_Dich(data = data,
Mod1 model = 2,
latent_dist = "LLS",
h=4)
### Summary
summary(Mod1)
#> Convergence:
#> Successfully converged below the threshold of 1e-04 on 73rd iterations.
#>
#> Model Fit:
#> deviance 9130.173
#> AIC 9178.173
#> BIC 9295.959
#>
#> The Number of Parameters:
#> item 20
#> dist 4
#> total 24
#>
#> The Number of Items:
#> dichotomous 10
#> polyotomous 0
#>
#> The Estimated Latent Distribution:
#> method - LLS
#> ----------------------------------------
#>
#>
#>
#>
#> . . . . . . @ .
#> @ @ @ @ @ @ @ @ @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> +---------+---------+---------+---------+
#> -2 -1 0 1 2
### The estimated item parameters
$par_est
Mod1#> a b c
#> [1,] 0.6918262 -0.07928281 0
#> [2,] 0.9383418 0.37278883 0
#> [3,] 1.0179276 -1.55407024 0
#> [4,] 1.1022914 -1.05543143 0
#> [5,] 1.0774381 -1.73147120 0
#> [6,] 1.9863217 0.45700688 0
#> [7,] 2.0410894 -1.63161594 0
#> [8,] 2.0560120 -0.49101479 0
#> [9,] 1.9277635 -0.50771321 0
#> [10,] 2.3390683 0.25528127 0
### The asymptotic standard errors of item parameters
$se
Mod1#> a b c
#> [1,] 0.11643444 0.15353537 NA
#> [2,] 0.11073882 0.12959651 NA
#> [3,] 0.09593797 0.12850705 NA
#> [4,] 0.09010128 0.08861957 NA
#> [5,] 0.10475036 0.13844368 NA
#> [6,] 0.12350916 0.04375507 NA
#> [7,] 0.19009790 0.08032865 NA
#> [8,] 0.12700645 0.04341767 NA
#> [9,] 0.12024177 0.04550756 NA
#> [10,] 0.14008630 0.03870131 NA
### The estimated ability parameters
plot(theta, Mod1$theta)
abline(b=1, a=0)
### The estimated latent distribution
plot(Mod1) +
lims(y = c(0, .5))
item_fit(Mod1)
#> Insufficient data values to produce 10 bins for Item 1. 9 bins will be used.
#> Insufficient data values to produce 9 bins for Item 1. 8 bins will be used.
#> Insufficient data values to produce 8 bins for Item 1. 7 bins will be used.
#> Insufficient data values to produce 7 bins for Item 1. 6 bins will be used.
#> stat df p.value
#> 1 52.59557 3 0.0000
#> 2 19.33040 7 0.0072
#> 3 33.84115 7 0.0000
#> 4 25.45106 7 0.0006
#> 5 30.08644 7 0.0001
#> 6 76.65091 7 0.0000
#> 7 31.18813 7 0.0001
#> 8 29.19976 7 0.0001
#> 9 42.28145 7 0.0000
#> 10 50.04304 7 0.0000
reliability(Mod1)
#> test reliability
#> 0.7740163
As in the case of dichotomous items, the function
DataGeneration
can be used for the pre-analysis step. This
function returns a set of artificial data and some useful objects for
analysis (e.g., theta
, data_P
,
item_P
, & initialitem_P
).
In the parameter estimation process, the initialitem_P
can
be used for an input of the function IRTest_Poly
(i.e.,
initialitem = initialitem_P
). The data_P
is an
artificial item response data that could be used for some analyses such
as computer simulation techniques, but would be unnecessary if
user-imported item response data is used. The theta
and
item_P
are not used for the estimation process, but could
play a role as true parameters if the artificial data
(data_P
) is used for an analysis.
<- DataGeneration(seed = 123456789,
Alldata model_P = "GPCM",
categ = rep(c(3,7), each = 5),
N=1000,
nitem_D = 0,
nitem_P = 10,
latent_dist = "2NM",
d = 1.414,
sd_ratio = 2,
prob = 0.5)
<- Alldata$data_P
data <- Alldata$item_P
item <- Alldata$theta
theta 1:500, 1:3] <- NA
data[501:1000, 4:6] <- NA data[
If the artificial data (data_P
) is used, the true latent
distribution looks like;
<- IRTest_Poly(data = data,
Mod1 model = "GPCM",
latent_dist = "KDE")
### Summary
summary(Mod1)
#> Convergence:
#> Successfully converged below the threshold of 1e-04 on 29th iterations.
#>
#> Model Fit:
#> deviance 17550.31
#> AIC 17650.31
#> BIC 17895.69
#>
#> The Number of Parameters:
#> item 49
#> dist 1
#> total 50
#>
#> The Number of Items:
#> dichotomous 0
#> polyotomous 10
#>
#> The Estimated Latent Distribution:
#> method - KDE
#> ----------------------------------------
#>
#> . .
#> . @ @ .
#> @ @ @ @ .
#> @ @ @ @ @ @ .
#> . @ @ @ @ @ @ @ .
#> @ @ @ @ @ @ @ @ @ @ .
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> +---------+---------+---------+---------+
#> -2 -1 0 1 2
### The estimated item parameters
$par_est
Mod1#> a b_1 b_2 b_3 b_4 b_5
#> [1,] 1.7875216 0.768557011 1.5465558 NA NA NA
#> [2,] 0.7641532 -0.227304122 0.8418435 NA NA NA
#> [3,] 0.6481532 -0.005371191 1.2849516 NA NA NA
#> [4,] 1.6588070 -0.945807752 0.9394254 NA NA NA
#> [5,] 2.2530632 -2.195167615 -2.0151652 NA NA NA
#> [6,] 1.2353060 -0.594188007 0.3089519 0.598282406 2.93402549 0.68958962
#> [7,] 1.4523293 -3.049821033 -1.9487927 -1.202220957 -0.92560041 0.08132396
#> [8,] 0.8574999 -1.390253736 -0.8853679 -0.365115011 -0.04109078 -0.13440845
#> [9,] 1.5337358 -1.016578133 -0.4014641 0.001875338 0.38574134 0.54652582
#> [10,] 1.2721265 -1.026096753 0.3073373 0.388904895 0.70808745 1.43425458
#> b_6
#> [1,] NA
#> [2,] NA
#> [3,] NA
#> [4,] NA
#> [5,] NA
#> [6,] NA
#> [7,] 0.5844565
#> [8,] 0.3924430
#> [9,] 0.8937754
#> [10,] 1.4746911
### The asymptotic standard errors of item parameters
$se
Mod1#> a b_1 b_2 b_3 b_4 b_5
#> [1,] 0.14797998 0.07150794 0.07980719 NA NA NA
#> [2,] 0.08139314 0.15965897 0.14218502 NA NA NA
#> [3,] 0.07631251 0.17392515 0.17304300 NA NA NA
#> [4,] 0.18097518 0.06426531 0.15622793 NA NA NA
#> [5,] 0.34689261 0.20947806 0.16496940 NA NA NA
#> [6,] 0.12884087 0.08725042 0.13210795 0.18805008 0.86173450 1.1727244
#> [7,] 0.07665161 0.42420195 0.14798194 0.09322249 0.07477916 0.0695550
#> [8,] 0.04763529 0.16463904 0.14333983 0.14274591 0.15090036 0.1460057
#> [9,] 0.08091761 0.07218241 0.07169808 0.08186487 0.09609836 0.1002510
#> [10,] 0.06945961 0.07474044 0.08939833 0.10578974 0.11570251 0.1288563
#> b_6
#> [1,] NA
#> [2,] NA
#> [3,] NA
#> [4,] NA
#> [5,] NA
#> [6,] NA
#> [7,] 0.07437509
#> [8,] 0.12931705
#> [9,] 0.09122916
#> [10,] 0.13744175
### The estimated ability parameters
plot(theta, Mod1$theta)
abline(b=1, a=0)
### The estimated latent distribution
plot(Mod1) +
lims(y = c(0, .5))
item_fit(Mod1)
#> stat df p.value
#> 1 22.49394 15 0.0955
#> 2 14.15362 15 0.5139
#> 3 17.37764 15 0.2968
#> 4 24.17928 15 0.0621
#> 5 23.89154 15 0.0670
#> 6 39.84661 39 0.4323
#> 7 81.99800 47 0.0012
#> 8 59.11891 47 0.1105
#> 9 58.43121 47 0.1225
#> 10 43.20066 47 0.6307
reliability(Mod1)
#> test reliability
#> 0.9158039
As in the case of dichotomous and polytomous items, the function
DataGeneration
can be used for the pre-analysis step. This
function returns artificial data and some useful objects for analysis
(i.e., theta
, data_D
, item_D
,
initialitem_D
, data_P
, item_P
,
& initialitem_P
).
In the parameter estimation process, the initialitem_D
and
initialitem_P
can be used for an input of the function
IRTest_Mix
(i.e.,
initialitem_D = initialitem_D
, &
initialitem_P = initialitem_P
). The data_D
and
data_P
are artificial item response data sets that could be
used for some analyses such as computer simulation techniques, but would
be unnecessary if user-imported item response data is used. The
theta
and item_D
and item_P
are
not used for the estimation process, but could play a role as true
parameters if the artificial data (data_D
&
data_P
) is used for an analysis.
<- DataGeneration(seed = 12345678,
Alldata model_D = rep(2,5),
model_P = "GPCM",
categ = rep(5,5),
N=1000,
nitem_D = 5,
nitem_P = 5,
latent_dist = "2NM",
d = 1.664,
sd_ratio = 1,
prob = 0.5)
<- Alldata$data_D
DataD <- Alldata$data_P
DataP <- Alldata$item_D
itemD <- Alldata$item_P
itemP <- Alldata$theta
theta
1:250, 1] <- NA
DataD[251:500, 2] <- NA
DataD[501:750, 1] <- NA
DataP[751:1000, 2] <- NA DataP[
If the artificial data (data
) is used, the true latent
distribution looks like,
#> Scale for y is already present.
#> Adding another scale for y, which will replace the existing scale.
<- IRTest_Mix(data_D = DataD,
Mod1 data_P = DataP,
model_D = "2PL",
model_P = "GPCM",
latent_dist = "KDE")
### Summary
summary(Mod1)
#> Convergence:
#> Successfully converged below the threshold of 1e-04 on 35th iterations.
#>
#> Model Fit:
#> deviance 1766382
#> AIC 1766454
#> BIC 1766631
#>
#> The Number of Parameters:
#> item 35
#> dist 1
#> total 36
#>
#> The Number of Items:
#> dichotomous 5
#> polyotomous 5
#>
#> The Estimated Latent Distribution:
#> method - KDE
#> ----------------------------------------
#>
#>
#>
#> @ @ .
#> @ @ @ @ @ @ @
#> . @ @ @ @ @ . . @ @ @ @ @
#> @ @ @ @ @ @ @ @ @ @ @ @ @ .
#> @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> . @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
#> +---------+---------+---------+---------+
#> -2 -1 0 1 2
### The estimated item parameters
$par_est
Mod1#> $Dichotomous
#> a b c
#> [1,] 1.5821249 -0.7547435 0
#> [2,] 0.9794597 -1.5943777 0
#> [3,] 0.8501268 0.7680404 0
#> [4,] 0.9688784 -0.9614223 0
#> [5,] 1.4944181 -1.3590145 0
#>
#> $Polytomous
#> a b_1 b_2 b_3 b_4 b_5 b_6
#> [1,] 2.322153 -0.7166374 0.75456780 1.5477052 2.7267412 NA NA
#> [2,] 1.029950 -0.6745269 -0.75486611 0.6870106 1.1195071 NA NA
#> [3,] 2.488694 -0.1582097 0.02776561 1.0515812 1.2225348 NA NA
#> [4,] 1.916094 -1.8030428 -0.34302398 -0.1287233 0.2492093 NA NA
#> [5,] 1.961237 -0.4699263 -0.15012932 0.8388752 1.1759573 NA NA
### The asymptotic standard errors of item parameters
$se
Mod1#> $Dichotomous
#> a b c
#> [1,] 0.12876313 0.07623034 NA
#> [2,] 0.11110379 0.18286320 NA
#> [3,] 0.07721960 0.09743837 NA
#> [4,] 0.08380408 0.09511242 NA
#> [5,] 0.12467429 0.08327815 NA
#>
#> $Polytomous
#> a b_1 b_2 b_3 b_4 b_5 b_6
#> [1,] 0.14781076 0.04331630 0.05904431 0.08494950 0.27118601 NA NA
#> [2,] 0.06825487 0.11285246 0.10874430 0.11949196 0.14430172 NA NA
#> [3,] 0.13384192 0.05041735 0.05173368 0.05111369 0.05795802 NA NA
#> [4,] 0.10416674 0.07999619 0.06179259 0.06573655 0.06047707 NA NA
#> [5,] 0.10426523 0.05592471 0.05795963 0.05861632 0.06500365 NA NA
### The estimated ability parameters
plot(theta, Mod1$theta)
abline(b=1, a=0)
### The estimated latent distribution
plot(Mod1) +
lims(y = c(0, .5))
item_fit(Mod1)
#> $Dichotomous
#> stat df p.value
#> 1 11.045242 7 0.1367
#> 2 6.643194 7 0.4670
#> 3 6.371348 7 0.4971
#> 4 11.931981 7 0.1028
#> 5 13.343816 7 0.0642
#>
#> $Polytomous
#> stat df p.value
#> 1 31.47649 31 0.4424
#> 2 38.48105 31 0.1670
#> 3 49.70119 31 0.0179
#> 4 38.54354 31 0.1653
#> 5 41.32231 31 0.1018
reliability(Mod1)
#> test reliability
#> 0.9156917