################################################################################################
# Sample analysis for the Environment data set.                                                #
# Description: The data set is extracted from the 1990 British Social Attitudes Survey.        #
# A 291 individuals were asked about their opinion on six environmental issues. The response   #
# options were 'very concerned', 'slightly concerned' and 'not very concerned' giving thus     #
# rise to the following six ordinal items:                                                     #
# 1. 'LeadPetrol': Lead from petrol                                                            #
# 2. 'RiverSea': River and sea pollution                                                       #
# 3. 'RadioWaste': Transport and storage of radioactive waste                                  #
# 4. 'AirPollution': Air pollution                                                             #
# 5. 'Chemicals': Transport and disposal of poisonous chemicals                                #
# 6. 'Nuclear': Risks from nuclear power station                                               #
################################################################################################


# install the latest version of the `ltm' package
# (if you do not already have it)
install.packages("ltm", dependencies = TRUE)

# load `ltm'
library(ltm)


##########################
# Descriptive Statistics #
##########################

# check how the first 10 rows of the data set look like
head(Environment, 10)

# frequencies per response category per item
summary(Environment)

# IRT-specific descriptive statistics can be produced
# using the descript() function
descript(Environment)

# kendall tau between pairs of items
rcor.test(Environment, method = "kendall")
# you can also adjust for multple comparisons
rcor.test(Environment, method = "kendall", p.adjust = TRUE)


##########################################
# Rasch Generalized Partial Credit Model #
##########################################

# the Rasch version of the Generalized Partial Credit Model 
# assumes a common discrimination parameter across items fixed at 1
fit1 <- gpcm(Environment, constraint = "rasch")
fit1
summary(fit1)
# you can ask for sandwich standard errors using
# the `robust.se' argument
summary(fit1, robust.se = TRUE)


# check the fit on the margins
margins(fit1)
margins(fit1, type = "three-way")

# the fit can also be assessed using the
# Pearson chi-squared goodness-of-fit measure;
# this can be done using the GoF.gpcm() function.
# By default the function produces an estimate to the
# p-value using a parametetric Boostrap approach.
# This is safer than using the asymptotic chi-squared
# distribution but it requires more computing time.
# In the first call below we use `simulate.p.value = FALSE'
# the specifies to use the asymptotic distribution.
# In the second call the Bootstrap approach is used.
GoF.gpcm(fit1, simulate.p.value = FALSE)
GoF.gpcm(fit1, B = 199) # this will take some time depending on your machine


########################################
# 1PL Generalized Partial Credit Model #
########################################

# the 1PL version of the Generalized Partial Credit Model 
# assumes a common discrimination parameter across items
fit2 <- gpcm(Environment, constraint = "1PL")
fit2
summary(fit2)


# check the fit on the margins
margins(fit2)
margins(fit2, type = "three-way")

# you can also use the GoF.gpcm() function
GoF.gpcm(fit2, simulate.p.value = FALSE)


####################################
# Generalized Partial Credit Model #
####################################

# the unconstrained version of the Generalized Partial Credit 
# assumes a different discrimination parameter per item;
fit3 <- gpcm(Environment)
fit3
summary(fit3)


# for each observed response pattern under the model:

# expected frequencies
fitted(fit3) 
# expected marginal probabilities
fitted(fit3, type = "marginal")
# expected conditional (on the latent ability) probabilities
fitted(fit3, type = "conditional")

# you can also compute the fitted probabilities for specific patterns, e.g.,
fitted(fit3, resp.patterns = cbind(1,2,3,NA,2,1))
fitted(fit3, resp.patterns = rbind("A" = c(1,2,3,NA,2,1), "B" = c(1,1,1,2,2,2)))


# the same options are also available for the residuals residuals values
residuals(fit3)


# check the fit on the margins
margins(fit3)
margins(fit3, type = "three-way")


# Likelihood Ratio Test between constrained and unconstrained models;
# here we test if the discrimination parameter should be fixed at 1
anova(fit1, fit2)
# here we test if the discrimination parameter is equal among the 6 items
anova(fit2, fit3)

# in cases required, you can use a parametric Bootstrap approach
# to estimate the p-value of the LRT, e.g.,
av <- anova(fit1, fit2, simulate.p.value = TRUE) # this will take some time depending on your machine
av
plot(av) # simulated distribution of the LRT (i.e., we compare with the asymptotic chi-squared distribution under the null)


#################
# Factor Scores #
#################

# the latent abilities can be estimated using
# the factor.scores() function:

# default is maximum a posteriori
fs <- factor.scores(fit3)
fs
# you can also plot the kernel density estimator for
# the latent abilities using the plot method, e.g.,
plot(fs)

# for expected a posteriori use
factor.scores(fit3, method = "EAP")

# factor scores can also be computed for specific response patterns;
# to do that use the `resp.patterns' argument, e.g.,
factor.scores(fit3, resp.patterns = rbind("A" = c(1,2,3,NA,2,1), "B" = c(1,1,1,2,2,2)))


##############################################
# Item Charecteristic and Information Curves #
##############################################

# by default the Item Response Category Characterstic
# Curves are produced; we include a legend
par(mfrow = c(2, 2))
plot(fit3, lwd = 2, cex = 1.2, legend = TRUE, cx = "left",
     xlab = "Latent Trait", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)

# if you want them for a specific item, then use the `items'
# argument, e.g., here for the third item
plot(fit3, items = 3, lwd = 2, cex = 1.2, legend = TRUE, cx = "left",
     xlab = "Latent Trait", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)


# for the GPCM the Item Operation Characteristic Curves can be fitted
# as well by specifying `type = "OCCu"' or `type = "OCCl"'
par(mfrow = c(2, 2))
plot(fit3, type = "OCCu", lwd = 2, cex = 1.2, legend = TRUE, cx = "topleft",
     xlab = "Latent Trait", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)


# the Item Information Curves are produced by specifying
# `type = "IIC"'
plot(fit3, type = "IIC", lwd = 2, cex = 1.2, legend = TRUE, cx = "topleft",
     xlab = "Latent Trait", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)


# for the Test Information Curve you need to specify `items = 0'
plot(fit3, type = "IIC", items = 0, lwd = 2, xlab = "Latent Trait",
     cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)


# compare the Test Information Curve of the three version of GPCM
# (check the range of the y-axis in each plot)
par(mfrow = c(2, 2))
plot(fit1, type = "IIC", items = 0, lwd = 2, xlab = "Latent Trait",
     main = "Rasch Version", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)
#
plot(fit2, type = "IIC", items = 0, lwd = 2, xlab = "Latent Trait",
     main = "1PL Version", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)
#
plot(fit3, type = "IIC", items = 0, lwd = 2, xlab = "Latent Trait",
     main = "GPCM Version", cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)


# to see more clearly the difference in the amount of infromation provided by
# the three models use the following code. In addition, we have also
# included the information from the Graded Response Model for a comparison:
vals1 <- plot(fit1, type = "IIC", items = 0, plot = FALSE)
vals2 <- plot(fit2, type = "IIC", items = 0, plot = FALSE)
vals3 <- plot(fit3, type = "IIC", items = 0, plot = FALSE)
vals4 <- plot(fit4 <- grm(Environment, constrained = TRUE), type = "IIC", items = 0, plot = FALSE)
vals5 <- plot(fit5 <- grm(Environment), type = "IIC", items = 0, plot = FALSE)

values <- cbind(vals1[, "test.info"], vals2[, "test.info"], vals3[, "test.info"], vals4[, "test.info"], vals5[, "test.info"])
matplot(vals1[, "z"], values, type = "l", col = c("black", "red", "blue", "green", "goldenrod1"), 
    xlab = "Latent Trait", ylab = "Information", main = "A Comparison between Different Polytomous IRT Models", 
    lty = 1, lwd = 2, cex.main = 1.5, cex.lab = 1.3, cex.axis = 1.1)
lgd <- c("GPCM (discrimination = 1)", "GPCM (discrimination equal among items)", 
    "GPCM (discrimination different per item)", "GRM (discrimination equal among items)", "GRM (discrimination different per item)")
legend("topleft", lgd, col = c("black", "red", "blue", "green", "goldenrod1"), cex = 1.2, lty = 1, lwd = 2, bty = "n")


# using the function information() you can numerically compare
# the infromation provided by the different IRT models for 
# polytomous data

# discrimination = 1
information(fit1, c(-4, 4))

# discrimination equal among items
information(fit2, c(-4, 4))
information(fit4, c(-4, 4))

# discrimination different per item
information(fit3, c(-4, 4))
information(fit5, c(-4, 4))