I am exploring the ordered_logistic model with a very simple model (to see how it works). I expected the cutpoint means to convert to probabilities that would exactly match the data frequencies. But the model returns frequencies biased toward the center. Here is the code.
library("rstan")
options(mc.cores = parallel::detectCores())
# Round to 2 decimal digits
rd = function(x) {round(x,2)}
# Build data Only 4 categories
N=39 # 39 rubric scores
# Make data frequencies: 3,13,15,8 for scores 1,2,3,4
counts = c(0,0,0,0)
counts[1] = 3
counts[2] = 13
counts[3] = 15
counts[4] = 8
rubric_score=c(rep(1,counts[1]),rep(2,counts[2]),rep(3,counts[3]),rep(4,counts[4]))
n = sum(counts)
prs=c(counts[1]/n,counts[2]/n,counts[3]/n,counts[4]/n) # decimal proportions
print(paste("n: ",n))
print(paste(c("Data Frequencies: ",rd(prs)),collapse = " "))
cum_pr_k=cumsum(prs) # cumulative probabilites
print(paste(c("Cumulative Frequencies: ",rd(cum_pr_k)),collapse = " "))
# set phi to 0. It is a placeholder for later corelations
phi = 0
stancode = "
data {
int<lower=0> n; // number of measurements
int<lower=1,upper = 4> rubric_score[n]; // measurements
real phi; // placeholder
}
parameters {
ordered[3] cutpoints;
}
model {
for (i in 1:n)
rubric_score[i]~ordered_logistic(phi,cutpoints);
cutpoints~normal(0,10);
}
"
writeLines(stancode,"mrubric.stan")
data_list = list(n=n,rubric_score=rubric_score,phi=phi)
mrubricord = stan("mrubric.stan",data = data_list,chains=4,cores=4,iter = 2000)
show(mrubricord)
# Name the logistic function its name
logistic <- plogis
# Extract means of the cutpoints
sumdat = mrubricord_summary$summary[,"mean"]
# Eliminate the mean of the lp__
sumdat = head(sumdat,-1)
# Convert to probabilities
cum_pt=logistic(sumdat)
cum_p=c(0,cum_pt,1)
p=rep(NA,4)
for(i in 1:4) p[i]=cum_p[i+1]-cum_p[i]
print(paste(c("Data Frequencies: ",rd(prs)),collapse = " "))
print(paste(c("Stan Frequencies: ",rd(p)),collapse = " "))