Partial pooling on ordinal response (cumulative probit)

Just a few comments -
Divergent transitions should not be ignored. Your inference can’t be trusted when HMC doesn’t properly explore the posterior.
Michael Betancourt recently released an excellent update to his ordinal modeling case study that builds ordinal models from the ground up https://betanalpha.github.io/assets/chapters_html/ordinal_modeling.html
@Solomon In this case study he provides an example of the situation where one has unobserved categories. https://betanalpha.github.io/assets/chapters_html/ordinal_modeling.html#homogeneous-ordinal-probabilities-ii
This can manifest as model degeneracy as seen in the models above, however, the models can be fit provided that a prior model on the cutpoints is informative for all categories. This can be done in a couple of ways that result in an induced-Dirichlet prior on the cutpoints. Either derive the cutpoints from baseline probabilities and put a Dirichlet prior on those probabilities (this is what rstanarm does), or model the cutpoints directly and put an induced-Dirichlet prior model on them. This allows one to place a prior on the cutpoints using domain expertise about the probability for each ordinal category (something far more intuitive than cutpoints for a cumulative distribution function), and it keeps the prior on each cutpoint from overlapping (as is implied by the normal priors in your example, although maybe this is somehow handled behind the scenes in brms by an ordered type in Stan, not sure).

Anyway, we can fit a Stan model to @epkanol original data with the unobserved categories without any divergent transitions or ESS warnings.

library(rstan)

d1 <- data.frame(
  id=rep(c("A", "B", "C", "D", "E", "F", "G"), each=6),
  gender=c(rep("male", 6*4), rep("female", 6*3)),
  name=rep(c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6"), times=7),
  response=c(rep(6,6), # resp.1, male
             c(rep(7,4), rep(6,2)), # resp.2, male
             rep(7,6), # resp.3, male
             c(7, rep(6,4), 5), # resp.4, male
             rep(7, 6), #resp.5, female
             rep(6, 6), #resp.6, female
             c(7, 6, 6, rep(5,3)) # resp.7, female
             ))

Now let’s view some prior predictive checks for the induced-Dirichlet prior model. Here we will derive the cutpoints from baseline probabilities rather than fooling with the Jacobian correction and modeling cutpoints directly.
For example, we can look at the prior pushforward for a Dirichlet with uniform probabilities on the ordinal outcome scale using Stan’s fixed parameter algorthim and generated quantities block and some plotting. A Dirichlet with alpha equal to a vector of ones.

#View an induced-Dirichlet prior model with uniform probabilities over the categories
stan_data <- list(N = 1000, ncut = 6, alpha = c(1,1,1,1,1,1,1))

stan_model <- "
functions {
  vector make_cutpoints(vector probabilities, int ncuts) {
    vector[ncuts] cutpoints;
    real running_sum = 0;
      for (n in 1:ncuts) {
        running_sum += probabilities[n];
        cutpoints[n] = inv_Phi(running_sum);
      }
    return cutpoints;
  }
}
data {
  int<lower=1> N;
  int ncut;
  vector[ncut+1] alpha;
}
generated quantities {
  vector[ncut + 1] pi;
  vector[ncut] cutpoints;
  vector[N] pred_y;
    pi = dirichlet_rng(alpha);
    cutpoints = make_cutpoints(pi, ncut);
    for (n in 1:N) {
      pred_y[n] = ordered_probit_rng(0, cutpoints);
    }
}
"

fit <- stan(model_code = stan_model, data = stan_data,
             chains = 1, iter = 1000, algorithm="Fixed_param")

pred_y <- extract(fit)$pred_y

c_light <- c("#eff3ff")
c_light_highlight <- c("#c6dbef")
c_mid <- c("#9ecae1")
c_mid_highlight <- c("#6baed6")
c_dark <- c("#3182bd")
c_dark_highlight <- c("#08519c")

B <- stan_data$ncut + 1

idx <- rep(1:B, each=2)
x <- sapply(1:length(idx), function(b) if(b %% 2 == 0) idx[b] + 0.5 else idx[b] - 0.5)

pred_counts <- sapply(1:500, function(n) 
                      hist(pred_y[n,], breaks=(1:(B + 1)) - 0.5, plot=FALSE)$counts)
pred_counts_prob <- pred_counts / 1000
probs = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
cred <- sapply(1:B, function(b) quantile(pred_counts_prob[b,], probs=probs))
pad_cred <- do.call(cbind, lapply(idx, function(n) cred[1:9,n]))

plot(1, type="n", main="Prior Predictive Distribution for pred_y",
     xlim=c(0.5, B + 0.5), xlab="pred_y",
     ylim=c(0, 1), ylab="Probability")

polygon(c(x, rev(x)), c(pad_cred[1,], rev(pad_cred[9,])),
        col = c_light, border = NA)
polygon(c(x, rev(x)), c(pad_cred[2,], rev(pad_cred[8,])),
        col = c_light_highlight, border = NA)
polygon(c(x, rev(x)), c(pad_cred[3,], rev(pad_cred[7,])),
        col = c_mid, border = NA)
polygon(c(x, rev(x)), c(pad_cred[4,], rev(pad_cred[6,])),
        col = c_mid_highlight, border = NA)
lines(x, pad_cred[5,], col=c_dark, lwd=2)

As we can see, the induced-Dirichlet(1,1,1,1,1,1,1) prior model pushes forward to uniform probability over the Likert categories.
If we had domain expertise regarding this, we might use something more informative.

We can now try to fit an ordered probit model to your data with unobserved categories using the induced-Dirichlet prior above for the cutpoints.

d1$gender_b <- 0
d1$gender_b <- ifelse(d1$gender=="female", 1, d1$gender_b)

stan_data <- list(N = length(d1$response), ncut = 6, likert = as.integer(d1$response), gender = d1$gender_b)

stan_model <- "
functions {
  vector make_cutpoints(vector probabilities, int ncuts) {
    vector[ncuts] cutpoints;
    real running_sum = 0;
      for (n in 1:ncuts) {
        running_sum += probabilities[n];
        cutpoints[n] = inv_Phi(running_sum);
      }
    return cutpoints;
  }
}
data {
  int<lower=1> N;
  int likert[N];
  vector[N] gender;
  int ncut;
}
parameters {
  simplex[ncut + 1] pi;
  real alpha;
  real beta;
}
transformed parameters {
  vector[ncut] cutpoints = make_cutpoints(pi, ncut);
}
model {
  // prior
  pi ~ dirichlet(rep_vector(1, ncut + 1));
  alpha ~ normal(0, 5);
  beta ~ normal(0, 5);
  // likelihood
  likert ~ ordered_probit(alpha + beta * gender, cutpoints);
}
generated quantities {
  vector[N] pred_y;
    for (n in 1:N) {
      pred_y[n] = ordered_probit_rng(alpha + beta * gender[n], cutpoints);
    }
}
"

fit_mod <- stan(model_code = stan_model, data = stan_data,
             chains = 4, iter = 2000, cores = 4)

Happily the model fits fine with no diagnostic warnings (at least in the random seed I used, but I don’t suspect any problems). There are no divergent transitions (or any other warning).

Now we can do a retrodictive check to see how well our model fits.

pred_y <- extract(fit_mod)$pred_y

c_light <- c("#eff3ff")
c_light_highlight <- c("#c6dbef")
c_mid <- c("#9ecae1")
c_mid_highlight <- c("#6baed6")
c_dark <- c("#3182bd")
c_dark_highlight <- c("#08519c")

B <- stan_data$ncut + 1

idx <- rep(1:B, each=2)
x <- sapply(1:length(idx), function(b) if(b %% 2 == 0) idx[b] + 0.5 else idx[b] - 0.5)

obs_counts <- hist(stan_data$likert, breaks=(1:(B + 1)) - 0.5, plot=FALSE)$counts
pad_obs_counts <- sapply(idx, function(n) obs_counts[n])

pred_counts <- sapply(1:1000, function(n) 
                      hist(pred_y[n,], breaks=(1:(B + 1)) - 0.5, plot=FALSE)$counts)
probs = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
cred <- sapply(1:B, function(b) quantile(pred_counts[b,], probs=probs))
pad_cred <- do.call(cbind, lapply(idx, function(n) cred[1:9,n]))

plot(1, type="n", main="Posterior Predictive Distribution for pred_y",
     xlim=c(0.5, B + 0.5), xlab="pred_y",
     ylim=c(0, max(c(obs_counts, cred[9,]))), ylab="")

polygon(c(x, rev(x)), c(pad_cred[1,], rev(pad_cred[9,])),
        col = c_light, border = NA)
polygon(c(x, rev(x)), c(pad_cred[2,], rev(pad_cred[8,])),
        col = c_light_highlight, border = NA)
polygon(c(x, rev(x)), c(pad_cred[3,], rev(pad_cred[7,])),
        col = c_mid, border = NA)
polygon(c(x, rev(x)), c(pad_cred[4,], rev(pad_cred[6,])),
        col = c_mid_highlight, border = NA)
lines(x, pad_cred[5,], col=c_dark, lwd=2)

lines(x, pad_obs_counts, col="white", lty=1, lw=2.5)
lines(x, pad_obs_counts, col="black", lty=1, lw=2)


Not too shaby!

2 Likes