Model Posterior Predictions Outside the Possible Range of the Data

I am having a lot of fun right now learning the ropes of modeling in Stan. Right now I’m wrestling with my model of a mixed between- and within-subjects factorial experimental design. There are different groups of subjects, Each subject indicates how much they expect each of three different beverages (water, decaf, and coffee) to reduce their caffeine withdrawal. The outcome variable - expectancy of withdrawal reduction - was measured via a Visual Analog Scale from 0 - 10 with 0 indicating no expectation of withdrawal reduction and 10 indicating a very high expectation of withdrawal reduction. I want to test if there are between-group differences in the amount of expected withdrawal-reduction potential of the three different beverages.

Here is the data

df <- data.frame(id = rep(1:46, each = 3),
                 group = c(3,3,3,1,1,1,3,3,3,1,1,1,3,3,3,1,1,1,3,3,3,2,2,2,1,1,1,3,3,3,3,3,3,2,2,2,3,3,3,1,1,1,2,2,2,3,3,3,2,2,2,2,2,2,3,3,3,1,1,1,2,2,2,3,3,3,2,2,2,3,3,3,3,3,3,2,2,2,3,3,3,3,3,3,1,1,1,3,3,3,3,3,3,1,1,1,2,2,2,2,2,2,1,1,1,2,2,2,2,2,2,1,1,1,1,1,1,2,2,2,2,2,2,1,1,1,1,1,1,3,3,3,1,1,1,3,3,3),
                 bevType = rep(c(3,2,1), times = 46),
                 score = c(2.9,1.0,0.0,9.5,5.0,4.5,9.0,3.0,5.0,5.0,0.0,3.0,9.5,2.0,3.0,8.5,0.0,6.0,5.2,3.0,4.0,8.4,7.0,2.0,10.0,0.0,3.0,7.3,1.0,1.8,8.5,2.0,9.0,10.0,5.0,10.0,8.3,2.0,5.0,6.0,0.0,5.0,6.0,0.0,5.0,10.0,0.0,5.0,6.8,1.0,4.8,8.0,1.0,4.0,7.0,4.0,6.0,6.5,1.0,3.1,9.0,1.0,0.0,6.0,0.0,2.0,9.5,4.0,6.0,8.0,1.0,3.8,0.4,0.0,7.0,7.0,0.0,3.0,9.0,2.0,5.0,9.5,2.0,7.0,7.9,5.0,4.9,8.0,1.0,1.0,9.3,5.0,7.9,6.5,2.0,3.0,8.0,2.0,6.0,10.0,0.0,5.0,6.0,0.0,5.0,6.8,0.1,7.0,8.0,3.0,9.1,8.2,0.0,7.9,8.2,5.0,0.0,9.2,1.0,3.1,9.1,3.0,0.6,5.7,2.0,5.1,7.0,0.0,7.4,8.0,1.0,1.5,9.1,4.0,4.3,8.5,8.0,5.0))

Now for the model. The model has a grand mean parameter a, a categorical predictor representing groups deflections from the grand mean bGroup, a term for deflections of the different beverage types from the grand mean bBev, a term for each subject’s intercept bSubj, and a term for the group by beverage interaction bGxB. I also estimated separate noise parameters for each beverage type.

To allow posterior predictive checks I drew from the joint posterior using the generated quantities block and the normal_rng function.

### Step 1: Put data into list
dList <- list(N = 138,
              nSubj = 46,
              nGroup = 3,
              nBev = 3,
              sIndex = df$id,
              gIndex = df$group,
              bIndex = df$bevType,
              score = df$score,
              gMean = 4.718841,
              gSD = 3.17)


#### Step 1 model
write("
      data{
      int<lower=1> N;
      int<lower=1> nSubj;
      int<lower=1> nGroup;
      int<lower=1> nBev;
      int<lower=1,upper=nSubj> sIndex[N];
      int<lower=1,upper=nGroup> gIndex[N];
      int<lower=1,upper=nBev> bIndex[N];
      real score[N];
      real gMean;
      real gSD;
      }
      
      parameters{
      real a;
      vector[nSubj] bSubj;
      vector[nGroup] bGroup;
      vector[nBev] bBev;
      vector[nBev] bGxB[nGroup];      // vector of vectors, stan no good with matrix
      vector[nBev] sigma;  
      real<lower=0> sigma_a;
      real<lower=0> sigma_s;
      real<lower=0> sigma_g;
      real<lower=0> sigma_b;
      real<lower=0> sigma_gb;
      }
      
      model{
      vector[N] mu;
      
      //hyper-priors
      sigma_s ~ normal(0,10);     
      sigma_g ~ normal(0,10);
      sigma_b ~ normal(0,10);
      sigma_gb ~ normal(0,10);
      
      
      //priors
      sigma ~ cauchy(0,1);
      a ~ normal(gMean, gSD);
      bSubj ~ normal(0, sigma_s);
      bGroup ~ normal(0,sigma_g);
      bBev ~ normal(0,sigma_b);
      for (i in 1:nGroup) {               //hierarchical prior on interaction
      bGxB[i] ~ normal(0, sigma_gb);
      }
      
      // likelihood
      for (i in 1:N){
      score[i] ~ normal(a + bGroup[gIndex[i]] + bBev[bIndex[i]] + bSubj[sIndex[i]] + bGxB[gIndex[i]][bIndex[i]], sigma[bIndex[i]]);
      }
      }
      
      generated quantities{
      real y_draw[N];
      for (i in 1:N) {
      y_draw[i] = normal_rng(a + bGroup[gIndex[i]] + bBev[bIndex[i]] + bSubj[sIndex[i]] + bGxB[gIndex[i]][bIndex[i]], sigma[bIndex[i]]);
      }
      }
      ", file = "temp.stan")

##### Step 3: generate the chains
mod <-  stan(file = "temp.stan",
             data = dList,
             iter = 5000,  
             warmup = 3000, 
             cores = 1,
             chains = 1)

Now to extract the draws from the joint posterior, and generate estimates of the group mean, upper and lower 95% HPDI. First we need a function to calculate the HPDI

HPDIFunct <- function (vector) { 
  sortVec <- sort(vector)
  ninetyFiveVec <- ceiling(.95*length(sortVec))
  fiveVec <- length(sortVec) - length(ninetyFiveVec)
  diffVec <- sapply(1:fiveVec, function (i) sortVec[i + ninetyFiveVec] - sortVec[i])
  minVal <- sortVec[which.min(diffVec)]
  maxVal <- sortVec[which.min(diffVec) + ninetyFiveVec]
  return(list(sortVec, minVal, maxVal))
}

Now to extract the draws from the posterior

#### Step 5: Posterior predictive checks
y_draw <- data.frame(extract(mod, pars = "y_draw"))

And plot the mean, lower HPDI and upper HPDI draws of these draws against the actual data.

df$drawMean <- apply(y_draw, 2, mean)
df$HPDI_Low <- apply(y_draw, 2, function(i) HPDIFunct(i)[[2]][1])
df$HPDI_Hi <- apply(y_draw, 2, function(i) HPDIFunct(i)[[3]][1])

### Step 6: plot posterior draws against actual data
ggplot(df, aes(x = factor(bevType), colour = factor(group))) +
       geom_jitter(aes(y = score), shape = 1, position = position_dodge(width=0.9)) +
       geom_point(aes(y = drawMean), position = position_dodge(width=0.9), stat = "summary", fun.y = "mean", shape = 3, size = 3, stroke = 2) +
       geom_point(aes(y = HPDI_Low), position = position_dodge(width=0.9), stat = "summary", fun.y = "mean", shape = 1, size = 3, stroke = 1) +
       geom_point(aes(y = HPDI_Hi), position = position_dodge(width=0.9), stat = "summary", fun.y = "mean", shape = 1, size = 3, stroke = 1) +
       scale_colour_manual(name = "Experimental Group", labels = c("Group 1", "Group 2", "Group 3"), values = c("#616a6b", "#00AFBB", "#E7B800")) +  
       scale_x_discrete(labels = c("Water", "Decaf", "Coffee")) + 
       labs(x = "Beverage Type", y = "Expectancy of Withdrawal Alleviation") +
       scale_y_continuous(breaks = seq(0,10,2)) +
       theme(axis.text.x = element_text(size = 12),
             axis.title.x = element_text(face = "bold"),
             axis.title.y = element_text(face = "bold"),
             axis.text.y = element_text(size = 12),
             legend.title = element_text(size = 13, face = "bold"))

Rplot

Now for Water expectancies the model seems to represent the centre (crosses) and spread (open circles) of the data quite well. But this breaks down for the Decaf and Coffee expectancies. For Decaf expectancies the lower HPDI is below the range of possible values (lower limit = 0) and the spread of the draws from the posterior (represented in each group by the open circles) is too large. The Coffee group’s upper HPDI limit is also above the range of the data (upper limit = 10) and the spread is too large for the actual data.

How do I constrain the draws from the joint posterior to the actual range of the data?

Is there some sort of brute-force way to constrain the draws from the posterior or would a more adaptable estimation of differences in the variance across the three beverage conditions be more effective?