Posterior predictive checking logistic binomial with random effects


#1

I would like to know if the block of generating quantities is correct for my model.

[Please include Stan progsurgical.data.hospital.R (1.1 KB)
ram and accompanying data if possible]

obs<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131)

n <-c(1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,4,4,4,4,6,6,6,7,7,7,8,8,8,9,9,9,9,9,9,9,10,10,10,10,11,11,12,12,13,13,14,14,15,15,15,15,16,16,16,17,17,17,17,17,17,17,18,18,18,18,19,20,20,20,21,21,22,24,24,25,25,25,26,26,27,27,28,28,32,33,33,33,33,35,35,35,37,38,38,38,40,40,40,41,42,43,43,44,45,45,47,47,48,51,51,56,57,60,61,61,61,68,69,69,73,74,75,75,79,91,99,104,133,152)

r<-c(0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,1,0,2,1,0,3,2,4,2,0,1,4,0,1,1,0,2,0,0,2,4,0,0,2,1,1,0,0,1,3,0,0,1,0,2,3,0,0,3,1,1,1,1,4,3,3,1,0,2,2,4,4,3,2,4,1,3,0,4,1,2,3,4,4,2,2,4,2,3,0,0,2,5,5,1,1,3,1,1,3,1,2,6,0,2,2,1,2,8,6,1,6,4,1,3,5,2,4,3,4,5,2,6,8,5,0,6,8,7,3,3,9,7,18,17)

N<-131

# BUGS surgical example (Vol 1, Example 4)
# http://www.mrc-bsu.cam.ac.uk/wp-content/uploads/WinBUGS_Vol1.pdf
rm(list=ls(all=TRUE))
library(rstan)
library(shinystan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores(logical = TRUE))
chains = 4
iter = 1000
set.seed(4711)

sourceToList = function(file){
  source(file, local = TRUE)
  d = mget(ls())
  d$file = NULL
  d
}
# Data are the same for all models
data = sourceToList("surgical.data.hospital.R")
init = rep(list(sourceToList("surgical.init.R")), chains)
y_obs=data$r

programa2 <- "
data {
  int<lower=0> N;
  int r[N];
  int n[N];
}
parameters {
   real mu;
   real b[N];
   real<lower=0> sigmasq;
}
transformed parameters {
  real<lower=0> sigma;
  real<lower=0,upper=1> p[N];
  sigma = sqrt(sigmasq); 
  for (i in 1:N)
    p[i] = inv_logit(b[i]);
}

model {
  mu ~ normal(0.0, 1000.0); 
  sigmasq ~ inv_gamma(0.001, 0.001);
  b ~ normal(mu,sigma);
  r ~ binomial_logit(n,b);
}

generated quantities {
  vector[N] r_post;
  vector[N] p_post;
  for (i in 1:N){
    p_post[i] = inv_logit(b[i]); 
    r_post[i] = 1.0 * binomial_rng(n[i],p_post[i]);
  }
}
"
surgical_stanified2 = stan(model_code = programa2, data = data, chains = chains,
                iter = iter)
launch_shinystan(surgical_stanified2)

surgical_stanified_summary2 =
  summary(surgical_stanified,probs = c(.025,0.975),
          pars = "r_post")$summary[,c(1,3,4,5,6)]

(surgical_stanified_summary2)

[edit: escaped source]


#2

Hi,
the generated quantities seem OK at first glance. Note that you can format source code in the posts using backticks (`).

However your priors are quite different from what is usually recommended in Stan (see https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations) and the parametrization through sigma^2 is also probably not needed. Not all good practices for BUGS are also good practices in Stan and direct recoding of BUGS models may not be the best approach.

I am not sure about your use case, but weakly informative priors such as

mu ~ normal(0, 10);
sigma ~ normal(0, 10);

or

mu ~ normal(0, 10);
sigma ~ cauchy(0, 5);

might work better (really depends on what your data are).


#3

thank you very much for the answer and suggestions