BYM2 with unstructured country random effect

Still no luck with this btw - even after following your recommendations above. The spatial effects are just not willing to converge. See below for the attached Rhat. I copy below the latest code as well for reference, in case you spot something strange.

Worth noting that convergence improves if you really jack-up the rho_sp - e.g. with a beta(10,10) prior - but the data is doing far less work then and the posterior mean is ~ 0.45, which is close enough to 0.5 that makes me think the prior is just overwhelming the data.

functions {
  real standard_icar_disconnected_lpdf(vector phi,
				       int[ , ] adjacency,
				       int[ ] comp_size,
				       int[ , ] comp_members) {
				       
    if (size(adjacency) != 2) {
      reject("require 2 rows for adjacency array;"," found rows = ", size(adjacency));
    }
    if (size(comp_size) != size(comp_members)) {
      reject("require ", size(comp_size), " rows for members matrix;", " found rows = ", size(comp_members));
    }
    if (size(phi) != dims(comp_members)[2]) {
      reject("require ", size(phi), " columns for members matrix;", " found columns = ", dims(comp_members)[2]);
    }

    real total = 0;
    for (n in 1:size(comp_size)) {
      if (comp_size[n] > 1)
	      total += -0.5 * dot_self(phi[adjacency[1, comp_members[n, 1:comp_size[n]]]] - phi[adjacency[2, comp_members[n, 1:comp_size[n]]]]) + normal_lpdf(sum(phi[comp_members[n, 1:comp_size[n]]]) | 0, 0.001 * comp_size[n]);
    }
    return total;
    
  }
}
data {
  int<lower = 0> n; // number of observations
  int<lower = 0, upper = 1> Y[n]; // outcome

  // spatial structure
  int<lower = 0> I;  // number of nodes
  int<lower = 0> J;  // number of edges
  int<lower = 1, upper = I> edges[2, J];  // node[1, j] adjacent to node[2, j]

  int<lower=0, upper=I> K;  // number of components in spatial graph
  int<lower=0, upper=I> K_size[K];   // component sizes
  int<lower=0, upper=I> K_members[K, I];  // rows contain per-component areal region indices
  int<lower=0, upper = K> group_id[n]; // small-area id

  vector<lower=0>[K] tau_sp; // per-component scaling factor, 0 for singletons
  
  // random effects idx
  int<lower = 1,upper = I> state_id[n]; // small-area id

}
parameters {
           // intercept 
           real alpha;  
           // spatial effects
           real<lower = 0> sigma_sp;  // scale of spatial effects
  real<lower = 0, upper = 1> rho_sp;  // proportion of spatial effect thats spatially smoothed
                 vector[I] theta_sp;  // standardized heterogeneous spatial effects
                 vector[I] phi_sp;  // standardized spatially smoothed spatial effects
}
transformed parameters {
  vector[I] gamma;
  vector[n] mu;

  // each component has its own spatial smoothing
  for (k in 1:K) {
    if (K_size[k] == 1) {
                 gamma[K_members[k,1]] = theta_sp[K_members[k,1]]*sigma_sp;
    } else {
      gamma[K_members[k, 1:K_size[k]]] = ( sqrt(1 - rho_sp) * theta_sp[K_members[k, 1:K_size[k]]] + sqrt(rho_sp/tau_sp[k]) * phi_sp[K_members[k, 1:K_size[k]]] ) * sigma_sp;
      
    }
  }

  mu = alpha + gamma[state_id];

}
model {
  // global intercept prior
  alpha ~ normal(0,1);
  
  // spatial hyperpriors and priors
  sigma_sp ~ normal(0, 1);
  rho_sp ~ beta(5, 5);
  theta_sp ~ normal(0, 1);
  phi_sp ~ standard_icar_disconnected(edges, K_size, K_members);

  // likelihood
  Y ~ bernoulli_logit(mu);
}

convergence_total.pdf (8.6 KB)