I have it working no problem with the BYM2-a model from @cmcd - there must be something not working in the BYM2-b model, perhaps is application specific, but it’s just a fairly basic logit-BYM2 model for the 51 US areas… for the life of me I do not know what is wrong.
Another area where I might have something wrong is the data input - see if these make sense, they seem sensible to me but I could be wrong:
int<lower = 0> n; // number of observations
data.list.total$n
[1] 4827
int<lower = 0, upper = 1> Y[n]; // outcome
data.list.total$Y
[1] 1 0 0 0 1 0 0 0 0 1 0 1 0 0 …
// spatial structure
int<lower = 0> I; // number of nodes
data.list.total$I
[1] 51
int<lower = 0> J; // number of edges
data.list.total$J
[1] 109
int<lower = 1, upper = I> edges[2, J]; // node[1, j] adjacent to node[2, j]
data.list.total$edges
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] …
[1,] 1 1 1 1 3 3 3 3 3 4 …
[2,] 10 11 25 43 5 6 29 32 45 …
int<lower=0, upper=I> K; // number of components in spatial graph
data.list.total$K
[1] 3
int<lower=0, upper=I> K_size[K]; // component sizes
data.list.total$K_size
[1] 49 1 1
int<lower=0, upper=I> K_members[K, I]; // rows contain per-component areal region indices
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] …
[1,] 1 3 4 5 6 7 8 9 10 11 …
[2,] 2 0 0 0 0 0 0 0 0 0 …
[3,] 12 0 0 0 0 0 0 0 0 0 …
int<lower=0, upper = K> group_id[n]; // connected-chunk id
group_id.temp.empty
[1] 1 1 1 1 1 1 1 1 1 1 …
vector<lower=0>[K] tau_sp; // per-component scaling factor, 0 for singletons
data.list.total$tau_sp
[1] 0.5250207 0.0000000 0.0000000
int<lower = 1,upper = I> state_id[n]; // small-area id
data.list.total$state_id
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 …
I’m going to use BYM2-a for now - I find the indexing in BYM2-b more intuitive somehow, if you get to the bottom of it at some point please do drop a line here. Code below.
functions{
/**
* Log probability of the intrinsic conditional autoregressive (ICAR) prior,
* excluding additive constants.
*
* @param phi Vector of parameters for spatial smoothing (on unit scale, approximately)
* @param node1
* @param node2
* @param k number of groups
* @param group_size number of observational units in each group
* @param group_idx index of observations in order of their group membership
* @param has_theta If the model contains an independent partial pooling term, phi for singletons can be zeroed out; otherwise, they require a standard normal prior. Both BYM and BYM2 have theta.
*
* @return Log probability density of ICAR prior up to additive constant
**/
real icar_normal_lpdf(vector phi,
int[] node1,
int[] node2,
int k,
int[] group_size,
int[] group_idx,
int has_theta) {
real lp;
int pos=1;
lp = -0.5 * dot_self(phi[node1] - phi[node2]);
if (has_theta) {
for (j in 1:k) {
/* sum to zero constraint, for each connected group; singletons zero out */
lp += normal_lpdf(sum(phi[segment(group_idx, pos, group_size[j])]) | 0, 0.001 * group_size[j]);
pos += group_size[j];
}
} else {
/* has no theta */
for (j in 1:k) {
if (group_size[j] > 1) {
/* same as above for non-singletons: sum to zero constraint */
lp += normal_lpdf(sum(phi[segment(group_idx, pos, group_size[j])]) | 0, 0.001 * group_size[j]);
} else {
/* its a singleton: independent */
lp += std_normal_lpdf(phi[segment(group_idx, pos, group_size[j])]);
}
pos += group_size[j];
}
}
return lp;
}
/**
* Combine local and global partial-pooling components into the convolved BYM2 term.
*
* @param phi_tilde local (spatially autocorrelated) component
* @param theta_tilde global component
* @param spatial_scale scale parameter for the convolution term
* @param n number of spatial units
* @param k number of connected groups
* @param group_size number of observational units in each group
* @param group_idx index of observations in order of their group membership
* @param logit_rho proportion of convolution that is spatially autocorrelated, logit transformed
* @param scale_factor The scaling factor for the BYM2 model. See scale_c R function, using R-INLA.
*
* @return BYM2 convolution vector
*/
vector convolve_bym2(vector phi_tilde,
vector theta_tilde,
real spatial_scale,
int n,
int k,
int[] group_size,
int[] group_idx,
real rho,
vector scale_factor
) {
vector[n] convolution;
int pos=1;
for (j in 1:k) {
if (group_size[j] == 1) {
convolution[ segment(group_idx, pos, group_size[j]) ] = spatial_scale * theta_tilde[ segment(group_idx, pos, group_size[j]) ];
} else {
convolution[ segment(group_idx, pos, group_size[j]) ] = spatial_scale * (
sqrt(rho / scale_factor[j]) * phi_tilde[ segment(group_idx, pos, group_size[j]) ] +
sqrt(1 - rho) * theta_tilde[ segment(group_idx, pos, group_size[j]) ]
);
}
pos += group_size[j];
}
return convolution;
}
}
data {
int<lower = 1> n; // total number of observations
int<lower = 0> Y[n]; // vector of labels
int<lower=1> I; // no. of spatial units
int<lower=1> k; // no. of groups
int group_size[k]; // observational units per group
int group_idx[I]; // index of observations, ordered by group
int state_id[n]; // index of states in the data
int<lower=1> n_edges;
int<lower=1, upper=n> node1[n_edges];
int<lower=1, upper=n> node2[n_edges];
int<lower=1, upper=k> comp_id[I];
vector[k] scale_factor; // BYM2 scale factor, with singletons represented by 1
}
transformed data {
int<lower=0,upper=1> has_theta=1;
}
parameters {
real alpha;
vector[I] phi_tilde;
vector[I] theta_tilde;
real<lower=0> spatial_scale;
real<lower=0,upper=1> rho;
}
transformed parameters{
vector[I] convolution;
vector[n] mu;
convolution = convolve_bym2(phi_tilde, theta_tilde, spatial_scale, I, k, group_size, group_idx, rho, scale_factor);
// linear function of the logit-scale propensity to be a recruit
mu = alpha + convolution[state_id];
}
model {
phi_tilde ~ icar_normal(node1, node2, k, group_size, group_idx, has_theta);
theta_tilde ~ std_normal();
spatial_scale ~ std_normal();
rho ~ beta(1,1);
alpha ~ std_normal();
// // // Likelihood
Y ~ bernoulli_logit(mu);
}