Following up on @Jesse’s suggestion, here’s some R and Stan code for getting started with models for observations nested in areal units.
This creates some data with K observations per areal unit:
library(geostan)
data(georgia)
N_geo <- nrow(georgia)
W <- shape2mat(georgia, "W")
mu_geo <- sim_sar(w=W, rho = 0.7)
K <- 15
mu <- rep(mu_geo, each = K)
y <- rnorm(mean = mu,
sd = 0.2,
n = length(mu))
dat <- data.frame(
GEOID = rep(georgia$GEOID, each = K),
y = y)
# converting GEOID to an index
id <- georgia$GEOID
dx <- data.frame(
GEOID = id,
geo_index = 1:N_geo
)
dat <- merge(dat, dx, by = "GEOID")
Here’s a Stan model example with a SAR model for the areal units. The user-defined function is taken from the documentation that Jesse linked to. I added an option to center the spatial model on zero or not (zmp) (i.e., choose non-centered or centered parameterization).
So you want to have a mean parameter alpha
(and you could add covariates or whatever to that) and then add another term for the area, which I’ve coded as geo
. Using the index:
\mu_i = \alpha + geo_i
where geo
is mean-zero (geo ~ mvnormal(0, Sigma)
). Instead of doing a loop like for (i in 1:n) mu[i] = alpha + geo[index[i]]
I used the index like mu = alpha + geo[index]
.
All of the data inputs for the spatial model are provided by geostan, so its less of a hassle than it may look at first (for those who are not familiar with it):
functions {
/**
* Log probability density of the simultaneous autoregressive (SAR) model (spatial error model)
*
* @param y Process to model
* @param mu Mean vector
* @param sigma Scale parameter
* @param rho Spatial dependence parameter
* @param W Sparse representation of W (its non-zero values)
* @param W_v Column indices for values in W
* @param W_u Row starting indices for values in W
* @param lambda Eigenvalues of W
* @param n Length of y
*
* @return Log probability density of SAR model up to additive constant
*/
real sar_normal_lpdf(vector y,
vector mu,
real sigma,
real rho,
vector W_w,
array[] int W_v,
array[] int W_u,
vector lambda,
int n) {
vector[n] z = y - mu;
real tau = 1 / sigma^2;
vector[n] ImrhoWz = z - csr_matrix_times_vector(n, n, rho * W_w, W_v , W_u , z);
real zVz = tau * dot_self(ImrhoWz);
real ldet_V = 2 * sum(log1m(rho * lambda)) - 2 * n * log(sigma);
return 0.5 * ( -n * log(2 * pi()) + ldet_V - zVz );
}
}
data {
int<lower=0,upper=1> zmp; // zero-mean CAR model? (non-centered param.)
int<lower=1> N;
vector[N] y;
// no. geog areas
int N_geo;
// geo identifier
array[N] int geo_index;
// SAR
int nW_w;
vector[nW_w] W_w;
array[nW_w] int W_v;
array[N_geo + 1] int W_u;
vector[N_geo] eigenvalues_w;
}
parameters {
real alpha;
real<lower=0> sigma;
vector[N_geo] geo;
real<lower=0> sigma_geo;
real<lower=1/min(eigenvalues_w), upper=1/max(eigenvalues_w)> rho_geo;
}
transformed parameters {
vector[N] mu;
vector[N_geo] mu_geo;
if (zmp) {
mu = alpha + geo[geo_index];
mu_geo = rep_vector(0, N_geo);
} else {
mu = geo[geo_index];
mu_geo = rep_vector(alpha, N_geo);
}
}
model {
target += normal_lpdf(y | mu, sigma);
// adjust these to have sensible priors for your project
target += normal_lpdf(sigma | 0, 2);
target += normal_lpdf(alpha | 8, 5);
target += student_t_lpdf(sigma_geo | 10, 0, 5);
// spatial model
target += sar_normal_lpdf(geo |
mu_geo,
sigma_geo,
rho_geo,
W_w,
W_v,
W_u,
eigenvalues_w,
N_geo);
}
This shows how to convert the data to the required format:
# start creating list of data for Stan
dl <- prep_sar_data(W)
dl$W <- NULL # might need to remove this W item or cmdstan will error
# add other data
dl$N <- nrow(dat)
dl$N_geo <- N_geo
dl$y <- dat$y
dl$geo_index <- dat$geo_index
dl$zmp <- 0
And draw samples:
library(cmdstanr)
mod <- cmdstan_model("nested_sar.stan")
S <- mod$sample(data = dl,
iter_sampling = 1e3,
parallel_chains = 4)
print(S, c('alpha', 'sigma', 'rho_geo', 'sigma_geo'))
returns:
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
alpha 0.05 0.06 0.28 0.26 -0.40 0.48 1.00 5832 2420
sigma 0.20 0.20 0.00 0.00 0.20 0.21 1.00 6148 2794
rho_geo 0.65 0.66 0.07 0.07 0.54 0.76 1.00 5091 2863
sigma_geo 1.09 1.09 0.06 0.06 0.99 1.20 1.00 7401 2727