The outcome of interest is the number of diseases by medical practice (MP) modeled with covariates recorded by area (say: county level).
In order to solve the misalignment problem, I model the spatial and covariates effect as a weighted average of areas (plus an MP specific unstructured error term).
The weights are deterministic and derive from how many patients of an MP i come from a certain area j.
- Disease by MP: Y_i where i = 1,..., m
- Covariates by areas: X_j where j = 1,...,n
Y_i \sim Poi(E_i \rho_i)
\log(\rho_i) = \sum_{j\in Area_i}(\beta_0 + \beta_1X_{j1} + \beta_{2}X_{j2} + \phi_j) + u_i
\phi \sim CAR
The way I implemented the multiple membership part is with a matrix whose rows contain the weights, even though it works as long as I keep the number of observations low, when I use the full dataset (1368 MPs and 1010 areas) computational time is just too much.
However, as you can guess, the multiple membership matrix is full of zeroes so I am confident there could be room for improvement.
If needed, I can provide the data from a reduced sample.
functions {
real sparse_car_lpdf(vector phi, real tau, real alpha,
int[,] W_sparse, vector D_sparse, vector lambda, int n, int W_n) {
row_vector[n] phit_D; // phi' * D
row_vector[n] phit_W; // phi' * W
vector[n] ldet_terms;
phit_D = (phi .* D_sparse)';
phit_W = rep_row_vector(0, n);
for (i in 1:W_n) {
phit_W[W_sparse[i, 1]] = phit_W[W_sparse[i, 1]]
+ phi[W_sparse[i, 2]];
phit_W[W_sparse[i, 2]] = phit_W[W_sparse[i, 2]]
+ phi[W_sparse[i, 1]];
}
for (i in 1:n) ldet_terms[i] = log1m(alpha * lambda[i]);
return 0.5 * (n * log(tau)
+ sum(ldet_terms)
- tau * (phit_D * phi - alpha * (phit_W * phi)));
}
}
data {
int<lower = 1> n;
int<lower = 1> m;
// Covariates
int<lower=0> k; // number of predictors
matrix[n, k] X; // predictor matrix
// Outcomes
int<lower = 0> y[m];
vector[m] log_offset;
// Spatial matrices
matrix<lower = 0, upper = 1>[n, n] W; // adjacency matrix
int W_n; // number of adjacent region pairs
// Multiple membership
matrix[m,n] M_W;
}
transformed data {
// QR representation for design matrix
matrix[n, k] Q_ast;
matrix[k, k] R_ast;
matrix[k, k] R_ast_inverse;
// Adjacency
int W_sparse[W_n, 2]; // adjacency pairs
vector[n] D_sparse; // diagonal of D
vector[n] lambda; // eigenvalues of invsqrtD * W * invsqrtD
{ // generate sparse representation for W
int counter;
counter = 1;
// loop over upper triangular part of W to identify neighbor pairs
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
if (W[i, j] == 1) {
W_sparse[counter, 1] = i;
W_sparse[counter, 2] = j;
counter = counter + 1;
}
}
}
}
for (i in 1:n) D_sparse[i] = sum(W[i]);
{
vector[n] invsqrtD;
for (i in 1:n) {
invsqrtD[i] = 1 / sqrt(D_sparse[i]);
}
lambda = eigenvalues_sym(quad_form(W, diag_matrix(invsqrtD)));
}
// Thin and scale the QR decomposition
Q_ast = qr_Q(X)[, 1:k] * sqrt(n - 1);
R_ast = qr_R(X)[1:k, ] / sqrt(n - 1);
R_ast_inverse = inverse(R_ast);
}
parameters {
vector[n] phi;
real intercept;
vector[m] u;
real<lower = 0> tau;
real<lower = 0> tau_u;
real<lower = 0, upper = .99> alpha;
vector[k] theta; // coefficients on Q_ast
}
transformed parameters{
vector[m] rho;
vector[m] mult_memb;
mult_memb = M_W*(phi + intercept + Q_ast * theta);
for(i in 1:m) {
rho[i] = mult_memb[i] + u[i];
}
}
model {
phi ~ sparse_car(tau, alpha, W_sparse, D_sparse, lambda, n, W_n);
u ~ normal(0, tau_u);
intercept ~ normal(0, 1E2);
tau ~ gamma(1, 0.1);
tau_u ~ gamma(1, 0.1);
y ~ poisson_log(rho + log_offset);
}
generated quantities{
vector[k] beta;
// Compute actual regression coefficients
beta = R_ast_inverse * theta;
}