Improving estimates for fast CAR parameters

Hi everybody,

I am trying to validate this implementation of the CAR prior (based on Joseph’s case study), but I’m struggling to recover the true parameters
X \sim Poi(\exp(\gamma + X\beta + \phi)) where \phi \sim N (0, \left[\tau(D-\alpha W)\right]^{-1})

In particular \alpha seems to be problematic, for a true value of 0.8 here is what I get:


Inference for Stan model: Univariate_CAR.
1 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=1000.

          mean se_mean    sd    2.5%     25%     50%     75%   97.5% n_eff Rhat
nought   0.61    0.01  0.09    0.44    0.56    0.61    0.66    0.77   177 1.00
alpha    0.53    0.02  0.23    0.04    0.36    0.54    0.71    0.93   163 1.01
tau      1.75    0.04  0.51    1.06    1.41    1.66    2.00    2.91   193 1.00
lp__   3242.40    1.23 14.62 3211.99 3232.60 3243.47 3251.92 3269.97   141 1.00

Samples were drawn using NUTS(diag_e) at Mon Sep 09 17:49:46 2019.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

the other true values are: \gamma = 0.6 (nought is \gamma) and \tau = 2


functions {
  real sparse_car_lpdf(vector phi, real alpha, 
  // real tau,
  int[,] W_sparse, vector D_sparse, 
  vector lambda, int n, int W_n) {
      row_vector[n] phit_D; // phi' * D
      row_vector[n] phit_W; // phi' * W
      vector[n] ldet_terms;
    
      phit_D = (phi .* D_sparse)';
      phit_W = rep_row_vector(0, n);
      for (i in 1:W_n) {
        phit_W[W_sparse[i, 1]] = phit_W[W_sparse[i, 1]] + phi[W_sparse[i, 2]];
        phit_W[W_sparse[i, 2]] = phit_W[W_sparse[i, 2]] + phi[W_sparse[i, 1]];
      }
    
      for (i in 1:n) ldet_terms[i] = log1m(alpha * lambda[i]);
      return 0.5 * (sum(ldet_terms)
                    - (phit_D * phi - alpha*(phit_W * phi)));
  }
}
data {
  int<lower = 1> n;
  // Covariates
  int<lower=0> k;   // number of predictors
  matrix[n, k] X;   // predictor matrix
  // Outcomes
  int<lower = 0> y1[n];
  vector[n] log_offset1;
  // Spatial matrices
  matrix<lower = 0, upper = 1>[n, n] W; // adjacency matrix
  matrix<lower = 0>[n, n] D;
  int W_n; // number of adjacent region pairs
}
transformed data {
  // QR representation for design matrix
  matrix[n, k] Q_ast1;
  matrix[k, k] R_ast1;
  matrix[k, k] R_ast_inverse1;
  // Adjacency
  int W_sparse[W_n, 2];   // adjacency pairs
  vector[n] D_sparse;     // diagonal of D (number of neigbors for each site)
  vector[n] lambda;       // eigenvalues of invsqrtD * W * invsqrtD
  vector[n] zeros;
  { // generate sparse representation for W
  int counter;
  counter = 1;
  // loop over upper triangular part of W to identify neighbor pairs
    for (i in 1:(n - 1)) {
      for (j in (i + 1):n) {
        if (W[i, j] == 1) {
          W_sparse[counter, 1] = i;
          W_sparse[counter, 2] = j;
          counter = counter + 1;
        }
      }
    }
  }
  for (i in 1:n) D_sparse[i] = sum(W[i]);
  {
    vector[n] invsqrtD;  
    for (i in 1:n) {
      invsqrtD[i] = 1 / sqrt(D_sparse[i]);
    }
    lambda = eigenvalues_sym(quad_form(W, diag_matrix(invsqrtD)));
  }
  // Thin and scale the QR decomposition
  Q_ast1 = qr_Q(X)[, 1:k] * sqrt(n - 1);
  R_ast1 = qr_R(X)[1:k, ] / sqrt(n - 1);
  R_ast_inverse1 = inverse(R_ast1);
  zeros = rep_vector(0, n);
}
parameters {
  vector[n] phi1;
  real nought;
  real<lower = 0, upper = 1> alpha1;
  real<lower = 0> tau1;
  vector[k] theta1; // coefficients on Q_ast
}
transformed parameters{
  vector[n] rho_1;
  rho_1 = phi1*inv_sqrt(tau1) + nought + Q_ast1 * theta1;
}
model {
  phi1 ~ sparse_car(alpha1, W_sparse, D_sparse, lambda, n, W_n);
  nought ~ normal(0, 3);
  tau1 ~ cauchy(0, 1);
  y1 ~ poisson_log(rho_1 + log_offset1);
}
generated quantities {
  vector[k] beta1;
  // Compute actual regression coefficients
  beta1 = R_ast_inverse1 * theta1; 
 }

Additionally, here is the code I use to simulate the CAR data
  # Adjacency
  x.easting <- 1:side
  x.northing <- 1:side
  Grid <- expand.grid(x.easting, x.northing)
  K <- nrow(Grid)
  ## set up distance and neighbourhood (W, based on sharing a common border) 
  ## matrices
  distance <- as.matrix(dist(Grid))
  W <- array(0, c(K,K))
  W[distance == 1] <- 1
  adj_mat <- tau*(diag(rowSums(W)) - alpha*W)
  var_phi_u <- solve(adj_mat)
  ## Generate the covariate component
  set.seed(384)
  x1 <- rnorm(K) # ; x1 <- (x1 - min(x1))/diff(range(x1))
  x2 <- rnorm(K) # ; x2 <- (x2 - min(x2))/diff(range(x2))
  beta1.1 <- 0.4
  beta2.1 <- 1.4
  int1 <- 0.6
  ## Jin version
  set.seed(450)
  phi_tot <- mvrnorm(n = 1, mu = rep(0, K), Sigma = var_phi_u)
  ## Relative risk
  if(int == T){
    lp <- cbind(rep(1, K), x1, x2)%*%c(int1, beta1.1, beta2.1) + phi_tot
  } else{
    lp <- cbind(x1, x2)%*%c(beta1.1, beta2.1) + phi_tot
  }
  
  Y_u <- rpois(n = K, lambda = exp(lp))


Do you have any idea on how to approach this and improve estimates for \alpha ant \tau?

https://statmodeling.stat.columbia.edu/2016/09/02/two-weird-tricks-for-fast-conditional-autoregressive-models-in-stan/#comment-302825

Ben’s link to the discussion on the blog provides some good insights, including potential computational issues with the sparse CAR case study. FWIW, I’d use an ICAR model instead: https://mc-stan.org/users/documentation/case-studies/icar_stan.html

However, if you’re sure you need a proper CAR model, I would recommend that you simplify your model to debug any issues. For example, removing the effect of extra covariates, and going with the non-sparse version:

data {
  int<lower = 1> n;
  int<lower = 0> y[n];
  matrix<lower = 0, upper = 1>[n, n] W; // adjacency matrix
}

transformed data {
  vector[n] zeros = rep_vector(0, n);
  matrix<lower = 0>[n, n] D;
  {
    vector[n] W_rowsums;
    for (i in 1:n) {
      W_rowsums[i] = sum(W[i, ]);
    }
    D = diag_matrix(W_rowsums);
  }
}

parameters {
  vector[n] phi;
  real intercept;
  real<lower = 0, upper = 1> alpha;
  real<lower = 0> tau;
}

model {
  phi ~ multi_normal_prec(zeros, tau * (D - alpha * W));
  intercept ~ std_normal();
  tau ~ std_normal();
  y ~ poisson_log(phi + intercept);
}

With a correspondingly simple data simulation script in R:

library(MASS)
library(rstan)


# Compute adjacency matrix W ----------------------------------------------

grid_size <- 10
grid <- expand.grid(x = 1:grid_size, y = 1:grid_size)
n <- nrow(grid)

distance <- as.matrix(dist(grid))

W <- array(0, c(n, n))
W[distance == 1] <- 1


# Set parameter values and simulate data ----------------------------------

tau <- 2
alpha <- 0.8
Tau <- tau*(diag(rowSums(W)) - alpha*W)
Sigma <- solve(Tau)

phi <- mvrnorm(n = 1, mu = rep(0, n), Sigma = Sigma)
intercept <- 0.6

y <- rpois(n = n, lambda = exp(intercept + phi))


# Fit model ---------------------------------------------------------------

stan_d <- list(n = n,
               y = y, 
               W = W)

m_init <- stan_model('test.stan')
m_fit <- sampling(m_init, data = stan_d, cores = 2)

pars_to_watch <- c('intercept', 'alpha', 'tau')
print(m_fit, pars = pars_to_watch)
traceplot(m_fit, pars = pars_to_watch)
pairs(m_fit, pars = pars_to_watch)

Once/if you’re satisfied, you can start incrementally adding back complexity and trying to identify any issues.

In the future it would be worth provide runnable examples (the code you pasted has some variables that are used but not defined, which makes it harder for others to help).

2 Likes