Ben,

Thanks for pointing me to the QR decomposition. Appreciate it.

I implemented it in the model, chains mix nicely, samples look good, etc. BUT, the coefficients recreated in generated quantities have a median of zero or very close to zero. That seems highly unlikely.

Stan file pasted below. This is a hurdle model for student activity counts, with a campus specific intercept. There are separate coefficients for each portion of the model (the probability of a zero, and the count if non-zero)

```
data {
int <lower=0> N; // Number of rows
int <lower=0> J; // Number of campus
int <lower=0> K; // Number of coef
int <lower=0> Y[N]; // Count of students
int<lower=1, upper=J> campus[N]; // Campus labels
matrix[N,K] X; // Covariates
}
transformed data{
// As per page 123 of stan language reference v15
matrix[N, K] Q_ast;
matrix[K, K] R_ast;
matrix[K, K] R_ast_inverse;
// thin and scale the QR decomposition
Q_ast = qr_Q(X)[, 1:K] * sqrt(N - 1);
R_ast = qr_R(X)[1:K, ] / sqrt(N - 1);
R_ast_inverse = inverse(R_ast);
}
parameters {
// Parameters for campus intercepts
vector[J] theta_z_eta;
vector[J] theta_c_eta;
real<lower=0> theta_z_scale;
real<lower=0> theta_c_scale;
// For non-centered coefficient estimation
vector[K] beta_z_eta;
vector[K] beta_c_eta;
real<lower=0> beta_z_scale;
real<lower=0> beta_c_scale;
}
transformed parameters{
vector[J] theta_z; // Campus intercept for zero prob part
vector[J] theta_c; // Campus intercept for count part
vector[K] beta_z; // Coefficients for zero part
vector[K] beta_c; // Coefficients for count part
theta_z = theta_z_eta * theta_z_scale;
theta_c = theta_c_eta * theta_c_scale;
beta_z = beta_z_eta * beta_z_scale;
beta_c = beta_c_eta * beta_c_scale;
}
model {
real theta;
real lambda;
vector[N] xb_z;
vector[N] xb_c;
theta_z_eta ~ normal(0,1);
theta_z_scale ~ gamma(10,10);
theta_c_eta ~ normal(0,1);
theta_c_scale ~ gamma(10,10);
beta_z_eta ~ normal(0,1);
beta_z_scale ~ gamma(10,10);
beta_c_eta ~ normal(0,1);
beta_c_scale ~ gamma(10,10);
xb_z = Q_ast * beta_z;
xb_c = Q_ast * beta_c;
for(n in 1:N){
theta = theta_z[campus[n]] + xb_z[n];
lambda = exp(theta_c[campus[n]] + xb_c[n]);
if(Y[n]==0){
target += bernoulli_logit_lpmf(1 | theta);
}
else{
target += bernoulli_logit_lpmf(0 | theta) + poisson_lpmf(Y[n] | lambda) - log1m_exp(-lambda);
}
}
}
generated quantities{
vector[K] beta_zero;
vector[K] beta_count;
beta_zero = R_ast_inverse * beta_z;
beta_count = R_ast_inverse * beta_c;
}
```