Hi all,
I am trying to construct a binomial mixed effects modeling following example code that is located here, but I am trying to alter the code so that I can use a model matrix. However, the resulting intercept estimate of the model is different.
‘model 1’ is coded as on the website, and returns results similar to a model using glmer(). ‘Model 2’ intercept estimate is wrong, but I can’t figure out how to fix it. I think that it is an issue with the random effects part of the model, but no sure how to code it correctly. Any help would be appreciated. Here is the code that should run on most computers. Famous last words.
library(rstan)
library(lme4)
### get data
data(cbpp) # built into lme4
### first with ML
m2_lme4 <- glmer( cbind(incidence,size-incidence) ~ period + (1|herd) , data=cbpp , family=binomial )
print(m2_lme4)
N = nrow(cbpp) ## get number of samples
incidence = cbpp$incidence
Xmat = model.matrix(m2_lme4) ### get the model matrix
period2 = Xmat[,2]
period3 = Xmat[,3]
period4 = Xmat[,4]
herd_index = cbpp$herd
bin_total = cbpp$size
N_herd_index = length(levels(cbpp$herd))
data.list = list(N = N, incidence = incidence, period2 = period2, period3 = period3,
period4 = period4,herd_index = as.numeric(herd_index), bin_total = bin_total,
N_herd_index = N_herd_index)
### script 1
Binom_ex1 = 'data{
int N;
int incidence[N];
real period2[N];
real period3[N];
real period4[N];
int herd_index[N];
int bin_total[N];
int N_herd_index;
}
parameters{
real Intercept;
real beta_period2;
real beta_period3;
real beta_period4;
real vary_herd_index[N_herd_index];
real<lower=0> sigma_herd_index;
}
model{
real vary[N];
real glm[N];
// Priors
Intercept ~ normal( 0 , 100 );
beta_period2 ~ normal( 0 , 100 );
beta_period3 ~ normal( 0 , 100 );
beta_period4 ~ normal( 0 , 100 );
sigma_herd_index ~ uniform( 0 , 100 );
// Varying effects
for ( j in 1:N_herd_index ) vary_herd_index[j] ~ normal( 0 , sigma_herd_index );
// Fixed effects
for ( i in 1:N ) {
vary[i] = vary_herd_index[herd_index[i]];
glm[i] = vary[i] + Intercept
+ beta_period2 * period2[i]
+ beta_period3 * period3[i]
+ beta_period4 * period4[i];
glm[i] = inv_logit( glm[i] );
}
incidence ~ binomial( bin_total , glm );
}
'
### script 2
Binom_ex2 = 'data{
int N;
int incidence[N];
int bin_total[N];
int herd_index[N];
int N_herd_index;
matrix [N,4] Xmat;
}
parameters{
vector[4] beta;
real<lower=0> vary_herd_index[N_herd_index];
real<lower=0> sigma_herd_index;
}
model{
vector[N] vary;
vector[N] glm;
// Priors
beta ~ normal( 0 , 100 );
sigma_herd_index ~ uniform( 0 , 100 );
// Varying effects
for ( j in 1:N_herd_index ) vary_herd_index[j] ~ normal( 0 , sigma_herd_index );
// Fixed effects
for ( i in 1:N ) {
vary[i] = vary_herd_index[herd_index[i]];
}
glm = inv_logit(vary + Xmat*beta);
incidence ~ binomial( bin_total , glm );
}
'
fit = stan(model_code = Binom_ex1,model_name = 'model 1',data = data.list,chains = 3,
iter = 5000, warmup = 500, thin = 3)
print(fit)
### Now try it with a model matrix ###
data.list = list(N=N, incidence = incidence, Xmat = Xmat,N_herd_index = N_herd_index,
bin_total = bin_total,herd_index = as.numeric(herd_index))
fit2 = stan(model_code = Binom_ex2, model_name = 'model 2',data = data.list,chains = 3,
iter = 5000, warmup = 500, thin = 3)
print(fit2)