I am attempting to estimate a non-centered model with multiple levels of hierarchy, i.e. I have collected observations of companies over time, companies are nested within states, states are related to each other. My outcomes is binomial and can be thought of as the number of sales out of how many possible clients received a quote. I am trying for a model with an intercept and a coefficient on a (linear) time term. When I use my (partially) centered model below, I get divergences. How do I get a fully non-centered parameterization with multiple layers in my structure?
To try to ensure that this model works as I believe it should, I am using simulated data (code below to create the data).
Thanks!
set.seed(1234)
(state.a <- rnorm(3, 0, 1)) # state-level intercepts
(state.b <- rnorm(3, 0, 1)) # state-level coefficients on time
means <- c(1000, 1000, 1000, 1000, 1000, 1000)
# on average how many quotes are there in each state-company
state.means <- cbind.data.frame(a=rep(state.a, each=2)
, b=rep(state.b, each=2)
, states = rep(1:3, each=2))
data <- data.frame()
for (i in 1:6){ # for each state-company combo
a <- rnorm(1, state.means[i,1], 0.05) # a company-specific intercept
b <- rnorm(1, state.means[i,2], 0.05) # a company-specific coefficient on time
n <- round(rnorm(length(times), means[i], 5)) # the number of quotes in each time period
out <- rep(NA, length(times))
for (t in 1:length(times)){
out[t] <- rbinom(1, n[t], plogis(a + times[t] * b))
# simulate the probability of success at a given point in time, given the company-level intercept and slope
}
out <- cbind.data.frame(y = out
, np = n
, a = a
, state_a = state.means[i,1]
, b = b
, state_b = state.means[i,2]
, id = i
, states = state.means$states[i]
, times = times)
data <- rbind(data, out)
}
stan.data <- list(N = nrow(data)
, L = length(unique(data$id))
, y = data$y
, ll = data$id
, S = 3
, ss = state.means$states
, K = data$np
, times = data$times)
fit <- stan(file = "stan forum.stan"
, data = stan.data
, iter = 2000
, chains = 3
, control = list(max_treedepth = 25
, adapt_delta = 0.95))
data {
int<lower=0> N; // number of observations
int<lower=0> K[N]; // number of trials within a state-company group
int<lower=0> y[N]; // number of "successes" within a state-company group
int<lower=1> L; // number of state-company combinations
int<lower=1, upper=L> ll[N]; // mapping the state-companies to individual observations
int<lower=1> S; // number of states included
int<lower=1, upper=S> ss[L]; // mapping the states to companies
real<lower=0, upper=1> times[N]; // a variable representing time
}
parameters {
real<lower=0> sigma; // population sd of p(success)
vector[L] alpha_std; // used in centering
vector[S] intercept_state; // state-level intercepts
real<lower=0> intercept_sigma_state; // state-level sd of intercepts
vector[S] beta_t_state; // state-level coefficient on time
real<lower=0> beta_t_sigma_state; // state-level sd of coefficient on time
vector[L] intercept; // the resulting company-level intercept
vector[L] beta_t; // the resulting company-level coefficient on time
}
transformed parameters {
vector[N] mu;
// mean of success for every observation
//(impacted by state- and company-level factors)
for (n in 1:N)
mu[n] = intercept[ll[n]] + beta_t[ll[n]] * times[n];
}
model {
intercept_state ~ normal(0, 1); // hyperprior
beta_t_state ~ normal(0, 1);
sigma ~ normal(0.25, 0.1); // hyperprior
alpha_std ~ normal(0, 1); // prior (hierarchical)
for (l in 1:L){
intercept[l] ~ normal(intercept_state[ss[l]], intercept_sigma_state);
beta_t[l] ~ normal(beta_t_state[ss[l]], beta_t_sigma_state);
}
{
vector[N] alpha_std_ll;
for (n in 1:N)
alpha_std_ll[n] = alpha_std[ll[n]];
y ~ binomial_logit(K, mu + sigma * alpha_std_ll); // likelihood
}
}