I am new here, but am working on a model of word frequencies (see model below for reference). For certain words, some rhat values in the fit object are high. When I inspect the plots of the fit object, I see “jumps” occurring (see below). Can anyone explain these jumps? What causes them, how do I interpret them, do they indicate a poorly specified model, etc?
Thanks!
model:
“”“STAN”"
data {
int N_DAYS;
int<lower=0> cnt[N_DAYS];
int<lower=0> z[N_DAYS];
}
transformed data {
}
parameters {
real<upper=0> daily_lp[N_DAYS]; // each day's probability
real alpha;
real mu;
real<lower=0> stdev;
real<lower=0> drift;
real A;
real<lower=0,upper=1> theta; // the weight between the two gaussians
}
transformed parameters {
}
model {
mu ~ cauchy(0,1);
stdev ~ cauchy(0,1);
alpha ~ cauchy(0,1);
drift ~ cauchy(0,1);
theta ~ beta(1,1);
A ~ normal(0,1);
target += skew_normal_lpdf(daily_lp[1] | mu, stdev, alpha);
target += skew_normal_lpdf(daily_lp[2] | mu, stdev, alpha);
for(d in 3:N_DAYS) {
real daymean = mu + A*(d/N_DAYS-0.5);
target += log_sum_exp( log(theta) + skew_normal_lpdf(daily_lp[d] | daymean, stdev, alpha),
log(1-theta) + normal_lpdf(daily_lp[d] | daily_lp[d-1], drift) );
}
// treat as a binomial sample
for(i in 1:N_DAYS){
target += cnt[i]*(daily_lp[i]) + (z[i]-cnt[i])*log1m(exp(daily_lp[i]));
}
}