Hi,
for some time I’ve being trying to estimate an AR(1)+HMM model for cyclical GDP, unsuccessfully.
To make it short I cannot even recover simulated data with 500 observations.
The model is a version of the ‘‘disaster’’ model of Barro (2006). I started from the BUGS code of Nakamura E.; Steinsson, J.; Barro, R. & Ursúa, J.
Crises and recoveries in an empirical model of consumption disasters
American Economic Journal: Macroeconomics, American Economic Association, 2013 , 5 , 35-74 (could not modify that as BUGS is not available any more at least in Linux)
The model is
y=d+z+mu
d=constant
z=rho*z+epsilon; epsilon~N(0,sigma)
mu~HMM(s={s1,s2,s3})
I’ve tried two version. One replaces out the AR(1) component. This makes the likelihood depend on past states. In this case I extend the HMM a’ la Hamilton.
The second – discussed here – models z as a latent AR(1) process.
Here is the Stan code and its call:
Stan code:
functions {
real my_normal_lpdf(real y, real mu, real sigma) {
return normal_lupdf(y | mu, sigma);
}
// No longer needed but kept for completeness
matrix kronecker_product(matrix A, matrix B) {
int m = rows(A);
int n = cols(A);
int p = rows(B);
int q = cols(B);
matrix[m * p, n * q] result;
for (i in 1:m) {
for (j in 1:n) {
for (k in 1:p) {
for (l in 1:q) {
result[(i - 1) * p + k, (j - 1) * q + l] = A[i, j] * B[k, l];
}
}
}
}
return result;
}
}
data {
int<lower=1> nPeriods;
int<lower=1> nStates;
vector[nPeriods] yy;
real quantile_U;
real quantile_L;
real country_std;
real country_mean;
}
parameters {
real d;
simplex[nStates] pi0_local;
simplex[nStates] p1_local_raw;
simplex[nStates] p2_local_raw;
simplex[nStates] p3_local_raw;
real<lower=0.0001> sigma;
real<lower=0.0, upper=0.99> rho;
real<upper=-1.5> mu_1;
real<lower=1.5> mu_3;
vector[nPeriods] z;
}
transformed parameters {
matrix[nStates, nStates] P;
vector[nStates] mu_i;
P[1,] = to_row_vector(p1_local_raw / sum(p1_local_raw));
P[2,] = to_row_vector(p2_local_raw / sum(p2_local_raw));
P[3,] = to_row_vector(p3_local_raw / sum(p3_local_raw));
mu_i = to_vector([mu_1, 0.0, mu_3]);
}
model {
rho ~ beta(1, 1);
d ~ normal(country_mean, abs(country_mean) * 1.2);
mu_1 ~ normal(quantile_L, .3 * abs(quantile_L));
mu_3 ~ normal(quantile_U, .3 * abs(quantile_U));
sigma ~ cauchy(0, 1.2 * country_std);
p1_local_raw ~ dirichlet(to_vector([1, 1, 1]));
p2_local_raw ~ dirichlet(to_vector([1, 1, 1]));
p3_local_raw ~ dirichlet(to_vector([1, 1, 1]));
pi0_local ~ dirichlet(rep_vector(1, nStates));
z[1] ~ normal(0, sigma / sqrt(1 - rho^2));
for (t in 2:nPeriods){
z[t] ~ normal(rho * z[t - 1], sigma);
}
matrix[nStates, nPeriods] log_lik;
for (t in 1:nPeriods) {
for (s in 1:nStates) {
log_lik[s, t] = my_normal_lpdf(yy[t] | d + mu_i[s] + z[t], sigma);
}
log_lik[, t] -= log_sum_exp(log_lik[, t]);
}
target += hmm_marginal(log_lik, P, pi0_local);
}
generated quantities {
matrix[nStates, nPeriods] state_probs;
{ matrix[nStates, nPeriods] log_lik_states;
for (t in 1:nPeriods) {
for (s in 1:nStates) {
log_lik_states[s, t] = my_normal_lpdf(yy[t] | d + mu_i[s] + z[t], sigma);
}
log_lik_states[, t] -= log_sum_exp(log_lik_states[, t]);
}
state_probs = hmm_hidden_state_prob(log_lik_states, P, pi0_local);
}
}
Data simulation:
nPeriods=500
simulate_ARHMM_data <- function(nPeriods= nPeriods, mu_i = c(-2, 0, 2), P = matrix(c(0.9, 0.1, 0.0,
0.1, 0.8, 0.1,
0.0, 0.1, 0.9),
nrow = 3, byrow = TRUE),
rho = 0.85, sigma = 1) {
state <- sample(1:3, 1)
z <- 0
y <- numeric(nPeriods)
for (t in 1:nPeriods) {
mu <- mu_i[state]
eps <- rnorm(1, 0, sigma)
z <- rho * z + eps
y[t] <- mu + z
state <- sample(1:3, 1, prob = P[state, ])
}
return(y)
}
# Example simulation
set.seed(133)
sim_y <- simulate_ARHMM_data(T = nPeriods)
yy <- matrix(sim_y, ncol = 1)
colnames(yy) <- "SIM"
Estimation:
country_mean <- apply(yy, MARGIN = 2, FUN = mean)
country_std <- apply(yy, MARGIN = 2, FUN = sd)
country_L <- apply(yy, MARGIN = 2, FUN = function(x) quantile(x, p = .01))
country_U <- apply(yy, MARGIN = 2, FUN = function(x) quantile(x, p = .99))
init_function <- function(chain_id = 1) {
list(
d = 0,
sigma = 1,
rho = 0.85,
mu_1 = -2,
mu_3 = 2,
p1_local=c(0.9, 0.1, 0.0),
p2_local= c(0.1, 0.8, 0.1),
p3_local= c(0.0, 0.1, 0.9)
)
}
data_list <- list(
nCountries = 1,
nPeriods = nPeriods,
yy = yy[,1],
nStates = nStates,
samp_prior = 0,# 1 if evaluate only priors
country_std = country_std,
country_mean = country_mean,
quantile_L = country_L,
quantile_U = country_U
)
model<-stan_model("test_model.stan")
fit <- sampling(
object = model,
data = data_list,
chains = 4,
init = init_function,
iter = 2000,
warmup = floor(0.2*max_iter),
thin = 2,
seed = 123,
control = list(adapt_delta = 0.99, max_treedepth = 14),
cores =4
)
Recovery test
plot(fit,par=c('rho'))
plot(fit,par=c('sigma'))
plot(fit,par=c('P'))
The estimates are way off.
Does anybody see any issue with this experiment?
Thanks
Gianni