Hi all,
I’ve used Stan for a while, but this is my first time posting here. The issues I’m currently experiencing I’ve never seen before. The following program run with the HMC sampling algorithm, adapt_delta=0.85, and 4000 iterates (2000 warmups) leads to a point mass for parameters (specifically \rho at 1, always) and different values of the log-posterior depending on starting values. The model generates 0 divergent transitions, but Rhats are on the order of 100 trillion for most parameters and 100 million for others.
data {
int<lower=1> T1; // Total number of periods.
int<lower=1> T0; // Number of periods for differenced values.
vector[T1] wtildeg; // Wage-weighted prices (goods)
vector[T1] wtildes; // Wage-weighted prices (services)
vector[T0] ptilde; // Log price differences
vector[T0] qtilde; // Log-consumption ratios differences
vector[2] lwtilde[T0]; // log(w / p_j) for each j in {g,s}
vector[2] KL[T0]; // log(K/L) for each firm
// Fixed Parameters
real nug_mean;
real nus_mean;
real omegag_beta;
real omegas_beta;
real alpha_alpha;
real alpha_betag;
real alpha_betas;
vector[3] zero_vec;
}
// PARAMETER DECLARATIONS
parameters {
real<lower=0,upper=1> rho; // elasticity of substitution for c's
real<lower=0> nnug;
real<lower=0> nnus;
real<lower=0,upper=1> omegag;
real<lower=0,upper=1> omegas;
vector<lower=0,upper=1>[2] alpha; // elasticities of substitution between K_f and L_f in production
cholesky_factor_corr[3] L_Omega_cov; // cholesky for cov's of L_Omega
vector<lower=0>[3] L_sigma_omega; // s.d.'s for differences shocks
}
// TRANSFORMATION BLOCK
transformed parameters{
matrix[3,3] L_Omega; // Combined variance/covariance matrix for zeta's
vector[3] expect[T0]; // This term is the centering vector for the likelihood.
real<upper=0> nug;
real<upper=0> nus;
nug = - nnug;
nus = - nnus;
// Variance/covariance matrices
L_Omega = diag_pre_multiply(L_sigma_omega,L_Omega_cov);
for(t in 1:T0){
expect[t][1] = (1-rho)/rho * qtilde[t] - (rho-nug)/rho/nug
* log(omegag + (1-omegag) * (wtildeg[t+1] * omegag / (1-omegag) )^(nug/(nug-1)))
+ (rho-nus)/rho/nus
* log(omegas + (1-omegas) * (wtildes[t+1] * omegas / (1-omegas) )^(nus/(nus-1)))
+ (rho-nug)/rho/nug
* log(omegag + (1-omegag) * (wtildeg[t] * omegag / (1-omegag) )^(nug/(nug-1)))
- (rho-nus)/rho/nus
* log(omegas + (1-omegas) * (wtildes[t] * omegas / (1-omegas) )^(nus/(nus-1)))
+ (1.0/rho) * ptilde[t];
for(j in 1:2){
expect[t][j+1] = lwtilde[t][j] - alpha[j] * KL[t][j];
}
}
}
// MODEL BLOCK
model {
// Structural Parameter Priors
rho ~ beta(1.0,1.0);
omegag ~ beta(1.0,omegag_beta);
omegas ~ beta(1.0,omegas_beta);
nnug ~ lognormal(nug_mean,1.0);
nnus ~ lognormal(nus_mean,1.0);
alpha[1] ~ beta(alpha_alpha,alpha_betag);
alpha[2] ~ beta(alpha_alpha,alpha_betas);
// Variance/covariance draws
L_Omega_cov ~ lkj_corr_cholesky(2.0);
L_sigma_omega ~ cauchy(0.0,2.0);
// Likelihood Error
for(t in 1:T0){
expect[t] ~ multi_normal_cholesky(zero_vec,L_Omega);
}
}
By theory, \rho \in (0,1) or \rho < 0. When I run a separate model with \rho < 0, everything converges properly with Rhats near 1 and high effective sample size:
// DATA BLOCK
data {
int<lower=1> T1; // Total number of periods.
int<lower=1> T0; // Number of periods for differenced values.
vector[T1] wtildeg; // Wage-weighted prices (goods)
vector[T1] wtildes; // Wage-weighted prices (services)
vector[T0] ptilde; // Log price differences
vector[T0] qtilde; // Log-consumption ratios differences
vector[2] lwtilde[T0]; // log(w / p_j) for each j in {g,s}
vector[2] KL[T0]; // log(K/L) for each firm
// Fixed Parameters
real nug_mean;
real nus_mean;
real omegag_beta;
real omegas_beta;
real alpha_alpha;
real alpha_betag;
real alpha_betas;
vector[3] zero_vec;
}
// PARAMETER DECLARATIONS
parameters {
real<lower=0> nrho; // elasticity of substitution for c's
real<lower=0> nnug;
real<lower=0> nnus;
real<lower=0,upper=1> omegag;
real<lower=0,upper=1> omegas;
vector<lower=0,upper=1>[2] alpha; // elasticities of substitution between K_f and L_f in production
cholesky_factor_corr[3] L_Omega_cov; // cholesky for cov's of L_Omega
vector<lower=0>[3] L_sigma_omega; // s.d.'s for differences shocks
}
// TRANSFORMATION BLOCK
transformed parameters{
matrix[3,3] L_Omega; // Combined variance/covariance matrix for zeta's
vector[3] expect[T0]; // This term is the centering vector for the likelihood.
real<upper=0> nug;
real<upper=0> nus;
real<upper=0> rho;
rho = - nrho;
nug = - nnug;
nus = - nnus;
// Variance/covariance matrices
L_Omega = diag_pre_multiply(L_sigma_omega,L_Omega_cov);
for(t in 1:T0){
expect[t][1] = (1-rho)/rho * qtilde[t] - (rho-nug)/rho/nug
* log(omegag + (1-omegag) * (wtildeg[t+1] * omegag / (1-omegag) )^(nug/(nug-1)))
+ (rho-nus)/rho/nus
* log(omegas + (1-omegas) * (wtildes[t+1] * omegas / (1-omegas) )^(nus/(nus-1)))
+ (rho-nug)/rho/nug
* log(omegag + (1-omegag) * (wtildeg[t] * omegag / (1-omegag) )^(nug/(nug-1)))
- (rho-nus)/rho/nus
* log(omegas + (1-omegas) * (wtildes[t] * omegas / (1-omegas) )^(nus/(nus-1)))
+ (1.0/rho) * ptilde[t];
for(j in 1:2){
expect[t][j+1] = lwtilde[t][j] - alpha[j] * KL[t][j];
}
}
}
// MODEL BLOCK
model {
// Structural Parameter Priors
nrho ~ lognormal(-0.5,1);
omegag ~ beta(1.0,omegag_beta);
omegas ~ beta(1.0,omegas_beta);
nnug ~ lognormal(nug_mean,1.0);
nnus ~ lognormal(nus_mean,1.0);
alpha[1] ~ beta(alpha_alpha,alpha_betag);
alpha[2] ~ beta(alpha_alpha,alpha_betas);
// Variance/covariance draws
L_Omega_cov ~ lkj_corr_cholesky(2.0);
L_sigma_omega ~ cauchy(0.0,2.0);
// Likelihood Error
for(t in 1:T0){
expect[t] ~ multi_normal_cholesky(zero_vec,L_Omega);
}
}
The two models above are simultaneous equations models. If I eliminate the 2nd and 3rd equations in the likelihood, and consider \rho \in (0,1), the program runs efficiently and converges quickly:
// DATA BLOCK
data {
int<lower=1> T1; // Total number of periods.
int<lower=1> T0; // Number of periods for differenced values.
vector[T1] wtildeg; // Wage-weighted prices (goods)
vector[T1] wtildes; // Wage-weighted prices (services)
vector[T0] ptilde; // Log price differences
vector[T0] qtilde; // Log-consumption ratios differences
// Fixed Parameters
real nug_mean;
real nus_mean;
real omegag_beta;
real omegas_beta;
}
// PARAMETER DECLARATIONS
parameters {
real<lower=0,upper=1> rho; // elasticity of substitution for c's
real<lower=0> nnug;
real<lower=0> nnus;
real<lower=0,upper=1> omegag;
real<lower=0,upper=1> omegas;
real<lower=0> precision;
}
// TRANSFORMATION BLOCK
transformed parameters{
vector[T0] expect; // This term is the centering vector for the likelihood.
real<upper=0> nug;
real<upper=0> nus;
real<lower=0> sigma;
nug = - nnug;
nus = - nnus;
sigma = sqrt(1.0 / precision);
for(t in 1:T0){
expect[t] = (1-rho)/rho * qtilde[t] - (rho-nug)/rho/nug
* log(omegag + (1-omegag) * (wtildeg[t+1] * omegag / (1-omegag) )^(nug/(nug-1)))
+ (rho-nus)/rho/nus
* log(omegas + (1-omegas) * (wtildes[t+1] * omegas / (1-omegas) )^(nus/(nus-1)))
+ (rho-nug)/rho/nug
* log(omegag + (1-omegag) * (wtildeg[t] * omegag / (1-omegag) )^(nug/(nug-1)))
- (rho-nus)/rho/nus
* log(omegas + (1-omegas) * (wtildes[t] * omegas / (1-omegas) )^(nus/(nus-1)))
+ (1.0/rho) * ptilde[t];
}
}
// MODEL BLOCK
model {
// Structural Parameter Priors
rho ~ beta(1,1);
omegag ~ beta(1.0,omegag_beta);
omegas ~ beta(1.0,omegas_beta);
nnug ~ lognormal(nug_mean,1.0);
nnus ~ lognormal(nus_mean,1.0);
// Variance draw
precision ~ gamma(2.0,4.0);
// Likelihood Error
for(t in 1:T0){
expect[t] ~ normal(0.0,sigma);
}
}
I find this behavior very curious. I am attaching two datasets (stan_agg_ge_naive.r and stan_agg_pe_naive.r). The first dataset should be used on the first two programs listed above and the second on the last program, without the simultaneous equations. Any help here is greatly appreciated.
- Nick
stan_agg_ge_naive.r (11.7 KB)
stan_agg_pe_naive.r (5.9 KB)