Hello, I got some problems when I fit the model of latent variables.
The stan code like this
data {
int<lower=0> T; //the number of sample
int<lower=1> K; //number of K years
real y1[T]; //data1
matrix[T,K] y2; //data2
matrix[T,K] y3; // data3
vector[K] tau; // represent K different year
}
// The parameters accepted by the model. Our model
// accepts two parameters 'mu' and 'sigma'.
parameters {
//parameters for mu
real mu1; //the mean for x1
real mu2; //the mean for x2
real mul1; //the mean for l1
real mul2; //the mean for l2
real mup; //the mean for p
real muh; //the mean for h
//parameters for sigma
real<lower=0> sigma1; //the standard deviation(sd) for x1
real<lower=0> sigma2; //the sd for x2
real<lower=0> sigmal1; //the sd for l1
real<lower=0> sigmal2; //the sd for l2
real<lower=0> sigmap; //the sd for p
real<lower=0> sigmah; //the sd for h
//parameters for kappa
real<lower=0> kappa1;
real<lower=kappa1> kappa2;
real<lower=0> kappal1;
real<lower=0> kappal2;
real kappa1q;
real kappa2q;
real kappal1q;
real kappal2q;
real<lower=0> kappap;
real<lower=0> sigy;
real<lower=0> sigdr;
real<lower=-1,upper=1> phi_h;
//parameter for intercept
real cg;
real gr;
real cr;
// latent variables
vector[T] x1; //latent variabe vector x1
vector[T] x2; //latent variabe vector x2
vector[T] l1; //latent variabe vector l1
vector[T] l2; //latent variabe vector l2
vector[T] p; //latent variabe vector p
vector[T] h; //latent variabe vector h
vector[T] ep; //latent variabe vector ep
}
transformed parameters{
real F1[5];
real F2[5];
real Fa3[5];
real Fg3[5];
real Ag[5];
real Gg[5];
real Ha[5];
real Hg[5];
real<lower=0> sigr1;
real<lower=0> sigr2;
real<lower=0> sigl1;
real<lower=0> sigl2;
real<lower=0> sigp;
vector[T] my1;
matrix[T,K] my2;
matrix[T,K] my3;
for (j in 1:5){
F1[j] = 1/ kappa1q *(1- exp(-kappa1q *tau[j]));
F2[j] = 1/ kappa2q *(1- exp(-kappa2q *tau[j]));
Fa3[j] = 1/ kappal2q *(1- exp(-kappal2q *tau[j]));
Ag[j] = (cr) *tau[j] - sigma1^2 *(tau[j]-F1[j])/(2*kappa1q^2)+sigma1^2 *F1[j]^2/(4*kappa1q)-
sigma2^2 *(tau[j]-F2[j])/(2*kappa2q^2)+sigma2^2 *F2[j]^2/(4*kappa2q)-
sigmal2^2 *(tau[j]-Fa3[j])/(2*kappal2q^2)+sigmal2^2 *Fa3[j]^2/(4*kappal2q);
Fg3[j] = 1/ kappal1q *(1- exp(-kappal1q *tau[j]));
Gg[j] = (cg+cr*gr) *tau[j] - gr^2 *sigma1^2 *(tau[j]-F1[j])/(2*kappa1q^2)+gr^2 *sigma1^2 *F1[j]^2/(4*kappa1q)-
gr^2 *sigma2^2 *(tau[j]-F2[j])/(2*kappa2q^2)+gr^2 *sigma2^2 *F2[j]^2/(4*kappa2q)-
sigmal1^2 *(tau[j]-Fg3[j])/(2*kappal1q^2)+sigmal1^2 *Fg3[j]^2/(4*kappal1q);
Ha[j] = cr *tau[j] - sigma1^2 *(tau[j]-F1[j])/(2*kappa1q^2)+sigma1^2 *F1[j]^2/(4*kappa1q)-
sigma2^2 *(tau[j]-F2[j])/(2*kappa2q^2)+sigma2^2 *F2[j]^2/(4*kappa2q);
Hg[j] = (cg+cr*gr) *tau[j] - gr^2 *sigma1^2 *(tau[j]-F1[j])/(2*kappa1q^2)+gr^2 *sigma1^2 *F1[j]^2/(4*kappa1q)-
gr^2 *sigma2^2 *(tau[j]-F2[j])/(2*kappa2q^2)+gr^2 *sigma2^2 *F2[j]^2/(4*kappa2q);
}
sigr1=sigma1*sqrt((1-exp(-2*kappa1/52))/(2*kappa1));
sigr2=sigma2*sqrt((1-exp(-2*kappa2/52))/(2*kappa2));
sigl1=sigmal1*sqrt((1-exp(-2*kappal1/52))/(2*kappal1));
sigl2=sigmal2*sqrt((1-exp(-2*kappal2/52))/(2*kappal2));
sigp=sigmap*sqrt((1-exp(-2*kappap/52))/(2*kappap));
for (t in 1:T){
my1[t]= cr+x1[t]+x2[t]+p[t]+ep[t]; //we have data y1~normal(my1,sigmadr^2) after
for (j in 1:5){
//we have data y2~normal(my2,sigmay^2) after
//we have data y3~normal(my3,sigmay^2) after
my2[t,j]= F1[j]*x1[t]/tau[j]+F2[j]*x2[t]/tau[j]+Fa3[j]*l2[t]/tau[j]+Ag[j]/tau[j];
my3[t,j]= gr*F1[j]*x1[t]/tau[j]+gr*F2[j]*x2[t]/tau[j]+Fg3[j]*l1[t]/tau[j]+Gg[j]/tau[j];
}
}
}
model {
//set the parameters prior
mu1 ~ cauchy(0,5);
mu2 ~ cauchy(0,5);
mul1 ~ cauchy(0,5);
mul2 ~ cauchy(0,5);
muh ~ cauchy(0,5);
mup ~ cauchy(0,5);
sigma1 ~ cauchy(0,5);
sigma2 ~ cauchy(0,5);
sigmal1 ~ cauchy(0,5);
sigmal2 ~ cauchy(0,5);
sigmap ~ cauchy(0,5);
sigmah ~ cauchy(0,5);
kappa1 ~ cauchy(0,10);
kappa2 ~ cauchy(0,10);
kappal1 ~ cauchy(0,10);
kappal2 ~ cauchy(0,10);
kappa1q ~ cauchy(0,10);
kappa2q ~ cauchy(0,10);
kappal1q ~ cauchy(0,10);
kappal2q ~ cauchy(0,10);
kappap ~ cauchy(0,10);
phi_h ~ uniform(-1, 1);
cg ~ normal(0,5);
cr ~ normal(0,5);
gr ~ normal(0,5);
sigy ~ cauchy(0,1);
sigdr ~cauchy(0,1);
x1[1] ~ normal(mu1, sigma1/sqrt(2*kappa1));
x2[1] ~ normal(mu2, sigma2/sqrt(2*kappa2));
l1[1] ~ normal(mul1, sigmal1/sqrt(2*kappal1));
l2[1] ~ normal(mul2, sigmal2/sqrt(2*kappal2));
p[1] ~ normal(mup, sigmap/sqrt(2*kappap));
h[1] ~ normal(muh, sigmah/sqrt(1-phi_h^2));
ep[1] ~ normal(0, exp(h[1] / 2));
for (t in 2:T){
x1[t]~normal((1-exp(-kappa1/52))*mu1+exp(-kappa1/52)*x1[t-1],sigr1);
x2[t]~normal((1-exp(-kappa2/52))*mu2+exp(-kappa2/52)*x2[t-1],sigr2);
l1[t]~normal((1-exp(-kappal1/52))*mul1+exp(-kappal1/52)*l1[t-1],sigl1);
l2[t]~normal((1-exp(-kappal2/52))*mul2+exp(-kappal2/52)*l2[t-1],sigl2);
p[t] ~ normal((1-exp(-kappap/52))*mup+exp(-kappap/52)*p[t-1],sigp);
h[t] ~ normal(muh + phi_h * (h[t - 1] - muh), sigmah);
ep[t] ~ normal(0, exp(h[t] / 2));
}
for(t in 1:T){
y1[t] ~ normal(my1[t],sigdr);
for (j in 1:K){
y2[t,j] ~ normal(my2[t,j], sigy);
y3[t,j] ~ normal(my3[t,j], sigy);
}
}
}
generated quantities{
real fit_y1[T];
real r[T];
matrix[T,K] fit_y2;
matrix[T,K] fit_y3;
matrix[T,K] R_a;
matrix[T,K] R_g;
for(t in 1:T){
fit_y1[t] = normal_rng(my1[t],sigdr);
r[t] = cr+x1[t]+x2[t];
for(j in 1:K){
fit_y2[t,j] = normal_rng(my2[t,j],sigy);
fit_y3[t,j] = normal_rng(my3[t,j],sigy);
R_a[t,j] = F1[j]*x1[t]/tau[j]+F2[j]*x2[t]/tau[j]+Ha[j]/tau[j];
R_g[t,j] = gr*F1[j]*x1[t]/tau[j]+gr*F2[j]*x2[t]/tau[j]+Hg[j]/tau[j];
}
}
}
And the results like this:
Warning messages:
1: There were 212 divergent transitions after warmup. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
2: There were 3788 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
3: There were 3 chains where the estimated Bayesian Fraction of Missing Information was low. See
http://mc-stan.org/misc/warnings.html#bfmi-low
4: Examine the pairs() plot to diagnose sampling problems
5: The largest R-hat is 5.51, indicating chains have not mixed.
Running the chains for more iterations may help. See
http://mc-stan.org/misc/warnings.html#r-hat
6: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
http://mc-stan.org/misc/warnings.html#bulk-ess
7: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
http://mc-stan.org/misc/warnings.html#tail-ess
some parameter estimations:
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu1 -3.00 1.01 1.83 -4.89 -4.59 -3.60 -1.76 1.46 3 1.67
mu2 -5.16 1.25 2.00 -9.75 -6.12 -4.71 -2.94 -2.94 3 3.69
mul1 2.05 0.86 1.40 0.28 0.66 2.39 2.95 4.77 3 2.32
mul2 0.62 1.22 1.78 -1.38 -0.68 0.28 1.79 4.34 2 4.36
mup 0.01 0.38 0.64 -1.06 -0.52 -0.04 0.41 1.33 3 1.87
sigma1 0.32 0.08 0.12 0.16 0.21 0.31 0.41 0.52 2 4.95
sigma2 0.29 0.01 0.03 0.22 0.28 0.29 0.32 0.32 4 2.24
sigmal1 2.36 1.66 2.37 0.29 0.39 0.83 5.21 6.14 2 7.34
sigmal2 1.57 1.37 2.02 0.17 0.25 0.52 1.51 6.29 2 4.24
sigmap 0.68 0.04 0.08 0.52 0.62 0.70 0.72 0.83 4 1.43
sigmah 0.57 0.04 0.14 0.35 0.51 0.51 0.64 0.91 11 1.30
kappa1 0.11 0.02 0.10 0.00 0.05 0.06 0.15 0.36 21 1.11
kappa2 0.25 0.01 0.13 0.02 0.19 0.19 0.32 0.54 86 1.08
kappal1 1.34 0.26 0.79 0.03 0.68 1.37 2.03 2.76 9 1.33
kappal2 2.05 1.41 2.38 0.03 0.04 0.41 3.89 7.35 3 2.06
kappa1q 0.18 0.14 0.21 0.00 0.00 0.12 0.27 0.62 2 6.11
kappa2q 0.12 0.04 0.06 0.04 0.05 0.12 0.19 0.20 2 9.05
kappal1q 7.97 5.45 7.78 0.36 0.38 5.28 17.52 17.52 2 8.32
kappal2q 3.05 3.24 4.68 0.08 0.36 0.50 2.45 13.02 2 5.97
kappap 1.10 0.44 0.87 0.15 0.65 0.70 1.38 3.52 4 1.50
sigy 0.06 0.00 0.00 0.06 0.06 0.06 0.07 0.07 2 4.62
sigdr 0.02 0.00 0.01 0.01 0.01 0.02 0.03 0.06 16 1.34
phi_h 0.94 0.02 0.05 0.83 0.92 0.95 0.98 0.99 6 1.35
muh -5.12 0.43 1.28 -6.39 -6.39 -5.04 -4.51 -2.14 9 1.26
cg -0.10 0.60 0.88 -2.01 -0.01 0.37 0.41 0.55 2 8.74
gr 0.78 0.13 0.19 0.63 0.65 0.69 0.76 1.19 2 10.26
cr 10.90 1.32 1.99 8.07 10.16 10.50 11.39 15.38 2 6.95
x1[1] -3.14 1.20 1.72 -4.78 -4.60 -3.86 -2.21 0.13 2 10.67
x1[2] -3.18 1.19 1.71 -4.80 -4.63 -3.92 -2.38 0.06 2 10.67
x1[3] -3.16 1.18 1.70 -4.77 -4.62 -3.90 -2.36 0.08 2 10.64
x1[4] -3.14 NaN 1.68 -4.74 -4.59 -3.86 -2.34 0.05 NaN 10.74
x1[5] -3.12 1.16 1.67 -4.70 -4.53 -3.83 -2.34 0.04 2 10.63
x1[6] -3.13 1.15 1.65 -4.69 -4.50 -3.82 -2.37 0.02 2 10.62
x1[7] -3.14 1.13 1.62 -4.67 -4.50 -3.84 -2.36 -0.03 2 10.45
...
Ok, I have some questions about that :
- I have divergent transitions about 212 after warmup, if it is important for my conclusions if I wanna use the parameters and latent variables.
- R_hats are too large, if there some ways to decrease it?
- in my model, I set a series of priors simply, if these priors’ distributions bring some troubles.
Some suggestions?
Thanks~