I’m currently working on a Bayesian inference problem with mixture Dirichlet process prior. I’m interested in the ADVI algorithm with Stan. I’m trying to use full rank approximation. However, I’m faced with two issues.
First, I want to get the estimated variance matrix of posterior distribution, but I didn’t find any code can be used to do that.
Second, I first tried mean-field approximation, it works but with poor performance. Then I tried full-rank approximation, but for some cases, there is error with report:
"stan::variational::advi::adapt_eta: All proposed step-sizes failed. Your model may be either severely ill-conditioned or misspecified.”
The code is here
data {
int<lower=0> n; // number of the observation
int<lower=0> N; // number of the components in stick-breaking prior
real y[n]; // observed y
real w[n]; // observed w
real z[n]; // observed x
real mcmat[2,2]; // misclassification matrx
real GammaPar[2]; // hyperparameter for precision parameter
//matrix[3,3] Sigma; // variance matrix for eta
}
parameters {
real eta[3]; // regression parameter for X
real<lower=0> alpha; // precision parameter
vector[2] V[N]; //total V
real <lower=0,upper=1> v[N];
// simplex[N] pi; // weight vector
real<lower=0> stepvalue[N];
real firstvalue;
real<lower=0> sigma[N];
real mueta;
real<lower=0> sigmaeta;
}
transformed parameters{
real<lower=0,upper=1> pi[N];
real mu[N];
mu[1]= firstvalue;
for (i in 2:N){
mu[i]=mu[i-1]+stepvalue[i];
}
pi[1] = v[1];
// stick-break process based on The BUGS book Chapter 11 (p.294)
for(j in 2:(N-1)){
pi[j]= v[j]*(1-v[j-1])*pi[j-1]/v[j-1];
}
pi[N]=1-sum(pi[1:(N-1)]); // to make a simplex.
}
model {
real H0;
real H1;
real Q;
//real mu2=rep_vector(0,2);
//real mu3=rep_vector(0,3);
// int index[n];
// vector[2] xi[n];
real a10;
real a11;
real a00;
real a01;
//prior
real ps[N];
// mu ~ normal(0,3);
sigma ~ lognormal(0,1);
firstvalue ~ normal(0,3);
stepvalue ~ lognormal(0,1);
for (i in 1:N){
V[i][1] ~ normal(mu[i],sqrt(sigma[i]));
V[i][2] ~ normal(firstvalue,sqrt(sigma[i]));
}
alpha ~ gamma(GammaPar[1],GammaPar[2]);
v ~ beta(1,alpha);
// pi ~ dirichlet(rep_vector(alpha/N,N));
// eta[1] ~ normal(-1.5,1);
// eta[2] ~ normal(1,1);
// eta[3] ~ normal(0.5,1);
mueta ~ normal(0,2);
sigmaeta ~ lognormal(0,2);
eta ~ normal(mueta,sqrt(sigmaeta));
a10=mcmat[1,2];
a11=mcmat[1,1];
a00=mcmat[2,2];
a01=mcmat[2,1];
for (i in 1:n){
H0=exp(eta[1]+eta[2]*0+eta[3]*z[i])/(1+exp(eta[1]+eta[2]*0+eta[3]*z[i]));
H1=exp(eta[1]+eta[2]*1+eta[3]*z[i])/(1+exp(eta[1]+eta[2]*1+eta[3]*z[i]));
for (j in 1:N){
Q=exp(V[j][1]+V[j][2]*z[i])/(1+exp(V[j][1]+V[j][2]*z[i]));
ps[j]=log(pi[j])+y[i]*w[i]*log(a10*H0*(1-Q)+a11*H1*Q)+y[i]*(1-w[i])*log(a00*H0*(1-Q)+a01*H1*Q)+
(1-y[i])*w[i]*log(a10*(1-H0)*(1-Q)+a11*(1-H1)*Q)+(1-y[i])*(1-w[i])*log(a00*(1-H0)*(1-Q)+a01*(1-H1)*Q);
}
target+= log_sum_exp(ps);
}
}
generated quantities {
real theta=0;
for(i in 1:n){
theta+=exp(eta[1]+eta[2]*1+eta[3]*z[i])/(1+exp(eta[1]+eta[2]*1+eta[3]*z[i]))-
exp(eta[1]+eta[2]*0+eta[3]*z[i])/(1+exp(eta[1]+eta[2]*0+eta[3]*z[i]));
}
theta=theta/n;
}