Hi,
I am new to stan. Recently, I want to make sure my model works. So I generate some data points for stan model to recover. But it fails. So I wonder if the fitted parameter values are not similar to the parameter which generates the data, does it matter? Does it because of the impropriate prior information? Or does it because of the labeling degeneracy of mixture model? I am so confused, hope someone can help me out. Thanks!
My model is a simple Gaussian mixture model (with 4 gaussian distribution and 4 sita).
Below part is how I generate the synthetic data (it is written in python):
def ff(x,y,mux,muy,Dx,Dy,ro):
s=1./(2*np.pi*Dx*Dy*np.sqrt(1-ro**2))*np.exp(-1/(2*(1-ro**2))*((x-mux)**2/Dx**2+(y-muy)**2/Dy**2-2*ro*(x-mux)*(y-muy)/(Dx*Dy)))#stats.norm(loc=mu,scale=Dx).pdf(x)
return s
def fcont(x,y,vx,vy,Dx,Dy,ro,x0,y0,t):
k=0
mux = x0 + vx*t
muy = y0 + vy*t
sx = np.sqrt(2.0*Dx*t)
sy = np.sqrt(2.0*Dy*t)
mm = ff(x,y,mux,muy,sx,sy,ro)
k = k + mm#/len(s)#1/len(s)*ff(x,y,mux,muy,sx,sy,ro[i])
return k
def covmat(Dx,Dy,ro,t):
sx = np.sqrt(2.0*Dx*t)+0.0001
sy = np.sqrt(2.0*Dy*t)+0.0001
return [[sx**2,ro*sx*sy],[ro*sx*sy,sy**2]]
if __name__ == '__main__':
# generate field data
x0 = 0.
y0 = 0.
x01 = 0
y01 = 0
x02 = 0
y02 = 0
x03 = 0
y03 = 0
vxp = 1.
vyp = 2.
Dxp = 1.
Dyp= 1.
rop= 0.3
vxp2 = 2.
vyp2 = -0.5
Dxp2 = 0.3
Dyp2= 0.3
rop2= 0.1
vxp3 = -2.
vyp3 = -2.
Dxp3 = 0.2
Dyp3= 0.2
rop3= 0.1
vxp4 = -5
vyp4 = 4
Dxp4 = 0.3
Dyp4= 0.3
rop4= 0.2
st = 5.0
# Dx1 = np.array([10])
Count = 1
#s1 = np.linspace(1,1,Count)
# print s1
position_est = []
#Urand = np.random.choice([0,1,2,3],size=200,p=[0.2,0.3,0.3,0.2])
#print Urand
#N = 1000
np.random.seed(10)
Urand = np.random.choice([0,1,2,3],size=10,p=[0.25,0.25,0.25,0.25])
np.random.seed(1)
rand = np.random.uniform(size=1)
#Urand = np.random.uniform(size=1000)
#rand.sample = np.repeat(Urand,N)
#Uinterval = np.linspace(0,1,Count)
for j in range(len(Urand)):
if Urand[j] == 0:
for U in rand:
i = int(U/(1./Count))
position = stats.multivariate_normal(mean=[x0+vxp*st,y0+vyp*st],cov=covmat(Dxp,Dyp,rop,st))
# position = stats.norm(loc=x0 + vxp*(st-s1[i]),scale=np.sqrt(2*Dxp*(st-s1[i])))
position_est.append(position.rvs())
elif Urand[j] == 1:
for U in rand:
i = int(U/(1./Count))
position = stats.multivariate_normal(mean=[x01 + vxp2*st,y01+vyp2*st],cov=covmat(Dxp2,Dyp2,rop2,st))
# position = stats.norm(loc=x0 + vxp*(st-s1[i]),scale=np.sqrt(2*Dxp*(st-s1[i])))
position_est.append(position.rvs())
elif Urand[j] == 2:
for U in rand:
i = int(U/(1./Count))
position = stats.multivariate_normal(mean=[x02 + vxp3*st,y02+vyp3*st],cov=covmat(Dxp3,Dyp3,rop3,st))
# position = stats.norm(loc=x0 + vxp*(st-s1[i]),scale=np.sqrt(2*Dxp*(st-s1[i])))
position_est.append(position.rvs())
else:
for U in rand:
i = int(U/(1./Count))
position = stats.multivariate_normal(mean=[x03 + vxp4*st,y03+vyp2*st],cov=covmat(Dxp4,Dyp4,rop4,st))
# position = stats.norm(loc=x0 + vxp*(st-s1[i]),scale=np.sqrt(2*Dxp*(st-s1[i])))
position_est.append(position.rvs())
position_est = np.array(position_est)
Then my stan model is using the position data, and my stan model is below:
model='''
data {
int D; //number of dimensions
int K; //number of gaussians
int N; //number of data
int M; // patch number at time
vector[D] y[N]; // observation data
vector[D] s;//oil spill location
matrix[N,M] st;
}
parameters {
simplex[K] theta; //mixing proportions
ordered[D] v[K];
ordered[D] Dif[K];
cholesky_factor_corr[D] L[K]; //cholesky factor of correlation matrix
}
transformed parameters{
cholesky_factor_cov[D,D] cov[K*M,N];
vector<lower=0>[D] sigma[K*M,N]; // standard deviations
vector[D] mu[K*M,N];
real<lower=-0.999> ro[K];
matrix[D,D] Omega[K];
matrix[D,D] Sigma[K*M,N];
for (k in 1:K) {
Omega[k] = multiply_lower_tri_self_transpose(L[k]);
ro[k]=Omega[k,2,1];
for (m in 1:M){
for (n in 1:N) {sigma[k+K*(m-1),n] = 0.05 + sqrt(2*(st[n][m])*Dif[k]);
mu[k+K*(m-1),n] = s+v[k]*st[n][m];
cov[k+K*(m-1),n] = diag_pre_multiply(sigma[k+K*(m-1),n],L[k]);
Sigma[k+K*(m-1),n] = quad_form_diag(Omega[k], sigma[k+K*(m-1),n]);}
}
}
}
model {
real ps[K*M];
theta ~ dirichlet(rep_vector(2.0, K));
for(k in 1:K){
L[k] ~ lkj_corr_cholesky(2);// contain rho
}
for (n in 1:N){
for (k in 1:K){
for (m in 1:M){
ps[k+K*(m-1)] = log(theta[k])+multi_normal_cholesky_lpdf(y[n] | mu[k+K*(m-1),n], cov[k+K*(m-1),n]);
}
}
target += log_sum_exp(ps);
}
}
The fit results are so unrealistic.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
theta[1] 0.49 0.02 0.03 0.44 0.46 0.49 0.51 0.55 4.0 1.07
theta[2] 0.21 7.9e-3 0.02 0.18 0.2 0.2 0.22 0.23 4.0 1.36
theta[3] 0.09 6.6e-3 0.01 0.06 0.08 0.09 0.1 0.11 5.0 1.12
theta[4] 0.22 5.6e-3 0.01 0.2 0.21 0.22 0.23 0.24 5.0 1.17
v[1,1] -2.44 0.04 0.06 -2.57 -2.49 -2.42 -2.39 -2.36 3.0 2.32
v[2,1] -29.56 0.22 0.53 -30.39 -30.02 -29.6 -29.09 -28.51 6.0 1.05
v[3,1] -41.0 0.51 0.8 -42.11 -41.86 -40.71 -40.36 -39.86 2.0 2.99
v[4,1] -65.84 0.41 0.82 -67.65 -66.5 -65.78 -65.24 -64.55 4.0 1.67
v[1,2] -1.36 0.06 0.14 -1.65 -1.46 -1.33 -1.27 -1.01 6.0 1.66
v[2,2] 4.2e296 nan inf1.3e2951.5e2962.8e2965.1e2961.5e297 nan nan
v[3,2] inf nan inf1.2e3069.7e3061.8e3072.7e3073.5e307 nan nan
v[4,2] inf nan inf2.1e3069.3e3061.8e3072.6e3073.5e307 nan nan
Dif[1,1] 47.09 0.2 0.44 46.22 46.69 47.24 47.44 47.69 5.0 1.02
Dif[2,1] 9.82 0.05 0.14 9.49 9.76 9.83 9.94 10.08 8.0 1.07
Dif[3,1] 22.76 1.34 2.15 19.64 20.65 23.03 24.39 26.35 3.0 2.51
Dif[4,1] 34.96 1.35 2.78 28.69 34.21 35.77 36.93 38.47 4.0 1.19
Dif[1,2] 64.23 0.88 1.91 61.12 62.86 64.01 65.27 68.15 5.0 1.3
Dif[2,2] 2.8e284 nan inf7.6e2824.3e2837.9e2833.5e2841.4e285 nan nan
Dif[3,2] inf nan inf1.6e3065.9e3069.7e3061.4e3071.8e307 nan nan
Dif[4,2] inf nan inf2.5e3065.2e3068.6e3061.3e3071.8e307 nan nan