Hello!
I am trying to simulate a 3-state Markov chain and the transition probabilities come from a Gaussian process (we call it X) with spatio-temporal covariance matrix and some other variables.
So far, the program is able to estimate the parameters of the the “fake” or the simulated data. But now that I am trying to test it on the real data, it does not converge. I believe the reason is that before, we were able to generate X process and feed it into the program as data, but now we moved it to the parameter block since we do not have it.
Now, my question is: is there another way of considering X in stan but not as a parameter nor data? I tried defining it as a function or in the transformed parameters but was not successful.
AND, if the answer is no :( , is there any suggestion to make the code converge? I have read the stan document regarding divergence error as well as some tips online any I believe I need to change something in my model since it was working before.
Below is the code for the real data which diverges.
Thanks!
functions {
array[] matrix cor_sep(real nu, real c, real a, real alpha,
data array[] matrix u_ar, data array[] matrix h_ar,data int n_var, data int n_block_row) {
array[n_block_row] matrix[n_var, n_var] corr;
for (n in 1:n_block_row) {
corr[n] = (a * u_ar[n] ^ (2 * alpha) + 1) ^ (-1) .* ((1 - nu) * exp(-c * h_ar[n]));
for (j in 1:n_var) {
for (i in 1:n_var) {
if (i == j)
corr[n][i, j] = (a * (u_ar[n][i, j]) ^ (2 * alpha) + 1) ^ (-1);
}
}
}
return corr;
}
matrix cor_joint(real nu, real c, real a, real alpha, array[] matrix corr_ar,
data int n_var, data int n_block_row) {
int ind_i_start;
int ind_i_end;
int ind_j_start;
int ind_j_end;
matrix[n_var * n_block_row, n_var * n_block_row] corr;
for (i in 1:n_block_row) {
ind_i_start = ((i - 1) * n_var + 1);
ind_i_end = (n_var * i);
for (j in 1:n_block_row) {
ind_j_start = ((j - 1) * n_var + 1);
ind_j_end = (n_var * j);
if (j >= i)
corr[ind_i_start:ind_i_end, ind_j_start:ind_j_end] = corr_ar[j - i + 1];
else
corr[ind_i_start:ind_i_end, ind_j_start:ind_j_end] = corr_ar[i - j + 1]';
}
}
return corr;
}
vector repeat_vector(vector input, int reps) {
int N = rows(input);
vector[N*reps] repvec; // stack N-vector reps times
for (k in 1:reps) {
for (i in 1:N) {
repvec[ i + (k - 1) * N] = input[i];
// assign i-th value of input to i+(k-1)*N -th value of repvec
}
}
return repvec;
}
}
data {
int<lower=1> N;
int<lower=1> N_mu;
int<lower=1> lag_u;
int<lower=1> n_ahead;
//number of wind farms
int<lower=1> n_var;
int<lower=1> lag_max;
int<lower=1> n_block_row;
matrix[n_var, n_var] dist;
array[n_block_row] matrix[n_var, n_var] h_ar;
array[n_block_row] matrix[n_var, n_var] u_ar;
//the final y
int<lower = 1, upper = 3> y[N,n_var];
}
transformed data {
vector[3] zeros = rep_vector(0, 3);
}
parameters {
// model parameters
real<lower=0, upper=1> nu;
real<lower=0> c;
real<lower=0> a;
real<lower=0, upper=1> alpha;
//new_parameters
matrix[3, 2] beta_raw;
// mean
vector[n_var] mu;
//cos & sin coeficients
vector[4] B;
//X
vector[N_mu] X[N];
}
transformed parameters {
array[n_block_row] matrix[n_var, n_var] c_sep;
matrix[n_var * n_block_row, n_var * n_block_row] c_joint;
matrix[3, 3] beta;
array[N, n_var] matrix[3, 3] theta;
c_sep = cor_sep(nu, c, a, alpha, u_ar, h_ar, n_var, n_block_row);
c_joint = cor_joint(nu, c, a, alpha, c_sep, n_var, n_block_row);
beta = append_col(beta_raw, zeros);
//mu
vector[N_mu] mu_final;
mu_final = repeat_vector(mu, n_block_row);
for(i in 1:N){
for(j in 1:n_var){
for(t in 1:3){
// periodic trends for ramp downs
theta[i,j,t,1] = beta[t,1] * X[i,j] +B[1]*cos((2*pi()*i*60*12)/(60*60*24))+B[2]*sin((2*pi()*i*60*12)/(60*60*24));
theta[i,j,t,2] = beta[t,2] * X[i,j];
// periodic trends for ramp ups
theta[i,j,t,3] = beta[t,3] * X[i,j] +B[3]*cos((2*pi()*i*60*12)/(60*60*24))+B[4]*sin((2*pi()*i*60*12)/(60*60*24));
}
}
}
}
model {
// flat priors
nu ~ uniform(0, 1);
c ~ uniform(0.0001, 5);
a ~ uniform(0.0001, 5);
alpha ~ uniform(0, 1);
mu ~ uniform(-10,10);
B~ uniform(-10,10);
to_vector(beta) ~ uniform(-10, 10);
for(i in 2:N){
X[i] ~ multi_normal(mu_final, c_joint);
for(j in 1:n_var){
y[i,j] ~ categorical_logit(to_vector(theta[i,j,y[i-1,j],:]));
}
}
}
`