Hi,
I am trying to rewrite the Metropolis-within-Gibbs sampler to a Hamiltonian Monte Carlo sampler by using Rstan. Our model is Y_{n \times r} \sim X1C_{n \times pc} + X1D_{n \times pd} +X2_{n \times p2}, where Y and X1C are continuous, X1D is discrete, X2 may be a mix of continuous and discrete variables. We generated the data from our true model (with some other assumptions omitted here).
Our model assumes randomness in Y and X1C with normal errors, and some priors on parameters. Below are the stan codes I wrote.
functions {
matrix kronecker_prod(matrix A, matrix B) {
matrix[rows(A) * rows(B), cols(A) * cols(B)] C;
int m;
int n;
int p;
int q;
m = rows(A);
n = cols(A);
p = rows(B);
q = cols(B);
for (i in 1:m) {
for (j in 1:n) {
int row_start;
int row_end;
int col_start;
int col_end;
row_start = (i - 1) * p + 1;
row_end = (i - 1) * p + p;
col_start = (j - 1) * q + 1;
col_end = (j - 1) * q + q;
C[row_start:row_end, col_start:col_end] = A[i, j] * B;
}
}
return C;
}
}
data {
// these are dimensions
int<lower=1> pc;
int<lower=1> pd;
int<lower=1> p2;
int<lower=1> r;
int<lower=1> n;
int<lower=1, upper=pc-1> dx;
int<lower=1, upper=r-1> dy;
// these are data matrices
array[n] vector[pc] X1C;
array[n] vector[pd] X1D;
array[n] vector[p2] X2;
array[n] vector[r] Y;
// these are hyper-parameters
matrix[pc - dx, dx] A0;
matrix[r - dy, dy] B0;
matrix[p2, r] Z;
matrix[pd, pc] FF;
matrix[dy, dx] W;
matrix[dy, pd] TT;
cov_matrix[p2] M;
cov_matrix[pd] Lambda;
cov_matrix[pd] Q;
cov_matrix[dx] PsiX;
cov_matrix[dx] SigmaA;
cov_matrix[dx] E;
cov_matrix[dy] PsiY;
cov_matrix[dy] SigmaB;
cov_matrix[pc - dx] PsiX0;
cov_matrix[pc - dx] KA;
cov_matrix[r - dy] PsiY0;
cov_matrix[r - dy] KB;
int<lower = dx - 1> wX;
int<lower = pc - dx - 1> wX0;
int<lower = dy - 1> wY;
int<lower = r - dy - 1> wY0;
}
parameters {
vector[pc] mu1C;
vector[r] muY;
matrix[p2, r] beta2;
matrix[pd, pc] gamma;
cov_matrix[dx] Omega;
cov_matrix[pc - dx] Omega0;
cov_matrix[dy] Phii;
cov_matrix[r - dy] Phii0;
matrix[pc - dx, dx] A;
matrix[r - dy, dy] B;
matrix[dy, dx] etaC;
matrix[dy, pd] etaD;
}
transformed parameters {
matrix[pc, dx] CA;
matrix[pc, dx] L;
matrix[pc, pc - dx] DA;
matrix[pc, pc - dx] L0;
matrix[r, dy] CB;
matrix[r, dy] R;
matrix[r, r - dy] DB;
matrix[r, r - dy] R0;
matrix[pc, pc] SigmaCD;
// cov_matrix[pc] SigmaCD;
matrix[r, r] SigmaYX;
// cov_matrix[r] SigmaYX;
matrix[pc ,r] beta1C;
matrix[pd ,r] beta1D;
CA = append_row(identity_matrix(dx), A);
L = CA * inv_sqrt( crossprod(CA) );
DA = append_row(-A', identity_matrix(pc - dx));
L0 = DA * inv_sqrt( crossprod(DA) );
CB = append_row(identity_matrix(dy), B);
R = CB * inv_sqrt( crossprod(CB) );
DB = append_row(-B', identity_matrix(r - dy));
R0 = DB * inv_sqrt( crossprod(DB) );
SigmaCD = quad_form(Omega, L') + quad_form(Omega0, L0');
SigmaYX = quad_form(Phii, R') + quad_form(Phii0, R0');
beta1C = L * etaC' * R';
beta1D = etaD' * R';
}
model {
array[n] vector[pc] meanCD;
array[n] vector[r] meanYX;
for (i in 1:n){
meanCD[i] = mu1C + gamma' * X1D[i];
meanYX[i] = muY + beta1C' * X1C[i] + beta1D' * X1D[i] + beta2' * X2[i];
}
for (i in 1:n) {
X1C[i] ~ multi_normal(meanCD[i], SigmaCD);
Y[i] ~ multi_normal(meanYX[i], SigmaYX);
}
to_vector(beta2) ~ multi_normal(to_vector(inverse_spd(M) * Z), kronecker_prod(SigmaYX, inverse_spd(M)));
to_vector(gamma) ~ multi_normal(to_vector(inverse_spd(Lambda) * FF), kronecker_prod(SigmaCD, inverse_spd(Lambda)));
to_vector(A) ~ multi_normal(to_vector(A0), kronecker_prod(SigmaA, KA));
to_vector(B) ~ multi_normal(to_vector(B0), kronecker_prod(SigmaB, KB));
to_vector(etaC) ~ multi_normal(to_vector(W * inverse_spd(E)), kronecker_prod(inverse_spd(E), Phii));
to_vector(etaD) ~ multi_normal(to_vector(TT * inverse_spd(Q)), kronecker_prod(inverse_spd(Q), Phii));
Omega ~ inv_wishart(wX, PsiX);
Omega0 ~ inv_wishart(wX0, PsiX0);
Phii ~ inv_wishart(wY, PsiY);
Phii0 ~ inv_wishart(wY0, PsiY0);
}
When I run this model, there is always an error with initialization that could not be solved as follows:
"Chain 1: Rejecting initial value:
Chain 1: Error evaluating the log probability at the initial value.
Chain 1: Exception: multi_normal_lpdf: Covariance matrix is not symmetric. Covariance matrix[1,2] = nan, but Covariance matrix[2,1] = nan (in ‘anon_model’, line 127, column 4 to column 46)
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Chain 1: Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] “Error in sampler$call_sampler(args_list[[i]]) : Initialization failed.”
I’ve also tried to give some good initial values by the “init” argument, but it cannot work. I really appreciate if someone could give me any suggestions. Thanks.