Hi,
I am new to stan. As suggested by a Professor, I intend to rewrite my Metropolis-within-Gibbs sampler developed for our new model to Hamiltonian Monte Carlo/NUTS by Rstan. Our model could be formulated as Y_{n \times r} \sim X1C_{n \times pc} + X1D_{n \times pd} + X2_{n \times p2}, where Y and X1C are continuous variables, X1D are discrete variables, and X2 could be a mix of continuous and discrete variables. We also put some other assumptions and priors, but will be omitted here for simplicity. Below is the Rstan code (“SIMP.stan”) I wrote.
functions {
matrix kronecker_prod(matrix A, matrix B) {
matrix[rows(A) * rows(B), cols(A) * cols(B)] C;
int m;
int n;
int p;
int q;
m = rows(A);
n = cols(A);
p = rows(B);
q = cols(B);
for (i in 1:m) {
for (j in 1:n) {
int row_start;
int row_end;
int col_start;
int col_end;
row_start = (i - 1) * p + 1;
row_end = (i - 1) * p + p;
col_start = (j - 1) * q + 1;
col_end = (j - 1) * q + q;
C[row_start:row_end, col_start:col_end] = A[i, j] * B;
}
}
return C;
}
}
data {
// these are dimensions
int<lower=1> pc;
int<lower=1> pd;
int<lower=1> p2;
int<lower=1> r;
int<lower=1> n;
int<lower=1, upper=pc-1> dx;
int<lower=1, upper=r-1> dy;
// these are data matrices
array[n] vector[pc] X1C;
array[n] vector[pd] X1D;
array[n] vector[p2] X2;
array[n] vector[r] Y;
}
parameters {
vector[pc] mu1C;
vector[r] muY;
matrix[p2, r] beta2;
matrix[pd, pc] gamma;
cov_matrix[dx] Omega;
cov_matrix[pc - dx] Omega0;
cov_matrix[dy] Phii;
cov_matrix[r - dy] Phii0;
matrix[pc - dx, dx] A;
matrix[r - dy, dy] B;
matrix[dy, dx] etaC;
matrix[dy, pd] etaD;
}
transformed parameters {
matrix[pc, dx] CA;
matrix[pc, dx] L;
matrix[pc, pc - dx] DA;
matrix[pc, pc - dx] L0;
matrix[r, dy] CB;
matrix[r, dy] R;
matrix[r, r - dy] DB;
matrix[r, r - dy] R0;
// matrix[pc, pc] SigmaCD;
cov_matrix[pc] SigmaCD;
// matrix[r, r] SigmaYX;
cov_matrix[r] SigmaYX;
matrix[pc ,r] beta1C;
matrix[pd ,r] beta1D;
CA = append_row(identity_matrix(dx), A);
L = CA * inv_sqrt( crossprod(CA) );
DA = append_row(-A', identity_matrix(pc - dx));
L0 = DA * inv_sqrt( crossprod(DA) );
CB = append_row(identity_matrix(dy), B);
R = CB * inv_sqrt( crossprod(CB) );
DB = append_row(-B', identity_matrix(r - dy));
R0 = DB * inv_sqrt( crossprod(DB) );
SigmaCD = quad_form(Omega, L') + quad_form(Omega0, L0');
SigmaYX = quad_form(Phii, R') + quad_form(Phii0, R0');
beta1C = L * etaC' * R';
beta1D = etaD' * R';
}
model {
array[n] vector[pc] meanCD;
array[n] vector[r] meanYX;
for (i in 1:n){
meanCD[i] = mu1C + gamma' * X1D[i];
meanYX[i] = muY + beta1C' * X1C[i] + beta1D' * X1D[i] + beta2' * X2[i];
}
for (i in 1:n) {
X1C[i] ~ multi_normal(meanCD[i], SigmaCD);
Y[i] ~ multi_normal(meanYX[i], SigmaYX);
}
to_vector(beta2) ~ multi_normal(to_vector(inverse_spd(1e-6 * identity_matrix(p2)) * rep_matrix(0, p2, r)), kronecker_prod(SigmaYX, inverse_spd(1e-6 * identity_matrix(p2))));
to_vector(gamma) ~ multi_normal(to_vector(inverse_spd(1e-6 * identity_matrix(pd)) * rep_matrix(0, pd, pc)), kronecker_prod(SigmaCD, inverse_spd(1e-6 * identity_matrix(pd))));
to_vector(A) ~ multi_normal(to_vector(rep_matrix(0, pc - dx, dx)), kronecker_prod(1e6 * identity_matrix(dx), 1e6 * identity_matrix(pc - dx)));
to_vector(B) ~ multi_normal(to_vector(rep_matrix(0, r - dy, dy)), kronecker_prod(1e6 * identity_matrix(dy), 1e6 * identity_matrix(r - dy)));
to_vector(etaC) ~ multi_normal(to_vector(rep_matrix(0, dy, dx) * inverse_spd(1e-6 * identity_matrix(dx))), kronecker_prod(inverse_spd(1e-6 * identity_matrix(dx)), Phii));
to_vector(etaD) ~ multi_normal(to_vector(rep_matrix(0, dy, pd) * inverse_spd(1e-6 * identity_matrix(pd))), kronecker_prod(inverse_spd(1e-6 * identity_matrix(pd)), Phii));
Omega ~ inv_wishart(dx, 1e-6 * dx * identity_matrix(dx));
Omega0 ~ inv_wishart(pc - dx, 1e-6 * (pc - dx) * identity_matrix(pc - dx) );
Phii ~ inv_wishart(dy, 1e-6 * dy * identity_matrix(dy));
Phii0 ~ inv_wishart(r - dy, 1e-6 * (r - dy) * identity_matrix(r - dy) );
}
And the R codes I execute after generating all data matrices of valid dimensions and variable types, and initial parameters by a R function get_init() for our Metropolis-within-Gibbs sampler.
params_init <- get_init(X1C, X1D_ctr, X2_ctr, Y, dx, dy, method.idx = 1)
initf <- function() {
list( mu1C = params_init$muX1C, muY = params_init$muY,
beta2 = params_init$beta2, gamma = params_init$gamma,
Omega = params_init$Omega, Omega0 = params_init$Omega0,
Phii = params_init$Phi, Phii0 = params_init$Phi0,
A = params_init$A, B = params_init$B,
etaC = params_init$etaC, etaD = params_init$etaD)
}
SIMP.fit <- stan_model("SIMP.stan")
options(mc.cores = parallel::detectCores())
fit <- sampling(SIMP.fit,
data = list(pc = pc, pd = pd, p2 = p2, r = r, n = n, dx = dx, dy = dy,
X1C = X1C, X1D = X1D, X2 = X2, Y = Y),
init = initf,
iter = 20000, chains = 1)
However, although compiled correctly, I consistently got error for the initializations like
"Chain 1: Rejecting initial value:
Chain 1: Error evaluating the log probability at the initial value.
Chain 1: Exception: model4557f3ec824__namespace::log_prob: SigmaCD is not symmetric. SigmaCD[1,2] = nan, but SigmaCD[2,1] = nan (in ‘anon_model’, line 72, column 2 to column 25)
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Chain 1: Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] “Error in sampler$call_sampler(args_list[[i]]) : Initialization failed.”
I am pretty sure the initial values I give are good enough (works for the Metropolis-within-Gibbs sampler and \sqrt{n}-consistent). I really appreciate it if anyone could let me know where goes wrong. I am stuck in debugging for few days. Thank you so much.