My model contains 2 Cholesky factors (K=2; each a 4 by 4 cholesky_factor_corr[D]), like this:
parameters {
cholesky_factor_corr[D] L[K]; // Cholesky factor of ranef corr matrix
}
How do I manually set initiation values for L[K], where k=1 and k=2?
If I manually set
Corr <- matrix(0.3, ncol = Dim, nrow = Dim); diag(Corr) <- 1.0
LCorr <- t(chol(Corr))
Arr <- array(rep(LCorr, 2), dim = c(Dim, Dim, dat$K))
initfun <- function(chain_id = 1) {
list(mu = array(c(runif(dat$K, min = -1.0, max = -0.5),
runif(dat$K, -0.5, 0)),
dim = c(dat$K, Dim)),
L = Arr )
}
I got this error about L: “mismatch in dimension declared and found in context; processing stage=initialization; variable name=L; position=0; dims declared=(2,4,4); dims found=(4,4,2)”. In R, the custom init values are in a 4x4x2 array (two slices of 4x4 matrices). It seems that stan is expecting a 2x4x4 array for L[K]. But I have no idea on how to represent this array in R. I can use aperm(Arr, c(3, 1, 2)) to turn Arr into a 2x4x4 array, but the values are all misaligned. Any pointer on how to do this is greatly appreciated.
Below is a simplified version of my model to make it easier to follow. The R script is also listed. I should mention that stan runs fine if init=“random” by default. This simplified model is obviously flawed. But I am not concerned about the flaws, but about how to manually set initiation values on Cholesky L[K].
data {
int<lower=1> D; // multivariate normal of dimension D
int<lower=1> K; // number of clusters
int<lower=1> N; // number of data entries
int<lower=1> fem1[N]; // females == 1
vector[D] y[N]; // N entries of column vector of length D
}
parameters {
vector[D] mu[K]; // D dimensions, each a vector of length K
cholesky_factor_corr[D] L[K]; // Cholesky factor of ranef corr matrix
}
model {
mu[1] ~ normal(-3, 1);
mu[2] ~ normal(0, 1);
for (k in 1:K) {
L[k] ~ lkj_corr_cholesky(2.0);
}
for (n in 1:N) {
target += multi_normal_cholesky_lpdf(y[n] | mu[fem1[n]], L[fem1[n]]);
}
}
R script:
library(mvtnorm)
### 4-dimensional multivariate normal data
set.seed(23)
mu1 <- c(0,0,0,0)
#mu1 <- c(0,-1,1,0)
sigma1 <- diag(4) * 0.1
sigma1[row(sigma1) != col(sigma1)] <- 0.03
norm1 <- rmvnorm(30, mean = mu1, sigma=sigma1)
mu2 <- c(7,7,7,7)
sigma2 <- diag(4) * 0.1
sigma2[row(sigma2) != col(sigma2)] <- 0.01
norm2 <- rmvnorm(30, mean=mu2, sigma=sigma2)
norms <- rbind(norm1, norm2)
N <- nrow(norms)
Dim <- ncol(norms)
y <- array(as.vector(norms), dim = c(N, Dim))
fem.1 <- rep(c(1, 2), each = 30)
dat <- list(N = N, D = Dim, y = y, K = 2, fem1 = fem.1)
library("rstan")
options(mc.cores = parallel::detectCores())
# Cholesky factor from a correlation matrix
Corr <- matrix(0.3, ncol = Dim, nrow = Dim); diag(Corr) <- 1.0
LCorr <- t(chol(Corr))
Arr <- array(rep(LCorr, 2), dim = c(Dim, Dim, dat$K))
initfun <- function(chain_id = 1) {
list(mu = array(c(runif(dat$K, min = -1.0, max = -0.5),
runif(dat$K, -0.5, 0)),
dim = c(dat$K, Dim)),
L = Arr )
}
n_chains <- 2
init_ll <- lapply(1:n_chains, function(id) initfun(chain_id = id))
fit.1 <- stan(model_code = model, model_name ="norm2", data = dat,
iter=1500, chains = n_chains, warmup=500, seed=11,
init = init_ll,
control = list(adapt_delta = 0.95))