Dear All,
My first time posting; appreciation in advance for any pointers, and apologies if I’m being silly.
There are various ways to simulate and specify inference for multivariate normals in Stan.
I’m failing to get one I would expect to work for inference to work. If anyone could explain why this doesn’t work, and ideally suggest a correction or alternative, I would be very grateful.
Examples of alternative ways to simulate a mean zero multivariate normal, which all give similar results:
data{
int NV;
int Nobs;
matrix[NV,NV] Sig;
}
transformed data{
matrix[NV,NV] L = cholesky_decompose(Sig);
row_vector[NV] zeros = rep_row_vector(0,NV);
}
generated quantities{
matrix[Nobs,NV] X0;
matrix[Nobs,NV] X1;
matrix[Nobs,NV] X2;
matrix[Nobs,NV] Z;
for(n in 1:Nobs){
X0[n] = multi_normal_rng(zeros,Sig)';
X1[n] = multi_normal_cholesky_rng(zeros,L)';
for(j in 1:NV){
Z[n,j] = normal_rng(0,1);
}
}
X2 = Z * L';
}
Following Stan guidance, a standard approach to inference would look like the below, which works OK:
data{
int Nobs; // number of observations
int NV; // number of variates
matrix[Nobs,NV] Y; // outcomes
real tau_prior_sd; // prior for tau, def=2.5
real lkj_prior_scale; //prior for cor, def=2
}
transformed data{
row_vector[NV] zeros = rep_row_vector(0,NV);
}
parameters{
cholesky_factor_corr[NV] L_Omega; //cholesky factor for variance
vector<lower=0>[NV] tau;
}
transformed parameters{
matrix[NV, NV] L = diag_pre_multiply(tau, L_Omega);// chol factor L for Sigma
}
model{
//priors
L_Omega ~ lkj_corr_cholesky(lkj_prior_scale);
tau ~ cauchy(0,tau_prior_sd);
//likelihood
for(n in 1:Nobs){
Y[n] ~ multi_normal_cholesky(zeros,L);
}
}
I would have expected that transforming the data with the cholesky factor would allow one to write the likelihood in terms of standard normals, inverting the simulation approach for variable X2 above. I’m interested in this as a way to facilitate specification of hierarchical models and because it is faster. However, this does not give sensible results:
data{
int Nobs; // number of observations
int NV; // number of variates
matrix[Nobs,NV] Y; // outcomes
real tau_prior_sd; // prior for tau, def=2.5
real lkj_prior_scale; //prior for cor, def=2
}
parameters{
cholesky_factor_corr[NV] L_Omega; //cholesky factor for variance
vector<lower=0>[NV] tau;
}
transformed parameters{
matrix[NV, NV] L = diag_pre_multiply(tau, L_Omega);// chol factor L for Sigma
matrix[NV,Nobs] z = mdivide_left_tri_low(L,Y'); //should be std normal: same as X2 = Z * L' in cholsim.stan
}
model{
//priors
L_Omega ~ lkj_corr_cholesky(lkj_prior_scale);
tau ~ normal(0,tau_prior_sd);
//likelihood
to_vector(z) ~ std_normal();
}
The below R code compiles the above Stan code and generates some comparisons:
library(rstan)
set.seed(1234)
## comparing approaches in simulation:
sim_model <- stan_model(file = "cholsim.stan") #simulation program
## create data:
Nobs <- 1e3 #number of observations to simulate
N <- 5 #outcome dimensions
S <- diag(1, N)
S[1, 2] <- S[2, 1] <- 0.5 #a simple covariance matrix with some correlation
SIN <- list(Nobs=Nobs,NV=N,Sig=S)
## simulate
stout <- sampling(sim_model, data = SIN, iter = 1, algorithm = "Fixed_param", chains = 1)
## compare outputs
out <- summary(stout, par = "X0")$summary[,1]
X0 <- matrix(out, nrow = Nobs, ncol = N, byrow = TRUE)
out <- summary(stout, par = "X1")$summary[, 1]
X1 <- matrix(out, nrow = Nobs, ncol = N, byrow = TRUE)
out <- summary(stout, par = "X2")$summary[, 1]
X2 <- matrix(out, nrow = Nobs, ncol = N, byrow = TRUE)
## compare empirical covariance with input covariance
S
cov(X0)
cov(X1)
cov(X2)
## seemingly OK
## now apply this in inference
## === less efficient version
sm1 <- stan_model(file = "cholmulti.stan") #version with mult_normal_cholesky
sm2 <- stan_model(file = "cholwithZ.stan") #version trying to set the likelihood using z
## first file for stan inference:
sdata <- list(
Nobs = nrow(X2), # number of observations
NV = ncol(X2), # number of variates
Y = X2, # data
tau_prior_sd = 2.5, # prior for tau
lkj_prior_scale = 2 # prior for cor
)
## sampling
smps1 <- sampling(sm1, data = sdata, iter = 2e3, chains = 1)
smps2 <- sampling(sm2, data = sdata, iter = 2e3, chains = 1)
## function to look at cors:
get_ests <- function(smps){
## tau
out <- summary(smps,par="tau")$summary
sel <- rownames(out) %in% paste0("tau[",1:N, "]")
tauest <- out[sel,1]
## L
out <- summary(smps,par="L")$summary
selnames <- c(paste0("L[", 1:3, ",", 1:3, "]"),
paste0("L[", c(1,1,2,2,3,3), ",", c(2,3,3,1,1,2), "]"))
sel <- rownames(out) %in% selnames
Lest <- out[sel,1]
LM <- diag(Lest[paste0("L[", 1:3, ",", 1:3, "]")])
LM[1,2] <- Lest["L[1,2]"]
LM[1,3] <- Lest["L[1,3]"]
LM[2,3] <- Lest["L[2,3]"]
LM[2,1] <- Lest["L[2,1]"]
LM[3,1] <- Lest["L[3,1]"]
LM[3,2] <- Lest["L[3,2]"]
## return
list(tau = tauest,Sig = LM %*% t(LM))
}
## expect this:
S[1:3,1:3]
cov(X2)[1:3,1:3]
## version 1:
get_ests(smps1) #looks good
## version 2:
get_ests(smps2) #total nonsense with very high tau