For anyone finding their way here in the future, the approach discussed by myself and @spinkney gives consistent priors across indices of the correlation matrix and works well for generating data, and I had some success with it hierarchically, but in some circumstances – I think when the correlation signal is relatively strong – this approach seems to give difficulty with sampling and optimization, with lots of multimodality. I’ve updated the code, it performs much better and copes with higher correlations with less bias (much better than the stan lkj approach too it seems) but the downside is priors on the bivariate correlations are not exactly the same across indices of the matrix. It should be possible to correct this behaviour, but for the time being I’ll post the update with the ‘rough correction’ (divide by column index) I guesshacked. The following code compares my version to the LKJ approach, it doesn’t show the hierarchical form but that is hopefully clear enough – just recompute the correlation matrix for whatever vector of unconstrained correlation parameters are relevant. I guess @mike-lawrence will be pretty interested here.
Also, the inability to edit old posts to point them to updated info is a bit annoying…
require(rstan)
smt <- '
functions{
matrix constraincorsqrt(vector rawcor, int d){ //converts from unconstrained lower tri matrix to cor
matrix[d,d] o;
real cormax=0;
real rowsum;
int counter = 0;
for(i in 1:d){ //constrain and set upper tri to lower
for(j in 1:d){
if(j > i){
counter += 1;
o[j,i] = rawcor[counter] / i;
o[i,j] = 0;//o[j,i];
}
}
o[i,i] = 1; // smaller gives more prior weight to larger correlations, larger gives less
o[i,] = o[i,] / sqrt(sum(square(o[i,])+1e-8));
}
return (o);
}
}
data{
int d;
int n;
vector[d] dat[n];
real corpriorscale;
}
parameters{
vector[(d * d - d) / 2] rawcor;
vector[d] logsdvec;
vector[d] mu;
}
transformed parameters{
matrix[d,d] mcor;
matrix[d, d] mcov;
matrix[d, d] mcovchol;
mcor = tcrossprod(constraincorsqrt(rawcor,d));
mcov = quad_form_diag(mcor, exp(logsdvec) +1e-8);
mcovchol = cholesky_decompose(mcov);
}
model{
rawcor ~ normal(0,corpriorscale);
logsdvec ~ normal(0,10);
mu ~ normal(0,10);
dat ~ multi_normal_cholesky(mu,mcovchol);
}
'
sm <- stan_model(model_code = smt)
smtlkj <- '
data{
int d;
int n;
vector[d] dat[n];
real corpriorscale;
}
parameters{
cholesky_factor_corr[d] cholcor;
vector[d] logsdvec;
vector[d] mu;
}
transformed parameters{
matrix[d,d] mcor = tcrossprod(cholcor);
matrix[d, d] mcov = quad_form_diag(mcor,exp(logsdvec));
matrix[d, d] mcovchol = cholesky_decompose(mcov);
}
model{
cholcor ~ lkj_corr_cholesky(corpriorscale);
logsdvec ~ normal(0,10);
mu ~ normal(0,10);
dat ~ multi_normal_cholesky(mu,mcovchol);
}
'
smlkj <- stan_model(model_code = smtlkj)
d=5
n=30
mcov <- diag(1,d)
#some high some low
mcov[lower.tri(mcov)][seq(1,(d^2-d)/2,d)] <- .8
# mcov[,1] <- mcov[,1]+3
#random
# mcov[lower.tri(mcov)]<- rnorm((d^2-d)/2)#.8
mcov[upper.tri(mcov)] <- t(mcov)[upper.tri(mcov)]
mcov <- cov2cor(mcov %*% t(mcov))
mcovchol <- t(chol(mcov))
mcor=cov2cor(mcov)
dat <- matrix(rnorm(d*n,0,1),n,d)
dat <- t(mcovchol %*% t(dat))
dat <- scale(dat)
round(cor(dat),3)
round(mcor,3)
sdat <- list(n=n,d=d, dat=dat,corpriorscale=10)
# sf <- ctsem:::stanWplot(sm,sdat,chains=4,cores=4,iter=2000)
ta<-Sys.time()
sf <- sampling(sm,sdat,chains=4,cores=4,iter=2000)
ta <- Sys.time()-ta
print(ta)
tb<-Sys.time()
# sflkj <- ctsem:::stanWplot(smlkj,sdat,chains=4,cores=4,iter=2000)
sflkj <- sampling(smlkj,sdat,chains=4,cores=4,iter=2000)
tb <- Sys.time()-tb
print(tb)
s=round(summary(sf)$summary,3)
s[grep('mcor',rownames(s)),]
slkj=round(summary(sflkj)$summary,3)
slkj[grep('mcor',rownames(slkj)),]
plot(s[grep('mcor',rownames(s)),1],pch=16,col=2,ylab='Corr.')
points(c(cor(dat)),pch=1,col='black')
# points(c(mcor),pch=16,col='blue')
points(slkj[grep('mcor',rownames(slkj)),1],pch=16,col='green')
points(s[grep('mcor',rownames(s)),4],col=2,pch=3)
points(s[grep('mcor',rownames(s)),8],col=2,pch=3)
points(slkj[grep('mcor',rownames(slkj)),4],pch=3,col='green')
points(slkj[grep('mcor',rownames(slkj)),8],pch=3,col='green')
legend('topright',c('True','CD','LKJ'),text.col=c(1,2,3),bty='n')
and in case anyone wants to do me a favour and figure out a proper solution, here is a simple script for testing:
ndim=5
mat=matrix(1,ndim,ndim)
corfunc=function(mat, invert){
o=mat;
d=nrow(o)
for(i in 1:nrow(o)){
for(j in 1:nrow(o)){
if(j > i){
o[j,i] = o[j,i] / (i) #divide by i was just a rough guess that semi works
o[i,j] = 0;
}
}
o[i,i]=1 #adjust this value to modify propensity for high correlation strengths
o[i,] = o[i,] / sqrt(sum((o[i,]^2)))
}
return(o)
}
m=tcrossprod(corfunc(mat,F))
mold
m
message('old mean = ',mean(mold[lower.tri(mold)]))
message('new mean = ',mean(m[lower.tri(m)]))
message('old diff = ',
sqrt(mean(( mold[lower.tri(mold)]-mean(mold[lower.tri(mold)]))^2)))
message('new diff = ',
sqrt(mean(( m[lower.tri(m)]-mean(m[lower.tri(m)]))^2)))
mold=m