@bgoodri Thank you very much for all your help with this. I have now coded up the example for a multivariate distribution with K dimensions (see below). All seems to work – the model is actually excellent at returning back the parameters that I use to generate the data (R code also below). Many thanks again for your help here, and include the code below in case others find it useful. I am, in particular, looking forward to extending this model to encompass a variety of discrete marginals: bernoulli, binomial, and negative binomial for starters. Best, Ben
R code to generate data from a Gaussian Copula with Poisson marginals (using a covariance matrix aSigma, and with marginal means given by the vector lMeans):
fMultivariateGaussianCopulaPoisson <- function(aSigma,lMeans){
K <- length(lMeans)
vX <- mvrnorm(1,rep(0,K),aSigma)
vU <- pnorm(vX)
lY <- vector(length=K)
for(i in 1:K)
lY[i] <- qpois(vU[i],lMeans[i])
return(lY)
}
// For N observations from the above
fGenerateMultivariateCopulaPoisson <- function(N,aSigma,lMeans){
K <- nrow(aSigma)
mCopula <- matrix(nrow = N,ncol=K)
for(i in 1:N){
mCopula[i,] <- fMultivariateGaussianCopulaPoisson(aSigma,lMeans)
}
return(mCopula)
}
Stan code (note I had to constrain the Poisson marginal parameters to avoid initialising these parameters with too large values that caused the log probability to go to -inf):
functions{
vector [] fFindBounds(row_vector vU, matrix L, int[] vY, vector vLambda){
int K = cols(vU);
matrix[K,2] mBounds;
vector[K] lLogProbU;
vector[K] u_star_lower;
vector[K] u_star_upper;
real z_star_lower;
real z_star_upper;
vector[K] z;
vector[K] v;
vector[K] out[2];
for(k in 1:K){
int km1 = k - 1;
if(vY[k]<0.001){ ## upper bound only
mBounds[k,1] = -1;
mBounds[k,2] = inv_Phi(poisson_cdf(vY[k],vLambda[k]));
z_star_lower = -1;
z_star_upper = (mBounds[k,2] - ((k > 1) ? L[k,1:km1] * head(z,km1) : 0)) / L[k,k];
u_star_lower[k] = 0;
u_star_upper[k] = Phi(z_star_upper);
}else{
mBounds[k,1] = inv_Phi(poisson_cdf(vY[k]-1,vLambda[k]));
mBounds[k,2] = inv_Phi(poisson_cdf(vY[k],vLambda[k]));
z_star_lower = (mBounds[k,1] - ((k > 1) ? L[k,1:km1] * head(z,km1) : 0)) / L[k,k];
z_star_upper = (mBounds[k,2] - ((k > 1) ? L[k,1:km1] * head(z,km1) : 0)) / L[k,k];
u_star_lower[k] = Phi(z_star_lower);
u_star_upper[k] = Phi(z_star_upper);
}
v[k] = u_star_lower[k] + (u_star_upper[k]-u_star_lower[k]) * vU[k];
z[k] = inv_Phi(v[k]);
lLogProbU[k] = u_star_upper[k] - u_star_lower[k];
}
out[1] = lLogProbU;
out[2] = v;
return(out);
}
}
data{
int N;
int K;
int Y[N,K];
}
transformed data{
vector[K] vZeros;
for(i in 1:K)
vZeros[i] = 0;
}
parameters{
cholesky_factor_corr[K] L; // correlation between Xs
vector<lower=0,upper=30>[K] lambda; // mean of each marginal poisson
vector<lower=0>[K] sigma;
// augmented likelihood parameters
matrix<lower=0,upper=1>[N,K] u;
}
transformed parameters{
matrix[N,K] x;
vector[N] lLogProb;
matrix[N,K] mLogProbU;
matrix[N,K] z;
vector[K] lOut[N,2];
matrix[N,K] mv;
cholesky_factor_cov[K] L_cov;
L_cov = diag_pre_multiply(sigma,L);
for(i in 1:N){
lOut[i] = fFindBounds(u[i],L_cov,Y[i],lambda);
mv[i] = to_row_vector(lOut[i,2]);
mLogProbU[i] = to_row_vector(lOut[i,1]);
}
for(i in 1:N){
for(j in 1:K){
z[i,j] = inv_Phi(mv[i,j]);
}
}
for(i in 1:N){
x[i] = to_row_vector(L_cov * to_vector(z[i]));
}
for(i in 1:N){
lLogProb[i] = multi_normal_cholesky_lpdf(to_vector(x[i])|vZeros,L_cov);
}
}
model{
for(i in 1:N){
target += lLogProb[i];
for(j in 1:K)
target += log(mLogProbU[i,j]);
}
L ~ lkj_corr_cholesky(2.0);
lambda ~ normal(5,10);
sigma ~ lognormal(0,1);
}
generated quantities{
cov_matrix[K] Omega;
Omega = L_cov * L_cov';
}