Gaussian copula for discrete marginals

@bgoodri Thank you very much for all your help with this. I have now coded up the example for a multivariate distribution with K dimensions (see below). All seems to work – the model is actually excellent at returning back the parameters that I use to generate the data (R code also below). Many thanks again for your help here, and include the code below in case others find it useful. I am, in particular, looking forward to extending this model to encompass a variety of discrete marginals: bernoulli, binomial, and negative binomial for starters. Best, Ben

R code to generate data from a Gaussian Copula with Poisson marginals (using a covariance matrix aSigma, and with marginal means given by the vector lMeans):

fMultivariateGaussianCopulaPoisson <- function(aSigma,lMeans){
  K <- length(lMeans)
  vX <- mvrnorm(1,rep(0,K),aSigma)
  vU <- pnorm(vX)
  lY <- vector(length=K)
  for(i in 1:K)
    lY[i] <- qpois(vU[i],lMeans[i])
  return(lY)
}

// For N observations from the above
fGenerateMultivariateCopulaPoisson <- function(N,aSigma,lMeans){
  K <- nrow(aSigma)
  mCopula <- matrix(nrow = N,ncol=K)
  for(i in 1:N){
    mCopula[i,] <- fMultivariateGaussianCopulaPoisson(aSigma,lMeans) 
  }
  return(mCopula)
}

Stan code (note I had to constrain the Poisson marginal parameters to avoid initialising these parameters with too large values that caused the log probability to go to -inf):

functions{
  vector [] fFindBounds(row_vector vU, matrix L, int[] vY, vector vLambda){
    int K = cols(vU);
    matrix[K,2] mBounds;
    vector[K] lLogProbU;
    vector[K] u_star_lower;
    vector[K] u_star_upper;
    real z_star_lower;
    real z_star_upper;
    vector[K] z;
    vector[K] v;
    vector[K] out[2];
    for(k in 1:K){
      int km1 = k - 1;
      if(vY[k]<0.001){ ## upper bound only
        mBounds[k,1] = -1;
        mBounds[k,2] = inv_Phi(poisson_cdf(vY[k],vLambda[k]));
        z_star_lower = -1;
        z_star_upper = (mBounds[k,2] - ((k > 1) ? L[k,1:km1] * head(z,km1) : 0)) / L[k,k];
        u_star_lower[k] = 0;
        u_star_upper[k] = Phi(z_star_upper);
      }else{
        mBounds[k,1] = inv_Phi(poisson_cdf(vY[k]-1,vLambda[k]));
        mBounds[k,2] = inv_Phi(poisson_cdf(vY[k],vLambda[k]));
        z_star_lower = (mBounds[k,1] - ((k > 1) ? L[k,1:km1] * head(z,km1) : 0)) / L[k,k];
        z_star_upper = (mBounds[k,2] - ((k > 1) ? L[k,1:km1] * head(z,km1) : 0)) / L[k,k];
        u_star_lower[k] = Phi(z_star_lower); 
        u_star_upper[k] = Phi(z_star_upper);
      }
      v[k] = u_star_lower[k] + (u_star_upper[k]-u_star_lower[k]) * vU[k];
      z[k] = inv_Phi(v[k]);
      lLogProbU[k] = u_star_upper[k] - u_star_lower[k];
    }
    
    out[1] = lLogProbU;
    out[2] = v;
    
    return(out);
  }
}

data{
  int N;
  int K;
  int Y[N,K];
}

transformed data{
  vector[K] vZeros;
  for(i in 1:K)
    vZeros[i] = 0;
}

parameters{
  cholesky_factor_corr[K] L; // correlation between Xs
  vector<lower=0,upper=30>[K] lambda; // mean of each marginal poisson
  vector<lower=0>[K] sigma;
  
  // augmented likelihood parameters
  matrix<lower=0,upper=1>[N,K] u;
}


transformed parameters{
  matrix[N,K] x;
  vector[N] lLogProb;
  matrix[N,K] mLogProbU;
  matrix[N,K] z;
  vector[K] lOut[N,2];
  matrix[N,K] mv;
  cholesky_factor_cov[K] L_cov;
  
  L_cov = diag_pre_multiply(sigma,L);
  
  for(i in 1:N){
    lOut[i] = fFindBounds(u[i],L_cov,Y[i],lambda);
    mv[i] = to_row_vector(lOut[i,2]);
    mLogProbU[i] = to_row_vector(lOut[i,1]);
  }
    
  for(i in 1:N){
    for(j in 1:K){
      z[i,j] = inv_Phi(mv[i,j]);
    }
  }
      
  for(i in 1:N){
       x[i] =  to_row_vector(L_cov * to_vector(z[i]));
  }
    
  for(i in 1:N){
    lLogProb[i] = multi_normal_cholesky_lpdf(to_vector(x[i])|vZeros,L_cov);
  }
}

model{
  for(i in 1:N){
    target += lLogProb[i];
    for(j in 1:K)
      target += log(mLogProbU[i,j]);
  }
  
  L ~ lkj_corr_cholesky(2.0);
  lambda ~ normal(5,10);
  sigma ~ lognormal(0,1);
}

generated quantities{
  cov_matrix[K] Omega;
  Omega = L_cov * L_cov';
}
1 Like