Bivariate-beta mixture model with copula

Hi,

I am trying to implement a hierarchical beta mixture model with fixed alphas and betas and prior on the components weights. I based my code off another post here and I added the copula on top. My aim is to get the cluster estimates for each observation. Could someone tell me whether this makes sense and whether there are smarter ways to go about it?


data {
  int<lower=1> K; // number of mixture components
  int<lower=1> N; // number of data points
  int<lower=1> I;//number of individuals
  real<lower=0,upper=1> y[N]; // observations
  real<lower=0,upper=1> x[N]; // Observations 
  real<lower = 0> alphax[I,K];
  real<lower = 0> betax[I,K];
  real<lower = 0> alphay[I,K];
  real<lower = 0> betay[I,K];
  real<lower = 0, upper = 1> theta[I,K];
  real<lower = -1, upper = 1> rho[I,K]
  int<lower=0,upper=1> prior_only; // =1 to get data from prior predictive
}
parameters {
  simplex[K] w[I];
}

transformed parameters {
  vector[I] log_w[K];
  for (i in 1:I) {
    for (k in 1:K) {
      log_w[k][i] = log(w[i][k]);
    }
  }
}

model {
  
  // priors
  for(i in 1:I){
    w[i] ~ dirichlet(theta[i]);
  }

  // individual likelihoods, as sum of component contributions
  for (n in 1:N) {
    // contributions from each component
    // these get overwritten for each observation
    vector[K] lps;  // contributions from each component
    int i = participant[n]; // Retrieve participant index for nth data point
  
    for(k in 1:K){
      real beta_x = beta_lpdf(x[n] | alphax[i,k], betax[i,k]);
      real beta_y = beta_lpdf(y[n] | alphay[i,k], betay[i,k]);
      real u_x = beta_cdf(x[n] | alphax[i,k], betax[i,k]);
      real u_y = beta_cdf(y[n] | alphay[i,k], betay[i,k]);
      real copula_density = gaussian_copula_density(u_x, u_y, rho[i,k]);
      
      lps[k] = log_w[i][k]+beta_x + beta_y+ log(copula_density);
      }
      target += log_sum_exp(lps);
    }
  }
generated quantities {
  real x_rep[N];
  real y_rep[N];
   
  for (n in 1:N) {
    int i = participant[n]; // Retrieve participant index for nth data point
    vector[K] ps;
    
    for (k in 1:K) {
      real u_x = beta_cdf(x[n] | alphax[i, k], betax[i, k]);
      real u_y = beta_cdf(y[n] | alphay[i, k], betay[i, k]);
      real copula_density = gaussian_copula_density(u_x, u_y, rho[i, k]);
      
      ps[k] = w[i][k] * copula_density;
    }
    
    int comp = categorical_rng(ps); // Sample a component
    x_rep[n] = beta_rng(alphax[i, comp], betax[i, comp]); // Generate new x data
    y_rep[n] = beta_rng(alphay[i, comp], betay[i, comp]); // Generate new y data
  }
}

// Function to compute the Gaussian copula density
real gaussian_copula_density(real u_x, real u_y, real rho) {
  real z_x = quantile(normal_cdf(u_x), 0.5); // Convert to normal scale
  real z_y = quantile(normal_cdf(u_y), 0.5); // Convert to normal scale
  real copula_density = normal_lpdf(z_x | 0, 1) + normal_lpdf(z_y | 0, 1)
                       - normal_lpdf(z_x, z_y | 0, rho); // Gaussian copula density
  return exp(copula_density);
}
    

Changed the code a little bit

functions{
  // Function to compute the Gaussian copula density
  real gaussian_copula_density(real u_x, real u_y, real rho) {
    real rho_sq = square(rho);
    real inv_rho_sq = 1 - rho_sq;
    real z_x = inv_Phi(u_x); // inverted normal cdf
    real z_y = inv_Phi(u_y); // inverted normal cdf
    real term1 = 1 / (2 * pi() * sqrt(inv_rho_sq));
    real term2 = exp(-0.5 * (square(z_x) + square(z_y) - 2 * rho * z_x * z_y) / inv_rho_sq);

  return term1 * term2;
  }
}

data {
  int<lower=1> K; // number of mixture components
  int<lower=1> N; // number of data points
  int<lower=1> I;//number of individuals
  int<lower=1, upper=I> participant[N]; // Participant index for each observation
  real<lower=0,upper=1> y[N]; // observations
  real<lower=0,upper=1> x[N]; // Observations 
  real<lower = 0> alphax[I,K];
  real<lower = 0> betax[I,K];
  real<lower = 0> alphay[I,K];
  real<lower = 0> betay[I,K];
  row_vector<lower=0>[K] theta[I]; // Dirichlet prior parameters for each participant
  real<lower = -1, upper = 1> rho[I,K];
  int<lower=0,upper=1> prior_only; // =1 to get data from prior predictive
}
parameters {
  simplex[K] w[I];
}

transformed parameters {
  vector[I] log_w[K];
  for (i in 1:I) {
    for (k in 1:K) {
      log_w[k][i] = log(w[i][k]);
    }
  }
}

model {
  
  // priors
  for(i in 1:I){
    w[i] ~ dirichlet(theta[i]);
  }

  // individual likelihoods, as sum of component contributions
  for (n in 1:N) {
    // contributions from each component
    // these get overwritten for each observation
    vector[K] lps;  // contributions from each component
    int i = participant[n]; // Retrieve participant index for nth data point
  
    for(k in 1:K){
      real beta_x = beta_lpdf(x[n] | alphax[i,k], betax[i,k]);
      real beta_y = beta_lpdf(y[n] | alphay[i,k], betay[i,k]);
      real u_x = beta_cdf(x[n] | alphax[i,k], betax[i,k]);
      real u_y = beta_cdf(y[n] | alphay[i,k], betay[i,k]);
      real copula_density = gaussian_copula_density(u_x, u_y, rho[i,k]);
      
      lps[k] = log_w[i][k]+beta_x + beta_y+ log(copula_density);
      }
      target += log_sum_exp(lps);
    }
  }
generated quantities {
  real x_rep[N];
  real y_rep[N];
   
  for (n in 1:N) {
    int i = participant[n]; // Retrieve participant index for nth data point
    vector[K] ps;
    
    for (k in 1:K) {
      real u_x = beta_cdf(x[n] | alphax[i, k], betax[i, k]);
      real u_y = beta_cdf(y[n] | alphay[i, k], betay[i, k]);
      real copula_density = gaussian_copula_density(u_x, u_y, rho[i, k]);
      
      ps[k] = w[i][k] * copula_density;
    }
    
    int comp = categorical_rng(ps); // Sample a component
    x_rep[n] = beta_rng(alphax[i, comp], betax[i, comp]); // Generate new x data
    y_rep[n] = beta_rng(alphay[i, comp], betay[i, comp]); // Generate new y data
  }
}

In generated_quantities can you proof ps (sums) integrates to 1? In

why there is no beta_x, beta_y, but in the model there is? As far as I remember in one paper of copula these come from the differentiation of the probability function of the copula, usually C.

This can just be

array[K] vector[I] log_w = log(w);

The array syntax has been deprecated and the new version is as above. Here, you need to replace this:

with this:

array[I, K] real<lower=0> alphax;

For efficiency, theta should be an array of vectors, in which case this can be written as

w ~ dirichlet(theta);

I find the use of betax and beta_x together in one model to be confusing.

I don’t know copulas well enough to comment, but if you have the weights right, then this looks right in terms of the mixture. You can simplify a bit by doing this:

vector[K] lps = log_w[i];
...
lps[k] += beta_x + beta_y + log(copula_density);

If at all possible defining the copula_density on the log scale directly should be more robust. Densities in high dimensions otherwise have a tendency to underflow double-precision floating point. We don’t have an inv_Phi on the log scale, but you could return log(term1) + log(term2) in a gaussian_copula_log_density() function. That way, you don’t need to round-trip a log/exp for term2, which is really prone to overflow/underflow. I also found it confusing that inv_rho_sq is defined to be 1 - rho_sq rather than 1 / rho_sq. I’d call that one_m_rho_sq.

Then log(term1) = -log(2 * pi()) - 0.5 * log1m(rho_sq), which should also be more stable if rho_sq is very close to 1.

1 Like

Hi, sorry, could you elaborate on your last point? I am extremely new to probabilistic modelling so I am not to sure of you to use the copula in the generated quantities

Hi Bob, Thank you for your comment!

Re

For efficiency, theta should be an array of vectors, in which case this can be written as

w ~ dirichlet(theta);

I have it indexed by I because I will have a different set of thetas for each participant. In this case, it would be compulsory to do it the way I coded it no?{ w[i]~dirichlet(theta[i]) }

Finally, here is the implemented code with your suggestions. Is this what you meant?

functions{
  // Function to compute the Gaussian copula density
  real gaussian_copula_density(real u_x, real u_y, real rho) {
    real rho_sq = square(rho);
    real one_m_rho_sq = 1 - rho_sq;
    real z_x = inv_Phi(u_x); // inverted normal cdf
    real z_y = inv_Phi(u_y); // inverted normal cdf
    real log_term1 = -log(2 * pi()) - 0.5 * log1m(rho_sq);
    real log_term2 = -0.5 * (square(z_x) + square(z_y) - 2 * rho * z_x * z_y) / one_m_rho_sq;

  return log_term1 * log_term2;
  }
}

data {
  int<lower=1> K; // number of mixture components
  int<lower=1> N; // number of data points
  int<lower=1> I;//number of individuals
  int<lower=1, upper=I> participant[N]; // Participant index for each observation
  real<lower=0,upper=1> y[N]; // observations
  real<lower=0,upper=1> x[N]; // Observations 
  array[I, K] real<lower=0> alphax;
  array[I, K] real<lower = 0> betax;
  array[I, K] real<lower = 0> alphay;
  array[I, K] real<lower = 0> betay;
  row_vector<lower=0>[K] theta[I]; // Dirichlet prior parameters for each participant
  real<lower = -1, upper = 1> rho[I,K];
  int<lower=0,upper=1> prior_only; // =1 to get data from prior predictive
}
parameters {
  simplex[K] w[I];
}

transformed parameters {
  array[K] vector[I] log_w = log(w);
}

model {
  
  // priors
  for(i in 1:I){
    w[i] ~ dirichlet(theta[i]);
  }

  // individual likelihoods, as sum of component contributions
  for (n in 1:N) {
    vector[K] lps = log_w[participant[n]];  // Initialize with log weights
    int i = participant[n]; // Retrieve participant index for nth data point
  
    for(k in 1:K){
      real beta_x = beta_lpdf(x[n] | alphax[i,k], betax[i,k]);
      real beta_y = beta_lpdf(y[n] | alphay[i,k], betay[i,k]);
      real u_x = beta_cdf(x[n] | alphax[i,k], betax[i,k]);
      real u_y = beta_cdf(y[n] | alphay[i,k], betay[i,k]);
      real copula_density = gaussian_copula_density(u_x, u_y, rho[i,k]);
      
      lps[k] = beta_x + beta_y+ log(copula_density);
      }
      target += log_sum_exp(lps);
    }
  }
parameters {
  simplex[K] w[I];
}

transformed parameters {
  array[K] vector[I] log_w = log(w);
}

Should be:

parameters {
  array[I] simplex[K] w;
}

transformed parameters {
  array[I] vector[K] log_w  = log(w);
}
      lps[k] = beta_x + beta_y + log(copula_density);

should be

      lps[k] += beta_x + beta_y + log(copula_density);
1 Like