Non-centered parametrisation for hierarchical log_mix (mixed) distribution

Hello,

I have a mixture distribution of this kind (they are two parallel lines)

for (a in 1:A) for(m in 1:M)
      target += log_mix(p,
          normal_lpdf(alpha[a,m] | beta[a,m] * gamma[2] + theta[1], sigma),
          normal_lpdf(alpha[a,m] | beta[a,m] * gamma[2] + 1, sigma)  
      );

I noticed that there is a funnel shape of the posterior. Is there a way I can do a centered parameterisation for this log_mix?

Thanks!

Funnel between which parameters? And can you post more of your model so I can see where you might be having trouble applying the standard non-centering formula?

Just a guess, but I would think a mixture model like this will have difficulty when p β†’ 0, because theta[1] can be almost anything. Conceptually, I think that if p β†’ 0, it would be good if theta[1] β†’ 1, i.e. the first component collapses to the second component of the mixture model.

Thanks for the replies,

I tried to build better priors with some prior-predictive checks. I guess I don’t see anymore a strong funnel

Maybe I could skip this non-centered framework all together

prec_coeff[1] is an intercept
prec_coeff[2] is a slope

The model, quite over complicated for the purpose of this specific issue, is here

functions{
  vector Q_sum_to_zero_QR(int N) {
    vector [2*N] Q_r;

    for(i in 1:N) {
      Q_r[i] = -sqrt((N-i)/(N-i+1.0));
      Q_r[i+N] = inv_sqrt((N-i) * (N-i+1));
    }
    return Q_r;
  }

  row_vector sum_to_zero_QR(row_vector x_raw, vector Q_r) {
    int N = num_elements(x_raw) + 1;
    row_vector [N] x;
    real x_aux = 0;

    for(i in 1:N-1){
      x[i] = x_aux + x_raw[i] * Q_r[i];
      x_aux = x_aux + x_raw[i] * Q_r[i+N];
    }
    x[N] = x_aux;
    return x;
  }

  int[] rep_each(int[] x, int K) {
    int N = size(x);
    int y[N * K];
    int pos = 1;
    for (n in 1:N) {
      for (k in 1:K) {
        y[pos] = x[n];
        pos += 1;
      }
    }
    return y;
  }

  real partial_sum_lpmf(int[] slice_y,
                        int start,
                        int end,
                        int[] exposure_array,
                        vector mu_array,
                        vector precision_array
                        ) {

return beta_binomial_lupmf(
    slice_y |
    exposure_array[start:end],
    (mu_array[start:end] .* precision_array[start:end]),
    (1.0 - mu_array[start:end]) .* precision_array[start:end]
  ) ;

}


}
data{
  int<lower=1> N;
  int<lower=1> M;
  int<lower=1> C;
  int<lower=1> A;
  int<lower=1> Ar; // Rows of unique variability design
  int exposure[N];
  int y[N,M];
  matrix[N, C] X;
  matrix[Ar, A] XA; // The unique variability design
  matrix[N, A] Xa; // The variability design

  // Truncation
  int is_truncated;
  int truncation_up[N,M];
  int truncation_down[N,M];
  int<lower=1, upper=N*M> TNS; // truncation_not_size
  int<lower=1, upper=N*M> truncation_not_idx[TNS];

  int<lower=0, upper=1> is_vb;

  // Prior info
  real prior_prec_intercept[2] ;
  real prior_prec_slope[2] ;
  real prior_prec_sd[2] ;

  // Exclude priors for testing purposes
  int<lower=0, upper=1> exclude_priors;
  int<lower=0, upper=1> bimodal_mean_variability_association;
  int<lower=0, upper=1> use_data;

  // Parallel chain
  int<lower=1> grainsize;

}
transformed data{
  vector[2*M] Q_r = Q_sum_to_zero_QR(M);
  real x_raw_sigma = inv_sqrt(1 - inv(M));

  matrix[N, C] Q_ast;
  matrix[C, C] R_ast;
  matrix[C, C] R_ast_inverse;

  int y_array[N*M];
  int exposure_array[N*M];

  // thin and scale the QR decomposition
  Q_ast = qr_thin_Q(X) * sqrt(N - 1);
  R_ast_inverse = inverse(qr_thin_R(X) / sqrt(N - 1));

  // If I get crazy diagonal matrix omit it
  if(max(R_ast_inverse)>1000){
    print("sccomp says: The QR deconposition resulted in extreme values, probably for the correlation structure of your design matrix. Omitting QR decomposition.");
    Q_ast = X;
    R_ast_inverse = diag_matrix(rep_vector(1.0, C));
  }

  // Data vectorised
  y_array =  to_array_1d(y);
  exposure_array = rep_each(exposure, M);
}
parameters{
	matrix[C, M-1] beta_raw_raw;
	matrix[A, M] alpha;

	// To exclude
  real prec_coeff[2];
  real<lower=0> prec_sd;

  real<lower=0, upper=1> mix_p;
}
transformed parameters{
		matrix[C,M] beta_raw;
		matrix[A,M] beta_intercept_slope;
		matrix[A,M] alpha_intercept_slope;
    matrix[C,M] beta;

    for(c in 1:C)	beta_raw[c,] =  sum_to_zero_QR(beta_raw_raw[c,], Q_r);

    // Beta
    beta = R_ast_inverse * beta_raw; // coefficients on x

    // All this because if A ==1 we have ocnversion problems
    // This works only with two discrete groups
    if(A == 1) beta_intercept_slope = to_matrix(beta[A,], A, M, 0);
    else beta_intercept_slope = (XA * beta[1:A,]);
    if(A == 1)  alpha_intercept_slope = alpha;
    else alpha_intercept_slope = (XA * alpha);

}
model{


  // Calculate MU
  matrix[M, N] precision = (Xa * alpha)';
  matrix[M, N] mu = (Q_ast * beta_raw)';
  for(n in 1:N) { mu[,n] = softmax(mu[,n]); }

  // Convert the matrix m to a column vector in column-major order.
  vector[N*M] mu_array = to_vector(mu);
  vector[N*M] precision_array = to_vector(exp(precision));



  if(use_data == 1){
      // target += beta_binomial_lpmf(
      //   y_array[truncation_not_idx] |
      //   exposure_array[truncation_not_idx],
      //   (mu_array[truncation_not_idx] .* precision_array[truncation_not_idx]),
      //   ((1.0 - mu_array[truncation_not_idx]) .* precision_array[truncation_not_idx])
      // ) ;

    target +=  reduce_sum(
      partial_sum_lupmf,
      y_array[truncation_not_idx],
      grainsize,
      exposure_array[truncation_not_idx],
      mu_array[truncation_not_idx],
      precision_array[truncation_not_idx]
    );
  }




  // Priors
  if(exclude_priors == 0){

    for(i in 1:C) to_vector(beta_raw_raw[i]) ~ normal ( 0, x_raw_sigma );


    // If mean-variability association is bimodal such as for single-cell RNA use mixed model
    if(bimodal_mean_variability_association == 1){
      for (a in 1:A)
      for(m in 1:M)
        target += log_mix(mix_p,
                        normal_lpdf(alpha_intercept_slope[a,m] | beta_intercept_slope[a,m] * prec_coeff[2] + prec_coeff[1], prec_sd ),
                        normal_lpdf(alpha_intercept_slope[a,m] | beta_intercept_slope[a,m] * prec_coeff[2] + 1, prec_sd)  // -0.73074903 is what we observe in single-cell dataset Therefore it is safe to fix it for this mixture model as it just want to capture few possible outlier in the association
                      );

    // If no bimodal
    } else {
      to_vector(alpha_intercept_slope) ~ normal(to_vector(beta_intercept_slope) * prec_coeff[2] + prec_coeff[1], prec_sd );
    }

  // If no priors
  } else {
    for(i in 1:C) to_vector(beta_raw_raw[i]) ~ normal ( 0, 2 );
    for (a in 1:A) alpha[a]  ~ normal( 5, 2 );
  }

  // Hyper priors
  mix_p ~ beta(1,5);
  prec_coeff[1] ~ normal(prior_prec_intercept[1], prior_prec_intercept[2]);
  prec_coeff[2] ~ normal(prior_prec_slope[1], prior_prec_slope[2]);
  prec_sd ~ gamma(prior_prec_sd[1],prior_prec_sd[2]);

}

generated quantities {
  matrix[A, M] alpha_normalised = alpha;


  if(A > 1) for(a in 2:A) alpha_normalised[a] = alpha[a] - (beta[a] * prec_coeff[2] );

}

2 Likes

If prec_sd is not regularised by prior or data and approaches 0, then the funnel will appear.