Joint Modeling of Mixed Outcome Types

@spinkney @andrjohns

Quick syntax question as I manipulate some of your copula code. I paste the full model below. I have not gotten the ordinal probit coded up yet, but in the meantime, I am just testing on continuous data to make sure I have the code up and running there first. Note, this is not ‘efficient’ or anything. Just testing some code.

That said, I am just adjusting small parts for my own use. This includes my own mean function \mu (instead of the linear model) and I also want to customize the sd \sigma - basically I want sigma to vary as a function of x. I’ve gotten this to work in general models like here . My attempt at this is seen in the code below in the model block (I even write HELP to stand out :)). I know I need to manipulate this area AND manipulate the normal_marginal function from above because as written it only takes a vector of 2 SDs (assumes constant noise) - essentially I am trying to model heteroscedasticity here.

You’ll see in some of my commented areas I’ve attempted a few things including declaring a new matrix. I always get an error related to incorrect syntax either in the normal_marinal function or during my initial declaration in the model block.

I hope all of this makes sense!

functions{
  matrix chol2inv2(matrix L) {
    int K = rows(L);
    matrix[K, K] K_identity = diag_matrix(rep_vector(1.0, K));
    matrix[K, K] L_inv = mdivide_left_tri_low(L, K_identity);
    matrix[K, K] rtn;

    if (K == 0) {
      return L;
    }

    if (K == 1) {
      rtn[1, 1] = inv_square(L[1, 1]);
    } else {
      for (k in 1:K) {
        rtn[k, k] = dot_self(tail(L_inv[ : , k], K + 1 - k));
        for (j in (k + 1):K) {
          int Kmj = K - j;
          rtn[k, j] = dot_product(tail(L_inv[ : , k], Kmj + 1),
                                  tail(L_inv[ : , j], Kmj + 1));
          rtn[j, k] = rtn[k, j];
        }
      }
    }

    return rtn;
  }
  
  real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, J] Gammainv = chol2inv2(L);
    return -N * sum(log(diagonal(L))) - 0.5 * sum(add_diag(Gammainv, -1.0) .* crossprod(U));
  }
  
  real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
    // Extract dimensions of final outcome matrix
    int N = rows(marginals[1][1]);
    int J = rows(L);
    matrix[N, J] U;

    // Iterate through marginal arrays, concatenating the outcome matrices by column
    // and aggregating the log-likelihoods (from continuous marginals) and jacobian
    // adjustments (from discrete marginals)
    real adj = 0;
    int pos = 1;
    for (m in 1 : size(marginals)) {
      int curr_cols = cols(marginals[m][1]);

      U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

      adj += sum(marginals[m][2]);
      pos += curr_cols;
    }

    // Return the sum of the log-probability for copula outcomes and jacobian adjustments
    return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
  }
  
  matrix[] normal_marginal(matrix y, matrix mu_glm, vector sigma) {
    int N = rows(mu_glm);
    int J = cols(mu_glm);
    matrix[N, J] rtn[2];
    //matrix[N, J] sigma2; HELP
    // Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
    rtn[2] = rep_matrix(0, N, J);

    for (j in 1 : J) {
      rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) / sigma[1];
      rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[2]);
    }

    return rtn;
}
}
data{
  int<lower=1> N; // total number of obs
  vector[N] y1; // first continuous response
  vector[N] y2; // second continuous response
  vector[N] x;
  int<lower=1> nresp; // number of responses
}
transformed data{
  int Outcome_Order[2]; //response array
  matrix[N,2] Y_gaussian; //response array
  Outcome_Order[1]=1;
  Y_gaussian[ : ,1] = y1;
  Outcome_Order[2] = 2;
  Y_gaussian[ : ,2] = y2;
}
parameters{
  real <lower=0> a1;
  real <lower=0> a2;
  real <lower=0> r1;
  real <lower=0> r2;
  real b1;
  real b2;
  real <lower=0> kappa1;
  real <lower=0> kappa2;
  real <lower=0> sigma_y1;
  real <lower=0> sigma_y2;
  cholesky_factor_corr[nresp] Lrescor;
}
model{
  // priors
  a1 ~ normal(0,10); // half-normal
	r1 ~ normal(0,1); //half-normal
	b1 ~ normal(0,10);
	a2 ~ normal(0,10); // half-normal
	r2 ~ normal(0,1); //half-normal
	b2 ~ normal(0,10);
	kappa1 ~ normal(0,1); //half-normal
	kappa2 ~ normal(0,1); //half-normal
  sigma_y1 ~ exponential(2);
  sigma_y2 ~  exponential(2);
  Lrescor ~ lkj_corr_cholesky(1);
  
  // likelihood 
  vector[N] mu1 = a1*x^r1+b1;
  vector[N] mu2 = a2*x^r2+b2;
  //vector[N] s1 = sigma_y1*(1+kappa1*x); HELP
  //vector[N] s2 = sigma_y2*(1+kappa2*x); HELP
  
  matrix[N,2] Mu_gaussian;
  
  vector[2] sigma = transpose([sigma_y1, sigma_y2]);
  Mu_gaussian[ : , 1] = mu1;
  Mu_gaussian[ : , 2] = mu2;
  //matrix[N,2] sigma; HELP
  //sigma[ : , 1] = s1; HELP
  //sigma[ : , 2] = s2; HELP
  
  target += centered_gaussian_copula_cholesky_({normal_marginal(Y_gaussian, Mu_gaussian, sigma)}, Lrescor, Outcome_Order);
}
generated quantities{
  corr_matrix[nresp] Rescor = multiply_lower_tri_self_transpose(Lrescor);
}