In @betanalpha 's post on ordinal regression here, the induced dirichlet prior has some anchor point, \phi, which I thought was arbitrary, and I have been using 0 for all my models. However, recently I changed it to different values to see what would happen and I get very different estimates. E.g. when changing the anchor point from 0 to 1 the sensitivities (cumulative probabilities) in my model change:
For \phi = 0:
For \phi=1
Stan code:
functions {
// Function to do partial pooling on correlation matrices
// See https://discourse.mc-stan.org/t/hierarchical-prior-for-partial-pooling-on-correlation-matrices/4852/27
real phi_logit_approx(real b) {
real a;
a = inv_logit( 1.702*b );
return(a);
}
vector phi_logit_approx_vec(vector b) {
int N = num_elements(b);
vector[N] a;
a = inv_logit( 1.702*b );
return(a);
}
real inv_phi_logit_approx(real b) {
real a;
a = (1/1.702)*logit(b);
return(a);
}
// Induced dirichlet from Betancourt, 2019.
// See https://betanalpha.github.io/assets/case_studies/ordinal_regression.html#3_cut_to_the_chase
real induced_dirichlet_lpdf(vector c, vector alpha, real phi) {
int K = num_elements(c) + 1;
vector[K - 1] anchoredcutoffs = c - phi;
vector[K] sigma;
vector[K] p;
matrix[K, K] J = rep_matrix(0, K, K);
sigma[1:(K-1)] = phi_logit_approx_vec(anchoredcutoffs);
sigma[K] = 1;
p[1] = sigma[1];
for (k in 2:(K - 1))
p[k] = sigma[k] - sigma[k - 1];
p[K] = 1 - sigma[K - 1];
// Baseline column of Jacobian
for (k in 1:K) J[k, 1] = 1;
// Diagonal entries of Jacobian
for (k in 2:K) {
// rho is the PDF of the latent distribution
real rho = 1.702 * sigma[k - 1] * (1 - sigma[k - 1]);
J[k, k] = - rho;
J[k - 1, k] = + rho;
}
return dirichlet_lpdf(p | alpha)
+ log_determinant(J);
}
}
data {
int n_binary_tests; // number of binary tests
int n_ordinal_tests; // number ordinal of tests
int<lower=1> n_tests; // number of tests
int<lower=1> N; // number of individuals
int<lower=1> Thr[n_tests]; // number of thresholds for each test
int<lower=0> y[N, n_tests]; // N individuals and n_tests tests, n_studies studies
// int r[choose(n_tests,2), 4];
// int numg;
real se_bin_mu;
real se_bin_sd;
real sp_bin_mu;
real sp_bin_sd;
}
parameters {
vector[n_tests] mu[2];
real<lower=0,upper=1> u_d[N,n_tests]; // nuisance that absorbs inequality constraints
real<lower=0,upper=1> u_nd[N,n_tests]; // nuisance that absorbs inequality constraints
cholesky_factor_corr[n_tests] L_Omega_nd;
cholesky_factor_corr[n_tests] L_Omega_d;
real<lower=0,upper=1> p; // disease prevalence
// need to manually make n_ordinal_tests lots of C_d_i, C_nd_i, phi_d_i, phi_nd_i (i= test index) vectors,
// as unfortunately no way to generate them automatically (I think...)
// e.g. if 2 ordinal tests:
ordered[Thr[n_binary_tests+1]] C_d_1;
ordered[Thr[n_binary_tests+1]] C_nd_1;
ordered[Thr[n_binary_tests+2]] C_d_2;
ordered[Thr[n_binary_tests+2]] C_nd_2;
ordered[Thr[n_binary_tests+3]] C_d_3;
ordered[Thr[n_binary_tests+3]] C_nd_3;
ordered[Thr[n_binary_tests+4]] C_d_4;
ordered[Thr[n_binary_tests+4]] C_nd_4;
}
transformed parameters {
vector[N] log_lik;
vector[max(Thr)] C_d_vec[n_ordinal_tests];
vector[max(Thr)] C_nd_vec[n_ordinal_tests];
for (k in 1:Thr[n_binary_tests+1]) {
C_d_vec[1,k] = C_d_1[k];
C_nd_vec[1,k] = C_nd_1[k];
}
for (k in 1:Thr[n_binary_tests+2]) {
C_d_vec[2,k] = C_d_2[k];
C_nd_vec[2,k] = C_nd_2[k];
}
for (k in 1:Thr[n_binary_tests+3]) {
C_d_vec[3,k] = C_d_3[k];
C_nd_vec[3,k] = C_nd_3[k];
}
for (k in 1:Thr[n_binary_tests+4]) {
C_d_vec[4,k] = C_d_4[k];
C_nd_vec[4,k] = C_nd_4[k];
}
{
// Parameters for likelihood function. Based on code upoaded by Ben Goodrich which uses the
// GHK algorithm for generating TruncMVN. See: https://github.com/stan-dev/example-models/blob/master/misc/multivariate-probit/probit-multi-good.stan#L11
for (n in 1:N) {
real lp1;
real lp0;
vector[n_tests] z_d;
vector[n_tests] z_nd;
vector[n_tests] y1d;
vector[n_tests] y1nd;
real prev_d = 0;
real prev_nd = 0;
for (t in 1:n_tests) {
if (Thr[t] == 1) { //// Binary Likelihoods (tests w/ 1 threshold) - make sure binary tests are at the start
real u_d1 = u_d[n,t];
real u_nd1 = u_nd[n,t];
real bound_d_bin;
real bound_nd_bin;
bound_d_bin = phi_logit_approx( -( (mu[1,t]) + prev_d ) / L_Omega_d[t,t] );
bound_nd_bin = phi_logit_approx( -( (mu[2,t]) + prev_nd ) / L_Omega_nd[t,t] );
if (y[n,t] == 1) {
z_d[t] = inv_phi_logit_approx(bound_d_bin + (1 - bound_d_bin)*u_d1);
z_nd[t] = inv_phi_logit_approx(bound_nd_bin + (1 - bound_nd_bin)*u_nd1);
y1d[t] = log1m(bound_d_bin); // Jacobian adjustment
y1nd[t] = log1m(bound_nd_bin); // Jacobian adjustment
}
else { // y == 0
z_d[t] = inv_phi_logit_approx(bound_d_bin*u_d1);
z_nd[t] = inv_phi_logit_approx(bound_nd_bin*u_nd1);
y1d[t] = log(bound_d_bin); // Jacobian adjustment
y1nd[t] = log(bound_nd_bin); // Jacobian adjustment
}
// if (t < n_binary_tests) prev_d = L_Omega_d[t+1,1:t] * head(z_d,t);
// if (t < n_binary_tests) prev_nd = L_Omega_nd[t+1,1:t] * head(z_nd,t);
}
else { // ordinal likelihoods (tests w/ > 1 threshold)
real u_d1 = u_d[n,t];
real u_nd1 = u_nd[n,t];
vector[Thr[t]] bound_d;
vector[Thr[t]] bound_nd;
bound_d[1:Thr[t]] = phi_logit_approx_vec( ( C_d_vec[t-n_binary_tests,1:Thr[t]] - ( mu[1,t] + prev_d ) ) / L_Omega_d[t,t] ); // diseased
bound_nd[1:Thr[t]] = phi_logit_approx_vec( ( C_nd_vec[t-n_binary_tests,1:Thr[t]] - ( mu[2,t] + prev_nd ) ) / L_Omega_nd[t,t] ); // non-diseased
{
int K = (Thr[t] + 1) ;
if (y[n,t] == K) {
z_d[t] = inv_phi_logit_approx(bound_d[K-1] + (1 - bound_d[K-1])*u_d1);
z_nd[t] = inv_phi_logit_approx(bound_nd[K-1] + (1 - bound_nd[K-1])*u_nd1);
y1d[t] = log1m(bound_d[K-1]); // Jacobian adjustment
y1nd[t] = log1m(bound_nd[K-1]); // Jacobian adjustment
}
for (J in 2:(K-1)) {
if (y[n,t] == J) {
z_d[t] = inv_phi_logit_approx(bound_d[J-1] + (bound_d[J] - bound_d[J-1])*u_d1);
z_nd[t] = inv_phi_logit_approx(bound_nd[J-1] + (bound_nd[J] - bound_nd[J-1])*u_nd1);
y1d[t] = log( (bound_d[J] - bound_d[J-1]) ); // Jacobian adjustment
y1nd[t] = log( (bound_nd[J] - bound_nd[J-1]) ); // Jacobian adjustment
}
}
if (y[n,t] == 1) {
z_d[t] = inv_phi_logit_approx(bound_d[1]* u_d1);
z_nd[t] = inv_phi_logit_approx(bound_nd[1]* u_nd1);
y1d[t] = log(bound_d[1]); // Jacobian adjustment
y1nd[t] = log(bound_nd[1]); // Jacobian adjustment
}
}
}
if (t < n_tests) prev_d = L_Omega_d[t+1,1:t] * head(z_d,t);
if (t < n_tests) prev_nd = L_Omega_nd[t+1,1:t] * head(z_nd,t);
}
// Jacobian adjustments imply z is truncated standard normal
// thus utility --- mu + L_Omega * z --- is truncated multivariate normal
lp1 = sum(y1d) + bernoulli_lpmf(1 | p);
lp0 = sum(y1nd) + bernoulli_lpmf(0 | p);
log_lik[n] = log_sum_exp(lp1 , lp0);
}
}
}
model {
C_d_1 ~ induced_dirichlet(rep_vector(1, Thr[2]+1), 1);
C_d_2 ~ induced_dirichlet(rep_vector(1, Thr[3]+1), 1);
C_d_3 ~ induced_dirichlet(rep_vector(1, Thr[4]+1), 1);
C_d_4 ~ induced_dirichlet(rep_vector(1, Thr[5]+1), 1);
C_nd_1 ~ induced_dirichlet(rep_vector(1, Thr[2]+1), 0);
C_nd_2 ~ induced_dirichlet(rep_vector(1, Thr[3]+1), 0);
C_nd_3 ~ induced_dirichlet(rep_vector(1, Thr[4]+1), 0);
C_nd_4 ~ induced_dirichlet(rep_vector(1, Thr[5]+1), 0);
mu[1,1] ~ normal(se_bin_mu, se_bin_sd);
mu[2,1] ~ normal(sp_bin_mu, sp_bin_sd);
for (t in 2:n_tests)
mu[, t] ~ std_normal();
L_Omega_d ~ lkj_corr_cholesky(4);
L_Omega_nd ~ lkj_corr_cholesky(4);
for (n in 1:N)
target += log_lik[n];
}
generated quantities {
vector[n_binary_tests] Se_bin;
vector[n_binary_tests] Sp_bin;
vector[max(Thr)] Se_ord[n_ordinal_tests];
vector[max(Thr)] Sp_ord[n_ordinal_tests];
corr_matrix[n_tests] Omega_nd;
corr_matrix[n_tests] Omega_d;
Omega_d = multiply_lower_tri_self_transpose(L_Omega_d);
Omega_nd = multiply_lower_tri_self_transpose(L_Omega_nd);
// Estimates for binary tests
for (t in 1:n_binary_tests) {
Se_bin[t] = phi_logit_approx( mu[1,t] );
Sp_bin[t] = 1 - phi_logit_approx( mu[2,t] );
}
// Estimates for ordinal tests
for (t in 1:n_ordinal_tests) {
for (k in 1:Thr[t+ n_binary_tests]) {
Se_ord[t,k] = 1- phi_logit_approx( C_d_vec[t,k] - mu[1,t + n_binary_tests] );
Sp_ord[t,k] = phi_logit_approx( C_nd_vec[t,k] - mu[2,t + n_binary_tests] );
}
}
}
R code (data file attached):
data <- readRDS("data.rdata")
m = t(matrix(data = c(1, 1, 1, 1, 1,
0, 1, 1, 1, 1,
0, 0, 1, 1, 1,
0, 0, 0, 1, 1,
0, 0, 0, 0, 1), nrow = 5, ncol = 5))
init = list(
mu = t(matrix(data = c(2,2,2,2,2,
-2,-2,-2,-2,-2), ncol = 2)),
C_d_1 = seq(-2,2, length=6),
C_d_2 = seq(-2,2, length=5),
C_d_3 = seq(-2,2, length=28),
C_d_4 = seq(-2,2, length=15),
C_nd_1 = seq(-2,2, length=6),
C_nd_2 = seq(-2,2, length=5),
C_nd_3 = seq(-2,2, length=28),
C_nd_4 = seq(-2,2, length=15),
L_Omega_d = m,
L_Omega_nd = m
)
meta_model2 <- mod$sample(
data = data,
seed = 123,
chains = 4,
parallel_chains = 4,
iter_warmup = 200,
iter_sampling = 200,
refresh = 50,
init = list(init,init,init,init),
adapt_delta = 0.90,
max_treedepth = 9)
meta_model2$cmdstan_diagnose() # view sampler diagnostics
meta_model2r <- rstan::read_stan_csv(meta_model2$output_files()) # convert to rstan CSV
print(meta_model2r, pars= c("Se_bin", "Sp_bin", "Se_ord", "Sp_ord",
"mu", "C_d_vec", "C_nd_vec","p"))
data.rdata (582 Bytes)