My current model is given below with the same plots I’ve been generating to diagnose problems. I’m clearly stuck…
I’ve been running some tests using optimize()
. I started with the simplest model - population parameters only. This converged, so I progressively added hierarchy back into the model. It runs with b_a1, b_a2, b_a3, b_a4, and b_bc being hierarchical parameters. When I add b_b1tt, etc., it gives a line search failure error. I tried only adding hierarchy for one of the b_b#tt and a few other combinations. In theory, the model should run (if specified as I believe it to be). I’m working off a published model with the same structure - i.e., all random/hierarchical parameters and a multiplication by a common hierarchical parameter (in my model b_bc), which captures scale heterogeneity/cost. You cannot separate the cost parameter from the scale and its median should be fixed to 1.0 for identification. Others also use lognormal priors for these parameters.
Current data and R script:
mtc_equity-v13.R (2.3 KB)
time_cost_calculated.CSV (2.4 MB)
functions {
int[] sequence(int start, int end) {
int seq[end - start + 1];
for (n in 1:num_elements(seq)) {
seq[n] = n + start - 1;
}
return seq;
}
real partial_log_lik_lpmf(int[] seq, int start, int end,
data int ncat, data int nvar, data int ntotvar, data int[] Y, data matrix x1, data matrix x2, data matrix x3, data matrix x4, data int[] J_1, data int[] J_2,
vector b_a2, vector b_a3, vector b_a4, vector b_bc, vector b_b1tt, vector b_b2tt, vector b_b3tt, vector b_b4tt, matrix avail) {
real ptarget = 0;
int N = end - start + 1;
// Define matrices/vector for x, alpha, beta
matrix[ncat,nvar] x;
vector[ncat] alpha;
matrix[nvar,ncat] beta;
array[N] row_vector[ncat] a = rep_array(rep_row_vector(0,ncat), N);
// initialize linear predictor term
// Terms are btc, b1tt, b2tt, b3tt, b4tt
array[N] row_vector[ntotvar] b = rep_array(rep_row_vector(0,ntotvar), N);
for (n in 1:N) {
int nn = n + start - 1;
// add to linear predictor term
a[n] += [0,b_a2[J_2[nn]], b_a3[J_2[nn]], b_a4[J_2[nn]]];
// add to linear predictor term
// Terms are btc, b1tt, b2tt, b3tt, b4tt
// Median btc parameter fixed to 1.0 for identification
b[n] += [b_bc[J_1[nn]], b_b1tt[J_1[nn]], b_b2tt[J_1[nn]], b_b3tt[J_1[nn]], b_b4tt[J_1[nn]]];
// Each x and beta is a matrix with dimensions alts x variables
// Our x will be the time/cost coming in as inputs
x = to_matrix([x1[nn],x2[nn],x3[nn],x4[nn]]);
// Our betas will be the hierarchical slope parameters (2 x 4)
// Time and cost parameters multiplied by -1 because all have negative effect (positive lognormal transformed to negative lognormal)
beta = -1*[b[n,1] * b[n,2:5], [b[n,1],0,0,b[n,1]]];
// Our alphas will be the hierarchical intercept parameters. ln(avail[nn,i]] gives an availability measure
alpha = [a[n,1]+log(avail[nn,1]), b[n,1] * a[n,2]+log(avail[nn,2]), b[n,1] * a[n,3]+log(avail[nn,3]), b[n,1] * a[n,4]+log(avail[nn,4])]';
// Y f(alpha+betaX)
ptarget += categorical_logit_glm_lupmf(Y[nn] | x, alpha, beta);
}
return ptarget;
}
}
data {
int<lower=1> N; // total number of observations
int<lower=2> ncat; // number of categories
int<lower=2> nvar; // number of variables for each alternative
int<lower=2> ntotvar; // number of total variables
int grainsize; // grainsize for threading
array[N] int Y; // response variable
// covariate matrix for alt1
matrix[N,nvar] x1; // matrix[N,nvar] x1;
// covariate matrix for alt2
matrix[N,nvar] x2; // matrix[N,nvar] x2;
// covariate matrix for alt3
matrix[N,nvar] x3; // matrix[N,nvar] x3;
// covariate matrix for alt4
matrix[N,nvar] x4; // matrix[N,nvar] x4;
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
array[N] int<lower=1> J_1; // grouping indicator per observation
// group-level predictor values
int<lower=1> NC_1; // number of group-level correlations
// data for group-level effects of ID 2
int<lower=1> N_2; // number of grouping levels
int<lower=1> M_2; // number of coefficients per level
array[N] int<lower=1> J_2; // grouping indicator per observation
matrix<lower=0,upper=1>[N,ncat] avail; // whether an alternative is available
int prior_only; // should the likelihood be ignored?
}
transformed data {
int seq[N] = sequence(1, N);
}
parameters {
real mu_b1tt; // population-level effects
real mu_b2tt; // population-level effects
real mu_b3tt; // population-level effects
real mu_b4tt; // population-level effects
real mu_a2; // population-level effects
real mu_a3; // population-level effects
real mu_a4; // population-level effects
real<lower=0> sd1_bc; // group-level standard deviations
real<lower=0> sd1_b1tt; // group-level standard deviations
real<lower=0> sd1_b2tt; // group-level standard deviations
real<lower=0> sd1_b3tt; // group-level standard deviations
real<lower=0> sd1_b4tt; // group-level standard deviations
real<lower=0> sd2_a2; // group-level standard deviations
real<lower=0> sd2_a3; // group-level standard deviations
real<lower=0> sd2_a4; // group-level standard deviations
vector<offset=0, multiplier = sd1_bc>[N_1] log_b_bc;
vector<offset=mu_b1tt, multiplier = sd1_b1tt>[N_1] log_b_b1tt;
vector<offset=mu_b2tt, multiplier = sd1_b2tt>[N_1] log_b_b2tt;
vector<offset=mu_b3tt, multiplier = sd1_b3tt>[N_1] log_b_b3tt;
vector<offset=mu_b4tt, multiplier = sd1_b4tt>[N_1] log_b_b4tt;
vector<offset=mu_a2, multiplier = sd2_a2>[N_2] b_a2;
vector<offset=mu_a3, multiplier = sd2_a3>[N_2] b_a3;
vector<offset=mu_a4, multiplier = sd2_a4>[N_2] b_a4;
}
// Transform normal to lognormal (all time and cost parameters should be single signed)
transformed parameters{
vector<lower=0>[N_1] b_bc = exp(log_b_bc);
vector<lower=0>[N_1] b_b1tt = exp(log_b_b1tt);
vector<lower=0>[N_1] b_b2tt = exp(log_b_b2tt);
vector<lower=0>[N_1] b_b3tt = exp(log_b_b3tt);
vector<lower=0>[N_1] b_b4tt = exp(log_b_b4tt);
}
model {
// likelihood including constants
if (!prior_only) {
target += reduce_sum(partial_log_lik_lpmf, seq,
grainsize, ncat, nvar, ntotvar, Y, x1, x2, x3, x4, J_1, J_2,
b_a2, b_a3, b_a4, b_bc, b_b1tt, b_b2tt, b_b3tt, b_b4tt, avail);
}
// priors
mu_b1tt ~ normal(3,1); // Gives reasonable values of time (~$5-$75)
mu_b2tt ~ normal(3,1);
mu_b3tt ~ normal(3,1);
mu_b4tt ~ normal(2,1);
mu_a2 ~ normal(3,1);
mu_a3 ~ normal(3,1);
mu_a4 ~ normal(3,1);
sd1_bc ~ std_normal();
sd1_b1tt ~ normal(2,1);
sd1_b2tt ~ normal(2,1);
sd1_b3tt ~ normal(2,1);
sd1_b4tt ~ normal(2,1);
sd2_a2 ~ normal(1,5);
sd2_a3 ~ normal(1,5);
sd2_a4 ~ normal(1,5);
b_a2 ~ normal(mu_a2, sd2_a2);
b_a3 ~ normal(mu_a3, sd2_a3);
b_a4 ~ normal(mu_a4, sd2_a4);
log_b_bc ~ normal(0, sd1_bc);
log_b_b1tt ~ normal(mu_b1tt, sd1_b1tt);
log_b_b2tt ~ normal(mu_b2tt, sd1_b2tt);
log_b_b3tt ~ normal(mu_b3tt, sd1_b3tt);
log_b_b4tt ~ normal(mu_b4tt, sd1_b4tt);
}
mu_b#tt in the range of 4.0 are ok (a bit high) when combined with an sd of about 0-3.