Dear Stan community,

I’m modelling some outcome data from a moderately successfully conducted randomized trial of psychotherapy. The goal is to evaluate a moderator hypothesis - that the effect of treatment allocation is conditional on the level of a proposed moderator variable. I have a baseline and an outcome score, with the outcome scores having a substantial proportion of missing data. There are two treatment conditions being compared, and a baseline measure of a proposed moderator. I have done this as a series of nested models with hierarchical intercepts, and was planning to compare the moderator model with the simpler models using loo to evaluate the moderator hypothesis. I do imputation for the missing outcome data as part of the model. There is also an IRT measurement model for the moderator variable with some imputation as well, and a fair bit of index fiddling.

My question concerns using loo for comparing the nested models. Should I calculate the log-likelihood of each observation conditional on the model, or the joint log-likelihood of the two observations for each patient? If the latter, what should I do about the observations for those who have only one? As I am doing it now, I take the log-likelihood of each observation and use those for loo. From what I understand from https://avehtari.github.io/modelselection/rats_kcv.html, that might be valid for evaluating model fit. At the same time, the prediction task is really whether the moderator is useful for predicting differential response to treatment, for a new patient, so that might suggest leaving one patient out, and take the joint log-likelihood of the two observations. But what to do then about those patients having only one observation (at baseline)?

```
data {
int N;
int nmis_oc_dep;
int id[N*2];
vector[N*2] treat;
vector[N*2] time;
vector[N*2-nmis_oc_dep] dep;
int<lower=1, upper=6> mod;
int<lower=1, upper=4> informant;
vector[5] pred_levels;
int ind_obs_cbq[N,4];
int ind_mis_cbq[N,4];
int nmis_cbq[4];
int nobs_cbq[4];
int cbq_items;
int nn[4];
int cbq[N*cbq_items,4];
int rs_cbq[N*cbq_items,4]; //case index of cbq
int qs_cbq[N*cbq_items,4]; //question index of cbq
}
transformed data{
int mis[4] = {1, nmis_cbq[1]+1, sum(nmis_cbq[1:2])+1, sum(nmis_cbq[1:3])+1};
int obs[4] = {1, nobs_cbq[1]+1, sum(nobs_cbq[1:2])+1, sum(nobs_cbq[1:3])+1};
}
parameters{
// IRT parameters
vector[cbq_items] cbq_beta_raw;
real cbq_beta_mu;
real<lower=0> cbq_beta_sigma;
vector<lower=0>[cbq_items] cbq_alpha;
vector[sum(nobs_cbq)] cbq_obs_theta;
// Imputation parameters
vector[sum(nmis_cbq)] cbq_mis_theta;
vector[nmis_oc_dep] oc_imp;
cholesky_factor_corr[4] conflict_corr;
//Regression parameters
vector[mod+1] beta;
vector[N] alpha_id_raw;
real<lower=0> alpha_id_sigma;
real<lower=0> sigma;
real<lower=1> nu;
}
transformed parameters{
vector[cbq_items] cbq_beta = cbq_beta_raw * cbq_beta_sigma + cbq_beta_mu;
vector[4] conflict_all[N];
for(j in 1:4){
for(i in 1:N){
if (ind_obs_cbq[i,j] > 0) {conflict_all[i,j] = segment(cbq_obs_theta, obs[j], nobs_cbq[j])[ind_obs_cbq[i,j]];}
else if (ind_mis_cbq[i,j] > 0) {conflict_all[i,j] = segment(cbq_mis_theta, mis[j], nmis_cbq[j])[ind_mis_cbq[i,j]];}
}
}
}
model{
// Regression priors
beta ~ normal(0, 1);
sigma ~ student_t(3,0,1);
nu ~ gamma(2,0.1);
alpha_id_raw ~ std_normal();
alpha_id_sigma ~ student_t(3,0,1);
// IRT priors
cbq_beta_raw ~ std_normal();
cbq_beta_mu ~ normal(0,3);
cbq_beta_sigma ~ student_t(3,0,1);
cbq_alpha ~ gamma(2,.5);
conflict_corr ~ lkj_corr_cholesky(2); // weakly informative prior on the correlation matrix
conflict_all ~ multi_normal_cholesky(rep_vector(0,4), conflict_corr); // implicit scale of 1 on the latent trait parameters as the cholesky factored correlation matrix is supplied directly as the covariance matrix
// IRT model
for(j in 1:4){ // [1:nn[j],j] extracts the correct index from the array containing indexes for all informants (actually ragged, but filled out with 99s to rectangular).
cbq[1:nn[j],j] ~ bernoulli_logit(cbq_alpha[qs_cbq[1:nn[j],j]] .* (segment(cbq_obs_theta, obs[j], nobs_cbq[j])[rs_cbq[1:nn[j],j]] - cbq_beta[qs_cbq[1:nn[j],j]]));
}
{ // Mixed regression model for treatment outcome, with models for prediction and moderation by conflict
vector[N*2] mu;
vector[N*2] alpha_id = alpha_id_raw[id] * alpha_id_sigma;
vector[N*2] intercept = rep_vector(1.0, N*2);
vector[N*2] conflict = to_vector(conflict_all[,informant])[id]; //extracts the conflict variable of the informant models are fitted to and multiplies it up
if(mod==6) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + conflict * beta[5] + time .* conflict * beta[6] + treat .* time .* conflict * beta[7] + alpha_id;} //conflict as moderator
if(mod==5) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + conflict * beta[5] + time .* conflict * beta[6] + alpha_id;} //conflict as predictor
if(mod==4) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + conflict * beta[5] + alpha_id;} //constant effect of conflict
if(mod==3) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + alpha_id;} //treatment outcome
if(mod==2) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + alpha_id;} //treatment group difference
if(mod==1) {
mu = intercept * beta[1] + time * beta[2] + alpha_id;} //effect of time
append_row(dep, oc_imp) ~ student_t(nu, mu, sigma);
}
}
generated quantities{
vector[N*2-nmis_oc_dep] log_lik;
{
vector[N*2] mu;
vector[N*2] alpha_id = alpha_id_raw[id] * alpha_id_sigma;
vector[N*2] intercept = rep_vector(1.0, N*2);
vector[N*2] conflict = to_vector(conflict_all[,informant])[id];
if(mod==6) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + conflict * beta[5] + time .* conflict * beta[6] + treat .* time .* conflict * beta[7] + alpha_id;} //conflict as moderator
if(mod==5) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + conflict * beta[5] + time .* conflict * beta[6] + alpha_id;} //conflict as predictor
if(mod==4) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + conflict * beta[5] + alpha_id;} //constant effect of conflict
if(mod==3) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + treat .* time * beta[4] + alpha_id;} //treatment outcome
if(mod==2) {
mu = intercept * beta[1] + time * beta[2] + treat * beta[3] + alpha_id;} //treatment group difference
if(mod==1) {
mu = intercept * beta[1] + time * beta[2] + alpha_id;} //effect of time
for(i in 1:N*2-nmis_oc_dep){
log_lik[i] = student_t_lpdf(dep[i]|nu, mu[i], sigma);
}
} //end generated quantities
```