I’d like to use projpred to answer a substantial question about predictor variable importance, in line with https://avehtari.github.io/modelselection/collinear.html. The dependent variable in my regression model is a latent variable obtained through an IRT model of responses to a questionnaire, and so I couldn’t figure out how to apply the init_refmodel function of projpred. The y argument of init_refmodel only accepts a vector of values, but I only have posterior draws.
Some minor points about the model in itself: The multilevel structure is due to parents being nested within adolescents. There is some imputation of the outcome variable going on as well.
Loo gives a lot of high pareto-k values, which I think is due to the very small cluster size for the varying intercepts, so I have been using K-fold CV in stead, and planned to use that for the projpred. I previously got some useful feedback on that here K-fold CV in linear regression model with varying intercepts and very small clusters
data{
int<lower=1> N;
int n_id;
int<lower=1> id[N];
int<lower=1> K;
matrix[K,N] x;
int cbq_items;
//index vector for cases with no missing data on CBQ
int ind_obs_cbq[N];
int nobs_cbq;
//index vector for cases with single items missing on CBQ
int ind_mis_cbq[N];
int nmis_cbq;
//index vector for cases with no response to CBQ
int ind_no_cbq[N];
int nno_cbq;
int<lower=0, upper=1> cbq_obs[nobs_cbq,cbq_items];
int<lower=-1, upper=1> cbq_mis[nmis_cbq,cbq_items];
}
transformed data{
matrix[N, K] Q = qr_Q(x')[,1:K] * N;
matrix[K, K] R = qr_R(x')[1:K, ] / N;
matrix[K, K] R_inv = inverse(R);
}
parameters{
vector[n_id] id_a_var;
real<lower=0> sigma_a_var;
vector[K] beta_tilde; // coefficients on Q
real<lower=1> nu;
real<lower=0> sigma;
vector[cbq_items] cbqbeta;
vector<lower=0>[cbq_items] cbqalpha;
vector[nobs_cbq] cbqtheta_obs;
vector[nmis_cbq] cbqtheta_mis;
vector[nno_cbq] cbqtheta_no;
}
transformed parameters{
vector[K] beta = R_inv * beta_tilde;
//collected parameter vectors for outcome and varying intercept
vector[N] y;
vector[N] a_var;
for(i in 1:N){
a_var[i] = id_a_var [id][i] * sigma_a_var;
if (ind_obs_cbq[i]>0)
y[i] = cbqtheta_obs[ind_obs_cbq[i]];
else if (ind_mis_cbq[i]>0)
y[i] = cbqtheta_mis[ind_mis_cbq[i]];
else
y[i] = cbqtheta_no[ind_no_cbq[i]];}
}
model{
//priors
id_a_var ~ std_normal();
sigma_a_var ~ normal(0,5);
beta ~ normal(0,1);
sigma ~ normal(0,1);
cbqtheta_obs ~ std_normal();
cbqtheta_mis ~ std_normal();
cbqbeta ~ normal(0,3);
cbqalpha ~ gamma(2,0.5);
nu ~ gamma(2,0.1);
//irt model
for(i in 1:nobs_cbq){
for (j in 1:cbq_items){
cbq_obs[i,j] ~ bernoulli_logit(cbqalpha[j]*(cbqtheta_obs[i] - cbqbeta[j]));
}}
for(i in 1:nmis_cbq){
for(j in 1:cbq_items){
if(cbq_mis[i,j]>-1){
cbq_mis[i,j] ~ bernoulli_logit(cbqalpha[j]*(cbqtheta_mis[i] - cbqbeta[j]));
}}}
//likelihood
y ~ student_t(nu, Q * beta_tilde + a_var, sigma);
}
generated quantities{
vector[N] log_lik;
vector[N] ppc;
for (n in 1:N){
log_lik[n] = student_t_lpdf(y[n] | nu, a_var[n] + x'[n,] * beta, sigma);
ppc[n] = student_t_rng(nu, a_var[n] + x'[n,] * beta, sigma);}
}