Greetings–
I am checking the difference in scores on tests within one group. There is considerable missingness, so I’ve been using a model to accommodate missing data (below). I want to do a posterior predictive check, using simulated data in the generated quantities section but I’m not sure which standard deviation parameter to use within that random number generator requires a real number (my transformed sd (coef_sds) is a vector). I’ve used the sd of the present values (Y_sd), but this can’t be right, since I should be checking the observed data against the simulated sds. Any suggestions on how to simulate data in generated quantities given my model?
Many thanks!
model to accommodate missingness
functions{
//a function to turn the long data to wide coefficients
matrix get_coefs(matrix X, vector Y, int [,] Z) {
matrix[size(Z),cols(X)] coefs ;
for(i in 1:size(Z)){
coefs[i] = transpose(
inverse(
transpose( X[Z[i,1]:Z[i,2]] )
* X[Z[i,1]:Z[i,2]]
)
* transpose( X[Z[i,1]:Z[i,2]] )
* Y[Z[i,1]:Z[i,2]]
);
}
return coefs ;
}
}
data {
// nSubj: number of subjects
int nSubj ;
// nY: total number of observations
int nY ;
// nX: number of predictors
int nX ;
// Y: vector of observations
vector[nY] Y ;
// X: predictor matrix
matrix[nY,nX] X ;
// subjIndices: list of start and end indices in Y corresponding to data from
// each subject
int subjIndices[nSubj,2] ;
// n_missing: number of missing values total
int n_missing ;
// indices_of_missing: indices in Y of missing values
int indices_of_missing[n_missing] ;
// indices_of_present: indices in Y of present values
int indices_of_present[nY-n_missing] ;
}
transformed data{
// Y_mean: mean of present values in Y
real Y_mean ;
// Y_sd: sd of present values in Y
real Y_sd ;
// scaled_Y: z-scaled Y values, using Y_mean & Y_sd
vector[nY] scaled_Y ;
Y_mean = mean( Y[indices_of_present] ) ;
Y_sd = sd( Y[indices_of_present] ) ;
scaled_Y = ( Y - Y_mean ) / Y_sd ;
}
parameters {
// scaled_coef_means: z-scale coefficient means
vector[nX] scaled_coef_means ;
// scaled_coef_sds: z-scale coefficient sds
vector<lower=0>[nX] scaled_coef_sds ;
// cor_mat: correlations amongst coefficients
corr_matrix[nX] cor_mat ;
// imputed_for_missing: values imputed in place of
// missing values
vector[n_missing] imputed_for_missing ;
}
transformed parameters{
// scaled_Y_with_imputed: vector combining non-missing & imputed values
vector[nY] scaled_Y_with_imputed ;
// scaled_coefs: matrix of wide-format coefficients for each subject given
// their data and predictors
matrix[nSubj,nX] scaled_coefs ;
// add the present values to scaled_Y_with_imputed
scaled_Y_with_imputed[indices_of_present] = scaled_Y[indices_of_present] ;
// add the imputed values to scaled_Y_with_imputed
scaled_Y_with_imputed[indices_of_missing] = imputed_for_missing ;
// use the get_coefs() function
scaled_coefs = get_coefs(X,scaled_Y_with_imputed,subjIndices) ;
}
model {
// weakly informed priors
scaled_coef_means ~ normal(0,1) ;
scaled_coef_sds ~ weibull(2,1) ;
cor_mat ~ lkj_corr(1) ;
imputed_for_missing ~ normal(0,1) ;
// Each subject’s coefficients as multivariate normal
for(s in 1:nSubj){
scaled_coefs[s,] ~ multi_normal(
scaled_coef_means
, quad_form_diag(cor_mat, scaled_coef_sds)
) ;
}
}
generated quantities{
// coef_means: coefficient means on the original scale of Y
vector[nX] coef_means ;
// coef_sds: coefficient sds on the original scale of Y
vector[nX] coef_sds ;
coef_sds = scaled_coef_sds * Y_sd ;
coef_means = scaled_coef_means * Y_sd ;
// adding back the mean to the intercept only
coef_means[1] = coef_means[1] + Y_mean ;
}
generated quantities for model 2:
generated quantities{
//posterior predictive check variable
vector[nY] y2 ;
// coef_means: coefficient means on the original scale of Y
vector[nX] coef_means ;
// coef_means: coefficient sds on the original scale of Y
vector[nX] coef_sds ;
coef_sds = scaled_coef_sds * Y_sd ;
coef_means = scaled_coef_means * Y_sd ;
// adding back the mean to the intercept only
coef_means[1] = coef_means[1] + Y_mean ;
for(i in 1:nY){
y2[i] = normal_rng( (coef_means)[i], Y_sd) ;
}
}