Greetings–

I am checking the difference in scores on tests **within** one group. There is considerable missingness, so I’ve been using a model to accommodate missing data (below). I want to do a posterior predictive check, using simulated data in the generated quantities section but I’m not sure which standard deviation parameter to use within that random number generator requires a real number (my transformed sd (coef_sds) is a vector). I’ve used the sd of the present values (Y_sd), but this can’t be right, since I should be checking the observed data against the simulated sds. Any suggestions on how to simulate data in generated quantities given my model?

Many thanks!

model to accommodate missingness

functions{

//a function to turn the long data to wide coefficients

matrix get_coefs(matrix X, vector Y, int [,] Z) {

matrix[size(Z),cols(X)] coefs ;

for(i in 1:size(Z)){

coefs[i] = transpose(

inverse(

transpose( X[Z[i,1]:Z[i,2]] )

* X[Z[i,1]:Z[i,2]]

)

* transpose( X[Z[i,1]:Z[i,2]] )

* Y[Z[i,1]:Z[i,2]]

);

}

return coefs ;

}

}

data {

// nSubj: number of subjects

int nSubj ;

// nY: total number of observations

int nY ;

// nX: number of predictors

int nX ;

// Y: vector of observations

vector[nY] Y ;

// X: predictor matrix

matrix[nY,nX] X ;

// subjIndices: list of start and end indices in Y corresponding to data from

// each subject

int subjIndices[nSubj,2] ;

// n_missing: number of missing values total

int n_missing ;

// indices_of_missing: indices in Y of missing values

int indices_of_missing[n_missing] ;

// indices_of_present: indices in Y of present values

int indices_of_present[nY-n_missing] ;

}

transformed data{

// Y_mean: mean of present values in Y

real Y_mean ;

// Y_sd: sd of present values in Y

real Y_sd ;

// scaled_Y: z-scaled Y values, using Y_mean & Y_sd

vector[nY] scaled_Y ;

Y_mean = mean( Y[indices_of_present] ) ;

Y_sd = sd( Y[indices_of_present] ) ;

scaled_Y = ( Y - Y_mean ) / Y_sd ;

}

parameters {

// scaled_coef_means: z-scale coefficient means

vector[nX] scaled_coef_means ;

// scaled_coef_sds: z-scale coefficient sds

vector<lower=0>[nX] scaled_coef_sds ;

// cor_mat: correlations amongst coefficients

corr_matrix[nX] cor_mat ;

// imputed_for_missing: values imputed in place of

// missing values

vector[n_missing] imputed_for_missing ;

}

transformed parameters{

// scaled_Y_with_imputed: vector combining non-missing & imputed values

vector[nY] scaled_Y_with_imputed ;

// scaled_coefs: matrix of wide-format coefficients for each subject given

// their data and predictors

matrix[nSubj,nX] scaled_coefs ;

// add the present values to scaled_Y_with_imputed

scaled_Y_with_imputed[indices_of_present] = scaled_Y[indices_of_present] ;

// add the imputed values to scaled_Y_with_imputed

scaled_Y_with_imputed[indices_of_missing] = imputed_for_missing ;

// use the get_coefs() function

scaled_coefs = get_coefs(X,scaled_Y_with_imputed,subjIndices) ;

}

model {

// weakly informed priors

scaled_coef_means ~ normal(0,1) ;

scaled_coef_sds ~ weibull(2,1) ;

cor_mat ~ lkj_corr(1) ;

imputed_for_missing ~ normal(0,1) ;

// Each subject’s coefficients as multivariate normal

for(s in 1:nSubj){

scaled_coefs[s,] ~ multi_normal(

scaled_coef_means

, quad_form_diag(cor_mat, scaled_coef_sds)

) ;

}

}

generated quantities{

// coef_means: coefficient means on the original scale of Y

vector[nX] coef_means ;

// coef_sds: coefficient sds on the original scale of Y

vector[nX] coef_sds ;

coef_sds = scaled_coef_sds * Y_sd ;

coef_means = scaled_coef_means * Y_sd ;

// adding back the mean to the intercept only

coef_means[1] = coef_means[1] + Y_mean ;

}

generated quantities for model 2:

generated quantities{

//posterior predictive check variable

vector[nY] y2 ;

// coef_means: coefficient means on the original scale of Y

vector[nX] coef_means ;

// coef_means: coefficient sds on the original scale of Y

vector[nX] coef_sds ;

coef_sds = scaled_coef_sds * Y_sd ;

coef_means = scaled_coef_means * Y_sd ;

// adding back the mean to the intercept only

coef_means[1] = coef_means[1] + Y_mean ;

for(i in 1:nY){

y2[i] = normal_rng( (coef_means)[i], Y_sd) ;

}

}