Non-centering an augmented multivariate normal

mike-lawrence · January 15, 2021, 4:45pm

So I have a standard hierarchical data set where each of multiple “subjects” is each observed many times in each of multiple conditions. A “centered” version of this would be:

data{
	// num_obs: total number of observations
	int<lower=1> num_obs ;
	// obs: observation on each trial
	vector[num_obs] obs ;
	// num_subj: number of subj
	int<lower=1> num_subj ;
	// num_rows_uW: num rows in uW
	int<lower=1> num_rows_uW ;
	// num_cols_uW: num cols in uW
	int<lower=1> num_cols_uW ;
	// uW: unique entries in the within predictor matrix
	matrix[num_rows_uW,num_cols_uW] uW ;
	// index: index of each trial in flattened subject-by-condition value matrix
	int obs_index[num_obs] ;
}
parameters{
	// chol_corr: population-level correlations (on cholesky factor scale) amongst within-subject predictors
	cholesky_factor_corr[num_cols_uW+1] chol_corr ;
	// mean_coef: mean (across subj) for each coefficient
	row_vector[num_cols_uW] mean_coef ;
	// sd_coef: sd (across subj) for each coefficient
	vector<lower=0>[num_cols_uW] sd_coef ;
	// subj_coef: coefficients for each subj
	vector[num_cols_uW] subj_coef[num_subj] ;
	// noise: measurement noise
	real<lower=0> noise ;
}
model{
	// priors
	noise ~ weibull(2,1) ;
	mean_coef ~ std_normal() ;
	sd_coef ~ std_normal() ;
	chol_corr ~ lkj_corr_cholesky(1) ;
	// subj values as mvn
	subj_coef ~ multi_normal_cholesky(
		mean_coef
		, diag_pre_multiply(sd_coef, chol_corr)
	) ;
	// Loop over subj and conditions to compute unique entries in design matrix
	matrix[num_rows_uW,num_subj] value_for_subj_cond ;
	for(this_subj in 1:num_subj){
		for(this_condition in 1:num_rows_uW){
			value_for_subj_cond[this_condition,this_subj] = dot_product(
				subj_coef[this_subj,1:num_cols_uW]
				, uW[this_condition]
			) ;
		}
	}
	// Likelihood
	obs ~ normal(
		to_vector(value_for_subj_cond)[obs_index]
		, noise
	) ;
}

And the non-centered version is:

data{
	// num_obs: number of trials
	int<lower=1> num_obs ;
	// obs: observation on each trial
	vector[num_obs] obs ;
	// num_subj: number of subj
	int<lower=1> num_subj ;
	// num_rows_uW: num rows in uW
	int<lower=1> num_rows_uW ;
	// num_cols_uW: num cols in uW
	int<lower=1> num_cols_uW ;
	// uW: unique entries in the within predictor matrix
	matrix[num_rows_uW,num_cols_uW] uW ;
	// index: index of each trial in flattened subject-by-condition value matrix
	int obs_index[num_obs] ;
}
parameters{
	// chol_corr: population-level correlations (on cholesky factor scale) amongst within-subject predictors
	cholesky_factor_corr[num_cols_uW+1] chol_corr ;
	// mean_coef: mean (across subj) for each coefficient
	row_vector[num_cols_uW] mean_coef ;
	// sd_coef: sd (across subj) for each coefficient
	vector<lower=0>[num_cols_uW] sd_coef ;
	// transformed_subj_coef: transformed coefficients for each subj
	matrix[num_cols_uW,num_subj] transformed_subj_coef;
	// noise: measurement noise
	real<lower=0> noise ;
}
transformed parameters{
	// achieved non-centered subj_coef ~ mvn()
	matrix[num_subj,num_cols_uW] subj_coef = (
		rep_matrix(mean_coef,num_subj)
		+ transpose(
			diag_pre_multiply(sd_coef,chol_corr)
			* transformed_subj_coef
		)
	) ;
}
model{
	// priors
	noise ~ weibull(2,1) ;
	mean_coef ~ std_normal() ;
	sd_coef ~ std_normal() ;
	chol_corr ~ lkj_corr_cholesky(1) ;
	// implies subj values as mvn
	to_vector(transformed_subj_coef) ~ std_normal() ;
	// Loop over subj and conditions to compute unique entries in design matrix
	matrix[num_rows_uW,num_subj] value_for_subj_cond ;
	for(this_subj in 1:num_subj){
		for(this_condition in 1:num_rows_uW){
			value_for_subj_cond[this_condition,this_subj] = dot_product(
				subj_coef[this_subj,1:num_cols_uW]
				, uW[this_condition]
			) ;
		}
	}
	// Likelihood
	obs ~ normal(
		to_vector(value_for_subj_cond)[obs_index]
		, noise
	) ;
}

Now, I actually have a variable that I want to model as correlated to the subj_coefs. For the centered parameterization, I can achieve this via:

data{
	// num_obs: number of trials
	int<lower=1> num_obs ;
	// obs: observation on each trial
	vector[num_obs] obs ;
	// num_subj: number of subj
	int<lower=1> num_subj ;
	// num_rows_uW: num rows in uW
	int<lower=1> num_rows_uW ;
	// num_cols_uW: num cols in uW
	int<lower=1> num_cols_uW ;
	// uW: unique entries in the within predictor matrix
	matrix[num_rows_uW,num_cols_uW] uW ;
	// index: index of each trial in flattened subject-by-condition value matrix
	int obs_index[num_obs] ;
	vector[num_subj] covariate ;
}
transformed data{
	real covariate_mean = mean(covariate) ;
	real covariate_sd = sd(covariate) ;
}
parameters{
	// chol_corr: population-level correlations (on cholesky factor scale) amongst within-subject predictors
	cholesky_factor_corr[num_cols_uW+1] chol_corr ;
	// mean_coef: mean (across subj) for each coefficient
	row_vector[num_cols_uW] mean_coef ;
	// sd_coef: sd (across subj) for each coefficient
	vector<lower=0>[num_cols_uW] sd_coef ;
	// subj_coef: coefficients for each subj
	vector[num_cols_uW] subj_coef[num_subj] ;
	// noise: measurement noise
	real<lower=0> noise ;
}
transformed parameters{
	//extend the means
	row_vector[num_cols_uW+1] mean_coef_extended;
	mean_coef_extended[1:num_cols_uW] = mean_coef ;
	mean_coef_extended[num_cols_uW+1] = covariate_mean ;
	//extend the sds
	vector[num_cols_uW+1] sd_coef_extended;
	sd_coef_extended[1:num_cols_uW] = sd_coef ;
	sd_coef_extended[num_cols_uW+1] = covariate_sd ;
	//extend the subject values
	vector[num_cols_uW+1] subj_coef_extended[num_subj] ;
	for(i in 1:num_subj){
		subj_coef_extended[i][1:num_cols_uW] = subj_coef[i] ;
		subj_coef_extended[i][num_cols_uW+1] = covariate[i] ;
	}
}
model{
	// priors
	noise ~ weibull(2,1) ;
	mean_coef ~ std_normal() ;
	sd_coef ~ std_normal() ;
	chol_corr ~ lkj_corr_cholesky(1) ;
	// subj values as mvn
	subj_coef_extended ~ multi_normal_cholesky(
		mean_coef_extended
		, diag_pre_multiply(sd_coef_extended, chol_corr)
	) ;
	// Loop over subj and conditions to compute unique entries in design matrix
	matrix[num_rows_uW,num_subj] value_for_subj_cond ;
	for(this_subj in 1:num_subj){
		for(this_condition in 1:num_rows_uW){
			value_for_subj_cond[this_condition,this_subj] = dot_product(
				subj_coef_extended[this_subj,1:num_cols_uW]
				, uW[this_condition]
			) ;
		}
	}
	// Likelihood
	obs ~ normal(
		to_vector(value_for_subj_cond)[obs_index]
		, noise
	) ;
}

But I’m having trouble working out how to implement the non-centered version. I think I have to apply the reverse of the contents in transformed parameters from the second model above to work out new values to add to the transformed_subj_coefs, but I’m getting a bit lost. Any guidance would be greatly appreciated!

martinmodrak · January 30, 2021, 8:48am

Hi, did you manage to resolve this? I don’t think I am able to completely disentangle what is going on, and I also know that these transformations are usually hard for me to get my head around, so hopefully bumping this up will catch somebody’s attention.

The only thing that feels somewhat relevant is that you can actually relatively easily compute the conditional distribution of some components of a multivariate normal given some other components, so maybe using that to compute the conditional distribution of subj_coef given covariate could work?

Best of luck with the model!

Topic		Replies	Views
Non-centred parameterisation for multivatiate normal Modeling	1	648	March 19, 2019
Non-centered parametrization with irregularly missing data Modeling reparametrization	1	237	December 8, 2023
Non-centered parameterization of a hierarchical linear model Modeling	3	1199	May 20, 2020
Working out an SEM-style alternative to the multivariate normal Modeling techniques , specification	3	787	May 28, 2021
Non-centered hierarchical skew normal implementation Modeling fitting-issues	0	526	January 14, 2019

Non-centering an augmented multivariate normal

Related topics