tl;dr: I want to modify brms code, so that a correlation between two random effects and condition that correlation on another observable variable.
Imagine a situation in which two latent variables are correlated, but the correlation is dependent on another variables. E.g. Cognitive ability X in the child and in the parent are very similar if the parent spends time with the child, the more time, the more so (fake example). Further these cognitive abilities are not observed directly, but estimated from some observable performance in whatever test.
Here is the code to generate such data
library(MASS)
library(brms)
library(tidyverse)
n <- 200 # n of participants
trials = 30 # n of trials
muC <- 0.4 # pop level mean of child underlying performance rate
muP <- 0.8 # pop level mean of parent underlying performance rate
sigmaC <- 0.1 # pop level sd of child underlying performance rate
sigmaP <- 0.12 #pop level sd of parent underlying performance rate
## a 0-4 integer parameter representing some abstract way of measuring time spent together
Time <- round(runif(n, 0, 4), 0)
## Intercept and slope of the relation between time and rho
InterceptRho <- 0.2
SlopeRho <- 0.3
for (i in seq(n)){
rho <- inv_logit_scaled(InterceptRho + SlopeRho * Time[i], -1, 1)
VarCovarM <- matrix(c(NA, NA, NA, NA), nrow = 2, ncol = 2)
VarCovarM[1,1] = sigmaC^2
VarCovarM[1,2] = rho * sigmaC * sigmaP
VarCovarM[2,1] = rho * sigmaC * sigmaP
VarCovarM[2,2] = sigmaP^2
vars <- mvrnorm(n = 1, mu=c(muC, muP), Sigma = VarCovarM)
temp <- tibble(
RateC = vars[1],
RateP = vars[2],
rho,
Time = Time[i])
if (i==1){df <- temp} else{df <- rbind(df, temp)}
}
for (i in seq(n)){
temp <- tibble(
ID = i,
scoreC = rbinom(1, size=trials, prob = df$RateC[i]),
scoreP = rbinom(1, size=trials, prob = df$RateP[i]),
Time = Time[i]
)
if (i==1){d <- temp} else{d<-rbind(d, temp)}
}
I can model the basic structure (but the effect of time on correlation) using brms. Code below.
library(brms)
fC <- bf(ScoreC | trials(trials) ~ 1 + (1 | p | ID))
fP <- bf(ScoreP | trials(trials) ~ 1 + (1 | p | ID))
make_stancode(fC + fP + set_rescor(rescor=FALSE),
d,
family = binomial)
# The model runs fine when fitted
Now, I extracted the code, but I am not confident as to how to modify the code in order to condition the correlation on Time:
// generated with brms 2.16.2
functions {
/* compute correlated group-level effects
* Args:
* z: matrix of unscaled group-level effects
* SD: vector of standard deviation parameters
* L: cholesky factor correlation matrix
* Returns:
* matrix of scaled group-level effects
*/
matrix scale_r_cor(matrix z, vector SD, matrix L) {
// r is stored in another dimension order than z
return transpose(diag_pre_multiply(SD, L) * z);
}
}
data {
int<lower=1> N; // total number of observations
int<lower=1> N_ScoreC; // number of observations
int Y_ScoreC[N_ScoreC]; // response variable
int trials_ScoreC[N_ScoreC]; // number of trials
int<lower=1> N_ScoreP; // number of observations
int Y_ScoreP[N_ScoreP]; // response variable
int trials_ScoreP[N_ScoreP]; // number of trials
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1_ScoreC[N_ScoreC]; // grouping indicator per observation
int<lower=1> J_1_ScoreP[N_ScoreP]; // grouping indicator per observation
// group-level predictor values
vector[N_ScoreC] Z_1_ScoreC_1;
vector[N_ScoreP] Z_1_ScoreP_2;
int<lower=1> NC_1; // number of group-level correlations
int prior_only; // should the likelihood be ignored?
}
transformed data {
}
parameters {
real Intercept_ScoreC; // temporary intercept for centered predictors
real Intercept_ScoreP; // temporary intercept for centered predictors
vector<lower=0>[M_1] sd_1; // group-level standard deviations
matrix[M_1, N_1] z_1; // standardized group-level effects
cholesky_factor_corr[M_1] L_1; // cholesky factor of correlation matrix
}
transformed parameters {
matrix[N_1, M_1] r_1; // actual group-level effects
// using vectors speeds up indexing in loops
vector[N_1] r_1_ScoreC_1;
vector[N_1] r_1_ScoreP_2;
// compute actual group-level effects
r_1 = scale_r_cor(z_1, sd_1, L_1);
r_1_ScoreC_1 = r_1[, 1];
r_1_ScoreP_2 = r_1[, 2];
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N_ScoreC] mu_ScoreC = Intercept_ScoreC + rep_vector(0.0, N_ScoreC);
// initialize linear predictor term
vector[N_ScoreP] mu_ScoreP = Intercept_ScoreP + rep_vector(0.0, N_ScoreP);
for (n in 1:N_ScoreC) {
// add more terms to the linear predictor
mu_ScoreC[n] += r_1_ScoreC_1[J_1_ScoreC[n]] * Z_1_ScoreC_1[n];
}
for (n in 1:N_ScoreP) {
// add more terms to the linear predictor
mu_ScoreP[n] += r_1_ScoreP_2[J_1_ScoreP[n]] * Z_1_ScoreP_2[n];
}
target += binomial_logit_lpmf(Y_ScoreC | trials_ScoreC, mu_ScoreC);
target += binomial_logit_lpmf(Y_ScoreP | trials_ScoreP, mu_ScoreP);
}
// priors including constants
target += student_t_lpdf(Intercept_ScoreC | 3, 0, 2.5);
target += student_t_lpdf(Intercept_ScoreP | 3, 0, 2.5);
target += student_t_lpdf(sd_1 | 3, 0, 2.5)
- 2 * student_t_lccdf(0 | 3, 0, 2.5);
target += std_normal_lpdf(to_vector(z_1));
target += lkj_corr_cholesky_lpdf(L_1 | 1);
}
generated quantities {
// actual population-level intercept
real b_ScoreC_Intercept = Intercept_ScoreC;
// actual population-level intercept
real b_ScoreP_Intercept = Intercept_ScoreP;
// compute group-level correlations
corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
vector<lower=-1,upper=1>[NC_1] cor_1;
// extract upper diagonal of correlation matrix
for (k in 1:M_1) {
for (j in 1:(k - 1)) {
cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
}
}
}
Any suggestions?
p.s. I had also tried building the code from scratch to avoid some of the additional complexities of the brms code, but I got stuck there (Conditioning a correlation on another variable), so I’m trying this different route.