Linear mixed effects model with varying intercept and slope in matrix notation

Hi all,

I am trying to write a varying slope and intercepts model using matrix notation in Stan so that I can extend when I add more predictors, but am having problems. I am trying to use a model matrix that includes the intercept, but that may be the problem. Here is the code that works without matrix notation and code that sort of works with matrix notation. It returns intercept and slope estimates, but the error estimates are off and the Rhats are way too high. I think that I am not providing proper priors, but to get it to work, I had to do a lot of funky coding.

Here is the code. If I could find a useful example of how to code up a general linear mixed model with varying intercept/slopes using matrix notation, I would use it, but I just can’t find it. I posted here with a generalized linear example, but thought it might be easier to start here. Any help would be appreciated.

library(rstan)
library(lme4)


rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

## set parameters
## MCMC settings
ni <- 5000
nt <- 1
nb <- 1000
nc <- 4

n.groups = 56
n.sample = 100
n = n.groups * n.sample
pop = gl(n = n.groups,k = n.sample)

original.length = runif(n,45,70) # body length
mn = mean(original.length)
sd = sd(original.length)
length = (original.length - mn) / sd
hist(length,col="grey")

Xmat = model.matrix(~ pop * length-1-length) ### design matrix without intercept

intercept.mean = 230 # mu_alpha
intercept.sd = 20 # sigma alpha
slope.mean = 60 # mu_beta
slope.sd = 30 # sigma_beta

intercept.effects = rnorm(n = n.groups, mean = intercept.mean, sd = intercept.sd)
slope.effects = rnorm(n = n.groups, mean = slope.mean, sd = slope.sd)
all.effects = c(intercept.effects, slope.effects)

lin.pred = Xmat[,] %*% all.effects
eps = rnorm(n = n,mean = 0,sd=30)
mass = as.vector(lin.pred + eps)
hist(mass,col="grey")

### put data together
data = data.frame(pop=pop,length = length,mass = mass)

data.list = list(y = as.vector(mass),x = Xmat,pop = as.numeric(pop),length = length,Nobs = length(as.vector(mass)),
                 Ngroups = n.groups)
LME_randomSlopeAndIntercept.stan = '
data {
  int<lower=0> Nobs; // Number of observed data
  int<lower=0> Ngroups; 
  int pop[Nobs]; // populations
  vector[Nobs] y;
  vector[Nobs] length; // new code
}

parameters {
  real beta; // common slope
  real alpha; // population intercept, a real number

// random effect
  vector[Ngroups] alpha2; // random intercept
  vector[Ngroups] beta2; // random slope. Just added this
  real<lower=0> sigmaint; // corresponds to mu.int, random intercept sd
  real<lower=0> sigmaslope; // corresponds to sigma.slope
  real<lower=0> sigmaeps; // estimate error
}

transformed parameters {
  vector[Nobs] yhat;

  // varying intercepts
  real alpha3[Ngroups];
  real beta3[Ngroups];
  
  // varying intercepts definition
  for (k in 1:Ngroups)
    alpha3[k] = alpha + alpha2[k];
  
  for (k in 1:Ngroups)
    beta3[k] = beta + beta2[k];	
  
  for (i in 1:Nobs)
    yhat[i] = alpha3[pop[i]]+ beta3[pop[i]]*length[i];

}
model {
  // random effects distribution
  alpha2 ~ normal(0,sigmaint);
  beta2 ~ normal(0,sigmaslope);
  y ~ normal(yhat, sigmaeps);
}
'
fit = stan(file = "LME_randomSlopeAndIntercept.stan",data = data.list,chains = nc,
           iter = ni, warmup = nb, thin = nt)

print(fit,pars=c("alpha","beta","sigmaint","sigmaslope","sigmaeps"))

# fit with lmer
lme.fit = lmer(mass ~ length + (1|pop) + (0+ length|pop)) ## (0+length|pop) adds correlation of fixed effects
summary(lme.fit)

#################
# matrix notation
#################
#Xmat = model.matrix(lme.fit)
Xmat = model.matrix(~ length) ### design matrix without intercept

data.list = list(N = nrow(Xmat), Npreds = ncol(Xmat), Ngroups = n.groups,y = as.vector(mass),
                 x = Xmat,group = as.numeric(pop))

LME_randomSlopeAndIntercept_matrix.stan = '
data {
  int<lower=0> N; // Number of observed data
  int<lower=1> Npreds; // number of columns in design matrix
  int<lower=0> Ngroups; // number of random groups
  int group[N]; // populations
  vector[N] y; // response
  vector[N] length; // may not be need now
  matrix [N,Npreds] Xmat; // model matrix;
}

parameters {
  vector [Npreds] beta; // betas
  
  // random effect
  matrix[Npreds,Ngroups] beta2; // random effects. Not sure beta 3 is necessary
  
  real<lower=0> sigmaint; // corresponds to mu.int, random intercept sd
  real<lower=0> sigmaslope; // corresponds to sigma.slope
  real<lower=0> sigmaeps; // estimate error
}

transformed parameters {
  vector[N] yhat;

  // varying intercepts
  matrix[Npreds,Ngroups] beta3; // not sure this is necessary
  
  for (i in 1:Npreds)
    beta3[i] = beta[i] + beta2[i];
  
  for (j in 1:N)
    yhat[j] = Xmat[j]*beta3[:,group[j]];


}
model {
  // random effects distribution
  //beta2 ~ normal(0,sigmaslope);
  y ~ normal(yhat, sigmaeps);
}


'

fit_matrix = stan(file = "LME_randomSlopeAndIntercept_matrix.stan",data = data.list,chains = 4,
            iter = ni, warmup = nb, thin = nt)

print(fit_matrix,pars=c("beta","sigmaint","sigmaslope","sigmaeps"))

If your model can be fit using lme4 syntax, then you can just get the X and Z matrices via

X <- getME(lme.fit, name = "X")
Z <- getME(lme.fit, name = "Z")

Then again, if you model can be fit using lme4 syntax, then you can fit the same model with rstanarm or brms. If you really want to work with the Stan code for some reason, use brms::make_stancode and edit it from there.

1 Like

Thanks for the reply. I do have to work directly with Stan code for several reasons.

I have modified the code found using brms and from the following paper

Here is the code:

Xmat = model.matrix(~ 1 + length)

data.list = list(N=nrow(Xmat),Npreds = ncol(Xmat),Nlev1 = nlevels(pop),
                 n_u = ncol(Xmat),lev1 = as.numeric(pop),Xmat = Xmat,Z_u = Xmat,y = as.vector(mass))

LME_randomSlopeAndInterceptCor.stan = '
data {
  int<lower=0> N;               //no trials
  int<lower=1> Npreds;               //no fixefs
  int<lower=0> Nlev1;               //no subjects
  int<lower=1> n_u;             //no subj ranefs
   int<lower=1,upper=Nlev1> lev1[N]; //subject indicator
   row_vector[Npreds] Xmat[N];           //fixef design matrix
  row_vector[n_u] Z_u[N];       //subj ranef design matrix
  vector[N] y;                 // response
}

parameters {
  vector[Npreds] beta;               //fixef coefs
  cholesky_factor_corr[n_u] L_u;  //cholesky factor of subj ranef corr matrix
  
  vector<lower=0>[n_u] sigma_u; //subj ranef std
  real<lower=0> sigma_e;        //residual std
  vector[n_u] z_u[Nlev1];           //spherical subj ranef
}

transformed parameters {
  vector[n_u] u[Nlev1];             //subj ranefs
 
  {
    matrix[n_u,n_u] Sigma_u;    //subj ranef cov matrix
    Sigma_u = diag_pre_multiply(sigma_u,L_u);
    for(j in 1:Nlev1)
      u[j] = Sigma_u * z_u[j];
   }
}

model {
  //priors
  L_u ~ lkj_corr_cholesky(2.0);
 for (j in 1:Nlev1)
    z_u[j] ~ normal(0,1);
 
  //likelihood
  for (i in 1:N)
  	y[i] ~ normal(Xmat[i] * beta + Z_u[i] * u[lev1[i]], sigma_e);
}
'

fit_matrix = stan(file = "LME_randomSlopeAndInterceptCor.stan",data = data.list,chains = 4,
           iter = ni, warmup = nb, thin = nt)

print(fit_matrix,pars=c("beta", "sigma_e", "sigma_u"),probs = c(0.025, 0.5, 0.975))

It return the expected parameters.