Multivariate animal model

Hey guys,

I’m trying to set up a multivariate animal model, which is just a random intercept model with a known covariance matrix between individuals (which is derived from a pedigree), and an unknown covariance matrix (G) between traits that gets Kronecker multiplied by the known between-individual matrix.

I’ve got a working simulated data that I can successfully fit in MCMglmm. I lifted some parts from a phylogenetic model in brms, but I can’t seem to get the Stan model to work. The G-matrix is the main parameter of interest here.

Here is the code so far, and the R code for the simulated data:

functions {
  matrix as_matrix(vector X, int N, int K) { 
    matrix[N, K] Y; 
    for (i in 1:N) {
      Y[i] = to_row_vector(X[((i - 1) * K + 1):(i * K)]); 
    }
    return Y; 
  }
  matrix kronecker(matrix A, matrix B) {
    matrix[rows(A)*rows(B), cols(A)*cols(B)] kron;
    for (i in 1:cols(A)) {
      for (j in 1:rows(A)) {
        kron[((j-1)*rows(B)+1):(j*rows(B)), ((i-1)*cols(B)+1):(i*cols(B))] = A[j,i] * B;
      }
    }
    return kron;
  }
}
data {
  int<lower=1>    K;  // number of traits
  int<lower=1>    J;  // number of fixed effects
  int<lower=0>    N;  // number of individuals
  vector[J]    X[N];  // Fixed effects design matrix
  vector[K]    Y[N];  // response variable
  int          Z[N];  // Random effect design matrix
  matrix[N, N]    A;  // cholesky factor of known covariance matrix
}

parameters {
  matrix[K,J] beta; // fixed effects
  vector[N*K]    a; // breeding values
  
  # G matrix
  cholesky_factor_corr[K] L_Omega_G;
  vector<lower=0>[K] L_sigma_G;
  
  # R matrix
  cholesky_factor_corr[K] L_Omega_R;
  vector<lower=0>[K] L_sigma_R;
  
}
transformed parameters {
  matrix[N, K] aM;
  
  // Get a vector "a" that is MVN with a "A⊗G" covariance matrix, 
  // then convert to a NxK matrix of individual random intercepts
  aM = as_matrix(kronecker(A, diag_pre_multiply(L_sigma_G, L_Omega_G)) * a, N, K);
}
model {
  vector[K] mu[N];
  matrix[K,K] L_Sigma_R;
  
  L_Sigma_R = diag_pre_multiply(L_sigma_R, L_Omega_R);
  
  for(n in 1:N)
    mu[n] = beta * X[n] + to_vector(aM[Z[n]]);
  
  Y ~ multi_normal_cholesky(mu, L_Sigma_R);
  
  to_vector(beta) ~ normal(0, 1);
  a ~ normal(0, 1);
  L_Omega_G ~ lkj_corr_cholesky(4);
  L_sigma_G ~ cauchy(0, 2.5);
  L_Omega_R ~ lkj_corr_cholesky(4);
  L_sigma_R ~ cauchy(0, 2.5);
}
generated quantities {
  matrix[K, K] P;
  matrix[K, K] G;
  matrix[K, K] E;
  corr_matrix[K] corrG;
  corr_matrix[K] corrE;
  
  G = multiply_lower_tri_self_transpose(diag_pre_multiply(L_sigma_G, L_Omega_G));
  E = multiply_lower_tri_self_transpose(diag_pre_multiply(L_sigma_R, L_Omega_R));
  P = G + E;
  
  corrG = multiply_lower_tri_self_transpose(L_Omega_G);
  corrE = multiply_lower_tri_self_transpose(L_Omega_R);
}

R code to generate the input:

library(MCMCglmm)
library(rstan)
library(mvtnorm)

set.seed(17)

# Pedigree
ped <- read.table("https://raw.githubusercontent.com/diogro/QGcourse/master/tutorials/volesPED.txt",header=T)

# G matrix
G <- matrix(c(1, 0.7, 0.7, 2), 2, 2)

# Residual matrix
E <- matrix(c(1, 0.2, 0.2, 2), 2, 2)

# Simulated breeding values
a = rbv(ped, G)

# Fixed effects
beta = matrix(c(1, 2,
                0.1, 0.2,
                0.05, 0.1), 3, 2, byrow = TRUE)
colnames(beta) = c("x", "y")
rownames(beta) = c("Intercept", "sex", "Z")

sex = numeric(nrow(a))
sex = sample(c(0, 1), length(sex), replace = TRUE)
sex[rownames(a) %in% ped$SIRE] <- 1
sex[rownames(a) %in% ped$DAM] <- 0
Z = rnorm(nrow(a))
Intercept = rep(1, nrow(a))
X = cbind(Intercept, sex, Z)
rownames(X) = rownames(a)

# Correlated noise
e = rmvnorm(nrow(a), sigma = E)

# Simulated data
Y = X %*% beta + a + e
colnames(Y) = c("x", "y")

# Create relationship matrix
inv.phylo <- MCMCglmm::inverseA(ped, scale = TRUE)
A <- solve(inv.phylo$Ainv)
A = (A + t(A))/2 # Not always symmetric after inversion
rownames(A) <- rownames(inv.phylo$Ainv)

# Relate individuals to columns of the A matrix
pos = unlist(lapply(rownames(Y), function(x) which(x == rownames(A))))

stan_data = list(K = ncol(Y),
                 J = ncol(X),
                 N = nrow(Y),
                 X = X,
                 Y = Y,
                 Z = pos,
                 A = as.matrix(chol(A)))
stan_model = stan(file = "./animalModel.stan", data = stan_data)
model = rstan::extract(stan_model)
rstan::summary(stan_model, pars = "G")[[1]]
rstan::traceplot(stan_model, pars = "G")
colMeans(model$G)
1 Like

Sorry nobody replied earlier. I’m still getting used to dealing with Discourse and trying not to get the first reply in on everything.

You probably don’t want to fully blow out this Kronecker result (this came up recently on our old mailing list). We should have examples of that. It won’t not work, just slow everything down.

What problem are you haivng with the model? Usually, we recommend starting simpler and building up to more complex models. Or at the very least, testing things like your component functions (e.g., to make sure your functions are doing what you think they’re doing (these things are very hard to debug by eye).

Hey Bob,

Thanks for the reply. This was the simplest multivariate model I could think of, but I’ve moved to an univariate model to work things out.

The idea is to model a continuous response in N individuals as a regression with random intercepts, given a known covariance between random intercepts. Something like:

Y = X*\beta + a + e

Where the “a” vector comes from a multivariate normal with zero mean and a covariance matrix of the form g*A. A is a known N x N relationship matrix, and g is an unknown variance. \beta is the fixed effects term, and X the design matrix. The “e” term is an uncorrelated error. The main parameter of interest, in this case, is the “g” variance.

I tried generating the correlated random intercepts by multiplying an uncorrelated vector by G and the Cholesky factor of the A matrix and adding this in the regression. This doesn’t seem to be correct.

Here is the code for the univariate case:

data {
  int<lower=1>    J; // number of fixed effects
  int<lower=0>    N; // number of individuals
  vector[J]    X[N]; // Fixed effects design matrix
  real         Y[N]; // response variable
  matrix[N, N]    A; // cholesky factor of known covariance matrix
}
parameters {
  row_vector[J] beta; // fixed effects
  vector[N]    a; // breeding values

# Genetic variance
  real<lower=0> sigma_G;

# Residual variance
  real<lower=0> sigma_R;

}
transformed parameters {
  vector[N] aM;
  // Correlated random intercepts
  aM = sigma_G * (A * a);
}
model {
    vector[N] mu;
 
    for(n in 1:N)
      mu[n] = beta * X[n] + aM[n];

    Y ~ normal(mu, sigma_R);

    to_vector(beta) ~ normal(0, 1);
    a ~ normal(0, 1);
    sigma_G ~ normal(0, 1);
    sigma_R ~ normal(0, 1);
}

And the simulated data:

list_pkgs <- c("MCMCglmm","pedigreemm", "rstan", "mvtnorm", "bayesplot")
new_pkgs <- list_pkgs[!(list_pkgs %in% installed.packages()[,"Package"])]
if(length(new_pkgs) > 0){ install.packages(new_pkgs) }
library(MCMCglmm)
library(pedigreemm)
library(rstan)
library(mvtnorm)
library(bayesplot)

rstan_options(auto_write = TRUE)
options(mc.cores = 10)

set.seed(17)

# Pedigree
ped <- read.table("https://raw.githubusercontent.com/diogro/QGcourse/master/tutorials/volesPED.txt",header=T)

# G variance
G <- 1.

# Residual variance
E <- 0.3

# Create relationship matrix
inv.phylo <- MCMCglmm::inverseA(ped, scale = TRUE)
A <- solve(inv.phylo$Ainv)
A = (A + t(A))/2 # Not always symmetric after inversion
rownames(A) <- rownames(inv.phylo$Ainv)

# Simulated correlated random intercepts
a = t(rmvnorm(1, sigma = G*as.matrix(A)))

# Fixed effects
beta = matrix(c(1, 0.3), 2, 1, byrow = TRUE)
rownames(beta) = c("Intercept", "sex")
sex = sample(c(0, 1), nrow(a), replace = TRUE)
sex[rownames(a) %in% ped$SIRE] <- 1
sex[rownames(a) %in% ped$DAM] <- 0
Intercept = rep(1, nrow(a))
X = cbind(Intercept, sex)

# Uncorrelated noise
e = rnorm(nrow(a), sd = sqrt(E))

# Simulated data
Y = X %*% beta + a + e

## Fitting the model via REML just to be safe
ped2 <- pedigree(ped$SIRE, ped$DAM, ped$ID)  #restructure ped file
data = data.frame(Y = Y, sex = sex, ID = rownames(A))
mod_animalREML<-pedigreemm(Y ~ sex + (1|ID), pedigree=list(ID=ped2), 
                           data = data, REML=TRUE, 
                           control = lmerControl(check.nobs.vs.nlev="ignore",
                                                 check.nobs.vs.nRE="ignore"))
summary(mod_animalREML)

stan_data = list(J = ncol(X),
                 N = nrow(Y),
                 X = X,
                 Y = as.numeric(Y),
                 A = as.matrix(chol(A)))

I think the default chol in R returns an upper triangular Cholesky factor. cholesky_decompose in Stan returns a lower triangular one.

I think the way you’ve written the Stan code you want to use a lower triangular one (so just transpose it before you pass it in, or do the Cholesky factorization inside your model).

1 Like

Thanks, Ben, good catch. I moved the Cholesky decomposition to the Stan code and now I’m getting sensible estimates. However, for some combinations of random intercept variance and residual variance, I get a warning on Bayesian Fraction of Missing Information being low in all chains. The estimates all seem fine, so should I be worried about this? I attached an example pairs plot where I got this warning. Could this be related to the inevitable negative correlation between the variances?

I honestly don’t really know what that diagnostic is.

I Googled it, and came across this discussion: https://groups.google.com/forum/#!topic/stan-dev/uJhsapVwlk8

Seems relevant :D

1 Like

That diagnostic is interesting (and Michael’s arXiv papers have good stuff on it) but I don’t think he has disseminated clear recommendations on interpretation (turns out to be hard).

1 Like

Just reread this – for some combinations of random intercept variance and residual variance, is there any indication what combinations of parameters are causing this? Might give you an idea what part of the model is being weird.

I get the warnings when the proportion of variance associated with the random intercepts is higher than the residual variance. That’s not really common in practice, but is there any alternative way to parametrize this?

The E-BFMI quantifies how effective the momentum resampling step of Hamiltonian Monte Carlo is – the smaller the number the harder it is to explore all of the energy level sets with nontrivial probability. In particular, for sufficiently small values this step loses some important guarantees which then compromises the accuracy of the entire MCMC chain.

The problem is identifying how small is too small. The current warning is set at 0.3 as that was the E-BFMI for typically pathological models. More recent studies, however, have shown that HMC is more robust than we thought so the threshold should probably be lowered. On the other hand, we don’t yet have a good idea as to just how far it should be lowered.

So the easiest thing to do would be to look at the actual value of the E-BFMI that you’re getting. Sadly the warning doesn’t display it but it can be calculated easily enough. If it’s close to 0.3 then you’re probably okay.

Otherwise you’ll have to fall back on the other means of validation. For example, if you’re seeing the E-BFMI warning in simulated data then you can follow the simulation validation procedure (sample parameters from prior, sample data from parameters, sample from corresponding posterior; compare prior samples to posterior samples). If that looks good then the E-BFMI is likely a false positive.

Intuitively, low E-BFMI often comes from heavy-tailed target distributions. That could be a manifestation of a non-identified model which would also be consistent with seeing the random intercepts variance be much larger than the measurement variance. Do you have sufficiently informative priors to identify everything?

4 Likes

I just did a bunch of chains with random intercepts at around 0.7 of the total variance, scaling so the random intercept variance and residual variance was around 1, put N(0, 1) priors on everything and got E-BMFI between 0.24 and 0.34. All the estimates are consistent with the simulated values, so I think this is a false positive. Also, it’s rare to see such a high proportion of variance in the random intercepts, so I think I’m ok.

2 Likes

Awesome, with these more specialized models it’s great to have these kinds of contributions. Animal models would be a great place to apply Stan. Unless something has changed in the last two years R’s MCMCglmm was still the gold standard but Stan could do much better. If you get some good implementations going that you trust please consider submitting a case study (you can see mc-stan.org for the format).

Great! I’ll be sure to do that after I test this more. Just to have these available, here are the univariate and multivariate code I came up with. Both give reasonable results on simulated data and run in similar time to MCMCglmm.

Univariate animal model:

data {
  int<lower=1>    J; // number of fixed effects
  int<lower=0>    N; // number of individuals
  vector[J]    X[N]; // Fixed effects design matrix
  real         Y[N]; // response variable
  cov_matrix[N]   A; // relationship matrix
}
transformed data{
  matrix[N, N] LA;
  LA = cholesky_decompose(A);
}
parameters {
  vector[N]  a_tilde; // breeding values
  row_vector[J] beta; // fixed effects

# Genetic variance
  real<lower=0> sigma_G;

# Residual variance
  real<lower=0> sigma_R;
}
model {
    vector[N] mu;
    vector[N] a;
    
    a_tilde ~ normal(0, 1);
    a = sqrt(sigma_G) * (LA * a_tilde);
 
    for(n in 1:N)
      mu[n] = beta * X[n] + a[n];

    Y ~ normal(mu, sigma_R);

    to_vector(beta) ~ normal(0, 1);
    
    sigma_G ~ cauchy(0, 5);
    sigma_R ~ cauchy(0, 5);
}
generated quantities{
  real sigma_E;
  sigma_E = sigma_R * sigma_R;
}

And the multivariate model. I used a custom function for the large Kronecker product that works well, but I’m sure there are better options.

functions {
  matrix as_matrix(vector X, int N, int K) { 
    matrix[N, K] Y; 
    for (i in 1:N) {
      Y[i] = to_row_vector(X[((i - 1) * K + 1):(i * K)]); 
    }
    return Y; 
  }
  vector chol_kronecker_product(matrix LA, matrix LG, vector a) {
    vector[num_elements(a)] new_a;
    new_a = rep_vector(0, num_elements(a));
    for(iA in 1:cols(LA)){
      for(jA in 1:iA){
        if(LA[iA, jA] > 1e-10){ // avoid calculating products between unrelated individuals
          for(iG in 1:cols(LG)){
            for(jG in 1:iG){
              new_a[(cols(LG)*(iA-1))+iG] = new_a[(cols(LG)*(iA-1))+iG] + 
                                            LA[iA, jA] * LG[iG, jG] * a[(cols(LG)*(jA-1))+jG];
            }
          }
        }
      }
    }
    return new_a;
  }
}
data {
  int<lower=1>    K; // number of traits
  int<lower=1>    J; // number of fixed effects
  int<lower=0>    N; // number of individuals
  vector[J]    X[N]; // Fixed effects design matrix
  vector[K]    Y[N]; // response variable
  matrix[N, N]    A; // relationship matrix
}
transformed data{
  matrix[N, N] LA;
  LA = cholesky_decompose(A);
}
parameters {
  matrix[K,J]    beta; // fixed effects
  vector[N*K] a_tilde; // breeding values

# G matrix
  cholesky_factor_corr[K] L_Omega_G;
  vector<lower=0>[K] L_sigma_G;

# R matrix
  cholesky_factor_corr[K] L_Omega_R;
  vector<lower=0>[K] L_sigma_R;
}
transformed parameters {
  matrix[N, K] a;
  a = as_matrix(chol_kronecker_product(LA, diag_pre_multiply(L_sigma_G, L_Omega_G), a_tilde), N, K);
}
model {
    vector[K] mu[N];
    matrix[K, K] L_Sigma_R;

    L_Sigma_R = diag_pre_multiply(L_sigma_R, L_Omega_R);

    for(n in 1:N)
      mu[n] = beta * X[n] + to_vector(a[n]);

    Y ~ multi_normal_cholesky(mu, L_Sigma_R);

    to_vector(beta) ~ normal(0, 1);
    a_tilde   ~ normal(0, 1);
    L_Omega_G ~ lkj_corr_cholesky(4);
    L_sigma_G ~ cauchy(0, 5);
    L_Omega_R ~ lkj_corr_cholesky(4);
    L_sigma_R ~ cauchy(0, 5);
}
generated quantities {
    cov_matrix[K] P;
    cov_matrix[K] G;
    cov_matrix[K] E;
    corr_matrix[K] corrG;
    corr_matrix[K] corrE;

    G = multiply_lower_tri_self_transpose(diag_pre_multiply(L_sigma_G, L_Omega_G));
    E = multiply_lower_tri_self_transpose(diag_pre_multiply(L_sigma_R, L_Omega_R));
    P = G + E;

    corrG = multiply_lower_tri_self_transpose(L_Omega_G);
    corrE = multiply_lower_tri_self_transpose(L_Omega_R);
}
8 Likes

Hi @diogro, I am not sure if you are still working on this topic, and I have a solution at here that runs faster than your code. I much appreciate you providing the stan code. It is inspiring. Thank you.

3 Likes

Fantastic! Thanks!

Ok, so I came back to this model this week and finally found a solution that is fast and well behaved enough to be useful.

Looking into the code from this paper I found a formulation of the A⊗G Kronecker product that honestly had me banging my head against a wall for not thinking of it before. My initial solution was doing the full product and using a long (individuals x traits) vector for the product with the Cholesky factor of the kronecker product, which was then reshaped to the get the matrix of breeding values. Instead of doing this large and slow multiplication, I now use two matrix products and an initial N(0, 1) matrix of effects that is wide (individuals in the rows and traits in the columns.

So the a terms, which are distributed according to N(0, A⊗G), are obtained with:

ã ~ N(0, 1)
cholesky(A) * ã * cholesky(G)’

matrix[N, K] a;
vector<lower=0>[K] L_sigma_G;

// a ~ N(0, A x G)
  a = (LA * a_tilde) * diag_pre_multiply(L_sigma_G, L_Omega_G)'; 

The intuition here is that the A matrix gives the covariance between individuals, and the G matrix the covariance between traits.

I also added an explicit estimate of the heritabilities (the variance partitioning between G and R in the model), which seems to have helped with the E-BFMI warnings.

With some other improvements, the full code is below. I’m also working on an R interface here.

data {
  int<lower=1>    K; // number of traits
  int<lower=1>    J; // number of fixed effects
  int<lower=0>    N; // number of individuals
  vector[J]    X[N]; // Fixed effects design matrix
  vector[K]    Y[N]; // response variable
  matrix[N, N]    A; // known covariance matrix
}
transformed data{
  matrix[N, N] LA;
  vector[K] y_sd;
  vector[K] y_var;
  vector[K] y_mean;
  vector[K] Y_std[N]; // response variable
  LA = cholesky_decompose(A);
  
  for(k in 1:K){
    y_sd[k] = sd(Y[,k]);
    y_mean[k] = mean(Y[,k]);
    for(n in 1:N)
      Y_std[n,k] = (Y[n,k] - y_mean[k]) / y_sd[k];
  }
}
parameters {
  matrix[K, J]    beta; // fixed effects
  matrix[N, K] a_tilde; // breeding values precursor
  vector<lower=0, upper = 1>[K] h2; //variance partition

// total variances
  vector<lower=0>[K] L_sigma;

// G matrix correlations
  cholesky_factor_corr[K] L_Omega_G;

// R matrix correlations
  cholesky_factor_corr[K] L_Omega_R;

}
transformed parameters {
  matrix[N, K] a;
  vector<lower=0>[K] L_sigma_G;
  vector<lower=0>[K] L_sigma_R;

  for(k in 1:K){
    L_sigma_G[k] = L_sigma[k] * h2[k];
    L_sigma_R[k] = L_sigma[k] * (1-h2[k]);
  }
  a = (LA * a_tilde) * diag_pre_multiply(L_sigma_G, L_Omega_G)'; // a ~ N(0, A x G)
}
model {
    vector[K] mu[N];
    matrix[K, K] L_Sigma_R;

    L_Sigma_R = diag_pre_multiply(L_sigma_R, L_Omega_R);

    for(n in 1:N)
      mu[n] = beta * X[n] + to_vector(a[n]);

    Y_std ~ multi_normal_cholesky(mu, L_Sigma_R);

    to_vector(beta) ~ normal(0, 1);
    to_vector(a_tilde) ~ normal(0, 1);
    h2 ~ beta(2, 4);
    L_sigma ~ normal(0, 1);
    L_Omega_G ~ lkj_corr_cholesky(4);
    L_Omega_R ~ lkj_corr_cholesky(4);
}
generated quantities {
    vector[K] sigma_G;
    vector[K] sigma_R;
    cov_matrix[K] P;
    cov_matrix[K] G;
    cov_matrix[K] E;
    corr_matrix[K] corrG;
    corr_matrix[K] corrE;
  
    sigma_G = y_sd .* L_sigma_G;
    sigma_R = y_sd .* L_sigma_R;

    G = multiply_lower_tri_self_transpose(diag_pre_multiply(sigma_G, L_Omega_G));
    E = multiply_lower_tri_self_transpose(diag_pre_multiply(sigma_R, L_Omega_R));
    P = G + E;

    corrG = multiply_lower_tri_self_transpose(L_Omega_G);
    corrE = multiply_lower_tri_self_transpose(L_Omega_R);
}
2 Likes