Hey guys,
I’m trying to set up a multivariate animal model, which is just a random intercept model with a known covariance matrix between individuals (which is derived from a pedigree), and an unknown covariance matrix (G) between traits that gets Kronecker multiplied by the known between-individual matrix.
I’ve got a working simulated data that I can successfully fit in MCMglmm. I lifted some parts from a phylogenetic model in brms, but I can’t seem to get the Stan model to work. The G-matrix is the main parameter of interest here.
Here is the code so far, and the R code for the simulated data:
functions {
matrix as_matrix(vector X, int N, int K) {
matrix[N, K] Y;
for (i in 1:N) {
Y[i] = to_row_vector(X[((i - 1) * K + 1):(i * K)]);
}
return Y;
}
matrix kronecker(matrix A, matrix B) {
matrix[rows(A)*rows(B), cols(A)*cols(B)] kron;
for (i in 1:cols(A)) {
for (j in 1:rows(A)) {
kron[((j-1)*rows(B)+1):(j*rows(B)), ((i-1)*cols(B)+1):(i*cols(B))] = A[j,i] * B;
}
}
return kron;
}
}
data {
int<lower=1> K; // number of traits
int<lower=1> J; // number of fixed effects
int<lower=0> N; // number of individuals
vector[J] X[N]; // Fixed effects design matrix
vector[K] Y[N]; // response variable
int Z[N]; // Random effect design matrix
matrix[N, N] A; // cholesky factor of known covariance matrix
}
parameters {
matrix[K,J] beta; // fixed effects
vector[N*K] a; // breeding values
# G matrix
cholesky_factor_corr[K] L_Omega_G;
vector<lower=0>[K] L_sigma_G;
# R matrix
cholesky_factor_corr[K] L_Omega_R;
vector<lower=0>[K] L_sigma_R;
}
transformed parameters {
matrix[N, K] aM;
// Get a vector "a" that is MVN with a "A⊗G" covariance matrix,
// then convert to a NxK matrix of individual random intercepts
aM = as_matrix(kronecker(A, diag_pre_multiply(L_sigma_G, L_Omega_G)) * a, N, K);
}
model {
vector[K] mu[N];
matrix[K,K] L_Sigma_R;
L_Sigma_R = diag_pre_multiply(L_sigma_R, L_Omega_R);
for(n in 1:N)
mu[n] = beta * X[n] + to_vector(aM[Z[n]]);
Y ~ multi_normal_cholesky(mu, L_Sigma_R);
to_vector(beta) ~ normal(0, 1);
a ~ normal(0, 1);
L_Omega_G ~ lkj_corr_cholesky(4);
L_sigma_G ~ cauchy(0, 2.5);
L_Omega_R ~ lkj_corr_cholesky(4);
L_sigma_R ~ cauchy(0, 2.5);
}
generated quantities {
matrix[K, K] P;
matrix[K, K] G;
matrix[K, K] E;
corr_matrix[K] corrG;
corr_matrix[K] corrE;
G = multiply_lower_tri_self_transpose(diag_pre_multiply(L_sigma_G, L_Omega_G));
E = multiply_lower_tri_self_transpose(diag_pre_multiply(L_sigma_R, L_Omega_R));
P = G + E;
corrG = multiply_lower_tri_self_transpose(L_Omega_G);
corrE = multiply_lower_tri_self_transpose(L_Omega_R);
}
R code to generate the input:
library(MCMCglmm)
library(rstan)
library(mvtnorm)
set.seed(17)
# Pedigree
ped <- read.table("https://raw.githubusercontent.com/diogro/QGcourse/master/tutorials/volesPED.txt",header=T)
# G matrix
G <- matrix(c(1, 0.7, 0.7, 2), 2, 2)
# Residual matrix
E <- matrix(c(1, 0.2, 0.2, 2), 2, 2)
# Simulated breeding values
a = rbv(ped, G)
# Fixed effects
beta = matrix(c(1, 2,
0.1, 0.2,
0.05, 0.1), 3, 2, byrow = TRUE)
colnames(beta) = c("x", "y")
rownames(beta) = c("Intercept", "sex", "Z")
sex = numeric(nrow(a))
sex = sample(c(0, 1), length(sex), replace = TRUE)
sex[rownames(a) %in% ped$SIRE] <- 1
sex[rownames(a) %in% ped$DAM] <- 0
Z = rnorm(nrow(a))
Intercept = rep(1, nrow(a))
X = cbind(Intercept, sex, Z)
rownames(X) = rownames(a)
# Correlated noise
e = rmvnorm(nrow(a), sigma = E)
# Simulated data
Y = X %*% beta + a + e
colnames(Y) = c("x", "y")
# Create relationship matrix
inv.phylo <- MCMCglmm::inverseA(ped, scale = TRUE)
A <- solve(inv.phylo$Ainv)
A = (A + t(A))/2 # Not always symmetric after inversion
rownames(A) <- rownames(inv.phylo$Ainv)
# Relate individuals to columns of the A matrix
pos = unlist(lapply(rownames(Y), function(x) which(x == rownames(A))))
stan_data = list(K = ncol(Y),
J = ncol(X),
N = nrow(Y),
X = X,
Y = Y,
Z = pos,
A = as.matrix(chol(A)))
stan_model = stan(file = "./animalModel.stan", data = stan_data)
model = rstan::extract(stan_model)
rstan::summary(stan_model, pars = "G")[[1]]
rstan::traceplot(stan_model, pars = "G")
colMeans(model$G)