I’m wondering if there’s a better way to specify the following model. I’d like to model the variance-covariance matrices based on the model matrix. Right now, the only way I’m able to get this to work is to sort the matrix so it’s ordered by factor combinations and use if statements to determine which matrix to effect.
I’m sure there must be a better way to do this. Any suggested improvements or resources to look at would be appreciated.
Here is some R code to create an example.
library(rstan)
# Create design, remove and order factor level combinations
x1 <- letters[1:3]
x2 <- letters[24:26]
df <- replicate(20*11, expand.grid(x1, x2), FALSE)
df <- do.call(rbind, df)
df <- df[order(df$Var1, df$Var2), ]
remove <- with(df, Var1 == x1[1] & Var2 == x2[1] | Var1 == x1[3] & Var2 == x2[3] )
df <- df[!remove,]
# Initialize responses and extract model matrix
df$x <- rnorm(nrow(df))
df$y <- rnorm(nrow(df))
mm <- model.matrix(lm(y ~ Var1 + Var2, data = df))
# Define coefficients for "true" standard deviations
x_betas <- c(12, 4, 7, -3, -1)
y_betas <- c(6, 8, 13, -3, -1)
# Calculate "true" standard deviations
sx_true <- as.vector(mm %*% x_betas)
sy_true <- as.vector(mm %*% y_betas)
# Simulate data
combins <- unique(df[, c("Var1", "Var2")])
for(i in 1:nrow(combins)){
rows <- which(df$Var1 == combins$Var1[i] & df$Var2 == combins$Var2[i])
df$x[rows] <- rnorm(length(rows), 0, unique(sx_true[rows]))
df$y[rows] <- rnorm(length(rows), 0, unique(sy_true[rows]))
}
# Data to be passed to Stan
dataList <- list(
N = nrow(df), # Number of observation
# K = ncol(mm), # Number of predictors el, ch (could add interaction)
J = 2, # Number of responses (x and y)
# x = mm, # model matrix (extracted from lm earlier)
y = as.matrix(df[,c("x", "y")]) # matrix of responses
)
The following is my Stan model for the variance-covariance matrices of the x
and y
responses based on the different factor level combinations.
# Write Stan model
stringModel <- "
data {
int<lower=0> N; // number of observations
int<lower=1> J; // number of responses
vector[J] y[N]; // an N x J matrix of responses
}
parameters {
// 7 different covariance matrices to correspond with 7 different factor level combinations
cov_matrix[J] sigma1; // covariance matrix for J responses
cov_matrix[J] sigma2; // covariance matrix for J responses
cov_matrix[J] sigma3; // covariance matrix for J responses
cov_matrix[J] sigma4; // covariance matrix for J responses
cov_matrix[J] sigma5; // covariance matrix for J responses
cov_matrix[J] sigma6; // covariance matrix for J responses
cov_matrix[J] sigma7; // covariance matrix for J responses
}
model {
for (i in 1:N){
if (i < 221)
y[i] ~ multi_normal([ 0, 0 ]', sigma1);
else if (i < 441)
y[i] ~ multi_normal([ 0, 0 ]', sigma2);
else if (i < 661)
y[i] ~ multi_normal([ 0, 0 ]', sigma3);
else if (i < 881)
y[i] ~ multi_normal([ 0, 0 ]', sigma4);
else if (i < 1101)
y[i] ~ multi_normal([ 0, 0 ]', sigma5);
else if (i < 1321)
y[i] ~ multi_normal([ 0, 0 ]', sigma6);
else
y[i] ~ multi_normal([ 0, 0 ]', sigma7);
}
}"