I am working on a joint model that includes a hierarchical linear model that partitions variation across multiple factors (species, and study, and residual variation). The species-level estimate of the traits are then used as predictors in the second part of the model. My aim is to develop a model that allows the traits to effect the cue, while also allowing us to estimate species-level variation in cues not explained by the trait.
But my model is struggling to estimate the study level variation (sigma_study) for my test data and I am not sure why. Specifically the model appears to struggle to differentiate the study sigma from the general variance.
It is unclear to me whether there is an issue with the specification of the model or how to improve it. Any insights or help would be much appreciated.
Below is the code for the Stan model:
data {
int<lower = 1> n_spec; // number of random effect level
// Trait model
int<lower = 1> N; // Sample size
int<lower = 1, upper = n_spec> trait_species[N]; // id of random effect (species)
int<lower = 1> n_study; // number of random effect levels (study)
int<lower = 1, upper = n_study> study[N]; // id of random effect (study)
vector[N] yTraiti; // Observed trait
real prior_mu_grand_mu;
real prior_mu_grand_sigma;
real prior_sigma_sp_mu;
real prior_sigma_sp_sigma;
real prior_sigma_study_mu;
real prior_sigma_study_sigma;
real prior_sigma_traity_mu;
real prior_sigma_traity_sigma;
// Cue model
int<lower = 1> Ncue; // Sample size for second part
int<lower = 1, upper = n_spec> cue_species[Ncue]; // id of random effect (species)
vector[Ncue] yRespi; // Outcome phenology
vector[Ncue] cuei; // predictor forcing
real prior_muCueSp_mu;
real prior_muCueSp_sigma;
real prior_muRespSp_mu;
real prior_muRespSp_sigma;
real prior_sigmaCueSp_mu;
real prior_sigmaCueSp_sigma;
real prior_sigmaRespSp_mu;
real prior_sigmaRespSp_sigma;
real prior_betaTraitxCue_mu;
real prior_betaTraitxCue_sigma;
real prior_sigmaRespy_mu;
real prior_sigmaRespy_sigma;
}
parameters{
// Traits
real mu_grand; // grand mean for trait value
vector[n_spec] muSp; // species offsets
vector[n_study] muStudy; // study offsets
real<lower = 0> sigma_traity; // sd general
real<lower = 0> sigma_sp; // sd species
real<lower = 0> sigma_study; // sd study
real alphaCueSp[n_spec];
real muCueSp;
real<lower = 0> sigmaCueSp;
real alphaRespSp[n_spec];
real muRespSp;
real<lower = 0> sigmaRespSp;
real betaTraitxCue;
real<lower = 0> sigmaResp_y;
}
transformed parameters{
vector[N] y_hat;
vector[n_spec] mu_grand_sp;
real betaCueSp[n_spec]; //species level beta forcing
for(i in 1:n_spec){
mu_grand_sp[i] = mu_grand + muSp[i];
}
for (i in 1:N){
y_hat[i] = mu_grand + muSp[trait_species[i]] + muStudy[study[i]];
}
for (isp in 1:n_spec){
betaCueSp[isp] = alphaCueSp[isp] + betaTraitxCue * (mu_grand_sp[isp]);
}
}
model{
// Traits
//// likelihood
yTraiti ~ normal(y_hat, sigma_traity);
muSp ~ normal(0, sigma_sp);
muStudy ~ normal(0, sigma_study);
//// priors
mu_grand ~ normal(prior_mu_grand_mu, prior_mu_grand_sigma);
sigma_sp ~ normal(prior_sigma_sp_mu, prior_sigma_sp_sigma);
sigma_study ~ normal(prior_sigma_study_mu, prior_sigma_study_sigma);
sigma_traity ~ normal(prior_sigma_traity_mu, prior_sigma_traity_sigma);
//// likelihood
for (i in 1:Ncue){
yRespi[i] ~ normal(alphaRespSp[cue_species[i]] + betaCueSp[cue_species[i]] * cuei[i], sigmaResp_y);
}
alphaRespSp ~ normal(muRespSp, sigmaRespSp);
alphaCueSp ~ normal(muCueSp, sigmaCueSp);
//// priors
muRespSp ~ normal(prior_muRespSp_mu, prior_muRespSp_sigma);
sigmaRespSp ~ normal(prior_sigmaRespSp_mu, prior_sigmaRespSp_sigma);
sigmaResp_y ~ normal(prior_sigmaRespy_mu, prior_sigmaRespy_sigma);
muCueSp ~ normal(prior_muCueSp_mu, prior_muCueSp_sigma);
sigmaCueSp ~ normal(prior_sigmaCueSp_mu, prior_sigmaCueSp_sigma);
betaTraitxCue ~ normal(prior_betaTraitxCue_mu, prior_betaTraitxCue_sigma);
}
We are using test data in which every study contains every species, using the below code:
# Part 1
Nrep <- 10 # rep per trait
Nstudy <- 20 # number of studies w/ traits
Nspp <- 20 # number of species
Ntrt <- Nspp * Nstudy * Nrep
mu.grand <- 15
sigma.species <- 5
sigma.study <- 2
sigmaTrait_y <- 2
trt.dat <- data.frame(matrix(NA, Ntrt, 1))
names(trt.dat) <- c("rep")
trt.dat$rep <- c(1:Nrep)
trt.dat$study <- rep(rep(c(1:Nstudy), each = Nspp), each = Nrep)
trt.dat$species <- rep(rep(c(1:Nspp), times = Nstudy), each = Nrep)
alphaTraitSp <- rnorm(Nspp, 0, sigma.species)
trt.dat$alphaTraitSp <- rep(rep(alphaTraitSp, times = Nstudy), each = Nrep)
muStudy <- rnorm(Nstudy, 0, sigma.study)
trt.dat$muStudy <- rep(muStudy, each = Nspp)
sigmaTraity <- 5
trt.dat$sigmaTraity <- rnorm(Ntrt, 0, sigmaTraity)
for (i in 1:Ntrt){
trt.dat$mu_grand_sp[i] <- trt.dat$alphaTraitSp[i] + mu.grand
}
for (i in 1:Ntrt){
trt.dat$yTraiti[i] <- trt.dat$alphaTraitSp[i] + trt.dat$muStudy[i] + mu.grand + trt.dat$sigmaTraity[i]
}
#################################################################
# Part 2
n_spec <-Nspp
nRep <- 20
Ncue <- n_spec * nRep
cue.dat <- data.frame(matrix(NA, Ncue, 2))
names(cue.dat) <- c("rep","species")
cue.dat$rep <- c(1:Ncue)
cue.dat$species <- rep(c(1:n_spec), each = nRep)
cue.dat$cuei <- rnorm(Ncue, 1, 1)
sigmaRespSp <- 20
muRespSp <- 80
alphaRespSp <- rnorm(n_spec, muRespSp, sigmaRespSp)
cue.dat$alphaRespSp <- rep(alphaRespSp, each = nRep)
betaTraitxCue <- 0.3 #Cue effects
muCueSp <- -15
sigmaCueSp <- 4
alphaCueSp <- rnorm(n_spec, muCueSp, sigmaCueSp)
cue.dat$alphaCueSp <- rep(alphaCueSp, each = nRep)
sigmaResp_y <- 5
cue.dat$eResp <- rnorm(Ncue, 0, sigmaResp_y)
cueTrtDat <- merge(cue.dat, unique(trt.dat[,c("species","mu_grand_sp")]), by = "species")
for (i in 1:Ncue){
cueTrtDat$betaCueSp[i] <- cueTrtDat$alphaCueSp[i] + (betaTraitxCue * cueTrtDat$mu_grand_sp[i])
}
for (i in 1:Ncue){
cueTrtDat$yMu[i] <- cueTrtDat$alphaRespSp[i] + cueTrtDat$betaCueSp[i] * cueTrtDat$cuei[i]
}
cueTrtDat$yRespi <- cueTrtDat$yMu + cueTrtDat$eResp
sim.data <- list(yTraiti = trt.dat$yTraiti,
N = Ntrt,
n_spec = Nspp,
trait_species = as.numeric(as.factor(trt.dat$species)),
n_study = Nstudy,
study = as.numeric(as.factor(trt.dat$study )),
prior_mu_grand_mu = 20,
prior_mu_grand_sigma = 10,
prior_sigma_sp_mu = 4,
prior_sigma_sp_sigma = 5,
prior_sigma_study_mu = 2,
prior_sigma_study_sigma = 5,
prior_sigma_traity_mu = 3,
prior_sigma_traity_sigma = 5,
Ncue = nrow(cue.dat),
cue_species = as.numeric(as.factor(cueTrtDat$species )),
yRespi = cueTrtDat$yRespi,
cuei = cueTrtDat$cuei,
prior_muCueSp_mu = -15,
prior_muCueSp_sigma = 10,
prior_muRespSp_mu = 40,
prior_muRespSp_sigma = 10,
prior_sigmaCueSp_mu = 5,
prior_sigmaCueSp_sigma = 5,
prior_sigmaRespSp_mu = 5,
prior_sigmaRespSp_sigma = 5,
prior_betaTraitxCue_mu = 0,
prior_betaTraitxCue_sigma = 1,
prior_sigmaRespy_mu = 10,
prior_sigmaRespy_sigma = 5
)
The pairs plot using this test data and model is as follows:
Thanks!