Thank you @avehtari for this useful suggestion!
We received similar advice from Ben Goodrich and have been working to incorporate it. This approach does shorten the run time substantially, but it generates ~4000 transitions that exceeded the max tree depth. Running the model on the below test data produces good estimates for the slope and intercept, but poor estimates for the lambda and sigma of both the intercept and slope (see summary output below). Is there an issue with how I am computing the Cholesky for Vphy?
It is also unclear to me how I would code a Woodbury matrix identity in Stan. Would you have any sample code you would be able to share? Although, this addition might not be necessary if I could get the current code to generate better estimates, since it is sufficiently fast.
Here is my revised Stan code to compute the Vphy matrix once:
functions {
matrix lambda_vcv(matrix vcv, real lambda, real sigma){
matrix[rows(vcv),cols(vcv)] local_vcv;
matrix[rows(vcv),cols(vcv)] sigma_mat;
local_vcv = vcv * lambda;
for(i in 1:rows(local_vcv))
local_vcv[i,i] = vcv[i,i];
sigma_mat = diag_matrix(rep_vector(sigma, rows(vcv)));
return(sigma_mat * local_vcv * sigma_mat);
}
}
data {
int<lower=1> N;
int<lower=1> n_sp;
int<lower=1, upper= n_sp> sp[N];
vector[N] y; // response
vector[N] x1; // predictor
matrix[n_sp,n_sp]Vphy; //matrix
// Priors
real a_z_prior_mu;
real a_z_prior_sigma;
real lam_interceptsa_prior_alpha;
real lam_interceptsa_prior_beta;
real sigma_interceptsa_prior_mu;
real sigma_interceptsa_prior_sigma;
real b_zf_prior_mu;
real b_zf_prior_sigma;
real lam_interceptsbf_prior_alpha;
real lam_interceptsbf_prior_beta;
real sigma_interceptsbf_prior_mu;
real sigma_interceptsbf_prior_sigma;
real sigma_y_mu_prior;
}
transformed data {
matrix[n_sp, n_sp] L = cholesky_decompose(Vphy);
}
parameters {
real<lower=0> sigma_y;
real<lower=0, upper=1> lam_interceptsa;
real<lower=0> sigma_interceptsa;
real<lower=0, upper=1> lam_interceptsbf;
real<lower=0> sigma_interceptsbf;
real b_zf;
real a_z;
vector[n_sp] a_;
vector[n_sp] b_;
}
transformed parameters {
vector[n_sp] a = a_z + sqrt(lam_interceptsa) * sigma_interceptsa * (L * a_);
vector[n_sp] b = b_zf + sqrt(lam_interceptsbf) * sigma_interceptsbf * (L * b_);
}
model {
real yhat[N];
for(i in 1:N){
yhat[i] =
a[sp[i]] + b[sp[i]] * x1[i];
}
y ~ normal(yhat, sigma_y);
// Priors
a_z ~ normal(a_z_prior_mu, a_z_prior_sigma);
lam_interceptsa ~ beta(lam_interceptsa_prior_alpha, lam_interceptsa_prior_beta);
sigma_interceptsa ~ normal(sigma_interceptsa_prior_mu, sigma_interceptsa_prior_sigma);
b_zf ~ normal(b_zf_prior_mu, b_zf_prior_sigma);
lam_interceptsbf ~ beta(lam_interceptsbf_prior_alpha, lam_interceptsbf_prior_beta);
sigma_interceptsbf ~ normal(sigma_interceptsbf_prior_mu, sigma_interceptsbf_prior_sigma);
sigma_y ~ normal(sigma_y_mu_prior, sigma_y_mu_prior);
a_ ~ normal(0, 1);
b_ ~ normal(0, 1);
}
And the accompanying test code and summary output:
require(ape)
require(geiger)
require(phytools)
require(rstan)
nspecies = 200
nind = 10
spetree <- pbtree(n=nspecies, nsim=1, b=1, complete=FALSE,scale=1)
spetree$tip.label <- paste("s", 1:nspecies, sep="")
# Set up the trait parameters
param <- list(a_z = 4, # root value intercept
lam_interceptsa = 0.4,
sigma_interceptsa = 0.2,
b_zf = 0.6,
lam_interceptsbf = 0.7,
sigma_interceptsbf = 0.1,
sigma_y = 0.01
)
# Set priors
phypriors <- list(
a_z_prior_mu = 4, # true value
a_z_prior_sigma = 1,
lam_interceptsa_prior_alpha = 4, #
lam_interceptsa_prior_beta = 6, #
sigma_interceptsa_prior_mu = 0.2, # true value
sigma_interceptsa_prior_sigma = 0.2,
b_zf_prior_mu = 0.6, # true value
b_zf_prior_sigma = 1,
lam_interceptsbf_prior_alpha = 7, #
lam_interceptsbf_prior_beta = 3, #
sigma_interceptsbf_prior_mu = 0.1, # true value
sigma_interceptsbf_prior_sigma = 0.1,
sigma_y_mu_prior = 0.01
)
# Generate intercept
scaledtree_intercept <- rescale(spetree, model = "lambda", param[["lam_interceptsa"]])
intercepts <- fastBM(scaledtree_intercept, a = param[["a_z"]], mu = 0, sig2 = param[["sigma_interceptsa"]] ^ 2)
# Generate bf slope
scaledtree_bf <- rescale(spetree, model = "lambda", param[["lam_interceptsbf"]])
slopes_bf <- fastBM(scaledtree_bf, a = param[["b_zf"]], mu = 0, sig2 = param[["sigma_interceptsbf"]] ^ 2)
dfhere <- data.frame(sp = c(), intercept = c(), x1=numeric(), trait1=numeric())
for (i in 1:nspecies){
temp <- data.frame(sp = rep(i, nind),
intercept = rep(intercepts[i], nind),
x1 = rnorm(n = nind, mean = 10, sd = 3),
trait1 = rep(slopes_bf[i], nind))
dfhere <- rbind(dfhere, temp)
}
dfhere$mu <- dfhere$intercept + dfhere$x1 * dfhere$trait1
dfhere$y <- rnorm(n = nrow(dfhere), mean = dfhere$mu, sd = param[["sigma_y"]])
# Function for generating "good" initial values
simu_inits <- function(chain_id) {
a_z.temp <- rnorm(n = nspecies, mean = param[["a_z"]], sd = 1)
b_z.temp <- rnorm(n = nspecies, mean = param[["b_zf"]], sd = 1)
return(append(list(a = a_z.temp, b_force = b_z.temp),
param))
}
The model estimates are on the first two rows of the below screenshot and the test data values in the last two rows.
I appreciate your help with this!
Deirdre