# LFO-CV for multivariate autoregressive model

Hi everyone,

I am trying to implement the approximate LFO-CV from this vignette using Stan directly instead of brms. The model I am using is a multivariate autoregressive model with latent variables to infer the variance-covariance matrix. Also, I defined the log_lik in the generated quantities block.

I am bit stuck translating some of the parts of the vignette to make it work without brms. In the vignette, the line

``````fit_past <- update(fit, newdata = df_past, recompile = FALSE, cores = 4)
``````

refits the model to the data up to L, and the line

``````loglik <- log_lik(fit_past, newdata = df_oos, oos = oos)
``````

obtains the the log-likfor the data up to L+1. Is that right?

What would be the equivalent using Stan directly? Should I just refit the model to the time series going from 1 to L+1 and extract the log_lik for L+1?

Here is the Stan model:

``````data {
int<lower = 0> N;                   // Number of observations (rows in the data)
int<lower=1> S;                     // Number of Secies
int<lower=1> T;                     // Numer of years
// as the N, indicating which reef each row corresponds to
int<lower = 1, upper = T> time[N];  // integer vector with reef-time
// (not absolute time). Each reef starts at 1
int <lower=1> D;        // Number of latent dimensions
int Y[N,S];    // The outcome matrix
}
transformed data{
int<lower=1> M;
}
parameters {
real beta_mu;
real alpha_mu;
vector[S] log_lambda[N];  // latent poisson parameters

// top level, covariance matrix with latent factors
vector[M] L_t;   // lower diagonal elements of L
vector[D] L_d;   // diagonal elements of L
vector<lower=0>[S] psi;         // vector of variances

// species level
real<lower=0> beta_sigma;
real<lower=0> alpha_sigma;
vector[S] z_alpha;
vector[S] z_beta;

}
transformed parameters{
cov_matrix[S] Omega;   // Covariance matrix
matrix[S,S] L_Omega;
vector[S] alpha;   // Vector of intrinsic growth rates
vector[S] beta;   // matrix with competitive parameter

alpha = alpha_mu + alpha_sigma * z_alpha;
beta = beta_mu + beta_sigma * z_beta;

{
int idx;
idx = 0;
for(i in 1:(D-1)){
for(j in (i+1):D){
L[i,j] = 0; //constrain the upper triangular elements to zero
}
}
for(j in 1:D) {
L[j,j] = L_d[j];
for(i in (j+1):S) {
idx = idx + 1;
L[i,j] = L_t[idx];
}
}
}

Omega = L*L';
for (i in 1:rows(Omega))
Omega[i,i] = Omega[i,i] + psi[i];
L_Omega = cholesky_decompose(Omega);

}
model {

// The hyperpriors
alpha_mu ~ normal(0,1);
alpha_sigma ~ exponential(1);
z_alpha ~ normal(0,1);

beta_mu ~ normal(0,1);
beta_sigma ~ exponential(1);
z_beta ~ normal(0,1);

// the priors
L_d ~ normal(0,10);
L_t ~ normal(0,10);
psi ~ exponential(5);

for(n in 1:N){
if(time[n]==1){
log_lambda[n] ~ normal(0,5);
}
if(time[n]>1){
log_lambda[n] ~ multi_normal_cholesky(alpha + diag_matrix(beta)*log_lambda[n-1], L_Omega);
}
}

//The likelihood
for(s in 1:S){
target += poisson_log_lpmf(Y[,s] | log_lambda[1:N,s]);
}
}
generated quantities{
int<lower = 0> Y_pred[N,S];
vector[N] log_lik;
vector[S] log_lambda_pred[N];
matrix[S,S] Rho; // Correlation matrix
vector[S] sde;   // species' sd

for(d in 1:D){
if(L[d,d]<0){
L_invar[,d] = -1 * L[,d];
}
}
for(i in 1:S)
for(j in 1:S)
Rho[i,j] = Omega[i,j]/sqrt(Omega[i,i] * Omega[j,j]);

sde = sqrt(diagonal(Omega));

for(n in 1:N){
if(time[n]==1){
log_lambda_pred[n] = log_lambda[n];
}
if(time[n]>1){
log_lambda_pred[n] = multi_normal_cholesky_rng(alpha + diag_matrix(beta)*log_lambda_pred[n-1], L_Omega);
}
}

for(s in 1:S){
for(n in 1:N){
if(log_lambda_pred[n,s]>20){
Y_pred[n,s] = poisson_log_rng(20);
}else{
Y_pred[n,s] = poisson_log_rng(log_lambda_pred[n,s]);
}
log_lik[n] = poisson_log_lpmf(Y[n] | log_lambda[n]);
}
}

}

``````

And here is R code generating toy data and fitting the model (NB for the data simulated below, the model takes around 20 minutes to fit using my pc)

``````# simulate time series
Gompertz.sim <- function(init.pop, alpha, beta, time, sp, Rho, sd_noise){
sim_data <- matrix(NA, nrow = time, ncol = sp+1)
colnames(sim_data)<-c(c("Year"),as.character(1:sp))
sim_data<- as.data.frame(sim_data)
sim_data\$Year <- 1:time
community <- matrix(NA, nrow = time, ncol = sp)
community[1,] <- init.pop
log_lambda <- matrix(NA, nrow = time, ncol = sp)
log_lambda[1,] <- init.pop
for (t in 2:time) {
log_lambda[t,]<- alpha + beta*log_lambda[t-1,] +
t(rmvnorm2(1, Mu = rep(0, sp),
sigma = diag(sd_noise),
Rho = Rho))
for(s in 1:sp){
community[t,s] <- rpois(1,lambda = exp(log_lambda[t,s]))
}
}

for(index in 1:nrow(sim_data)){
sim_data[index,-(1)]<-community[index,]
}

return(results = list(abundance.array = community, Simulation = sim_data))
}

set.seed(2021)

P0<- 1
sp <- 20
t <- 50
alpha <- rnorm(sp, 0.4, 0.1)

beta <- rtruncnorm(sp, a = -Inf, b = 0.95, mean = 0.9, sd = 0.1)

sd_noise <- matrix(0, nrow = sp, ncol = sp)
diag(sd_noise) <- rlnorm(sp, -1.5, 0.1)

Rho <- rlkjcorr(1, sp, eta=0.5)
# Rho <- matrix(0, ncol = sp, nrow = sp)
# diag(Rho) <- 1

Sigma <- sd_noise%*%Rho%*%sd_noise
Sim.data <- Gompertz.sim(init.pop = P0, alpha = alpha, beta = beta, time = t, sp = sp, sd_noise = sd_noise, Rho = Rho)
matplot(matrix(rep(1:t, sp), nrow = t, ncol = sp), log(Sim.data\$abundance.array), type = rep("l",sp), lty = 1,lwd = 2, col = 1:sp)

sim.data_2 <- Sim.data\$Simulation

sim.data_2 <- sim.data_2 %>%
mutate(time = 1:n())

dat<-list(time = sim.data_2\$time,
T = max(sim.data_2\$time),
N = nrow(sim.data_2),
Y = sim.data_2[,-(c(1,ncol(sim.data_2)))],
S = ncol(sim.data_2[,-(c(1,ncol(sim.data_2)))]),
D = 9
)

MAR_model <- stan(file = "poi_MVLN_model_ppc.stan",
data = dat,
chains = 4,
cores = 4,
iter = 3000,
control = list(adapt_delta = 0.9, max_treedepth = 10))

``````