Hi everyone,

I am trying to implement a MAR/VAR model by Ovaskainen et al. 2017. I have simplified the model by assuming species only interact with individuals of their own species (i.e. no interspecific interactions). I have focused on estimating the variance-covariance matrix assuming a multiplicative gamma prior on the factor loadings (Bhattacharya and Dunson 2011). I have used some code from the Stan forum, mainly from here.

Here is the stan code:

```
data {
int<lower=0> sp; // Number of species
int<lower=0> T; // Numer of time steps
int <lower=0> nf; // Number of latent variables
vector[sp] Y[T]; // The outcome matrix
}
parameters {
real<lower=0> a1;
real<lower=0> a2;
vector[sp] r;
vector[sp] a;
vector<lower=0>[nf] delta;
matrix<lower=0>[sp,nf] phi;
matrix[sp,nf] lam;
vector<lower=0>[sp] s2i;
}
transformed parameters{
vector<lower=0>[nf] theta;
vector<lower=0>[sp] s2;
matrix[sp,sp] omega;
theta[1] = delta[1];
for(l in 2:nf)
theta[l] = theta[l-1] * delta[l];
s2 = 1.0 ./ s2i;
omega = diag_matrix(s2) + lam *lam';
}
model {
// priors
a1 ~ gamma(2.0, 1.0);
a2 ~ gamma(2.0, 1.0);
r ~ normal(0,1);
a ~ normal(0,1);
delta[1] ~ gamma(a1, 1.0);
for (h in 2:nf)
delta[h] ~ gamma(a2, 1.0);
for (s in 1:sp)
for (l in 1:nf)
phi[s, l] ~ gamma(3.0 / 2.0, 3.0 / 2.0);
for (s in 1:sp)
for (l in 1:nf)
lam[s, l] ~ normal(0.0, 1.0 / phi[s, l] / theta[l]);
for (s in 1:sp)
s2i[s] ~ gamma(1.0, 0.3);
//Likelihood
for (t in 2:T)
Y[t] ~ multi_normal(r + diag_matrix(a) * Y[t-1], omega);
}
generated quantities{
vector[sp] sde;
matrix[sp, sp]Rho;
sde = sqrt(s2);
for(i in 1:sp)
for(j in 1:sp)
Rho[i,j] = omega[i,j]/sqrt(omega[i,i] * omega[j,j]);
}
```

And here is the R code simulating the data and fitting the model

```
# Clean the environment
rm(list = ls())
# Load libraries
library(rstan)
library(mvtnorm)
library(rethinking)
library(truncnorm)
# simulate time series following a gompertz curve. Simulates species biomass on a log scale
Gompertz.sim <- function(init.pop, r, alpha, time, sp, Rho, sd_noise){
metacommunity <- matrix(NA, nrow = time, ncol = sp)
metacommunity[1,] <- init.pop
for (t in 2:time) {
metacommunity[t,]<- r + alpha*metacommunity[t-1,] + t(rmvnorm2(1, Mu = rep(0, sp), sigma = diag(sd_noise), Rho = Rho))
}
return(metacommunity)
}
P0<- 1 # Initial population biomass for all species on log scale
sp <- 10 # Number of species
t <- 200 # Number of timesteps
r <- rnorm(sp, 0.65, 0) # intrinsict grwoth rate for all species
alpha <- rtruncnorm(sp, a = -Inf, b = 0.95, mean = 0.94, sd = 0.1) # Density dependence for all sp
# Process error standard deviation for all sp
sd_noise <- matrix(0, nrow = sp, ncol = sp)
diag(sd_noise) <- rlnorm(sp, -1.5, 0.1)
Rho <- rlkjcorr(1, sp, 2) # Correlation matrix
Sigma <- sd_noise%*%Rho%*%sd_noise # variance-covariance matrix
# Simulate the community dynamics
Sim.data <- Gompertz.sim(init.pop = P0, r = r, alpha = alpha, time = t, sp = sp, sd_noise = sd_noise, Rho = Rho)
# Prepare the stan data
dat<-list(Y = Sim.data,
sp = ncol(Sim.data),
T=nrow(Sim.data),
nf = 2)
# Estimate the posterior
MAR_model <- stan(file = "MAR_Ovas.stan",
data = dat,
chains = 4,
cores = 4,
iter = 2000,
control = list(adapt_delta =0.9))
```

The model recovers the true parameter fairly well. All the parameters seem to converge (good Rhat, n_eff and traceplots) except for the factor ladings `lam`

, which do fine if I increase the number of iterations from 2000 to 15000.

I am new to Stan and latent variable models. Any ideas how to make my model specification more efficient? Or, any other suggestion on how to estimate the factor loadings using other priors?

Thanks,

Alfonso