Hello,

I am trying to calculate the log-likelihoods for a MARSS (multivariate autoregressive state space) model in Stan. Because the timeseries being fed to the model are different lengths, I am using a nested for loop that iterates through each timeseries, and then through the number of observations within a given timeseries in the model section. The model works, however, when I try to re-use the same nested for loop structure to calculate the log-likelihoods in the generated quantities section, I only receive log likelihoods for the first timeseries, and not the remaining 9 other timeseries. Is there something different about the generated quantities section that prevents it from using nested for loops?

Code below:

// 12 timeseries and 3 states MARSS model

data {

int<lower=0> obs; //total number of real observations across all timeseries

int<lower=0> N; //number of years in the longest timeseries - max-min year + 1

int<lower=0> K; //number of timeseries in the observation portion of the model

int<lower=0> J; //number of states

int<lower=0> P[K]; //number of real data points in each timeseries of observations

vector[J] p_st; //mean of first timestep in the timeseries by state grouping

vector[obs] tsx; //vector of observations

int<lower=0> ii[obs]; //vector of integers describing the indexed locations of each observation relative to the state timeseries

}

parameters {

matrix[J,N] x; //estimated true underlying data (the states)

vector<lower=0>[J] sigma; //standard deviation of process

vector<lower=0>[2] sigma_os; //standard deviation of observation

cholesky_factor_corr[J] L; //lower triangular of a positive definite correlation matrix for the process

}

transformed parameters {

//temporal autocorrelation function for the state portion of the model

matrix[J,N] mu;

matrix[K,N] x_m;

vector<lower=0>[K] sigma_o; //standard deviation of observation

mu[,1] = p_st;

for (i in 2:N){

mu[,i] = x[,i-1];

}

//matching states to observed timeseries (12 timeseries, 3 states)

x_m[1,] = x[1,];

x_m[2,] = x[1,];

x_m[3,] = x[1,];

x_m[4,] = x[1,];

x_m[5,] = x[2,];

x_m[6,] = x[2,];

x_m[7,] = x[1,];

x_m[8,] = x[1,];

x_m[9,] = x[2,];

x_m[10,] = x[3,];

x_m[11,] = x[3,];

x_m[12,] = x[2,];

sigma_o[1] = sigma_os[1];

sigma_o[2] = sigma_os[1];

sigma_o[3] = sigma_os[1];

sigma_o[4] = sigma_os[1];

sigma_o[5] = sigma_os[2];

sigma_o[6] = sigma_os[1];

sigma_o[7] = sigma_os[1];

sigma_o[8] = sigma_os[1];

sigma_o[9] = sigma_os[2];

sigma_o[10] = sigma_os[2];

sigma_o[11] = sigma_os[2];

sigma_o[12] = sigma_os[1];

}

model { //this section and nested for loop works

int pos;

pos = 1;

for(k in 1:K){

for(i in 1:P[k]){

segment(tsx, pos, P[k])[i] ~ normal(x_m[k, segment(ii, pos, P[k])[i]], sigma_o[k]); //observation portion of the model (no intercept)

}

pos = pos + P[k]; //here we chop the observations and the positions of the observations relative to the state time-series

} //beta is a categorical variable that allows observations to be higher or lower than average (i.e., allows for persistent bias)

//beta2 is the effect of whether the calculation of productivity/survival uses smolts or fry

//note it is SUPER IMPORTANT that the recursive “pos” calculation occurs in the k loop, but NOT inside the i loop (the model blows up)

x[,1] ~ multi_normal_cholesky(p_st, diag_pre_multiply(sigma, L));

for(n in 2:N){

x[,n] ~ multi_normal_cholesky(mu[,n], diag_pre_multiply(sigma, L));

}

sigma ~ cauchy(1.0, 0.5); //SD of process

sigma_os ~ cauchy(1.0, 0.5); //SD of observations

L ~ lkj_corr_cholesky(2); //prior for lower triangular of process

}

generated quantities {//this section with the same nested for loop system does not work

vector[obs] log_lik;

int post;

post = 1;

for(k in 1:K){

for(i in 1:P[k]){

log_lik[i] = normal_lpdf(segment(tsx, post, P[k])[i] | x_m[k, segment(ii, post, P[k])[i]], sigma_o[k]);

}

post = post + P[k];

}

}