Hi everyone,
I’m a huge fan of Stan and have used it to fit relatively simple statistical models, but I’m still relatively new to the world of state-space modeling. I’ve been trying to build a three-stage fisheries state-space model, and ran into some issues with biased estimation. The process model statements assume a log-normal distribution and look like this:
\ln(A_t) \sim N\big(\ln(S_{t-1}e^{-(F_{2t-1} + M_S)} + J_{t-1}e^{-(F_{2t-1} + M_J)}), \sigma^2_A \big)
\ln(J_t) \sim N\big(\ln(\alpha S_{t-1}e^{-\beta S_{t-1}}),\sigma^2_J \big)
\ln(S_t) \sim N\big( \ln(A_te^{-(F_{1t-1} + M_A)}, \sigma^2_S \big)
where A_t, S_t, and J_t are latent states for adults, spawners, and juveniles, respectively. Meanwhile, M_A, M_S, and M_J are natural mortality rates for the respective states and F_{1t} and F_{2t} refer to fishing mortality terms for the first (F_{1t}) and second (F_{2t}) halves of the year. Finally, \alpha and \beta are stock-recruit parameters. These terms are related to both observations of state abundances (A_{obs_t}, S_{obs_t}, and J_{obs_t}) and two catch equations so that the parameters are estimable.
Catch equations:
C_{1t} = \frac{F_{1t}}{F_{1t} + M_A}\big(1-e^{-(F_{1t} + M_A)} \big)A_{t}
C_{2t} = \frac{F_{2t}}{F_{2t} + M_S}\big(1-e^{-(F_{2t} + M_S)} \big)S_{t} + \frac{F_{2t}}{F_{2t} + M_J}\big(1-e^{-(F_{2t} + M_J)} \big)J_{t}
Observations:
A_{obs_t} \sim N(A_t/Area, \sigma^2_{A_{obs_t}})
S_{obs_t} \sim N(S_t/Area, \sigma^2_{S_{obs_t}})
J_{obs_t} \sim N(J_t/Area, \sigma^2_{J_{obs_t}})
Area is just a scalar which is supplied externally to the model. In addition, both catches and observation errors are supplied externally to the model.
Here is the r code that generates simulated data I used to evaluate the model:
## Packages
library(ggplot2)
library(rstan)
Process_model <- function(A_0 = 1.7e8, ## Adult starting state
J_0 = 2e8, ## Juvenile starting state
a = 10, ## Ricker productivity parameter
MA = 0.45, ## Adult natural mortality
MS = 0.45, ## Spawner natural mortality
MJ = 0.9, ## Juvenile natural mortality
beta = 1e-8, ## Ricker density dependence
C = sort(abs(rnorm(33, 50e6, 10e6)), decreasing = T), ## Catch at time t
Ntime = 33, ## Number of timesteps
Area = 1.16e10, ## Area of waterbody to scale up observations
OA_SE = 0.001, ## Adult observation error
OS_SE = 0.001, ## Spawner observation error,
OJ_SE = 0.001, ## Juvenile observation error
sigma_e_A = 0.1, ## Adult process error
sigma_e_J = 0.1, ## Juvenile process error
sigma_e_S = 0.1, ## Spawner process error
CF = 0.1 ## Catch fraction for first and second half of the year
){
## Vector for fishing mortality
F1 <- rep(NA, Ntime)
F2 <- rep(NA, Ntime)
## Divide catch into first and second half of year
C1 <- C*(CF)
C2 <- C*(1-CF)
## Simulate process errors
epsilon_A <- rnorm(Ntime, 0, sigma_e_A)
epsilon_J <- rnorm(Ntime, 0, sigma_e_J)
epsilon_S <- rnorm(Ntime, 0, sigma_e_S)
## State matrix
N <- matrix(0, nrow = Ntime, ncol = 3)
## Baranov's catch equation for Adults (C1)
Baranov_1 <- function(F., ## Fishing mortality
C, ## Catch
M, ## Natural mortality
A ## Adult abundance
){
(F./(F.+M))*(1-exp(-(F.+M)))*(A) - C
}
## Baranov's catch equation for spawners and juveniles (C2)
Baranov_2 <- function(F., ## Fishing mortality
C, ## Catch
MS, ## Spawner natural mortality
MJ, ## Juvenile natural mortality
S, ## Spawner abundance
J ## Juvenile abundance
){
(F./(F.+MS))*(1-exp(-(F.+MS)))*(S) + (F./(F.+MJ))*(1-exp(-(F.+MJ)))*(J)- C
}
## Starting values
### For each catch, first try to numerically approximate F, if C1/A or C2/(S+J) >1, increase F to 10000 to crash population
F1_test <- try(uniroot(Baranov_1, lower = 0, upper = 2, M= MA, A = A_0, C = C1[1])$root, silent = T)
F1[1] <- ifelse(class(F1_test) == "try-error", 10000, F1_test)
S_0 <- A_0*exp(-(F1[1]+MA))
F2_test <- try(uniroot(Baranov_2, lower = 0, upper = 2, MS= MS, MJ = MJ, S = S_0, J = J_0, C = C2[1])$root, silent = T)
F2[1] <- ifelse(class(F2_test) == "try-error", 10000, F2_test)
N[1,] <- c(J_0, A_0, S_0)
## Population dynamics loop
for (i in 2:Ntime){
N[i,1] <- a*N[i-1,3]*exp(-beta*N[i-1,3])*exp(epsilon_J[i-1])
N[i,2] <- (N[i-1,3]*exp(-(MS + F2[i-1]))+ N[i-1,1]*exp(-(MJ + F2[i-1])))*exp(epsilon_A[i-1])
F1_test <- try(uniroot(Baranov_1, lower = 0, upper = 2, M= MA, A = N[i,2], C = C1[i])$root, silent = T)
F1[i] <- ifelse(class(F1_test) == "try-error", 10000, F1_test)
N[i,3] <- N[i,2]*exp(-(F1[i]+MA))*exp(epsilon_S[i])
F2_test <- try(uniroot(Baranov_2, lower = 0, upper = 2, MS= MS, MJ = MJ, S = N[i,3], J = N[i,1], C = C2[i])$root, silent = T)
F2[i] <- ifelse(class(F2_test) == "try-error", 10000, F2_test)
}
## Sample latent states for each stage of population with error
OA <- rnorm(Ntime, N[,2]/Area, OA_SE)
OS <- rnorm(Ntime, N[,3]/Area, OS_SE)
OJ <- rnorm(Ntime, N[,1]/Area, OJ_SE)
Endyear <- 1990 + Ntime-1
Simulated_Data <- data.frame(
Year = c(1990:Endyear),
J_state = N[,1],
A_state = N[,2],
S_state = N[,3],
OA = OA,
OS = OS,
OJ = OJ,
OA_SE = OA_SE,
OS_SE = OS_SE,
OJ_SE = OJ_SE,
C1 = C1,
C2 = C2,
F1 = F1,
F2 = F2,
epsilon_A = epsilon_A,
epsilon_J = epsilon_J,
epsilon_S = epsilon_S
)
return(Simulated_Data)
}
Simulated_data <- Process_model()
dataList <- list('A' = Simulated_data$OA,
'J' = Simulated_data$OJ,
'S' = Simulated_data$OJ,
'J_obs' = Simulated_data$DJ_SE,
'S_obs' = Simulated_data$OJ_SE,
'A_obs' = Simulated_data$DA_SE,
'T' = nrow(Simulated_data),
'C1' = Simulated_data$C1,
'C2' = Simulated_data$C2,
'Area' = 1.16e10
)
Finally, here is the model I wrote in Stan. After doing some reading from the forums here, I had the model estimate the F1 and F2 values and tried to tie it to observed catch by adding a small amount of observation error for each catch likelihood. I’ve tried multiple re-parameterizations such as non-centering (example below) and centering (in the model block). I’ve also tried estimating all states as transformed parameters multiplied by individual residuals for each state/time step. Essentially, unless I highly constrain the parameters, the model converges on estimates which are very, very different from the true values. When I do constrain the parameters, it results in divergences and warnings about the max treedepth. It could well be that I am asking too much of the data, but these are relatively common models in fisheries population dynamics and commonly implemented in AD Model Builder. I was wondering if anyone could point out where I am going wrong with respect to the code, or just list a set of suggestions for how to re-parameterize things to facilitate unbiased estimation.
data {
int<lower=0> T; // Timesteps
vector[T] A; // Adults observations
vector[T] J; // Juvenile observations
vector[T] S; // Spawner observations
vector[T] J_obs; // Juvenile observation error
vector[T] A_obs; // Adult observation error
vector[T] S_obs; // Spawner observation error
vector[T] C1; // Observed catch (first half of year)
vector[T] C2; // Observed catch (second half of year)
real Area; // Area of water body to scale observations at m^2 resolution
}
parameters {
real L_MA; // Log adult natural mortality
real L_MS; // Log spawner natural mortality
real L_MJ; // Log juvenile natural mortality
vector[T] L_F1; // Log adult fishing mortality vector
vector[T] L_F2; // Log juvenile/spawner fishing mortality vector
real L_alpha; // Log Ricker productivity parameter
real L_beta; // Log Ricker density dependence
real<lower = 0> sigma_epsilon_A;// Process error for adults
real<lower = 0> sigma_epsilon_J;// Process error for juveniles
real<lower = 0> sigma_epsilon_S;// Process error for spawners
// Use non-centered values to facilitate convergence
vector[T] J_raw;
vector[T] A_raw;
vector[T] S_raw;
}
transformed parameters {
vector[T] L_J_est; // Log Juvenile states
vector[T] L_S_est; // Log Spawner states
vector[T] L_A_est; // Log Adult states
// Below are exponentiated parameters back transformed from log-space
vector[T] J_est;
vector[T] A_est;
vector[T] S_est;
vector[T] F1;
vector[T] F2;
real MA;
real MS;
real MJ;
real<lower = 0> alpha;
real beta;
F1 = exp(L_F1);
F2 = exp(L_F2);
MA = exp(L_MA);
MJ = exp(L_MJ);
MS = exp(L_MS);
beta = exp(L_beta);
alpha = exp(L_alpha);
//Evaluate adult states with observed values and catchability estimates
// Supply starting states and estimate in log-space
L_J_est[1] = 19.11383 + sigma_epsilon_J*J_raw[1];
L_A_est[1] = 18.95131 + sigma_epsilon_A*A_raw[1];
L_S_est[1] = L_A_est[1]-F1[1]-MA + sigma_epsilon_S*S_raw[1];
A_est[1] = exp(L_A_est[1]);
J_est[1] = exp(L_J_est[1]);
S_est[1] = exp(L_S_est[1]);
// Run model to estimate states and catch
for (t in 2:T){
L_J_est[t] = L_alpha + L_S_est[t-1] -beta*S_est[t-1] + sigma_epsilon_J*J_raw[t];
L_A_est[t] = log(S_est[t-1]*exp(-F2[t]-MS) + J_est[t-1]*exp(-F2[t]-MJ)) + sigma_epsilon_A*A_raw[t];
L_S_est[t] = L_A_est[t]-F1[t]-MA + sigma_epsilon_S*S_raw[t];
A_est[t] = exp(L_A_est[t]);
J_est[t] = exp(L_J_est[t]);
S_est[t] = exp(L_S_est[t]);
}
}
model {
for (t in 1:T){
A[t] ~ normal(A_est[t]/Area, A_obs[t]);
S[t] ~ normal(A_est[t]/Area, S_obs[t]);
J[t] ~ normal(J_est[t]/Area, J_obs[t]);
C1[t] ~ normal((F1[t]/(F1[t]+MA))*(1-exp(-(F1[t]+MA)))*(A_est[t]), 1e5);
C2[t] ~ normal((F2[t]/(F2[t]+MS))*(1-exp(-(F2[t]+MS)))*(S_est[t]) + (F2[t]/(F2[t]+MJ))*(1-exp(-(F2[t]+MJ)))*(J_est[t]), 1e6);
}
// priors (in log space)
A_raw ~std_normal(); //non-centered values for each state
J_raw ~std_normal();
S_raw ~std_normal();
L_alpha ~ normal(1.538439,1);
L_beta ~ normal(-18.392383, 1);
L_F1 ~ normal(-0.1, 0.3);
L_F2 ~ normal(-0.1, 0.3);
L_MA ~ normal(-0.8, 0.1);
L_MS ~ normal(-0.8, 0.1);
L_MJ ~ normal(-0.1, 0.1);
sigma_epsilon_A ~ exponential(10);
sigma_epsilon_J ~ exponential(10);
sigma_epsilon_S ~ exponential(10);
}
generated quantities{
//for plotting states and indices
vector[T] J_index;
vector[T] S_index;
vector[T] A_index;
vector[T] C1_est;
vector[T] C2_est;
for (t in 1:T){
J_index[t] = J_est[t]/Area;
S_index[t] = S_est[t]/Area;
A_index[t] = A_est[t]/Area;
C1_est[t] = (F1[t]/(F1[t]+MA))*(1-exp(-(F1[t]+MA)))*(A_est[t]);
C2_est[t] = (F2[t]/(F2[t]+MS))*(1-exp(-(F2[t]+MS)))*(S_est[t]) + (F2[t]/(F2[t]+MJ))*(1-exp(-(F2[t]+MJ)))*(J_est[t]);
}
}
Thanks in advance for your time and help. I’ve seen very little on Fisheries SSM’s on Stan, and hope that Stan becomes increasingly used in this domain.
EDIT: I have re-parameterized the model so that M_S, M_A and M_J are now M/2, M/2 and M to simplify and improve identifiability to the model. I’m still getting bad values.