# Multistate model does not always initialize

Hi all,

I am hoping to get some ideas from folks - I am working on a multistate model to estimate bird mortality rates in different behavioral states (Following this paper: https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/1365-2656.13902). I was able to fit a simple model with my data, but when I make the transition rates time-dependent, it does not initialize. After some troubleshooting with colleagues we got it to work on some simulated data, but not the real data.

There are no differences in general structure between the simulated and real data and we have thoroughly checked to make sure the real data set is correct. In the full data sets, the distributions of each parameter are fairly similar between the simulated and real data.

I am wondering if anyone might have suggestions about what could be going wrong. Thinking maybe some of the parameters need to be constrained differently, or maybe there is a different way to think about incorporating time into the model. The Stan model and R code to simulate data and run the model are below, and I can also provide a subset of the real data. Thank you for your thoughts!

Stan Model:

``````data{
int<lower = 0> N;     //Number of individuals
int<lower = 0> P; // number of time intervals
int<lower = 1> l; // length of each time interval
real<lower = 0> c_p[P]; //midpoint of each time interval
int<lower = 0> max_U; // Maximum number of detections
int<lower = 0> max_U_plus_one; // max number of detections including last detection to TT
//int<lower = 0> TT;      // length of study
int<lower = 0> U[N];   // number of detections for each individual
int<lower = 0> d[N, max_U]; // interval within which each obs occurred
int<lower = 0> state[N, max_U];// states
real<lower = 0> delta[N, max_U_plus_one]; // time difference between each observation and the previous one
real<lower = 0> delta_int[N, max_U]; //number of whole intervals between each observation
real<lower = 0> b_r[N, max_U]; // time between detection and start of the interval it's in
real<lower = 0> b_g[N, max_U]; // time between each detection and end of the interval it's in
matrix<lower = 0>[1,4] f;//initial states
matrix<lower = 0>[4,1] ones;// a 1 for each state
}

parameters{
real<lower = 0> h[3];// hazard rate
real<lower = 0> k[3]; //shape parameter
real<lower = 0> g[3]; // scale parameter
real<lower = 0> mu[3];// detection rate
}

transformed parameters{
matrix[4,4] Q[P]; //transition rate matrix
vector<lower = 0>[4] lambda = rep_vector(0,4); // detection rate vector
real<lower = 0> eta12[P];   // Transition rate from state 1 to state 2
real<lower = 0> eta13[P];   // Transition rate from state 1 to state 3
real<lower = 0> eta23[P];   // Transition rate from state 2 to state 3

lambda[1]=mu[1]; // detection rates
lambda[2]=mu[2];
lambda[3]=mu[3];

for (p in 1:P){
eta12[p] = k[1]* c_p[p] ^ (k[1]-1)/g[1]^k[1];
eta13[p] = k[2]* c_p[p] ^ (k[2]-1)/g[2]^k[2];
eta23[p] = k[3]* c_p[p] ^ (k[3]-1)/g[3]^k[3];

// transition rate matrix - rates not listed are 0
Q[p] = rep_matrix(0,4,4);
Q[p,1,1] = -(eta12[p]+eta13[p]+h[1]); //stay in 1
Q[p,1,2] = eta12[p];// 1 to 2
Q[p,1,3] = eta13[p]; //1 to 3
Q[p,1,4] = h[1]; // hazard 1
Q[p,2,2] = -(eta23[p]+h[2]);
Q[p,2,3] = eta23[p]; //2 to 3
Q[p,2,4] = h[2]; // hazard 2
Q[p,3,3] = -(h[3]);  // stay in 3
Q[p,3,4] = h[3]; // hazard 3
}

}

model{
matrix[1,4] acc; // temp variable, holds intermediate values for calculating likelihood
matrix[4,4] Gamma; // temp variable, holds likelihood of each detection
matrix[4,4] Gamma_temp;

//priors (following Rushing 2023 appendix 3)
h ~ gamma(1,4); // hazard rate
k ~ gamma(1,1); // shape parameter
g ~ gamma(1,1); // scale parameter
mu ~ gamma(1,4); // detection rates

//likelihood
for (i in 1:N){
matrix[4,4] Omega[U[i]+1];
if (U[i]>0){ // this section is the equation Gamma[i,u] = exp{(Q-Lambda)* Deltat[i,u]} from paper
for (j in 1:U[i]){
if(j==1){
if(delta_int[i,1]==0){ // if detections are in the same interval
Gamma = diag_post_multiply(matrix_exp((Q[1]-diag_matrix(lambda))*delta[i,1]), lambda);
Omega[1] = rep_matrix(0,4,4);
Omega[1,1,state[i,1]] = Gamma[1, state[i,1]];
}else{
Gamma_temp = matrix_exp((Q[1] - diag_matrix(lambda))*l);
if (delta_int[i,1]>1){
for (p in 1:(d[i,1]-1)){
Gamma_temp*= matrix_exp((Q[p]- diag_matrix(lambda))*l);
} //p
} //if
Gamma_temp *= matrix_exp((Q[d[i,1]]- diag_matrix(lambda)) * b_r[i,1]);
Gamma = diag_post_multiply(Gamma_temp, lambda);
Omega[1] = rep_matrix(0,4,4);
Omega[1,1,state[i,1]] = Gamma[1, state[i,1]];
}
}else{
if(delta_int[i,j]==0){
//if detections are within the same interval
Gamma=diag_post_multiply(matrix_exp((Q[d[i, j]] - diag_matrix(lambda))* delta[i,j]), lambda);
Omega[j]=rep_matrix(0,4,4);
Omega[j, state[i, j-1], state[i,j]] = Gamma[state[i,j-1], state[i,j]];
}else{
Gamma_temp = matrix_exp((Q[d[i, j-1]] - diag_matrix(lambda))* b_g[i, j-1]);

if(delta_int[i,j]>1){
for (p in (d[i, j-1]+1):d[i,j]-1){
Gamma_temp *= matrix_exp((Q[p]-diag_matrix(lambda))*l);
}//p
}//if
Gamma = diag_post_multiply(Gamma_temp, lambda);
Omega[j] = rep_matrix(0,4,4);
Omega[j, state[i, j-1], state[i,j]] = Gamma[state[i, j-1], state[i, j]];
} //else
}//else
} //j

// last detection to end of study
Gamma = matrix_exp((Q[d[i, U[i]]]- diag_matrix(lambda))* b_g[i, U[i]]);

if(d[i, U[i]]<P){
for (p in (d[i, U[i]]+1):P){
Gamma *= matrix_exp((Q[p]-diag_matrix(lambda))*l);
}
Omega[U[i]+1] = rep_matrix(0,4,4);
Omega[U[i]+1, state[i, U[i]], state[i, U[i]]:4] = Gamma[state[i, U[i]], state[i, U[i]]:4];

// likelihood for individual i
acc = f * Omega[1];
for (j in 1: U[i]+1){
acc *= Omega[j];
} //j

target += log(acc*ones);

}

}//i

}//N
}//end
``````

R Code to simulate data and run Stan Model

``````library(rstan)
library(expm)

# Runs with simulated data, but not real data
#Errors when running with real data:
#Chain 1: Rejecting initial value:
#Chain 1:   Log probability evaluates to log(0), i.e. negative infinity.
#Chain 1:   Stan can't start sampling from this initial value.
#Chain 1:
#Chain 1: Initialization between (-2, 2) failed after 100 attempts.
#Chain 1:  Try specifying initial values, reducing ranges of constrained values, or #reparameterizing the model.
#[1] "Error : Initialization failed."

# data simulation (adapted from Rushing 2023)
set.seed(4457)
## Set parameters
N <- 20 # Number of individuals
T <- 100 # Length of study
h <- c(0.0025, 0.01, 0.001) # Hazard rates
gamma <- c(6, 4, 8) # Weibull scale parameter
kappa <- c(8, 10, 8) # Weibull shape parameter
lambda <- c(0.2, 0.1, 0.3, 0) # Detection rates

b <- seq(from = 0, to = T, by = 5)
# Interval mid-points, assuming 5-day intervals
c_p <- seq(from = 2.5, to = 97.5, by = 5)/10
# Number of intervals
P <- length(c_p)
# Indicator of what interval each day is in
tau <- rep(1:length(c_p), each = 5)

# Transition rates

eta12 <- kappa[1] * c_p ^ (kappa[1] - 1)/gamma[1] ^ kappa[1]
eta13 <- kappa[2] * c_p ^ (kappa[2] - 1)/gamma[2] ^ kappa[2]
eta23 <- kappa[3] * c_p ^ (kappa[3] - 1)/gamma[3] ^ kappa[3]

## Define transition rate and detection matrices
Q <- array(0, dim = c(T, 4, 4))

for(t in 1:T){
Q[t, 1, 1] <- -(eta12[tau[t]] + eta13[tau[t]] + h[1])
Q[t, 1, 2] <- eta12[tau[t]]
Q[t, 1, 3] <- eta13[tau[t]]
Q[t, 1, 4] <- h[1]
Q[t, 2, 2] <- -(eta23[tau[t]] + h[2])
Q[t, 2, 3] <- eta23[tau[t]]
Q[t, 2, 4] <- h[2]
Q[t, 3, 3] <- -(h[3])
Q[t, 3, 4] <- h[3]
}

## Simulate true states
s <- matrix(NA, nrow = N, ncol = T)
s[,1] <- 1
for(i in 1:N){ for(t in 2:T){
s[i, t] <- which(rmultinom(1, 1, prob = expm(Q[t,,])[s[i, t - 1],]) == 1)
}
}
# Change state 3 to 0 (dead)
s[s == 4] <- 0

## Simulate encounter histories as a list because
## history length differs among individuals
det_list <- state_list <- vector(mode = "list", length = N)
U1 <- U2 <- U3 <- vector(length = N)

# How many days was each individual in each state?
s1 <- apply(s, 1, function(x) sum(x == 1))
s2 <- apply(s, 1, function(x) sum(x == 2))
s3 <- apply(s, 1, function(x) sum(x == 3))

for(i in 1:N){
U1[i] <- rpois(1, lambda[1] * s1[i]) # Number of detections
dets1 <- sort(runif(U1[i], 0, s1[i])) # Time of detections
state1 <- rep(1, U1[i])
U2[i] <- rpois(1, lambda[2] * s2[i]) # Number of detections
dets2 <- sort(runif(U2[i], 0, s2[i])) + s1[i] # Time of detections
state2 <- rep(2, U2[i])
U3[i] <- rpois(1, lambda[3] * s3[i]) # Number of detections
dets3 <- sort(runif(U3[i], 0, s3[i])) + s1[i] + s2[i] # Time of detections
state3 <- rep(3, U3[i])
det_list[[i]] <- c(dets1, dets2, dets3)
state_list[[i]] <- c(state1, state2, state3)
}

# Number of detections for each individual
U <- U1 + U2 + U3

# Convert detections to matrix
det <- state <- matrix(0, nrow = N, ncol = max(U))
for(i in 1:N){ if(U[i] > 0){
det[i, 1:U[i]] <- det_list[[i]]
state[i, 1:U[i]] <- state_list[[i]]
}
}

# Calculate time difference between detections (including last detection to end of study)
delta <- matrix(0, nrow = N, ncol = max(U) + 1)
delta[,1] <- det[,1]

for(i in 1:N){ if(U[i] > 1){
for(j in 2:U[i]){
delta[i, j] <- det[i, j] - det[i, j - 1]
} }
delta[i, U[i] + 1] <- T - max(det[i,])
}

# Number of occasions between detections; upper/lower intervals
d <- delta_int <- b_g <-  b_r <- matrix(0, nrow = N, ncol = max(U))

for(i in 1:N){
if(U[i] > 0){
d[i, 1:U[i]] <- as.numeric(cut(det[i, 1:U[i]], breaks = b))
delta_int[i, 1] <- d[i, 1] - 1
b_r[i, 1] <- det[i, 1] - b[d[i, 1]]
b_g[i, 1] <- b[d[i, 1] + 1] - det[i, 1]
if(U[i] > 1){
for(j in 2:U[i]){
delta_int[i, j] <- d[i, j] - d[i, j - 1]
b_r[i, j] <- det[i, j] - b[d[i, j]]
b_g[i, j] <- b[d[i, j] + 1] - det[i, j]
}
}
}
}

stan_data <- list(N = N,
max_U = max(U),
max_U_plus_one = max(U) + 1,
P = length(c_p),
l = 5,
U = U,
delta = delta,
d = d,
state = state,
delta_int = delta_int,
b_r = b_r,
b_g = b_g,
c_p = c_p,
f = matrix(c(1,0,0,0),1,4),
ones = matrix(c(1,1,1,1), 4, 1))

# Initial values
initf1 <- function() {
list(h = runif(3, 0, 0.015),
g = runif(3, 5, 15),
k = runif(3, 7, 10),
mu = runif(3, 0, 0.25))
}

# run model - few iterations/1 chain for troubleshooting
fit <- stan(file = "./model.stan",
pars = c("h", "g", "k", "mu"),
data = stan_data,
init = initf1,
warmup = 50,
iter = 100,
chains = 1,
cores = 1,
thin = 1)

``````