Hi all,
I am trying to fit a mark-recapture model with a single explanatory covariate using simulated data to make sure that I can recover parameters when using real data. I have fit two different models, one with varying intercepts only and a second model with varying intercepts and slopes. Both “recapture” (pun intended) the simulated parameter estimates, but the standard deviation increases in the varying intercepts/slopes model from 0.05 in the varying intercepts only to .38 in the varying intercepts/slopes model. I am not sure why this is happening and would appreciate if anyone could help me with the answer.
Here is the R code for simulating the data and fitting the models.
library(rstan)
library(brms)
### functions
# Define function to simulate a capture-history (CH) matrix
simul.cjs <- function(PHI, P, marked){
n.occasions <- dim(PHI)[2] + 1
CH <- matrix(0, ncol = n.occasions, nrow = sum(marked))
# Define a vector with the occasion of marking
mark.occ <- rep(1:length(marked), marked[1:length(marked)])
# Fill the CH matrix
for (i in 1:sum(marked)){
CH[i, mark.occ[i]] <- 1 # Write an 1 at the release occasion
if (mark.occ[i]==n.occasions) next
for (t in (mark.occ[i]+1):n.occasions){
# Bernoulli trial: does individual survive occasion?
sur <- rbinom(1, 1, PHI[i,t-1])
if (sur==0) break # If dead, move to next individual
# Bernoulli trial: is individual recaptured?
rp <- rbinom(1, 1, P[i,t-1])
if (rp==1) CH[i,t] <- 1
} #t
} #i
return(CH)
}
createX = function(MarkTot,n.occasions){
out = NULL
init = runif(MarkTot,-1,1) ## initial values
for (i in 1:n.occasions){
if(i==1){out = init}
else {
tmp = init + rnorm(MarkTot,0,.1)
out = cbind(out,tmp)
}
}
return(out)
}
f_createPHI = function(b0,b1,X,v.ind){
out = NULL
for (i in 1:ncol(X)){
if (i == 1){
out = inv.logit(b0 + b1*X[,i] + rnorm(nrow(X),0,v.ind^.5))
}
else {
tmp = inv.logit(b0 + b1*X[,i] + rnorm(nrow(X),0,v.ind^.5))
out = cbind(out,tmp)
}
}
return(out)
}
f_createPHI_group = function(b0_rand,b1_rand,X,v.ind,groups){
outPHI = NULL
for (i in 1:nlevels(as.factor(groups))){
tmpRow = which(groups %in% i) ### get position
tmpX = X[tmpRow,]
if (i ==1){
outPHI = f_createPHI(b0_rand[i],b1_rand[i],tmpX,v.ind)
}
else {
tmpPHI = f_createPHI(b0_rand[i],b1_rand[i],tmpX,v.ind)
outPHI = rbind(outPHI,tmpPHI)
}
}
return(outPHI)
}
logit2prob <- function(logit){
odds <- exp(logit)
prob <- odds / (1 + odds)
return(prob)
}
inv.logit = function(x) {
out = exp(x)/(1 + exp(x))
return(out)
}
#### parameters for stan ####
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
## MCMC settings
ni <- 5000
nt <- 1
nb <- 1000
nc <- 3
### scripts
## data generation
N = 100 ## number of marked individuals
n.occasions <- 20 # Number of capture occasions
marked <- rep(N, n.occasions-1) # Annual number of newly marked individuals
MarkTot = sum(marked)
v.ind = 0
n_groups = 10 ### random groups
p = .95 ### detection error
# covariates
b0 = .2
b1 = .5
b0_rand = rnorm(n_groups,b0,0.1) ### add randomness to intercept
b1_rand = rnorm(n_groups,b1,0) ### add randomness to slope
b0_rand = c(0.351005087878791, 0.13998088305613, 0.0977846443122724, 0.337444275403668,
0.133204648339682, 0.120816725735267, 0.210225911307954, 0.243564746300285,
0.103536857740947, 0.181705097704071)
b1_rand = c(0.457302607240773, 0.435317530603031, 0.685770666151242, 0.410756965622094,
0.481328199989342, 0.528539544895223, 0.429505336385031, 0.366877589483642,
0.56611632776759, 0.379166507209119)
plot(-1:1,-1:1)
for(i in 1:length(b0_rand)){
abline(b0_rand[i],b1_rand[i])
}
X = createX(MarkTot,n.occasions)
groups = sort(rep(1:n_groups,(N*(n.occasions - 1))/n_groups)) ### make group covariate
PHI = f_createPHI_group(b0_rand,b1_rand,X,v.ind,groups)
P = matrix(p,ncol=n.occasions,nrow = MarkTot)
# Simulate capture-histories
CH <- simul.cjs(PHI, P, marked)
stan_data = list(y=CH,n_occasions = ncol(CH),nind = nrow(CH),
X = X, n_groups = nlevels(as.factor(groups)),group = groups,
n_occ_minus_1 = ncol(CH) - 1)
### Random intercepts only
randInt.fit <- stan(file = "cjs_randInt.stan",
data = stan_data,chains = nc, iter = ni,
warmup = nb, thin = nt,control = list(adapt_delta = 0.90))
stan_trace(randInt.fit, pars= c("alpha","beta","mean_phi","mean_p")) ## diagnostics
print(randInt.fit,pars= c("alpha","beta","mean_phi","mean_p")) # Summarize posteriors
### Random intercepts, slopes, no correlation
randIntSlp.fit = stan(file = "cjs_randIntSlpNoCor.stan",
data = stan_data,chains = nc, iter = ni,
warmup = nb, thin = nt,control = list(adapt_delta = 0.90))
stan_trace(randIntSlp.fit, pars= c("beta","mean_phi","mean_p")) ## diagnostics
print(randIntSlp.fit,pars=c("beta","mean_phi","mean_p")) # Summarize posteriors
Here is the Stan code for the varying intercept model.
functions {
int first_capture(int[] y_i) {
for (k in 1:size(y_i))
if (y_i[k])
return k;
return 0;
}
int last_capture(int[] y_i) {
for (k_rev in 0:(size(y_i) - 1)) {
int k = size(y_i) - k_rev;
if (y_i[k])
return k;
}
return 0;
}
matrix prob_uncaptured(int nind, int n_occasions,
matrix p, matrix phi) {
matrix[nind, n_occasions] chi;
for (i in 1:nind) {
chi[i, n_occasions] = 1.0;
for (t in 1:(n_occasions - 1)) {
// Compoud declaration was enabled in Stan 2.13
int t_curr = n_occasions - t;
int t_next = t_curr + 1;
chi[i, t_curr] = (1 - phi[i, t_curr])
+ phi[i, t_curr] * (1 - p[i, t_next - 1]) * chi[i, t_next];
}
}
return chi;
}
}
data {
int<lower=0> nind; // Number of individuals
int<lower=2> n_occasions; // Number of capture occasions
int<lower=0,upper=1> y[nind, n_occasions]; // Capture-history
int n_occ_minus_1; //number of capture occasions minus 1
row_vector[n_occ_minus_1] X[nind]; // covariate
int<lower=1> n_groups; // number of groups, same as number of subjects
int<lower=1,upper=n_groups> group[nind]; // group ID
}
transformed data {
int<lower=0,upper=n_occasions> first[nind];
int<lower=0,upper=n_occasions> last[nind];
for (i in 1:nind)
first[i] = first_capture(y[i]);
for (i in 1:nind)
last[i] = last_capture(y[i]);
}
parameters {
real<lower=0,upper=1> mean_p; // Mean recapture
real alpha;
real beta;
vector[n_groups] u; // group intercept
real<lower = 0> sigma_u; // group sd
}
transformed parameters {
matrix<lower=0,upper=1>[nind, n_occ_minus_1] phi;
matrix<lower=0,upper=1>[nind, n_occ_minus_1] p;
matrix<lower=0,upper=1>[nind, n_occasions] chi;
real mu;
// Constraints
for (i in 1:nind) {
for (t in 1:(first[i] - 1)) {
phi[i, t] = 0;
p[i, t] = 0;
}
for (t in first[i]:n_occ_minus_1) {
mu = inv_logit(alpha + u[group[i]] + beta*X[i,t]);
phi[i, t] = mu;
p[i, t] = mean_p;
}
}
chi = prob_uncaptured(nind, n_occasions, p, phi);
}
model {
// Priors
alpha ~ normal(0,10);
beta ~ normal(0, 10);
u ~ normal(0,sigma_u);
// Likelihood
for (i in 1:nind) {
if (first[i] > 0) {
for (t in (first[i] + 1):last[i]) {
1 ~ bernoulli(phi[i, t - 1]);
y[i, t] ~ bernoulli(p[i, t - 1]);
}
1 ~ bernoulli(chi[i, last[i]]);
}
}
}
generated quantities {
real<lower=0,upper=1> mean_phi = inv_logit(mu);
}
Finally, here is the code for the varying intercepts/slopes model:
functions {
int first_capture(int[] y_i) {
for (k in 1:size(y_i))
if (y_i[k])
return k;
return 0;
}
int last_capture(int[] y_i) {
for (k_rev in 0:(size(y_i) - 1)) {
int k = size(y_i) - k_rev;
if (y_i[k])
return k;
}
return 0;
}
matrix prob_uncaptured(int nind, int n_occasions,
matrix p, matrix phi) {
matrix[nind, n_occasions] chi;
for (i in 1:nind) {
chi[i, n_occasions] = 1.0;
for (t in 1:(n_occasions - 1)) {
// Compoud declaration was enabled in Stan 2.13
int t_curr = n_occasions - t;
int t_next = t_curr + 1;
chi[i, t_curr] = (1 - phi[i, t_curr])
+ phi[i, t_curr] * (1 - p[i, t_next - 1]) * chi[i, t_next];
}
}
return chi;
}
}
data {
int<lower=0> nind; // Number of individuals
int<lower=2> n_occasions; // Number of capture occasions
int<lower=0,upper=1> y[nind, n_occasions]; // Capture-history
int n_occ_minus_1; //number of capture occasions minus 1
row_vector[n_occ_minus_1] X[nind]; // covariate
int<lower=1> n_groups; // number of groups, same as number of subjects
int<lower=1,upper=n_groups> group[nind]; // group ID
}
transformed data {
int<lower=0,upper=n_occasions> first[nind];
int<lower=0,upper=n_occasions> last[nind];
for (i in 1:nind)
first[i] = first_capture(y[i]);
for (i in 1:nind)
last[i] = last_capture(y[i]);
}
parameters {
real<lower=0,upper=1> mean_p; // Mean recapture
vector[2] beta; //intercept and slope
//vector[n_groups] u; // group intercept
//real<lower = 0> sigma_u; // group sd
matrix[2,n_groups] u; //subj intercepts, slopes I think the '2' would have to be replaced with extended
vector<lower=1>[2] sigma_u; //subj sd
}
transformed parameters {
matrix<lower=0,upper=1>[nind, n_occ_minus_1] phi;
matrix<lower=0,upper=1>[nind, n_occ_minus_1] p;
matrix<lower=0,upper=1>[nind, n_occasions] chi;
real mu;
// Constraints
for (i in 1:nind) {
for (t in 1:(first[i] - 1)) {
phi[i, t] = 0;
p[i, t] = 0;
}
for (t in first[i]:n_occ_minus_1) {
mu = inv_logit((beta[1] + u[1,group[i]]) + (beta[2] + u[2,group[i]]) * X[i,t]);
phi[i, t] = mu;
p[i, t] = mean_p;
}
}
chi = prob_uncaptured(nind, n_occasions, p, phi);
}
model {
// Priors
beta[1] ~ normal(0, 10);
beta[2] ~ normal(0, 10);
mean_p ~ uniform(0,1);
sigma_u ~ cauchy(0,1);
for (i in 1:2){
u[i] ~ normal(0,sigma_u[i]);
}
// Likelihood
for (i in 1:nind) {
if (first[i] > 0) {
for (t in (first[i] + 1):last[i]) {
1 ~ bernoulli(phi[i, t - 1]);
y[i, t] ~ bernoulli(p[i, t - 1]);
}
1 ~ bernoulli(chi[i, last[i]]);
}
}
}
generated quantities {
real<lower=0,upper=1> mean_phi = inv_logit(mu);
}