Hi all,
This is probably a bit embarassing, but I cannot for the life of me figure out why the posterior preditions from a brmsfit object are different (better) than my custom negative binomial model. The model example is a simple regression with two harmonic terms. I have tried generating posterior predictions with both neg_binomial_2_rng()
with the inverse-link function and neg_binomial_2_log_rng()
with the linear predictor. The posterior distributions for the parameters are almost identical, so the model (despite being extremely inefficient) is fine. Itâs just the predictions that are confusing me a bit. The code for my custom model is below, and I am attaching my R code and a sample of the data. Pardon the clunkiness of the stan code, the real model I am working is much more complex, and I wanted to firgure out the issues of the toy example before moving to correct the real one, so I just stripped down the more complex versions so it was easier to fix the other model structures. Iâm 99% positive itâs a user error issue, I just donât know what it is.
Thanks!
data {
int<lower=0> M_Y; // # Months x # Years of data
int<lower = 0> Trips[M_Y]; // Response: Trips in month m in year y
matrix[M_Y,2] X; // Predictors of max effort: column 1 = sine, 2 = cosine
vector[M_Y] Pop; // Florida population
vector[M_Y] CPUE; // CPUE for each species s in year y
}
parameters {
real<lower = 0, upper = 100> phi; // Dispersion parameters
vector[2] beta; // Effects of seasonality (harmonic terms)
real eta; // Minimum effort (intercept)
real xi; // Biomass
real tau; // Florida population effect
}
transformed parameters {
vector[M_Y] mu; // Mean structure for counts (log scale)
vector[M_Y] alpha;
alpha = X[,1]*beta[1] + X[,2]*beta[2] +xi*CPUE + tau*Pop;
mu = alpha+eta;
}
model {
for (my in 1:M_Y){
Trips[my] ~ neg_binomial_2_log(mu[my], phi);
}
}
generated quantities {
vector[M_Y] pred;
vector[M_Y] log_lik;
for (my in 1:M_Y){
log_lik[my] = neg_binomial_2_log_lpmf(Trips[my] | mu[my], phi);
pred[my] = neg_binomial_2_rng(exp(mu[my]), phi);
}
}
R code:
#------------------------------------------------------------------------------#
##################################### Libraries ################################
#------------------------------------------------------------------------------#
## Syntax packages
suppressMessages(library(dplyr))
suppressMessages(library(tidyr))
suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))
## Visualization packages
suppressMessages(library(ggplot2))
options(width = 80)
### Set ggplot theme for plotting
theme_set(theme_bw(base_family = 'serif'))
## Modeling
suppressMessages(library(rstan))
suppressMessages(library(brms))
#------------------------------------------------------------------------------#
##################################### Housekeeping #############################
#------------------------------------------------------------------------------#
## Clear out old files in R
rm(list=ls(all=TRUE))
## Set working directory
setwd("C:\\Users\\ichal\\Documents\\Hyman gag grouper models")
## User-defined functions
`%nin%` <- Negate(`%in%`)
#------------------------------------------------------------------------------#
##################################### Formatting ###############################
#------------------------------------------------------------------------------#
## Load data
Data <- read.csv("Snapper.csv")
#Datalist for model
DataList <- list(
"M_Y" = nrow(Data),
"Pop" = Data$pop,
"Trips" = ceiling(Data$VS),
"X" = Data[,2:3],
"CPUE" = Data$SSB
)
## My model
My_Model <- stan('Example.stan', data = DataList, chains = 1, iter = 2000, control = list(adapt_delta = 0.99))
BRM_Model <- brm(ceiling(VS)~sin1+cos1+pop + SSB, data = Data, family = negbinomial())
BRM_preds <- predict(BRM_Model, Data, type = 'response')
Data$Median_BRM <- BRM_preds[,1]
Data$Upper_BRM <- BRM_preds[,4]
Data$Lower_BRM <- BRM_preds[,3]
Model_pred <- extract(My_Model)
Data$Median_custom <- apply(Model_pred$pred, 2, function(x){quantile(x, 0.5)})
Data$Lower_custom <- apply(Model_pred$pred, 2, function(x){quantile(x, 0.025)})
Data$Upper_custom <- apply(Model_pred$pred, 2, function(x){quantile(x, 0.975)})
Data$Time <- 1:276
ggplot(Data)+
geom_point(aes(x = Time, y = VS), col = 1)+
geom_line(aes(x = Time, y = Median_BRM, color = 'a'))+
geom_line(aes(x = Time, y = Median_custom, color = 'b'))+
scale_colour_manual(name = 'Model',
values = c('a'='forestgreen','b'=2),
labels = c('brms','custom'))