For a larger project, I’m interested doing some benchmark comparisons between pure Stan and Stan models generated through a development platform called TMB ( TMB Documentation: Introduction ). TMB performs fast marginal likelihood estimation and Cole Monahan (monnahc) has a fantastic package called tmbstan that allows TMB models (or RTMB) to be sampled via Stan (paper here).
TMB uses the Laplace approximation to integrate out random effects, so a model including random components will only have fixed effects sampled via Stan. I would expect the speed and efficiency of these approaches to be fairly similar, but am generally finding pure Stan to be 1.5-2x faster.
As a reproducible example, I’m including an AR(1) versions of both RTMB and Stan models below. These models have been written to
- produce the same likelihood with the same data inputs
- use the same priors
- use the same init values, with same initial seeds
I’m hoping to understand where the speed differences are coming from, whether it’s differences in how the gradient calculation is done, compiler flags, or something entirely different. Thanks for any help.
library(RTMB)
library(rstan)
library(tictoc)
# Set cores to 1 -- to avoid tmbstan errors
rstan_options(auto_write = TRUE)
options(mc.cores = 1)
## Simulation - AR process
n_sim <- 20 # number of datasets
T <- 100 # time series length
n_chains <- 4
iter_hmc <- 2000
adapt_delta <- 0.99 # otherwise divergent transitions
true_phi <- 0.7
true_sdAR <- 0.4
true_sdO <- 0.3
innov_sd <- true_sdAR * sqrt(1 - true_phi^2)
set.seed(1234)
sim_data <- lapply(1:n_sim, function(i) {
mu_sim <- numeric(T)
mu_sim[1] <- rnorm(1, 0, true_sdAR)
for (t in 2:T) {
mu_sim[t] <- true_phi * mu_sim[t - 1] + rnorm(1, 0, innov_sd)
}
return(rnorm(T, mu_sim, true_sdO))
})
# RTMB model definition
nll_ar1 <- function(par) {
getAll(par)
sdAR <- exp(logSdR)
sdO <- exp(logSdO)
phi <- 2 * plogis(tphiR) - 1
ans <- -sum(dnorm(x_raw, 0, 1, log = TRUE))
ans <- ans - dnorm(z1, 0, 1, log = TRUE)
ans <- ans - dnorm(logSdR, 0, 1, log = TRUE)
ans <- ans - dnorm(logSdO, 0, 1, log = TRUE)
mu_vec <- advector(numeric(T))
mu_vec[1] <- z1 * sdAR
step_sd <- sdAR * sqrt(1 - phi^2)
for (t in 2:T) {
mu_vec[t] <- phi * mu_vec[t - 1] + step_sd * x_raw[t - 1]
}
# likelihood
ans <- ans - sum(dnorm(y, mu_vec, sdO, log = TRUE))
return(ans)
}
stan_code <- "
data {
int<lower=1> T;
vector[T] y;
}
parameters {
real logSdR; // log process sd
real logSdO; // log obs sd
real tphiR; // transformed ar1 coef
real z1; // standardized init state
vector[T-1] x_raw; // scaled devs
}
transformed parameters {
real phi = 2 * inv_logit(tphiR) - 1;
real sdAR = exp(logSdR);
real sdO = exp(logSdO);
vector[T] mu;
mu[1] = z1 * sdAR;
for (t in 2:T)
mu[t] = phi * mu[t-1] + sdAR * sqrt(1 - phi^2) * x_raw[t-1];
}
model {
z1 ~ std_normal();
x_raw ~ std_normal();
logSdR ~ normal(0, 1);
logSdO ~ normal(0, 1);
y ~ normal(mu, sdO);
}
"
stan_mod <- stan_model(model_code = stan_code)
par_init_base <- list(logSdR = 0, logSdO = 0, tphiR = 0, z1 = 0, x_raw = rep(0, T - 1))
## results dataframe -- store approx times
results <- data.frame(
s = 1:n_sim,
tmb_time = NA,
stan_time = NA,
min_tmb_ess = NA,
min_stan_ess = NA
)
for (s in 1:n_sim) {
y_curr <- sim_data[[s]]
obj <- MakeADFun(
function(p) {
p$y <- y_curr
nll_ar1(p)
},
par_init_base,
random = c("z1", "x_raw"), silent = TRUE
)
# build inits from the tmb object
all_inits_tmb <- lapply(1:n_chains, function(i) {
set.seed(s * 100 + i)
# jitter init vals
jittered_flat <- obj$par + rnorm(length(obj$par), 0, 0.01)
# convert back to a named list
# handles random effects correctly for tmbstan
inits <- obj$env$parList(jittered_flat)
return(inits)
})
all_inits_stan <- lapply(1:n_chains, function(i) {
# copy TMB list
inits <- all_inits_tmb[[i]]
# Stan also has to have latent vector of x
inits$x_raw <- rep(0, T - 1)
return(inits)
})
tic()
fit_tmb <- try(tmbstan(obj,
laplace = FALSE,
chains = n_chains, iter = iter_hmc,
init = all_inits_tmb, seed = 123, refresh = 0,
control = list(
adapt_delta = adapt_delta,
max_treedepth = 12
)
))
t_tmb <- toc(quiet = TRUE)
tic()
fit_stan <- try(sampling(stan_mod,
data = list(T = T, y = y_curr),
chains = n_chains, iter = iter_hmc,
init = all_inits_stan, seed = 123, refresh = 0,
control = list(
adapt_delta = adapt_delta,
max_treedepth = 12
)
))
t_stan <- toc(quiet = TRUE)
results$tmb_time[s] <- t_tmb$toc - t_tmb$tic
results$stan_time[s] <- t_stan$toc - t_stan$tic
results$min_tmb_ess[s] <- min(summary(fit_tmb)$summary[, "n_eff"])
results$min_stan_ess[s] <- min(summary(fit_stan)$summary[, "n_eff"])
print(s)
}
print(results)
#> s tmb_time stan_time min_tmb_ess min_stan_ess
#> 1 1 10.699 8.596 68.153829 78.620330
#> 2 2 9.101 6.755 54.996436 90.410796
#> 3 3 11.158 5.557 45.373532 94.318515
#> 4 4 9.840 8.613 201.947948 201.897162
#> 5 5 14.880 8.355 17.038681 299.347833
#> 6 6 10.121 7.241 49.239454 95.837324
#> 7 7 9.482 6.282 22.907155 12.994246
#> 8 8 13.695 7.664 4.476378 30.200596
#> 9 9 10.500 6.566 102.943055 176.717573
#> 10 10 7.372 6.114 15.604389 78.066006
#> 11 11 16.058 7.822 116.796181 63.250657
#> 12 12 10.739 6.551 127.002852 87.128375
#> 13 13 10.238 7.620 110.456115 36.094930
#> 14 14 8.452 5.912 166.838706 149.042957
#> 15 15 7.764 5.121 123.110769 68.472132
#> 16 16 18.909 18.065 74.388787 76.333303
#> 17 17 10.181 7.672 153.084062 103.536185
#> 18 18 12.081 11.808 10.783403 8.916516
#> 19 19 21.421 10.153 381.027111 79.080998
#> 20 20 8.834 6.072 136.713875 106.807922
Created on 2026-04-23 with reprex v2.1.1