Hi, I’m doing research on implementing Stochastic Volatility models in Stan. I’m using the reparameterised model as outlined in Stan user guide and cmdstanr.
I noticed some unexpected behaviour when sampling the model when switching save_warmup=FALSE
to save_warmup=TRUE
.
When save_warmup=FALSE
the number of divegences I get is 209 out of 4000. When I switch this to save_warmup=TRUE
the number of divergences becomes 10. The seed is set to 123 in both instances.
Why does changing the save_warmup parameter from FALSE to TRUE impact the MCMC samples from the model?
I initially thought that choosing to save the warmup or not impacted the initialised values for MCMC which affected the sampled parameters (and therefore, the number of divergences). However, when I set the same initial values I still get different number of divergences when I change the save_warmup input.
I also ran the sampler twice for the same hyperparameters and seed and I was able to reproduce the same sample values - so something about changing the warmup has altered the sampling behaviour. Shouldn’t saving the warmup only just save the warmup samples and not impact anything else?
Reproducible example (including data) below:
sv_model.stan
file code:
data {
int<lower=0> T;
vector[T] y;
int<lower = 0, upper = 1> sample_prior;
}
parameters {
real mu; // mean log volatility
real<lower=0, upper=1> p; // p parameter of beta. starts at 0.5
real<lower=0> sigma; // white noise shock scale
vector[T] h_std; // std log volatility time t
}
transformed parameters {
real<lower=-1, upper=1> phi = 2*p-1;
vector[T] h = h_std * sigma;
h[1] /= sqrt(1 - phi * phi);
h += mu;
for (t in 2:T) {
h[t] += phi * (h[t - 1] - mu);
}
}
model {
p ~ beta(20, 1.5);
sigma ~ inv_gamma(5./2., (0.01*5.)/2.);
mu ~ normal(0, 10);
h_std ~ std_normal();
if(sample_prior==0){
y ~ normal(0, exp(h / 2));
}
}
generated quantities{
vector[T] y_rep;
vector[T] log_yrep_squared;
vector[T] log_y_squared;
for (t in 1:T){
y_rep[t] = normal_rng(0, exp(h[t]/2));
log_yrep_squared[t] = log(y_rep[t] * y_rep[t]);
log_y_squared[t] = log(y[t] * y[t]);
}
}
R code to produce data and run model:
library(cmdstanr)
set.seed(323651)
# Generate data
gen.AR1 <- function(phi, mu, sig, T) {
# simulate AR(1) state vector 'h' having length 'T'
# using equation (1) and subsequent equation for initial state
h <- rep(0, T)
h[1] <- rnorm(1, mean = mu, sd = sig / sqrt(1 - phi^2))
for (i in 2:T) {
h[i] <- rnorm(1, mean = (mu + phi * (h[(i - 1)] - mu)), sd = sig)
}
return(h)
}
simulate_ksc <- function(T = 1000,
phi.true = 0.97779,
sig.true = 0.15850,
beta.true = 0.64733) {
mu.true <- log(beta.true^2)
htrue <- gen.AR1(phi.true, mu.true, sig.true, T)
yobs <- exp(htrue / 2) * rnorm(T, 0, 1)
df <- data.frame(cbind(yobs))
return(df)
}
data <- simulate_ksc()
returns <- data[, "yobs"]
data_list <- list(T = length(returns),
y = returns,
sample_prior = FALSE)
mod <- cmdstan_model("sv_model.stan")
# 10 divergences
model_fit_true <- mod$sample(
data = data_list,
seed = 123,
chains = 4,
parallel_chains = 4,
refresh = 500,
adapt_delta = 0.95,
save_warmup = TRUE
)
# 209 divergences
model_fit_false <- mod$sample(
data = data_list,
seed = 123,
chains = 4,
parallel_chains = 4,
refresh = 500,
adapt_delta = 0.95,
save_warmup = FALSE
)
Setting initial values:
init_list <- list(
list(mu = 0.2, p = 0.2, sigma = 0.2, h_std = 0.2),
list(mu = 0.4, p = 0.4, sigma = 0.4, h_std = 0.4),
list(mu = 0.6, p = 0.6, sigma = 0.6, h_std = 0.6),
list(mu = 0.8, p = 0.8, sigma = 0.8, h_std = 0.8)
)
model_fit <- mod$sample(
data = data_list,
seed = 123,
chains = 4,
parallel_chains = 4,
refresh = 500,
adapt_delta = 0.95,
save_warmup = TRUE, #switch this to FALSE to see different divergnce values
init=init_list
)
Environment info:
R version 4.2.3 (2023-03-15)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.2
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices datasets utils methods base
other attached packages:
[1] cmdstanr_0.5.3
loaded via a namespace (and not attached):
[1] pillar_1.9.0 compiler_4.2.3 tools_4.2.3 jsonlite_1.8.4 lifecycle_1.0.3 tibble_3.2.1 gtable_0.3.3 checkmate_2.1.0 pkgconfig_2.0.3
[10] rlang_1.1.0 cli_3.6.1 DBI_1.1.3 xfun_0.37 withr_2.5.0 dplyr_1.0.10 knitr_1.42 generics_0.1.3 vctrs_0.6.1
[19] grid_4.2.3 tidyselect_1.2.0 glue_1.6.2 data.table_1.14.8 R6_2.5.1 processx_3.8.0 fansi_1.0.4 distributional_0.3.2 tensorA_0.36.2
[28] ggplot2_3.4.1 farver_2.1.1 posterior_1.4.1 magrittr_2.0.3 backports_1.4.1 scales_1.2.1 ps_1.7.3 abind_1.4-5 assertthat_0.2.1
[37] colorspace_2.1-0 renv_0.16.0 utf8_1.2.3 munsell_0.5.0