Hello,
I use function read_stan_csv
when I want to create stanfit
object from the saved CSV file which is created by RStan. However, saved csv file can reach several gigabytes in size for a large number of parameters. In that case, function read_stan_csv
can take up on several hours to load csv file.
Is there a way not to save results to csv, but save them to memory?
Below I provided small code example, the line that needs to be improved is stanfit <- rstan::read_stan_csv(fit_vb$output_files())
. I have fit_vb
object in memory, maybe from it I could get stanfit
object and not load csv at all?
Code example:
rm(list = ls())
library(tidyverse)
library(cmdstanr)
library(rstan)
library(brms)
tryCatch({cmdstanr::set_cmdstan_path('/home/cmdstanr/cmdstan-2.28.2')})
# ------------------------------------------------------------------------------
# Generate data
# ------------------------------------------------------------------------------
N <- 1000
nchains = 3
ncores = 3
nthreads = 8
data0 <-
tibble(
x = rnorm(N, 0, 1), # x = F1
x1 = rnorm(N, 1*x, 1),
x2 = rnorm(N, 0.5*x, 1),
x3 = rnorm(N, 1*x, 1),
F1 = as.numeric(NA)
)
bf0 <- bf(x1 ~ 0 + mi(F1)) +
bf(x2 ~ 0 + mi(F1)) +
bf(x3 ~ 0 + mi(F1)) +
bf(F1 | mi() ~ 1) +
set_rescor(rescor = FALSE)
prior0 <- prior(constant(1), class = "b", resp = "x1") +
prior(constant(1), class = "sigma", resp = "x1") +
prior(normal(0, 10), class = "b", resp = "x2") +
prior(constant(1), class = "sigma", resp = "x2") +
prior(normal(0, 10), class = "b", resp = "x3") +
prior(constant(1), class = "sigma", resp = "x3") +
prior(normal(0, 10), class = "Intercept", resp = "F1") +
prior(cauchy(0, 1), class = "sigma", resp = "F1")
# ------------------------------------------------------------------------------
# BRMS
# ------------------------------------------------------------------------------
threads = 2
# make native stan code from brm objects and save the code as stan object
my_stan_code <- make_stancode(
formula = bf0,
data = data0,
prior = prior0,
autocor = NULL,
data2 = NULL,
cov_ranef = NULL,
sparse = NULL,
sample_prior = "no",
stanvars = NULL,
stan_funs = NULL,
knots = NULL,
threads = threading(threads = threads),
normalize = getOption("brms.normalize", TRUE),
save_model = '/home/rstudio/user_projects/Neringos/psc_20220117.stan'
)
# make a native stan data structure from brm objects
my_stan_data <- make_standata(
formula = bf0,
data = data0,
prior = prior0,
autocor = NULL,
data2 = NULL,
cov_ranef = NULL,
sample_prior = "no",
stanvars = NULL,
threads = threading(threads = threads),
knots = NULL
)
# need to unclass to work in further steps
my_stan_data <- unclass(my_stan_data)
# need to specify missing parameter (https://mc-stan.org/docs/2_23/stan-users-guide/reduce-sum.html)
my_stan_data$grainsize <- max(c(100,ceiling( dim(data0)[1]/(1*threads))))
mod <- cmdstan_model(stan_file = '/home/rstudio/stan_stuff/psc_20220117.stan', compile = FALSE)
# compile stan code
mod$compile(
quiet = FALSE,
dir = NULL,
pedantic = TRUE,
include_paths = NULL,
cpp_options = list(stan_threads = TRUE), # enabling reduce_sum
stanc_options = list(),
force_recompile = TRUE
)
# do variational inference
fit_vb <- mod$variational(
data = my_stan_data,
seed = NULL,
refresh = NULL,
init = NULL,
save_latent_dynamics = FALSE,
output_dir = '/home/rstudio/user_projects/Neringos/',
output_basename = NULL,
sig_figs = NULL,
threads = threads,
opencl_ids = NULL,
algorithm = NULL,
iter = 1000, # increase iter in order to increase computation time, alternatively use bigger sample
grad_samples = NULL,
elbo_samples = NULL,
eta = NULL,
adapt_engaged = NULL,
adapt_iter = NULL,
tol_rel_obj = 0.0000001,
eval_elbo = NULL,
output_samples = NULL
)
fit <- brm(formula = bf0,
prior = prior0,
data = data0,
iter = 1000,
backend = 'cmdstanr',
algorithm = 'meanfield',
tol_rel_obj = 0.00000001,
empty = TRUE
)
# reading this csv takes hours for large N
stanfit <- rstan::read_stan_csv(fit_vb$output_files())
fit$fit <- stanfit
fit <- rename_pars(fit)
fit