I’ve noticed a few issues with using tuple types in cmdstanr; specifically, there are parts of cmdstanr that don’t seem to understand tuples and can trigger cryptic errors.
Passing tuple data to cmdstan
The cmdstanr:::process_data()
function (which takes a named list of data, does some pre-processing/checks, and then runs write_stan_json()
) errors when the model contains tuple data. As such, trying to run the sampling()
method with a list of data will fail, though it can still work if you give it a properly formatted JSON file. I’ve put together a function that can create JSON data files with tuples, but it’s kind of hacky and I can’t guarantee it will work for everyone. This requires a version of R with native pipe support, though it could be re-written to remove that limitation.
#' Write out the JSON for the model data, with tuple support
#' cmdstanr doesn't currently support tuples, so this is a hack around it
#' @param data named list of data
#' @param compiled_model a compiled `CmdStanModel` object
#' @param json_file name of output json file to create
#' @returns json_file, invisibly.
process_data_tuple = \(data, compiled_model, json_file ) {
model_variables = compiled_model$variables()
data_variables = model_variables$data
# Identify which variables are tuples
data_type_length = purrr::map(data_variables, 'type') |> lengths()
tuple_var_idx = data_type_length > 1
# base variables (non-tuples)
model_variables_base = local({
# browser()
mv = model_variables
mv$data = mv$data[!tuple_var_idx]
mv
})
data_base = data[names(model_variables_base$data)]
tmp_json = cmdstanr:::process_data(data_base, model_variables_base)
json_txt = local({
# Read in the json, then remove the last brace and add a comma
txt = readr::read_lines(tmp_json)
len = length(txt)
txt = txt[-len]
txt[len-1] = paste0(txt[len-1], ',')
txt
})
# Now make a stan json for the tuple variables
tuple_vars = data_variables[tuple_var_idx]
# Rename all internal tuple nodes to follow 1:n() spec
rename_all = \(x) {
numbered = purrr::set_names(x, seq_along(x))
# Recurse across sub-tuples
lsts = purrr::map_lgl(numbered, is.list)
numbered[lsts] = purrr::map(numbered[lsts], rename_all)
numbered
}
# browser()
tuple_data = data[names(tuple_vars)] |> map(rename_all)
# tuple_json = tempfile('tmpjson', fileext = '.json')
tuple_txt = tuple_data |>
purrr::lmap(\(x) jsonlite::toJSON(x, auto_unbox = TRUE,
factor = "integer", always_decimal = FALSE,
digits = NA, pretty = TRUE) |> list()) |> unlist() |>
stringr::str_sub(3L, -3L) |> stringr::str_replace_all(fixed('\n'), ' ') |> # Trim off opening/closing brackets
paste0(c(rep(',', length(tuple_data) - 1), '')) # add comma to the end, except for last one
#
# jsonlite::write_json(tuple_data, auto_unbox = TRUE,
# factor = "integer", always_decimal = FALSE,
# digits = NA, pretty = TRUE)
# tuple_txt = local({
# txt = read_lines(tuple_json)[-1]
# })
if(!dir.exists(dirname(json_file))) dir.create(dirname(json_file))
readr::write_lines(c(json_txt, tuple_txt, "}"),
json_file)
invisible(json_file)
}
variable_skeleton()
returns NA for tuple parameters
A model that contains the following in the parameters block:
tuple(real<multiplier=hp_zi_sd>, real<multiplier=hp_zi_sd>) intercept_zi_p;
real<multiplier=hp_lm_sd> intercept_lm;
tuple(real<offset=hp_init_mu[1], multiplier=hp_init_sd[1]>,
real<offset=hp_init_mu[2], multiplier=hp_init_sd[2]>) intercept_init_p;
Produces the following variable skeleton:
pthf$variable_skeleton() |> str()
# List of 3
# $ NA : num [1(1d)] 0 #should be intercept_zi_p
# $ intercept_lm: num [1(1d)] 0
# $ NA : num [1(1d)] 0 #should be intercept_init_p
Unfortunately, I don’t have a workaround for this yet.
This is with cmdstanr 0.7.1 using cmdstan 2.34.1 and R 4.3.1.