I’m currently working through Richard McElreath’s Statistical Rethinking and seem to have run into an issue with cmdstanr (which is the backend for the ulam()
function McElreath uses throughout the book).
In one example, McElreath uses 4 cores to sample a model & he notes that it should take roughly ~20min. Sampling in ulam()
or using cmdstanr’s $sample()
method directly doesn’t even get started after about an hour, but sampling with rstan::stan()
works generally as expected.
If I set to run on a single core, $sample()
/ulam()
sample as expected, but I lose the benefit of parallelizing the chains. Is there some cmdstan setup that I’m missing currently? It may be that cmdstan is actually sampling, but if so, it’s taking egregiously long to the point where it’s just more efficient to use rstan. Would appreciate any help here!
library(rethinking)
#> Loading required package: rstan
#> Warning: package 'rstan' was built under R version 4.2.1
#> Loading required package: StanHeaders
#> Warning: package 'StanHeaders' was built under R version 4.2.1
#>
#> rstan version 2.26.13 (Stan version 2.26.1)
#> For execution on a local, multicore CPU with excess RAM we recommend calling
#> options(mc.cores = parallel::detectCores()).
#> To avoid recompilation of unchanged Stan programs, we recommend calling
#> rstan_options(auto_write = TRUE)
#> For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
#> change `threads_per_chain` option:
#> rstan_options(threads_per_chain = 1)
#> Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
#> Loading required package: cmdstanr
#> This is cmdstanr version 0.5.3
#> - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
#> - CmdStan path: C:/Users/E1735399/Documents/.cmdstan/cmdstan-2.30.1
#> - CmdStan version: 2.30.1
#> Loading required package: parallel
#> rethinking (Version 2.23)
#>
#> Attaching package: 'rethinking'
#> The following object is masked from 'package:rstan':
#>
#> stan
#> The following object is masked from 'package:stats':
#>
#> rstudent
data("Trolley")
d <- Trolley
edu_levels <- c(6, 1, 8, 4, 7, 2, 5, 3)
d$edu_new <- edu_levels[d$edu]
# subset of data to run for example
d <- d[1:1000,]
dat <-
list(
R = d$response,
action = d$action,
intention = d$intention,
contact = d$contact,
E = as.integer(d$edu_new),
alpha = rep(2, 7)
)
# set sample to false to pull out model code
m12.6 <-
ulam(
alist(R ~ ordered_logistic(phi, kappa),
phi <- bE*sum(delta_j[1:E]) + bA*action + bI*intention + bC*contact,
kappa ~ normal(0, 1.5),
c(bA, bI, bC, bE) ~ normal(0, 1),
vector[8]: delta_j <<- append_row(0, delta),
simplex[7]: delta ~ dirichlet(alpha)),
data = dat,
sample = FALSE
)
# stan code written by `ulam()`
stancode(m12.6)
#> data{
#> array[1000] int R;
#> array[1000] int contact;
#> array[1000] int intention;
#> array[1000] int action;
#> array[1000] int E;
#> vector[7] alpha;
#> }
#> parameters{
#> ordered[6] kappa;
#> real bE;
#> real bC;
#> real bI;
#> real bA;
#> simplex[7] delta;
#> }
#> model{
#> vector[1000] phi;
#> vector[8] delta_j;
#> delta ~ dirichlet( alpha );
#> delta_j = append_row(0, delta);
#> bA ~ normal( 0 , 1 );
#> bI ~ normal( 0 , 1 );
#> bC ~ normal( 0 , 1 );
#> bE ~ normal( 0 , 1 );
#> kappa ~ normal( 0 , 1.5 );
#> for ( i in 1:1000 ) {
#> phi[i] = bE * sum(delta_j[1:E[i]]) + bA * action[i] + bI * intention[i] + bC * contact[i];
#> }
#> for ( i in 1:1000 ) R[i] ~ ordered_logistic( phi[i] , kappa );
#> }
# samples with rstan
m12.6_rstan <-
rstan::stan(
model_code = stancode(m12.6),
data = dat,
chains = 4,
iter = 1000,
cores = 4
)
# output summary w/ rethinking::precis()
precis(m12.6_rstan, depth = 2)
#> mean sd 5.5% 94.5% n_eff Rhat4
#> kappa[1] -2.10446901 0.25742085 -2.510985755 -1.6862182 1194.686 1.0014895
#> kappa[2] -1.49648321 0.25278235 -1.883059102 -1.0817611 1216.085 1.0015766
#> kappa[3] -1.02347649 0.24962151 -1.413633917 -0.6187427 1265.861 1.0012399
#> kappa[4] -0.02278274 0.24485644 -0.392755927 0.3841375 1355.320 1.0009926
#> kappa[5] 0.53088284 0.24824163 0.143593007 0.9424150 1359.282 1.0013155
#> kappa[6] 1.33549706 0.25419531 0.947755305 1.7669024 1362.173 1.0006375
#> bE 0.48236711 0.30701474 0.006931886 0.9792447 1399.932 1.0018044
#> bC -1.00903360 0.15908698 -1.263193408 -0.7487804 2141.323 0.9986759
#> bI -0.65430022 0.11857518 -0.848499808 -0.4702231 2620.459 0.9993161
#> bA -0.69718130 0.12548953 -0.899591184 -0.4917507 2163.669 1.0006727
#> delta[1] 0.15282084 0.09581575 0.031726485 0.3326662 2380.593 0.9986317
#> delta[2] 0.14123486 0.09025845 0.028600681 0.3085055 2625.586 0.9995884
#> delta[3] 0.19424857 0.11533186 0.040199619 0.3985537 2873.824 0.9996124
#> delta[4] 0.11664518 0.07561453 0.023721683 0.2535431 2229.839 0.9991632
#> delta[5] 0.14743092 0.08633043 0.033231921 0.3076077 2274.953 1.0007345
#> delta[6] 0.09075601 0.06686325 0.015409241 0.2116227 1965.286 1.0041835
#> delta[7] 0.15686361 0.09192488 0.038909863 0.3220604 2328.327 1.0017209
# able to compile the model w/cmdstan
m12.6_cmdstan <-
cmdstan_model(write_stan_file(stancode(m12.6)))
m12.6_cmdstan
#> data{
#> array[1000] int R;
#> array[1000] int contact;
#> array[1000] int intention;
#> array[1000] int action;
#> array[1000] int E;
#> vector[7] alpha;
#> }
#> parameters{
#> ordered[6] kappa;
#> real bE;
#> real bC;
#> real bI;
#> real bA;
#> simplex[7] delta;
#> }
#> model{
#> vector[1000] phi;
#> vector[8] delta_j;
#> delta ~ dirichlet( alpha );
#> delta_j = append_row(0, delta);
#> bA ~ normal( 0 , 1 );
#> bI ~ normal( 0 , 1 );
#> bC ~ normal( 0 , 1 );
#> bE ~ normal( 0 , 1 );
#> kappa ~ normal( 0 , 1.5 );
#> for ( i in 1:1000 ) {
#> phi[i] = bE * sum(delta_j[1:E[i]]) + bA * action[i] + bI * intention[i] + bC * contact[i];
#> }
#> for ( i in 1:1000 ) R[i] ~ ordered_logistic( phi[i] , kappa );
#> }
# this doesn't sample, so commenting out
# m12.6_cmdstan$sample(
# data = dat,
# chains = 4,
# iter_warmup = 500,
# iter_sampling = 500,
# num_cores = 4
# )
sessionInfo()
#> R version 4.2.0 (2022-04-22 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 18363)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=English_United States.utf8
#> [2] LC_CTYPE=English_United States.utf8
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.utf8
#>
#> attached base packages:
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] rethinking_2.23 cmdstanr_0.5.3 rstan_2.26.13
#> [4] StanHeaders_2.26.13
#>
#> loaded via a namespace (and not attached):
#> [1] Rcpp_1.0.9 mvtnorm_1.1-3 lattice_0.20-45
#> [4] prettyunits_1.1.1 ps_1.7.1 assertthat_0.2.1
#> [7] digest_0.6.29 utf8_1.2.2 V8_4.2.2
#> [10] R6_2.5.1 backports_1.4.1 reprex_2.0.2
#> [13] stats4_4.2.0 coda_0.19-4 evaluate_0.18
#> [16] ggplot2_3.4.0 highr_0.9 pillar_1.8.1
#> [19] rlang_1.0.6 curl_4.3.3 rstudioapi_0.14
#> [22] callr_3.7.3 checkmate_2.1.0 rmarkdown_2.18
#> [25] stringr_1.4.1 loo_2.5.1 munsell_0.5.0
#> [28] compiler_4.2.0 xfun_0.35 pkgconfig_2.0.3
#> [31] pkgbuild_1.4.0 shape_1.4.6 htmltools_0.5.3
#> [34] tidyselect_1.2.0 tibble_3.1.8 tensorA_0.36.2
#> [37] gridExtra_2.3 codetools_0.2-18 matrixStats_0.63.0
#> [40] fansi_1.0.3 crayon_1.5.2 dplyr_1.0.10
#> [43] withr_2.5.0 MASS_7.3-58.1 grid_4.2.0
#> [46] distributional_0.3.1 jsonlite_1.8.3 gtable_0.3.1
#> [49] lifecycle_1.0.3 DBI_1.1.3 magrittr_2.0.3
#> [52] posterior_1.3.1 scales_1.2.1 RcppParallel_5.1.5
#> [55] cli_3.4.1 stringi_1.7.8 farver_2.1.1
#> [58] fs_1.5.2 generics_0.1.3 vctrs_0.5.1
#> [61] tools_4.2.0 glue_1.6.2 processx_3.7.0
#> [64] abind_1.4-5 fastmap_1.1.0 yaml_2.3.6
#> [67] inline_0.3.19 colorspace_2.0-3 knitr_1.41
Created on 2022-12-16 with reprex v2.0.2