Hi all,
I have tried a simple model with two different data files. cmdstan::command
succeeds in generating two output files if they are called sequentially. However, it will exit with a Segmentation fault error if parallelized using std::thread
. Please find the test case as below.
The model stan file, with the file name bernoulli.stan
.
data {
int<lower=0> N;
int<lower=0,upper=1> y[N];
}
parameters {
real<lower=0,upper=1> theta;
}
model {
theta ~ beta(1,1);
for (n in 1:N)
y[n] ~ bernoulli(theta);
}
Compile the *hpp
file
$ stanc --o=bernoulli.hpp bernoulli.stan
Two data files.
$ cat data1.R
N <- 10
y <- c(0,1,0,0,0,0,0,0,0,1)
$ cat data2.R
N <- 10
y <- c(1,1,1,0,0,0,0,1,0,1)
The first main.cpp
with sequential calling
#include <stan/services/error_codes.hpp>
#include <cmdstan/command.hpp>
#include <boost/exception/diagnostic_information.hpp>
#include <boost/exception_ptr.hpp>
int main(int argc, const char* argv[]) {
try {
int arc = 8;
const char * arr1 [] = {"PROGRAM", "sample", "data", "file=data1.R", "random", "seed=123", "output", "file=output1.txt"};
const char * arr2 [] = {"PROGRAM", "sample", "data", "file=data2.R", "random", "seed=123", "output", "file=output2.txt"};
cmdstan::command(arc, arr1);
cmdstan::command(arc, arr2);
return 0;
} catch (const std::exception& e) {
std::cout << e.what() << std::endl;
return stan::services::error_codes::SOFTWARE;
}
}
Program build comand
$ root=cmdstan_root
$ g++ -std=c++1y -pthread -Wno-sign-compare -O3 -x c++ \
-I "$root/src" \
-I "$root/stan/src" \
-I "$root/stan/lib/stan_math/" \
-I "$root/stan/lib/stan_math/lib/eigen_3.3.3" \
-I "$root/stan/lib/stan_math/lib/boost_1.69.0" \
-I "$root/stan/lib/stan_math/lib/sundials_4.1.0/include" \
-L "$root/stan/lib/stan_math/lib/sundials_4.1.0/lib" \
-DBOOST_RESULT_OF_USE_TR1 \
-DBOOST_NO_DECLTYPE \
-DBOOST_DISABLE_ASSERTS \
-DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION \
-lsundials_nvecserial \
-lsundials_nvecserial \
-lsundials_cvodes \
-lsundials_idas \
bernoulli.hpp main.cpp \
-o bernoulli
Corrent output files with the sequential calling
$ stansummary output1.txt
Inference for Stan model: bernoulli_model
1 chains: each with iter=(1000); warmup=(0); thin=(1); 1000 iterations saved.
Warmup took (0.0091) seconds, 0.0091 seconds total
Sampling took (0.014) seconds, 0.014 seconds total
Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat
lp__ -7.3 3.6e-02 7.5e-01 -8.8 -7.0 -6.8 436 30888 1.0e+00
accept_stat__ 0.86 7.2e-03 2.1e-01 0.37 0.96 1.0 859 60888 1.0e+00
stepsize__ 1.1 4.0e-15 4.0e-15 1.1 1.1 1.1 1.0 71 1.0e+00
treedepth__ 1.4 1.6e-02 5.0e-01 1.0 1.0 2.0 1013 71761 1.0e+00
n_leapfrog__ 2.2 3.3e-02 9.8e-01 1.0 3.0 3.0 871 61698 1.0e+00
divergent__ 0.00 0.0e+00 0.0e+00 0.00 0.00 0.00 500 35433 -nan
energy__ 7.8 5.2e-02 1.1e+00 6.8 7.5 9.9 415 29425 1.0e+00
theta 0.25 5.4e-03 1.2e-01 0.080 0.24 0.46 484 34286 1.0e+00
Samples were drawn using hmc with nuts.
For each parameter, N_Eff is a crude measure of effective sample size,
and R_hat is the potential scale reduction factor on split chains (at
convergence, R_hat=1).
$ stansummary output2.txt
Inference for Stan model: bernoulli_model
1 chains: each with iter=(1000); warmup=(0); thin=(1); 1000 iterations saved.
Warmup took (0.0079) seconds, 0.0079 seconds total
Sampling took (0.013) seconds, 0.013 seconds total
Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat
lp__ -8.8 4.0e-02 6.7e-01 -10 -8.6 -8.3 285 22163 1.0e+00
accept_stat__ 0.92 3.4e-03 1.2e-01 0.67 0.97 1.0 1222 95188 1.0e+00
stepsize__ 1.0 2.9e-15 2.9e-15 1.0 1.0 1.0 1.0 78 1.0e+00
treedepth__ 1.4 1.7e-02 4.8e-01 1.0 1.0 2.0 838 65249 1.0e+00
n_leapfrog__ 2.3 3.6e-02 9.7e-01 1.0 3.0 3.0 737 57421 1.0e+00
divergent__ 0.00 0.0e+00 0.0e+00 0.00 0.00 0.00 500 38941 -nan
energy__ 9.3 5.3e-02 9.3e-01 8.4 9.0 11 305 23733 1.0e+00
theta 0.51 7.2e-03 1.4e-01 0.28 0.52 0.74 363 28296 1.0e+00
Samples were drawn using hmc with nuts.
For each parameter, N_Eff is a crude measure of effective sample size,
and R_hat is the potential scale reduction factor on split chains (at
convergence, R_hat=1).
The second main.cpp
with parallel calling using std::thread
#include <stan/services/error_codes.hpp>
#include <cmdstan/command.hpp>
#include <boost/exception/diagnostic_information.hpp>
#include <boost/exception_ptr.hpp>
#include <thread>
void worker(int argc, const char * argv[])
{
cmdstan::command(argc, argv);
}
int main(int argc, const char* argv[]) {
try {
int arc = 8;
const char * arr1 [] = {"PROGRAM", "sample", "data", "file=data1.R", "random", "seed=123", "output", "file=output1.txt"};
const char * arr2 [] = {"PROGRAM", "sample", "data", "file=data2.R", "random", "seed=123", "output", "file=output2.txt"};
int num_threads=2;
std::thread t[num_threads];
t[0] = std::thread(worker, arc, arr1);
t[1] = std::thread(worker, arc, arr2);
t[0].join();
t[1].join();
return 0;
} catch (const std::exception& e) {
std::cout << e.what() << std::endl;
return stan::services::error_codes::SOFTWARE;
}
}
Here, two threads are created for parallel model calling. However, it encounters a segmentation fault error.
$ ./bernoulli
method = sample (Default)
sample
num_samples = 1000 (Default)
num_warmup = 1000 (Default)
save_warmup = 0 (Default)
thin = 1 (Default)
adapt
engaged = 1 (Default)
method = sample (Default)
sample
num_samples = 1000 (Default)
num_warmup = 1000 (Default)
save_warmup = 0 (Default)
thin = 1 (Default)
adapt
engaged = 1 (Default)
gamma = 0.050000000000000003 (Default)
delta = 0.80000000000000004 (Default)
kappa = 0.75 (Default)
t0 = 10 (Default)
init_buffer = 75 (Default)
term_buffer = 50 (Default)
window = 25 (Default)
algorithm = hmc (Default)
hmc
engine = nuts (Default)
nuts
max_depth = 10 (Default)
metric = diag_e (Default)
gamma = 0.050000000000000003 (Default)
delta = 0.80000000000000004 (Default)
metric_file = (Default)
stepsize = 1 (Default)
stepsize_jitter = 0 (Default)
id = 0 (Default)
data
file = data2.R
init = 2 (Default)
random
seed = 123
output
file = output2.txt
diagnostic_file = (Default)
refresh = 100 (Default)
kappa = 0.75 (Default)
t0 = 10 (Default)
init_buffer = 75 (Default)
term_buffer = 50 (Default)
window = 25 (Default)
algorithm = hmc (Default)
hmc
engine = nuts (Default)
nuts
max_depth = 10 (Default)
metric = diag_e (Default)
metric_file = (Default)
stepsize = 1 (Default)
stepsize_jitter = 0 (Default)
id = 0 (Default)
data
file = data1.R
init = 2 (Default)
random
seed = 123
output
file = output1.txt
diagnostic_file = (Default)
refresh = 100 (Default)
Gradient evaluation took 1e-05 seconds
1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
Adjust your expectations accordingly!
Iteration: 1 / 2000 [ 0%] (Warmup)
Segmentation fault (core dumped)
The GDB trace info is as below.
$ gdb ./bernoulli
GNU gdb (Ubuntu 8.1-0ubuntu3) 8.1.0.20180409-git
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ./bernoulli...done.
run
(gdb) run
Starting program: /home/lijin/work/mwe/cmdstanmwe/example/c++_thread/bernoulli/thread/bernoulli
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7ffff6e85700 (LWP 23060)]
method = sample (Default)
sample
num_samples = 1000 (Default)
num_warmup = 1000 (Default)
save_warmup = 0 (Default)
thin = 1 (Default)
adapt
engaged = 1 (Default)
gamma = 0.050000000000000003 (Default)
delta = 0.80000000000000004 (Default)
kappa = 0.75 (Default)
t0 = 10 (Default)
init_buffer = 75 (Default)
term_buffer = 50 (Default)
window = 25 (Default)
algorithm = hmc (Default)
hmc
engine = nuts (Default)
nuts
max_depth = 10 (Default)
metric = diag_e (Default)
metric_file = (Default)
stepsize = 1 (Default)
stepsize_jitter = 0 (Default)
id = 0 (Default)
data
file = data1.R
init = 2 (Default)
random
seed = 123
output
file = output1.txt
diagnostic_file = (Default)
refresh = 100 (Default)
[New Thread 0x7ffff6684700 (LWP 23061)]
Gradient evaluation took 1e-05 seconds
1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
Adjust your expectations accordingly!
Iteration: 1 / 2000 [ 0%] (Warmup)
method = sample (Default)
sample
num_samples = 1000 (Default)
num_warmup = 1000 (Default)
save_warmup = 0 (Default)
thin = 1 (Default)
adapt
engaged = 1 (Default)
gamma = 0.050000000000000003 (Default)
delta = 0.80000000000000004 (Default)
kappa = 0.75 (Default)
t0 = 10 (Default)
init_buffer = 75 (Default)
term_buffer = 50 (Default)
window = 25 (Default)
algorithm = hmc (Default)
hmc
engine = nuts (Default)
nuts
max_depth = 10 (Default)
metric = diag_e (Default)
metric_file = (Default)
stepsize = 1 (Default)
stepsize_jitter = 0 (Default)
id = 0 (Default)
data
file = data2.R
init = 2 (Default)
random
seed = 123
output
file = output2.txt
diagnostic_file = (Default)
refresh = 100 (Default)
Thread 2 "bernoulli" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffff6e85700 (LWP 23060)]
0x000055555556d7c2 in stan::math::precomputed_gradients_vari::chain (this=0x5555558703b8) at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/lib/stan_math/stan/math/rev/core/precomputed_gradients.hpp:72
72 varis_[i]->adj_ += adj_ * gradients_[i];
(gdb) where
#0 0x000055555556d7c2 in stan::math::precomputed_gradients_vari::chain (this=0x5555558703b8) at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/lib/stan_math/stan/math/rev/core/precomputed_gradients.hpp:72
#1 0x00005555555ddc3a in stan::math::grad (vi=<optimized out>) at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/lib/stan_math/stan/math/rev/core/grad.hpp:44
#2 stan::math::gradient<stan::model::model_functional<stan::model::model_base> > (f=..., x=..., fx=@0x7ffff6e83110: -4.6809100078512351, grad_fx=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/lib/stan_math/stan/math/rev/mat/functor/gradient.hpp:50
#3 0x00005555555de17e in stan::model::gradient<stan::model::model_base> (model=..., x=..., f=@0x7ffff6e83110: -4.6809100078512351, grad_f=..., logger=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/model/gradient.hpp:31
#4 0x00005555555de36e in stan::mcmc::base_hamiltonian<stan::model::model_base, stan::mcmc::diag_e_point, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >::update_potential_gradient (this=0x7ffff6e83140, z=..., logger=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/mcmc/hmc/hamiltonians/base_hamiltonian.hpp:69
#5 0x00005555555e3e89 in stan::mcmc::expl_leapfrog<stan::mcmc::diag_e_metric<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > > >::update_q (logger=..., epsilon=-1.1153728433577428, hamiltonian=..., z=..., this=0x7ffff6e83138)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/mcmc/hmc/integrators/expl_leapfrog.hpp:27
#6 stan::mcmc::base_leapfrog<stan::mcmc::diag_e_metric<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > > >::evolve (this=0x7ffff6e83138, z=..., hamiltonian=warning: RTTI symbol not found for class 'stan::mcmc::diag_e_metric<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >'
..., epsilon=-1.1153728433577428, logger=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/mcmc/hmc/integrators/base_leapfrog.hpp:24
#7 0x000055555560b73b in stan::mcmc::base_nuts<stan::model::model_base, stan::mcmc::diag_e_metric, stan::mcmc::expl_leapfrog, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >::build_tree (this=0x7ffff6e830e0, depth=<optimized out>, z_propose=..., p_sharp_left=...,
p_sharp_right=..., rho=..., H0=7.1728643944582391, sign=-1, n_leapfrog=@0x7ffff6e82a24: 0, log_sum_weight=@0x7ffff6e82a80: -inf, sum_metro_prob=@0x7ffff6e82a28: 0, logger=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/mcmc/hmc/nuts/base_nuts.hpp:215
#8 0x000055555560c361 in stan::mcmc::base_nuts<stan::model::model_base, stan::mcmc::diag_e_metric, stan::mcmc::expl_leapfrog, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >::transition (this=0x7ffff6e830e0, init_sample=..., logger=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/mcmc/hmc/nuts/base_nuts.hpp:122
#9 0x000055555560c4b6 in stan::mcmc::adapt_diag_e_nuts<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >::transition (this=<optimized out>, init_sample=..., logger=..., this=<optimized out>)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp:27
#10 0x000055555559134a in stan::services::util::generate_transitions<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > > (sampler=warning: RTTI symbol not found for class 'stan::mcmc::adapt_diag_e_nuts<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >'
..., num_iterations=1000, finish=2000, num_thin=1, refresh=100, save=false, mcmc_writer=..., init_s=..., model=...,
base_rng=..., callback=..., logger=..., warmup=true, start=0) at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/services/util/generate_transitions.hpp:71
#11 0x00005555555e7bf5 in stan::services::util::run_adaptive_sampler<stan::mcmc::adapt_diag_e_nuts<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >, stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > > (diagnostic_writer=..., sample_writer=..., logger=..., interrupt=..., rng=...,
save_warmup=false, refresh=100, num_thin=1, num_samples=1000, num_warmup=1000, cont_vector=std::vector of length 1, capacity 1 = {...}, model=..., sampler=warning: RTTI symbol not found for class 'stan::mcmc::adapt_diag_e_nuts<stan::model::model_base, boost::random::additive_combine_engine<boost::random::linear_congruential_engine<unsigned int, 40014u, 0u, 2147483563u>, boost::random::linear_congruential_engine<unsigned int, 40692u, 0u, 2147483399u> > >'
..., this=<optimized out>)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/services/util/run_adaptive_sampler.hpp:69
#12 stan::services::sample::hmc_nuts_diag_e_adapt<stan::model::model_base> (model=..., init=..., init_inv_metric=..., random_seed=<optimized out>, chain=<optimized out>, init_radius=<optimized out>,
num_warmup=1000, num_samples=1000, num_thin=1, save_warmup=false, refresh=100, stepsize=<optimized out>, stepsize_jitter=<optimized out>, max_depth=10, delta=<optimized out>, gamma=<optimized out>,
kappa=<optimized out>, t0=<optimized out>, init_buffer=75, term_buffer=50, window=25, interrupt=..., logger=..., init_writer=..., sample_writer=..., diagnostic_writer=...)
at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/services/sample/hmc_nuts_diag_e_adapt.hpp:106
#13 0x0000555555598eca in stan::services::sample::hmc_nuts_diag_e_adapt<stan::model::model_base> (diagnostic_writer=..., sample_writer=..., init_writer=..., logger=..., interrupt=..., window=25, term_buffer=50,
init_buffer=75, t0=10, kappa=0.75, gamma=0.050000000000000003, delta=0.80000000000000004, max_depth=10, stepsize_jitter=0, stepsize=1, refresh=100, save_warmup=false, num_thin=1, num_samples=1000,
num_warmup=1000, init_radius=2, chain=0, random_seed=123, init=..., model=...) at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/stan/src/stan/services/sample/hmc_nuts_diag_e_adapt.hpp:173
#14 cmdstan::command (argc=<optimized out>, argv=<optimized out>) at /home/lijin/work/mwe/cmdstanmwe/install/cmdstan/src/cmdstan/command.hpp:523
#15 0x00007ffff7b0966f in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#16 0x00007ffff727e6db in start_thread (arg=0x7ffff6e85700) at pthread_create.c:463
#17 0x00007ffff6fa788f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
May I ask how to call cmdstan::command
instances in parallel? Thank you.
- Operating System: Ubuntu 18.04
- CmdStan Version: 2.20.0
- Compiler/Toolkit: g++ 7.4.0