tl;dr Calling a C program, which uses stan::math::gradient
, on separate threads from Julia segfaults and I don’t understand why, while a similar program (called similarly) without stan::math::gradient
runs fine. Does anyone have any ideas why spawning separate threads each of which calls stan::math::gradient
doesn’t work?
Julia has a nice interface for calling C functions which are compiled as shared libraries. One can successfully write a C shared library which calls a C++ shared library which itself uses Stan-math. Here’s a minimal reproducible example which contains two C functions logistic
, which segfaults when called from Julia on separate threads, and logisticm
, which runs fine when called from Julia on separate threads.
C, C++, and Makefile files
C
#include "models.h"
extern void logistic(double* _fx, double* _grad_fx,
double* _theta, double* _X, double* _y, int _N, int _K) {
_logistic(_fx, _grad_fx, _theta, _X, _y, _N, _K);
}
extern void logisticm(double* _fx, double* _grad_fx,
double* _theta, double* _X, double* _y, int _N, int _K) {
_logisticm(_fx, _grad_fx, _theta, _X, _y, _N, _K);
}
and header file
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
void logistic(double* _fx, double* _grad_fx,
double* _theta, double* _X, double* _y, int _N, int _K);
void _logistic(double* _fx, double* _grad_fx,
double* _theta, double* _X, double* _y, int _N, int _K);
void logisticm(double* _fx, double* _grad_fx,
double* _theta, double* _X, double* _y, int _N, int _K);
void _logisticm(double* _fx, double* _grad_fx,
double* _theta, double* _X, double* _y, int _N, int _K);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
C++
#include "models.h"
#include <exception>
#include <cmath>
#include <stan/math.hpp>
extern "C" {
void _logistic(double* _fx,
double* _grad_fx,
double* _theta,
double* _X,
double* _y,
int _N,
int _K) {
Eigen::VectorXd grad_fx;
int D = _K + 1;
const Eigen::Map<Eigen::MatrixXd> X(_X, _N, _K);
const Eigen::Map<Eigen::VectorXd> y(_y, _N);
const Eigen::Map<Eigen::VectorXd> theta(_theta, D);
stan::math::gradient([&X, &y, &_K](auto q) {
double lp(0.0);
stan::math::accumulator<stan::math::var_value<double> > lp_accum;
try {
lp_accum.add(stan::math::normal_lpdf<true>(q, 0, 1));
auto a = stan::math::add(q(0), stan::math::multiply(X, q.tail(_K).eval()));
lp_accum.add(stan::math::bernoulli_logit_lpmf<true>(y, a));
} catch (const std::exception& e) {}
lp_accum.add(lp);
return -lp_accum.sum();
}, theta, *_fx, grad_fx);
// copy out
// could avoid this copy with new gradient method
for (int d = 0; d < D; d++) {
_grad_fx[d] = grad_fx(d);
}
}
void _logisticm(double* _fx,
double* _grad_fx,
double* _theta,
double* _X,
double* _y,
int _N,
int _K) {
int D = _K + 1;
const Eigen::Map<Eigen::MatrixXd> X(_X, _N, _K);
const Eigen::Map<Eigen::VectorXd> y(_y, _N);
const Eigen::Map<Eigen::VectorXd> theta(_theta, D);
*_fx = -0.5 * std::pow(theta(0), 2);
_grad_fx[0] = -theta(0);
Eigen::VectorXd Xb = X * theta.tail(_K);
for (int n = 0; n < _N; n++) {
auto tmp = theta(0) + Xb(n);
auto s = 2.0 * y(n) - 1.0;
auto nsa = std::exp(-s * tmp);
*_fx += -std::log1p(nsa);
Xb(n) = s * nsa / (1 + nsa);
_grad_fx[0] += Xb(n);
}
for (int k = 0; k < _K; k++) {
*_fx += -0.5 * std::pow(theta(k + 1), 2);
_grad_fx[k + 1] = -theta(k + 1) + X.col(k).dot(Xb);
}
}
}
Makefile
Edit the paths appropriately.
%.so: logistic.cpp
clang++ -shared -fPIC -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes -I /Users/ez/math/lib/tbb_2020.3/include -O3 -I /Users/ez/math/ -I /Users/ez/math/lib/eigen_3.3.9 -I /Users/ez/math/lib/boost_1.75.0 -I /Users/ez/math/lib/sundials_6.1.1/include -I /Users/ez/math/lib/sundials_6.1.1/src/sundials -DBOOST_DISABLE_ASSERTS -Wl,-L,"/Users/ez/math/lib/tbb" -Wl,-rpath,"/Users/ez/math/lib/tbb" logistic.cpp -Wl,-L,"/Users/ez/math/lib/tbb" -Wl,-rpath,"/Users/ez/math/lib/tbb" /Users/ez/math/lib/sundials_6.1.1/lib/libsundials_nvecserial.a /Users/ez/math/lib/sundials_6.1.1/lib/libsundials_cvodes.a /Users/ez/math/lib/sundials_6.1.1/lib/libsundials_idas.a /Users/ez/math/lib/sundials_6.1.1/lib/libsundials_kinsol.a /Users/ez/math/lib/tbb/libtbb.dylib /Users/ez/math/lib/tbb/libtbbmalloc.dylib /Users/ez/math/lib/tbb/libtbbmalloc_proxy.dylib -o logistic_lib.so
make: models.c logistic_lib.so
clang -o models.o -c models.c -O3
clang -shared -o stan_models.so models.o logistic_lib.so -lm -fPIC -O3
clean:
rm stan_models.o stan_models.so logistic_lib.so
The function logistic
uses stan::math::gradient
. The function logisticm
calculates the same thing manually (up to the sign). That logisticm
works as I plan makes me think this is a Stan-math issue, not a Julia issue.
Here’s a minimal reproducible example Julia file which works with the above files, so long as everything is in the same directory.
Julia file
stanlib = joinpath(@__DIR__, "stan_models.so")
df = [-1.25 -1.23 0.0; -0.45 2.24 1.0];
q = randn(3);
N = size(df, 1);
K = size(df, 2) - 1;
y = df[:, K+1];
X = df[:, 1:K];
fx = zeros(1);
grad_fx = zeros(K+1);
# change to :logistic; this will still work
ccall((:logisticm, stanlib),
Cvoid,
(Ref{Cdouble},
Ref{Cdouble},
Ref{Cdouble},
Ref{Cdouble},
Ref{Cdouble},
Cint,
Cint),
fx, grad_fx, q, X, y, N, K)
C = 10;
# start Julia with more than 1 thread. $ julia -t 2
nt = Threads.nthreads();
fxs = [zeros(1) for _ in 1:C];
grad_fxs = [zeros(K+1) for _ in 1:C];
@sync for it in 1:nt
Threads.@spawn for c in it:nt:C
ccall((:logisticm, stanlib), # change to :logistic; this borks
Cvoid,
(Ref{Cdouble},
Ref{Cdouble},
Ref{Cdouble},
Ref{Cdouble},
Ref{Cdouble},
Cint,
Cint),
fxs[c], grad_fxs[c], q, X, y, N, K)
end
end
My understanding is that the @spawn-ed threads from Julia are independent, despite sharing a few read-only variables. My conceptual model, which I believe I confirmed with the C function logisticm
, is that calling this shared library from independent threads should work fine. However, this model is not true when I call stan::math::gradient
. So my conceptual model or understanding must be off somewhere.
Any comments/suggestions/tips are much appreciated. Thanks in advance.
System info:
Stan-math v4.3.2
mac OS 12.2
Julia Version 1.7.2
Apple clang version 11.0.3