I would like to use the Stan math library to calculate gradients and Hessians for a problem I am working on. To this end, I tried to replicate the example on page 12 of the article “The Stan Math Library: Reverse-Mode Automatic Differentiation in C++” (call it TSML, found at https://arxiv.org/pdf/1509.07164.pdf), using functions. This worked for the gradient, and I also managed to calculate the Hessian:
C++ code
#include <Rcpp.h>
#include <RcppEigen.h>
#include <vector>
#include <iostream>
#include <stan/math.hpp>
#include <stan/math/prim/scal.hpp>
#include <stan/math/mix/mat.hpp>
#include <boost/math/tools/promotion.hpp>
// [[Rcpp::depends(RcppEigen)]]
using Eigen::Dynamic;
using Eigen::VectorXd;
using Eigen::MatrixXd;
using namespace Rcpp;
// From page 12 of https://arxiv.org/pdf/1509.07164.pdf
struct normal_ll {
const VectorXd y_;
normal_ll(const VectorXd& y) : y_(y) { }
template <typename T>
T operator()(const Eigen::Matrix<T, Dynamic, 1>& theta) const {
T mu = theta[0];
T sigma = theta[1];
T lp = stan::math::normal_lpdf(y_, mu, sigma);
return lp;
}
stan::math::var operator()(const Eigen::Matrix<stan::math::var, Dynamic, 1>& theta) const {
stan::math::var mu = theta[0];
stan::math::var sigma = theta[1];
stan::math::var lp = normal_lpdf(y_, mu, sigma);
return lp;
}
};
// [[Rcpp::export]]
VectorXd normal_ll_grad(const VectorXd& y,
double mu,
double sigma) {
normal_ll f(y);
VectorXd x(2);
x << mu, sigma;
double fx;
VectorXd grad_fx;
stan::math::gradient(f, x, fx, grad_fx);
return grad_fx;
}
// [[Rcpp::export]]
MatrixXd normal_ll_hess(const VectorXd& y,
double mu,
double sigma) {
normal_ll f(y);
VectorXd x(2);
x << mu, sigma;
double fx;
VectorXd grad_fx;
MatrixXd H;
stan::math::hessian(f, x, fx, grad_fx, H);
return H;
}
R code
library(Rcpp)
lib_path <- .libPaths()[1]
includes <- paste0('-I\"', lib_path, "/",
c("Rcpp", "RcppEigen", "StanHeaders", "BH"),
'/include\"')
Sys.setenv("PKG_CXXFLAGS" = paste(includes, collapse = " "))
sourceCpp("Test/try_hessian.cpp")
# Replicate example on page 12 of https://arxiv.org/pdf/1509.07164.pdf
normal_ll_grad(c(1.3, 2.7, -1.9), 1.3, 2.9)
# Now try Hessian
normal_ll_hess(c(1.3, 2.7, -1.9), 1.3, 2.9)
If I now try to do something similar for a function in which I transform the parameter vector before calculating the lpdf, my program crashes. The following C++ code follows that above, in the same file:
Crashing code
template <typename T1, typename T2>
T1 transform_mean(const T1& mu, const T2& coeffs) {
Eigen::Matrix<T1, Dynamic, 1> res;
res = stan::math::elt_multiply(mu, coeffs);
return res;
}
struct posterior_lpdf {
const VectorXd y_;
const VectorXd m_;
posterior_lpdf(const VectorXd& y, const VectorXd& m) : y_(y), m_(m) { }
template <typename T>
T operator()(const Eigen::Matrix<T, Dynamic, 1>& theta) const {
Eigen::Matrix<T, Dynamic, 1> theta_means(theta.size() - 1);
for (int i = 1; i < theta.size(); ++i) {
theta_means << theta(i);
}
// The 3 lines below will allow the program to compile, but crash when run
T sigma = theta[0];
T lp = normal_lpdf(y_, theta_means, sigma);
return lp;
// The 4 lines below show what I would like to do, in this example
// Eigen::Matrix<T, Dynamic, 1> mu = transform_mean(theta_means, m_);
// T sigma = theta[0];
// T lp = normal_lpdf(y_, mu, sigma);
// return lp;
}
};
// [[Rcpp::export]]
VectorXd posterior_grad(const VectorXd& y,
const VectorXd& m,
const VectorXd& mu,
const double sigma) {
posterior_lpdf f(y, m);
VectorXd x;
x << sigma;
for (int i = 0; i < m.size(); ++i) {
x << mu[i];
}
double fx;
VectorXd grad_fx;
stan::math::gradient(f, x, fx, grad_fx);
return grad_fx;
}
R code that makes it crash
posterior_grad(rnorm(5, 1:5, 0.2), 1:5, runif(5, 0.95, 1.05), 2)
What am I doing wrong here? How can I make a transformation (using both parameters and non-parameters, e.g. doubles) of the parameter vector before calculating the lpdf, and then get the gradient (and Hessian) with respect to the non-transformed parameters?