# Gradient after transformation (math library)

I would like to use the Stan math library to calculate gradients and Hessians for a problem I am working on. To this end, I tried to replicate the example on page 12 of the article “The Stan Math Library: Reverse-Mode Automatic Differentiation in C++” (call it TSML, found at https://arxiv.org/pdf/1509.07164.pdf), using functions. This worked for the gradient, and I also managed to calculate the Hessian:

C++ code
``````#include <Rcpp.h>
#include <RcppEigen.h>
#include <vector>
#include <iostream>

#include <stan/math.hpp>
#include <stan/math/prim/scal.hpp>
#include <stan/math/mix/mat.hpp>

#include <boost/math/tools/promotion.hpp>

// [[Rcpp::depends(RcppEigen)]]

using Eigen::Dynamic;
using Eigen::VectorXd;
using Eigen::MatrixXd;

using namespace Rcpp;

// From page 12 of https://arxiv.org/pdf/1509.07164.pdf
struct normal_ll {
const VectorXd y_;
normal_ll(const VectorXd& y) : y_(y) { }

template <typename T>
T operator()(const Eigen::Matrix<T, Dynamic, 1>& theta) const {
T mu = theta[0];
T sigma = theta[1];
T lp = stan::math::normal_lpdf(y_, mu, sigma);
return lp;
}

stan::math::var operator()(const Eigen::Matrix<stan::math::var, Dynamic, 1>& theta) const {
stan::math::var mu = theta[0];
stan::math::var sigma = theta[1];
stan::math::var lp = normal_lpdf(y_, mu, sigma);
return lp;
}
};

// [[Rcpp::export]]
VectorXd normal_ll_grad(const VectorXd& y,
double mu,
double sigma) {
normal_ll f(y);

VectorXd x(2);
x << mu, sigma;

double fx;

}

// [[Rcpp::export]]
MatrixXd normal_ll_hess(const VectorXd& y,
double mu,
double sigma) {

normal_ll f(y);

VectorXd x(2);
x << mu, sigma;

double fx;
MatrixXd H;
stan::math::hessian(f, x, fx, grad_fx, H);

return H;
}
``````
R code
``````library(Rcpp)

lib_path <- .libPaths()[1]
includes <- paste0('-I\"', lib_path, "/",
c("Rcpp", "RcppEigen", "StanHeaders", "BH"),
'/include\"')
Sys.setenv("PKG_CXXFLAGS" = paste(includes, collapse = " "))

sourceCpp("Test/try_hessian.cpp")

# Replicate example on page 12 of https://arxiv.org/pdf/1509.07164.pdf
normal_ll_grad(c(1.3, 2.7, -1.9), 1.3, 2.9)

# Now try Hessian
normal_ll_hess(c(1.3, 2.7, -1.9), 1.3, 2.9)
``````

If I now try to do something similar for a function in which I transform the parameter vector before calculating the lpdf, my program crashes. The following C++ code follows that above, in the same file:

Crashing code
``````template <typename T1, typename T2>
T1 transform_mean(const T1& mu, const T2& coeffs) {
Eigen::Matrix<T1, Dynamic, 1> res;
res = stan::math::elt_multiply(mu, coeffs);
return res;
}

struct posterior_lpdf {
const VectorXd y_;
const VectorXd m_;
posterior_lpdf(const VectorXd& y, const VectorXd& m) : y_(y), m_(m) { }

template <typename T>
T operator()(const Eigen::Matrix<T, Dynamic, 1>& theta) const {

Eigen::Matrix<T, Dynamic, 1> theta_means(theta.size() - 1);
for (int i = 1; i < theta.size(); ++i) {
theta_means << theta(i);
}

// The 3 lines below will allow the program to compile, but crash when run
T sigma = theta[0];
T lp = normal_lpdf(y_, theta_means, sigma);
return lp;

// The 4 lines below show what I would like to do, in this example
// Eigen::Matrix<T, Dynamic, 1> mu = transform_mean(theta_means, m_);
// T sigma = theta[0];
// T lp = normal_lpdf(y_, mu, sigma);
// return lp;
}
};

// [[Rcpp::export]]
VectorXd posterior_grad(const VectorXd& y,
const VectorXd& m,
const VectorXd& mu,
const double sigma) {
posterior_lpdf f(y, m);

VectorXd x;
x << sigma;
for (int i = 0; i < m.size(); ++i) {
x << mu[i];
}

double fx;
}
``````
R code that makes it crash
``````posterior_grad(rnorm(5, 1:5, 0.2), 1:5, runif(5, 0.95, 1.05), 2)
``````

What am I doing wrong here? How can I make a transformation (using both parameters and non-parameters, e.g. doubles) of the parameter vector before calculating the lpdf, and then get the gradient (and Hessian) with respect to the non-transformed parameters?

The program is segfaulting because of this section:

``````  VectorXd x;
x << sigma;
for (int i = 0; i < m.size(); ++i) {
x << mu[i];
}
``````

You need to initialise `x` to a size before you can use the `<<` operator (e.g. `VectorXd x(N);` for vector of size N).

Hope it helps!

1 Like

Ah, silly mistake. Thanks for pointing that out. Now, that did not completely fix the problem, because there was (I believe) some type casting problems with my original code. I think I managed to fix these problems as well, and for completeness, here is some example code that will compile and run:

C++ code
``````#include <Rcpp.h>
#include <RcppEigen.h>
#include <vector>
#include <iostream>

#include <stan/math.hpp>
#include <stan/math/prim/scal.hpp>
#include <stan/math/mix/mat.hpp>

#include <boost/math/tools/promotion.hpp>

// [[Rcpp::depends(RcppEigen)]]

using Eigen::Dynamic;
using Eigen::VectorXd;
using Eigen::MatrixXd;

using namespace Rcpp;

template <typename T1, typename T2, int N>
Eigen::Matrix<typename boost::math::tools::promote_args<T1, T2>::type, N, 1>
transform_mean(const Eigen::Matrix<T1, N, 1>& m1,
const Eigen::Matrix<T2, N, 1>& m2) {
Eigen::Matrix<typename boost::math::tools::promote_args<T1, T2>::type, N, 1> new_mean(m1.size());
new_mean(0) = m1(0) + m2(0);
for (int i = 1; i < m1.size(); ++i) {
new_mean(i) = m1(i) * m1(i - 1) + m2(i);
}
return new_mean;
}

struct posterior_lpdf {
const VectorXd y_;
const VectorXd m_;
posterior_lpdf(const VectorXd& y, const VectorXd& m) : y_(y), m_(m) { }

template <typename T>
T operator()(const Eigen::Matrix<T, Dynamic, 1>& theta) const {
Eigen::Matrix<T, Dynamic, 1> new_mean(theta.size());
new_mean = transform_mean(theta, m_);
T lp = stan::math::normal_lpdf(y_, new_mean, 1.0);
return lp;
}
};

// [[Rcpp::export]]
VectorXd posterior_grad(const VectorXd& y,
const VectorXd& m,
const VectorXd& x) {
posterior_lpdf f(y, m);

double fx;
}

// [[Rcpp::export]]
MatrixXd posterior_hess(const VectorXd& y,
const VectorXd& m,
const VectorXd& x) {
posterior_lpdf f(y, m);

double fx;
MatrixXd H;
stan::math::hessian(f, x, fx, grad_fx, H);

return H;
}
``````
R code
``````library(Rcpp)
lib_path <- .libPaths()[1]
includes <- paste0('-I\"', lib_path, "/",
c("Rcpp", "RcppEigen", "StanHeaders", "BH"),
'/include\"')
Sys.setenv("PKG_CXXFLAGS" = paste(includes, collapse = " "))

sourceCpp("transform_theta.cpp")

make_mean <- function(tt, mm) {
n <- length(tt)
out <- numeric(n)
out[1] <- tt[1] + mm[1]
out[-1] <- tail(tt, n - 1) * head(tt, n - 1) + mm[-1]
return(out)
}

set.seed(1)
N <- 3
m <- seq_len(N)
theta <- runif(N, 0.95, 1.05)
true_mean <- make_mean(theta, m)
y <- rnorm(N, true_mean, 1)

theta_eval <- theta + rnorm(N, 0, 0.01)
eval_mean <- make_mean(theta_eval, m)