I am trying to build a model in which my parameter vector is transformed in various ways before calculating the likelihood. Eventually, I will want to calculate the gradient and Hessian of the likelihood. One or more of the transformations of (parts of) the input vector will involve matrix-matrix or matrix-vector multiplication, where the matrices and vectors involved will either contain doubles, or (transformations of) parameter elements.
As a simple example, here is some code which calculates a linear combination of parameters for the mean parameter vector of a normal distribution. In the function prop_to_mean, I show 3 alternatives (two of which must be commented out before compilation), of which only the third (which only uses element-by-element multiplication) works. The first alternatives generate a host of compiler errors, which I also list below (but that I do not understand).
C++ code (placed in functions.cpp)
#include <Rcpp.h>
#include <RcppEigen.h>
#include <vector>
#include <stan/math.hpp>
#include <stan/math/prim/scal.hpp>
#include <stan/math/mix/mat.hpp>
#include <boost/math/tools/promotion.hpp>
using namespace Rcpp;
using namespace RcppEigen;
// [[Rcpp::depends(RcppEigen)]]
// Transform proportions together with endmember mean hyperpars to obs. means
template <typename T_P, int P_rows, int P_cols>
Eigen::Matrix<typename boost::math::tools::promote_args<T_P>::type, P_rows, 1>
prop_to_mean(const Eigen::Matrix<T_P, P_rows, P_cols>& P,
const Eigen::Matrix<double, P_rows, P_cols>& M) {
Eigen::Matrix<typename boost::math::tools::promote_args<T_P>::type,
P_rows, 1> mu(P.rows());
// Alternative 1 (does not work)
for (int i = 0; i < P.rows(); ++i) {
mu(i) = P.row(i) * M.row(i).transpose();
}
// Alternative 2 (does not work)
Eigen::Matrix<T_P, P_rows, P_cols> Mcasted(M.rows(), M.cols());
Mcasted = M.template cast<T_P>();
for (int i = 0; i < P.rows(); ++i) {
mu(i) = P.row(i) * M.row(i).transpose();
}
// Alternative 3 (works)
for (int i = 0; i < P.rows(); ++i) {
mu(i) = 0.0;
for (int j = 0; j < P.cols(); ++j) {
mu(i) += P(i, j) * M(i, j);
}
}
return mu;
}
struct test_lpdf {
const Eigen::VectorXd y_;
const Eigen::MatrixXd M_;
test_lpdf(const Eigen::VectorXd& y,
const Eigen::MatrixXd& M) : y_(y), M_(M) { }
template <typename T>
T operator()(const Eigen::Matrix<T, Eigen::Dynamic, 1>& theta) const {
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> X(y_.size(), 2);
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> P(y_.size(), 3);
for (int i = 0; i < y_.size(); ++i) {
P(i, 0) = theta(i);
P(i, 1) = theta(i + y_.size());
P(i, 2) = theta(i + 2 * y_.size());
}
Eigen::Matrix<T, Eigen::Dynamic, 1> mu(y_.size());
mu = prop_to_mean(P, M_);
T lp = stan::math::normal_lpdf(y_, mu, 1.0);
return lp;
}
};
Rcpp::List test_calc(const Eigen::VectorXd& y,
const Eigen::MatrixXd& M,
const Eigen::VectorXd& x) {
test_lpdf f(y, M);
double fx;
Eigen::VectorXd grad_fx;
Eigen::MatrixXd H;
stan::math::hessian(f, x, fx, grad_fx, H);
return Rcpp::List::create(Rcpp::Named("lp") = fx,
Rcpp::Named("gradient") = grad_fx,
Rcpp::Named("hessian") = H);
}
// [[Rcpp::export]]
Rcpp::List R_test_calc(const Eigen::Map<Eigen::VectorXd>& y,
const Eigen::Map<Eigen::MatrixXd>& M,
const Eigen::Map<Eigen::VectorXd>& x) {
return test_calc(y, M, x);
}
R code (compiles functions.cpp)
library(Rcpp)
lib_path <- .libPaths()[1]
includes <- paste0('-I\"', lib_path, "/",
c("Rcpp", "RcppEigen", "StanHeaders", "BH"),
'/include\"')
Sys.setenv("PKG_CXXFLAGS" = paste(includes, collapse = " "))
y <- rnorm(5, 8:12)
M <- matrix(1:15 + 0, 5, 3)
V <- compositions::ilrBase(D = 3)
X <- matrix(runif(10), 5, 2)
P <- matrix(c(0.1354226, 0.2546265, 0.6099509,
0.1870608, 0.3919542, 0.4209849,
0.1797275, 0.3958411, 0.4244314,
0.2167451, 0.4061172, 0.3771377,
0.2102265, 0.3311246, 0.4586489), ncol = 3, byrow = TRUE)
mu <- rowSums(P * M)
lp <- sum(dnorm(y, mu, 1, log = TRUE))
sourceCpp("functions.cpp")
R_test_calc(y, M, as.vector(P)+ runif(15, -0.01, 0.01))
Compiler errors for Alternative 1 (pastebin link)
Compiler errors for Alternative 2 (pastebin link)
What do I need to do in order to make matrix operations possible in this scenario (and generally)?
As a side question, I noted that for the functioning Alternative 3 above, the ()-operator of the lpdf function seems to be used as many times as the number of parameters (the vector theta that the ()-operator takes as input). Is this to be expected?