Feature request: Jacobians of unconstraining transform

For RStan, there was a Discourse thread about the now released vignette. It is not implemented for PyStan yet, but that usually follows RStan. For CmdStan, it is a bit different but documented in the CmdStan guide.

In Andrew’s case, the Stan program was

functions {
  real log_kernel(real lambda, real b_linear, vector b_logit,
                  vector lambda_logit, vector lambda_kernel, vector b_kernel,
                  vector y, real mu) {
    return lambda * b_linear + dot_product(lambda_logit, b_logit) + 
                               dot_product(lambda_kernel, b_kernel) +
           lambda * cauchy_lpdf(y | mu, 1) + (1 - lambda) * normal_lpdf(mu | 0, 10);     
  }
  
  real[] log_kernel_gradient(real lambda, real b_linear, vector b_logit,
                             vector lambda_logit, vector lambda_kernel, 
                             vector b_kernel, vector y, real mu); // undefined now
  
}
data {
  int N;
  vector[N] y;
  real a_lower;
  real a_upper;
  real b_linear;
  int K_logit;
  vector[K_logit] mu_logit;
  vector[K_logit] sigma_logit;
  vector[K_logit] b_logit;
  int K_kernel;
  vector[K_kernel] mu_kernel;
  vector[K_kernel] sigma_kernel;
  vector[K_kernel] b_kernel;
}
parameters {
  real mu;
  real<lower=0,upper=2> a;
}
transformed parameters {
  real a_reflected;
  real a_scaled;
  real lambda;
  vector[K_logit] lambda_logit;
  vector[K_kernel] lambda_kernel;
  a_reflected = 1 - fabs(1 - a);;
  a_scaled = (a_reflected - a_lower)/(a_upper - a_lower);
  if (a_scaled <= 0)
    lambda = 0;
  else if (a_scaled < 1)
    lambda = 0.5 + 0.25*(3*(2*a_scaled - 1) - (2*a_scaled - 1)^3);
  else
    lambda = 1;
  lambda_logit = 1 ./ (1 + exp(-(lambda - mu_logit) ./ sigma_logit));  
  lambda_kernel = exp(-.5*(lambda - mu_kernel) .* (lambda - mu_kernel) ./ (sigma_kernel .* sigma_kernel));
}
model {
  target += log_kernel(lambda, b_linear, b_logit,
                       lambda_logit, lambda_kernel, b_kernel,
                       y, mu);
}
generated quantities {
  real grad_lambda[1] =
    log_kernel_gradient(lambda, b_linear, b_logit,
                        lambda_logit, lambda_kernel, b_kernel,
                        y, mu);
}

and the log_kernel_gradient.hpp file that calculated the gradient was

template <typename T0__, typename T1__, typename T2__, typename T3__, typename T4__, typename T5__, typename T6__, typename T7__>
std::vector<typename boost::math::tools::promote_args<T0__, T1__, T2__, T3__, typename boost::math::tools::promote_args<T4__, T5__, T6__, T7__>::type>::type>
log_kernel_gradient(const T0__& lambda,
                    const T1__& b_linear,
                    const Eigen::Matrix<T2__, Eigen::Dynamic,1>& b_logit,
                    const Eigen::Matrix<T3__, Eigen::Dynamic,1>& lambda_logit,
                    const Eigen::Matrix<T4__, Eigen::Dynamic,1>& lambda_kernel,
                    const Eigen::Matrix<T5__, Eigen::Dynamic,1>& b_kernel,
                    const Eigen::Matrix<T6__, Eigen::Dynamic,1>& y,
                    const T7__& mu, std::ostream* pstream__) {

  auto lambda_v = to_var(lambda); // auto foo_v = to_var(foo) for anything unknown
  var lp = log_kernel(lambda_v, b_linear, b_logit, // using lambda_v, not lambda
                      lambda_logit, lambda_kernel, b_kernel,
                      y, mu, 0); // need extra 0 at the end
  std::vector<var> theta;
  theta.push_back(lambda_v); // call theta.push_back(foo_v) for anything unknown
  std::vector<double> g;
  lp.grad(theta, g);
  return g;
}

Calculating Jacobians is about the same and discussed in section 2.4 of the Stan Math paper.