I’m working on adding gradients to the ordered logistic distribution, but I’m getting some odd results which look like a floating-point error:
test/unit/math/mix/mat/prob/ordered_logistic_test.cpp:141: Failure
Value of: out.d_.val()
Actual: -2.220446e-16
Expected: 0.0
Which is: 0
The sum of all partials for the ordered logistic lpmf should be zero, as the partial derivative for the latent location score is equal to the negative of the partial derivatives for the ordered cutpoints.
The strange part is that this only happens when the input length of integers and latent locations is odd (i.e. when y
and lambda
are vectors of length 3,5,etc.). When the input length is even the tests pass without issue.
The code with gradients is below:
template <bool propto, typename T_loc, typename T_cut>
typename return_type<T_loc, T_cut>::type ordered_logistic_lpmf(
const std::vector<int>& y,
const Eigen::Matrix<T_loc, Eigen::Dynamic, 1>& lambda,
const Eigen::Matrix<T_cut, Eigen::Dynamic, 1>& c) {
typedef typename
stan::partials_return_type<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>,
Eigen::Matrix<T_cut, Eigen::Dynamic, 1>>::type T_partials_return;
typedef typename Eigen::Matrix<T_partials_return, -1, 1> T_partials_vec;
int N = lambda.size();
int K = c.size() + 1;
vector_seq_view<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>> lam_vec(lambda);
T_partials_vec lam_dbl(N);
lam_dbl = value_of(lam_vec[0]);
vector_seq_view<Eigen::Matrix<T_cut, Eigen::Dynamic, 1>> c_vec(c);
T_partials_vec c_dbl((K-1));
c_dbl = value_of(c_vec[0]);
T_partials_return logp(0.0);
T_partials_vec lam_deriv(N);
T_partials_vec c_deriv(K-1);
lam_deriv.setZero(N);
c_deriv.setZero(K-1);
operands_and_partials<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>,
Eigen::Matrix<T_cut, Eigen::Dynamic, 1>> ops_partials(lambda, c);
for (int n = 0; n < N; ++n) {
if (y[n] == 1) {
logp -= log1p_exp(lam_dbl[n]-c_dbl[0]);
lam_deriv[n] -= inv_logit(lam_dbl[n]-c_dbl[0]);
c_deriv[0] -= lam_deriv[n];
} else if (y[n] == K) {
logp -= log1p_exp(c_dbl[K-2]-lam_dbl[n]);
lam_deriv[n] -= inv_logit(c_dbl[K-2]-lam_dbl[n]);
c_deriv[K-2] -= lam_deriv[n];
} else {
logp += log_inv_logit_diff(c_dbl[y[n]-2]-lam_dbl[n],
c_dbl[y[n]-1]-lam_dbl[n]);
lam_deriv[n] = inv_logit(c_dbl[y[n]-2]-lam_dbl[n])
- inv_logit(lam_dbl[n]-c_dbl[y[n]-1]);
c_deriv[y[n]-2] += inv(1-exp(c_dbl[y[n]-1]-c_dbl[y[n]-2]))
- inv_logit(c_dbl[y[n]-2]-lam_dbl[n]);
c_deriv[y[n]-1] += inv(1-exp(c_dbl[y[n]-2]-c_dbl[y[n]-1]))
- inv_logit(c_dbl[y[n]-1]-lam_dbl[n]);
}
}
if (!is_constant_struct<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>>::value)
ops_partials.edge1_.partials_ = lam_deriv;
if (!is_constant_struct<Eigen::Matrix<T_cut, Eigen::Dynamic, 1>>::value)
ops_partials.edge2_.partials_ = c_deriv;
return ops_partials.build(logp);
}
The tests are:
TEST(ProbDistributions, fvar_var_vec2) {
using stan::math::ordered_logistic_lpmf;
using stan::math::vector_fv;
using stan::math::fvar;
using stan::math::var;
vector_fv lambda(2);
std::vector<int> y(2);
for (int i = 0; i < 2; i++) {
lambda[i].val_ = -0.25;
lambda[i].d_ = 1.0;
y[i] = 2;
}
vector_fv cuts(3);
cuts << -0.95, -0.10, 0.95;
for (int i = 0; i < 3; i++)
cuts(i).d_ = 1.0;
fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);
out.d_.grad();
EXPECT_FLOAT_EQ(0.0, out.d_.val());
}
TEST(ProbDistributions, fvar_var_vec3) {
using stan::math::ordered_logistic_lpmf;
using stan::math::vector_fv;
using stan::math::fvar;
using stan::math::var;
vector_fv lambda(3);
std::vector<int> y(3);
for (int i = 0; i < 3; i++) {
lambda[i].val_ = -0.25;
lambda[i].d_ = 1.0;
y[i] = 2;
}
vector_fv cuts(3);
cuts << -0.95, -0.10, 0.95;
for (int i = 0; i < 3; i++)
cuts(i).d_ = 1.0;
fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);
out.d_.grad();
EXPECT_FLOAT_EQ(0.0, out.d_.val());
}
TEST(ProbDistributions, fvar_var_vec4) {
using stan::math::ordered_logistic_lpmf;
using stan::math::vector_fv;
using stan::math::fvar;
using stan::math::var;
vector_fv lambda(4);
std::vector<int> y(4);
for (int i = 0; i < 4; i++) {
lambda[i].val_ = -0.25;
lambda[i].d_ = 1.0;
y[i] = 2;
}
vector_fv cuts(3);
cuts << -0.95, -0.10, 0.95;
for (int i = 0; i < 3; i++)
cuts(i).d_ = 1.0;
fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);
out.d_.grad();
EXPECT_FLOAT_EQ(0.0, out.d_.val());
}
TEST(ProbDistributions, fvar_var_vec5) {
using stan::math::ordered_logistic_lpmf;
using stan::math::vector_fv;
using stan::math::fvar;
using stan::math::var;
vector_fv lambda(5);
std::vector<int> y(5);
for (int i = 0; i < 5; i++) {
lambda[i].val_ = -0.25;
lambda[i].d_ = 1.0;
y[i] = 2;
}
vector_fv cuts(3);
cuts << -0.95, -0.10, 0.95;
for (int i = 0; i < 3; i++)
cuts(i).d_ = 1.0;
fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);
out.d_.grad();
EXPECT_FLOAT_EQ(0.0, out.d_.val());
}
With result:
test/unit/math/mix/mat/prob/ordered_logistic_test --gtest_output="xml:test/unit/math/mix/mat/prob/ordered_logistic_test.xml"
Running main() from gtest_main.cc
[==========] Running 5 tests from 1 test case.
[----------] Global test environment set-up.
[----------] 5 tests from ProbDistributions
[ RUN ] ProbDistributions.fvar_var
[ OK ] ProbDistributions.fvar_var (0 ms)
[ RUN ] ProbDistributions.fvar_var_vec2
[ OK ] ProbDistributions.fvar_var_vec2 (0 ms)
[ RUN ] ProbDistributions.fvar_var_vec3
test/unit/math/mix/mat/prob/ordered_logistic_test.cpp:89: Failure
Value of: out.d_.val()
Actual: 2.220446e-16
Expected: 0.0
Which is: 0
[ FAILED ] ProbDistributions.fvar_var_vec3 (0 ms)
[ RUN ] ProbDistributions.fvar_var_vec4
[ OK ] ProbDistributions.fvar_var_vec4 (0 ms)
[ RUN ] ProbDistributions.fvar_var_vec5
test/unit/math/mix/mat/prob/ordered_logistic_test.cpp:141: Failure
Value of: out.d_.val()
Actual: -2.220446e-16
Expected: 0.0
Which is: 0
[ FAILED ] ProbDistributions.fvar_var_vec5 (0 ms)
[----------] 5 tests from ProbDistributions (0 ms total)
[----------] Global test environment tear-down
[==========] 5 tests from 1 test case ran. (0 ms total)
[ PASSED ] 3 tests.
[ FAILED ] 2 tests, listed below:
[ FAILED ] ProbDistributions.fvar_var_vec3
[ FAILED ] ProbDistributions.fvar_var_vec5
2 FAILED TESTS
Slight code dump, but I couldn’t see any obvious reason for this kind of behaviour