Operands_and_partials - sum of partial derivatives not equal to total derivative (by 2.220446e-16)

andrjohns · April 30, 2018, 3:37pm

I’m working on adding gradients to the ordered logistic distribution, but I’m getting some odd results which look like a floating-point error:

test/unit/math/mix/mat/prob/ordered_logistic_test.cpp:141: Failure
Value of: out.d_.val()
  Actual: -2.220446e-16
Expected: 0.0
Which is: 0

The sum of all partials for the ordered logistic lpmf should be zero, as the partial derivative for the latent location score is equal to the negative of the partial derivatives for the ordered cutpoints.

The strange part is that this only happens when the input length of integers and latent locations is odd (i.e. when y and lambda are vectors of length 3,5,etc.). When the input length is even the tests pass without issue.

The code with gradients is below:

template <bool propto, typename T_loc, typename T_cut>
typename return_type<T_loc, T_cut>::type ordered_logistic_lpmf(
    const std::vector<int>& y,
    const Eigen::Matrix<T_loc, Eigen::Dynamic, 1>& lambda,
    const Eigen::Matrix<T_cut, Eigen::Dynamic, 1>& c) {

  typedef typename 
    stan::partials_return_type<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>,
                               Eigen::Matrix<T_cut, Eigen::Dynamic, 1>>::type T_partials_return;

  typedef typename Eigen::Matrix<T_partials_return, -1, 1> T_partials_vec;

  int N = lambda.size();
  int K = c.size() + 1;
 
  vector_seq_view<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>> lam_vec(lambda);
  T_partials_vec lam_dbl(N);
    lam_dbl = value_of(lam_vec[0]);

  vector_seq_view<Eigen::Matrix<T_cut, Eigen::Dynamic, 1>> c_vec(c);
  T_partials_vec c_dbl((K-1));
    c_dbl = value_of(c_vec[0]);

  T_partials_return logp(0.0);
  T_partials_vec lam_deriv(N);
  T_partials_vec c_deriv(K-1);
  lam_deriv.setZero(N);
  c_deriv.setZero(K-1);

  operands_and_partials<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>,
                        Eigen::Matrix<T_cut, Eigen::Dynamic, 1>> ops_partials(lambda, c);

  for (int n = 0; n < N; ++n) {
    if (y[n] == 1) {
      logp -= log1p_exp(lam_dbl[n]-c_dbl[0]);
      lam_deriv[n] -= inv_logit(lam_dbl[n]-c_dbl[0]);
      c_deriv[0] -= lam_deriv[n];
    } else if (y[n] == K) {
      logp -= log1p_exp(c_dbl[K-2]-lam_dbl[n]);
      lam_deriv[n] -= inv_logit(c_dbl[K-2]-lam_dbl[n]);
      c_deriv[K-2] -= lam_deriv[n];
    } else {
      logp += log_inv_logit_diff(c_dbl[y[n]-2]-lam_dbl[n],
                                 c_dbl[y[n]-1]-lam_dbl[n]);

      lam_deriv[n] = inv_logit(c_dbl[y[n]-2]-lam_dbl[n])
                      - inv_logit(lam_dbl[n]-c_dbl[y[n]-1]);

      c_deriv[y[n]-2] += inv(1-exp(c_dbl[y[n]-1]-c_dbl[y[n]-2]))
                          - inv_logit(c_dbl[y[n]-2]-lam_dbl[n]);

      c_deriv[y[n]-1] += inv(1-exp(c_dbl[y[n]-2]-c_dbl[y[n]-1]))
                          - inv_logit(c_dbl[y[n]-1]-lam_dbl[n]);
    }
  }
  if (!is_constant_struct<Eigen::Matrix<T_loc, Eigen::Dynamic, 1>>::value)
    ops_partials.edge1_.partials_ = lam_deriv;

  if (!is_constant_struct<Eigen::Matrix<T_cut, Eigen::Dynamic, 1>>::value)
    ops_partials.edge2_.partials_ = c_deriv;

  return ops_partials.build(logp);
}

The tests are:

TEST(ProbDistributions, fvar_var_vec2) {
  using stan::math::ordered_logistic_lpmf;
  using stan::math::vector_fv;
  using stan::math::fvar;
  using stan::math::var;

  vector_fv lambda(2);
  std::vector<int> y(2);
  for (int i = 0; i < 2; i++) {
    lambda[i].val_ = -0.25;
    lambda[i].d_ = 1.0;
    y[i] = 2;
  }

  vector_fv cuts(3);
    cuts << -0.95, -0.10, 0.95;
  for (int i = 0; i < 3; i++)
    cuts(i).d_ = 1.0;

  fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);

  out.d_.grad();

  EXPECT_FLOAT_EQ(0.0, out.d_.val());
}

TEST(ProbDistributions, fvar_var_vec3) {
  using stan::math::ordered_logistic_lpmf;
  using stan::math::vector_fv;
  using stan::math::fvar;
  using stan::math::var;

  vector_fv lambda(3);
  std::vector<int> y(3);
  for (int i = 0; i < 3; i++) {
    lambda[i].val_ = -0.25;
    lambda[i].d_ = 1.0;
    y[i] = 2;
  }

  vector_fv cuts(3);
    cuts << -0.95, -0.10, 0.95;
  for (int i = 0; i < 3; i++)
    cuts(i).d_ = 1.0;

  fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);

  out.d_.grad();

  EXPECT_FLOAT_EQ(0.0, out.d_.val());
}

TEST(ProbDistributions, fvar_var_vec4) {
  using stan::math::ordered_logistic_lpmf;
  using stan::math::vector_fv;
  using stan::math::fvar;
  using stan::math::var;

  vector_fv lambda(4);
  std::vector<int> y(4);
  for (int i = 0; i < 4; i++) {
    lambda[i].val_ = -0.25;
    lambda[i].d_ = 1.0;
    y[i] = 2;
  }

  vector_fv cuts(3);
    cuts << -0.95, -0.10, 0.95;
  for (int i = 0; i < 3; i++)
    cuts(i).d_ = 1.0;

  fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);

  out.d_.grad();

  EXPECT_FLOAT_EQ(0.0, out.d_.val());
}

TEST(ProbDistributions, fvar_var_vec5) {
  using stan::math::ordered_logistic_lpmf;
  using stan::math::vector_fv;
  using stan::math::fvar;
  using stan::math::var;

  vector_fv lambda(5);
  std::vector<int> y(5);
  for (int i = 0; i < 5; i++) {
    lambda[i].val_ = -0.25;
    lambda[i].d_ = 1.0;
    y[i] = 2;
  }

  vector_fv cuts(3);
    cuts << -0.95, -0.10, 0.95;
  for (int i = 0; i < 3; i++)
    cuts(i).d_ = 1.0;

  fvar<var> out = ordered_logistic_lpmf(y, lambda, cuts);

  out.d_.grad();

  EXPECT_FLOAT_EQ(0.0, out.d_.val());
}

With result:

test/unit/math/mix/mat/prob/ordered_logistic_test --gtest_output="xml:test/unit/math/mix/mat/prob/ordered_logistic_test.xml"
Running main() from gtest_main.cc
[==========] Running 5 tests from 1 test case.
[----------] Global test environment set-up.
[----------] 5 tests from ProbDistributions
[ RUN      ] ProbDistributions.fvar_var
[       OK ] ProbDistributions.fvar_var (0 ms)
[ RUN      ] ProbDistributions.fvar_var_vec2
[       OK ] ProbDistributions.fvar_var_vec2 (0 ms)
[ RUN      ] ProbDistributions.fvar_var_vec3
test/unit/math/mix/mat/prob/ordered_logistic_test.cpp:89: Failure
Value of: out.d_.val()
  Actual: 2.220446e-16
Expected: 0.0
Which is: 0
[  FAILED  ] ProbDistributions.fvar_var_vec3 (0 ms)
[ RUN      ] ProbDistributions.fvar_var_vec4
[       OK ] ProbDistributions.fvar_var_vec4 (0 ms)
[ RUN      ] ProbDistributions.fvar_var_vec5
test/unit/math/mix/mat/prob/ordered_logistic_test.cpp:141: Failure
Value of: out.d_.val()
  Actual: -2.220446e-16
Expected: 0.0
Which is: 0
[  FAILED  ] ProbDistributions.fvar_var_vec5 (0 ms)
[----------] 5 tests from ProbDistributions (0 ms total)

[----------] Global test environment tear-down
[==========] 5 tests from 1 test case ran. (0 ms total)
[  PASSED  ] 3 tests.
[  FAILED  ] 2 tests, listed below:
[  FAILED  ] ProbDistributions.fvar_var_vec3
[  FAILED  ] ProbDistributions.fvar_var_vec5

 2 FAILED TESTS

Slight code dump, but I couldn’t see any obvious reason for this kind of behaviour

bgoodri · April 30, 2018, 3:54pm

I don’t think this is a good test pattern. If it does what I think it does, it tests for relative accuracy against exactly zero. If so, you could switch the order of the arguments or add 1.0 to both.

andrjohns · April 30, 2018, 4:09pm

Yep, adding 1.0 to both sorted it straight away. Thanks for the quick solve!

Topic		Replies	Views
Operands and Partials: partials_ vs partials_vec_ Developers	10	1040	February 3, 2018
Ill-typed argument in reduce sum fuction in ordered logistic model Modeling rstan	6	439	May 9, 2023
Operands and partials with more than five edges Developers math	4	548	August 8, 2018
Vari class, operands_and_partials, or adj_jac_apply Developers math	2	474	January 23, 2020
Potential slowness in operands_and_partials Developers	31	1649	September 14, 2018

Operands_and_partials - sum of partial derivatives not equal to total derivative (by 2.220446e-16)

Related topics