I played around and got the integrate_1d to work for this distribution. No doubt that this is way slower than @mathDR approximation.
This is my first time using the integrate functionality so that was cool! I’m sure there’s a more numerically stable way to code it (@andrjohns or @bbbales2 may know a good way to use the xc
parameter here).
functions {
real wallenius_integral(real t, // Function argument
real xc, // Complement of function argument
// on the domain (defined later)
real[] theta, // parameters
real[] x_r, // data (real)
int[] x_i) { // data (integer)
real omega = theta[1];
real Dinv = 1 / theta[2];
int k = x_i[1];
int n = x_i[2];
return pow(1 - t^(omega * Dinv), k) * pow(1 - t^Dinv, n - k);
}
real wallenius_lpmf(int k, int n, int[] m) {
return -log1p(m[1]) - lbeta(m[1] - k + 1, k + 1) -
log1p(m[2]) - lbeta(m[2] - n + k + 1, n - k + 1);
}
}
data {
int<lower=0> N;
int y[N];
int n;
int m[2];
real tol;
}
transformed data {
real x_r[0];
int x_i[N, 2];
for (i in 1:N)
x_i[i] = {y[i], n};
}
parameters {
real<lower=0> odds;
}
model {
odds ~ exponential(1);
for (i in 1:N) {
real D = odds * (m[1] - y[i]) + (m[2] - n + y[i]);
y[i] ~ wallenius(n, m);
target += log(integrate_1d(wallenius_integral, 0, 1,
{odds, D},
x_r,
x_i[i],
tol));
}
}
generated quantities {
real out = odds / (1 + odds);
}
Here’s R code to generate data.
library(cmdstanr)
library(BiasedUrn)
fp <- file.path("../wallenius.stan")
mod <- cmdstan_model(fp, force_recompile = T)
# dWNCHypergeo(nrand, m1, m2, n, odds, precision=1E-7)
# m1: initial number of red balls in urn
# m2: initial number of white balls in urn
# n : total number of balls sampled
# odds: probability ratio of red over white
# number of realizations
N <- 10
m1 <- 1000
m2 <- 1000
n <- 200
prob <- 0.8
odds <- prob / (1 - prob)
y <- rWNCHypergeo(N, m1, m2, n, odds)
mod_out <- mod$sample(
data = list(N = N,
y = y,
n = n,
m = c(m1, m2),
tol = 0.1),
chains = 2,
init = 1,
adapt_delta = 0.8,
parallel_chains = 2,
iter_warmup = 200,
iter_sampling = 200
)
@andrjohns is there way to call the integrate_1d
in the lpmf function? I was getting complaints about the inputs being only data.