Let’s see if this will work for you. I don’t understand how your output above works because x[11] is clearly greater than x[10]. Or maybe filter is removing those where that’s the case? Idk, I don’t use tidyverse. It doesn’t really matter though because you can reverse what I’ve done.
Let’s say the constraints are
`x[1]` > `x[2]`,
`x[1]` > `x[8]`,
`x[1]` > `x[10]`,
`x[2]` > `x[3]`,
`x[3]` > `x[4]`,
`x[3]` > `x[7]`,
`x[4]` > `x[5]`,
`x[4]` > `x[6]`,
`x[8]` > `x[9]`,
`x[10]` > `x[11]`
And you’ve told us that each one has a marginal gamma distribution with given scale and rate. There’s a neat trick here using the F distribution (see F-distribution - Wikipedia).
Ripping straight from wikipedia, if X_k \sim \Gamma(\alpha_k,\beta_k)\, Gamma distribution are independent, then \frac{\alpha_2\beta_1 X_1}{\alpha_1\beta_2 X_2} \sim \mathrm{F}(2\alpha_1, 2\alpha_2)
We can rewrite the above constraints so that the F distribution is truncated at 1 for the given X_1, X_2.
Then pass in all the indexes for your pairwise constraints and things seem to work well.
functions {
real F_lpdf(real x, real d1, real d2) {
real d1_half = 0.5 * d1;
real d2_half = 0.5 * d2;
return -lbeta(d1_half, d2_half) + d1_half * log(d1 / d2) +
(d1_half - 1) * log(x) - (d1_half + d2_half) * log1p(x * d1/d2);
}
real F_lcdf(real x, real d1, real d2) {
return log(inc_beta(0.5 * d1, 0.5 * d2, (d1 * x) / (d1 * x + d2)));
}
}
data {
int<lower=0> N;
vector[N] shape;
vector[N] rate;
int<lower=1> P; // number of pairs
array[P, 2] int P_index;
int<lower=1, upper=N> max_index;
}
transformed data {
vector[P] pairs_coef;
for (i in 1:P) {
pairs_coef[i] = (shape[P_index[i, 2]] * rate[P_index[i, 1]]) / (shape[P_index[i, 1]] * rate[P_index[i, 2]]);
}
}
parameters {
real<lower=0> max_age;
vector<lower=0, upper=pairs_coef>[P] ratio_age;
}
transformed parameters {
vector[P] ratio_age_raw = ratio_age ./ pairs_coef;
}
model {
max_age ~ gamma(shape[max_index], rate[max_index]);
for (i in 1:P) {
target += F_lpdf(ratio_age[i] | 2 * shape[P_index[i, 1]], 2 * shape[P_index[i, 2]]) ;
target += F_lcdf(pairs_coef[i] | 2 * shape[P_index[i, 1]], 2 * shape[P_index[i, 2]]);
}
}
generated quantities {
vector[N] ages;
ages[max_index] = max_age;
int p = 2;
for (i in 1:P) {
ages[p] = ages[P_index[i, 2]] * ratio_age_raw[i];
p += 1;
}
}
The corresponding R code
library(tidyverse)
library(posterior)
tribble(~parentname, ~name, ~shape, ~rate,
NA_character_, "I-Y15154", 55.4, 0.0420,
"I-Y15154", "I-BY70273", 40.9, 0.0419,
"I-BY70273", "I-FGC18186", 35.0, 0.0432,
"I-FGC18186", "I-Y59437", 27.7, 0.0365,
"I-Y59437", "I-BY61123", 8.83, 0.0493,
"I-Y59437", "I-FTC12859", 10.3, 0.0329,
"I-FGC18186", "I-FTB41323", 11.8, 0.0227,
"I-Y15154", "I-Y17669", 17.8, 0.0139,
"I-Y17669", "I-Y17667", 6.11, 0.0109,
"I-Y15154", "I-BY115311", 16.8, 0.0242,
"I-BY115311", "I-Y88749", 10.3, 0.0319) %>%
mutate(index = row_number(),
parent_index = match(parentname, name)) %>%
replace_na(list(parent_index = 0)) ->
tree_data
mod_order <- cmdstan_model("order_constraint.stan")
P_index <- array(dim = c(10, 2))
j <- 2
for (i in 1:10){
P_index[i, ] <- as.matrix(tree_data[j, 5:6])
j <- j + 1
}
d <- list(
N = nrow(dat),
shape = tree_data$shape,
rate = tree_data$rate,
P = 10,
P_index = P_index,
max_index = 1
)
mod_out <- mod_order$sample(
data = d,
iter_warmup = 1000,
iter_sampling = 1000,
parallel_chains = 4
)
mod_out$summary("ages")
And the output
# A tibble: 11 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <num> <num> <num> <num> <num> <num> <num> <num> <num>
1 ages[1] 1319. 1314. 178. 178. 1041. 1620. 1.00 4753. 2785.
2 ages[2] 956. 941. 216. 221. 628. 1331. 1.00 4343. 2895.
3 ages[3] 743. 724. 208. 210. 434. 1108. 1.00 3511. 3017.
4 ages[4] 598. 573. 193. 189. 320. 947. 1.00 4349. 2961.
5 ages[5] 146. 132. 77.4 65.0 54.7 297. 1.00 3644. 2907.
6 ages[6] 255. 229. 129. 109. 95.9 503. 1.00 4382. 2693.
7 ages[7] 458. 436. 186. 186. 203. 813. 1.00 4142. 2864.
8 ages[8] 1051. 1044. 228. 225. 683. 1434. 1.00 5203. 3113.
9 ages[9] 474. 432. 230. 215. 174. 911. 1.00 4127. 2504.
10 ages[10] 705. 683. 219. 210. 389. 1107. 1.00 3389. 1602.
11 ages[11] 339. 307. 167. 150. 131. 665. 1.00 3663. 2251.
And let’s check the constraints
ages <- mod_out$draws("ages", format = "matrix")
for (i in 1:10) {
print(table(ages[, P_index[i, 1]] < ages[, P_index[i, 2]]))
}
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000
TRUE
4000