I am working with the following model and trying to use generated quantities block to make prediction of new data points.
The issue I have is in the generated quantities block, where according to the formula in 3 above,
mu = alpha + beta + rho * (log(C_w-1, d) - mu_w-1,d)
I am unsure how to code the part with the rho in generated quantities. If I only use mu = alpha + beta, I can get values back from pred_paid2.
I think the issue here is that mu and pred_paid2 depends on each other, I have to generate mu from C_w-1 and mu_w-1, then the generated mu is used to generate a sample from a lognormal distribution as prediction. And the prediction is then used to generate the next mu.
Thank you for your time reading my code, it’s not the prettiest and I am not sure if this is the best way to translate the model above into stan.
data {
int<lower=1> num_ay;
int<lower=1> num_dev;
int<lower=1> num_obs;
vector[num_obs] log_prem;
vector[num_obs] log_loss;
int origin[num_obs];
int cy[num_obs];
int development_lag[num_obs];
//predictions
int<lower=1> new_obs;
vector[new_obs] log_prem_new;
int origin_new[new_obs];
int dev_new[new_obs];
int cy_new[new_obs];
}
transformed data {
array[num_dev-1] real prior_diag_log_loss;
for (i in 1:num_obs) {
if (cy[i] == 10 && development_lag[i] != 1) {
prior_diag_log_loss[development_lag[i] - 1] = log_loss[i];
}
}
}
parameters {
vector[num_ay] alpha;
vector<upper=0> [num_dev - 1] dev; // 9 development period
vector<lower=0.001>[num_ay] elr;
vector<lower=0>[num_dev] a; // half normal
real<lower=-1, upper=1> rho;
}
transformed parameters {
vector[num_obs] mu;
vector[num_dev] beta;
vector[num_dev] sig2;
vector[num_ay] loglr;
array[num_dev-1] real prior_diag_mu;
vector[new_obs] mu_new;
for (i in 1:(num_dev - 1)) {
beta[i] = dev[i];
}
beta[num_dev] = 0; // No more development at 10th period
for (i in 1:num_dev) {
sig2[i] = sum(a[i:num_dev]);
}
loglr[1] = -0.2877;
for (i in 1:num_ay) {
loglr[i] = log(elr[i]);
}
for (i in 1:num_obs) {
if (origin[i] == 1) {
mu[i] = log_prem[origin[i]] + loglr[origin[i]] +
alpha[origin[i]] + beta[development_lag[i]];
} else {
mu[i] = log_prem[i] + loglr[origin[i]] + alpha[origin[i]] +
beta[development_lag[i]] +
rho*(log_loss[origin[i-1]] - mu[origin[i-1]]);
}
}
for (i in 1:num_obs) {
if (cy[i] == 10 && development_lag[i] != 1) {
prior_diag_mu[development_lag[i] - 1] = mu[i];
}
}
for (i in 1:new_obs) {
if (cy_new[i] == 11) {
mu_new[i] = log_prem_new[i] + loglr[origin_new[i]] +
alpha[origin_new[i]] + beta[dev_new[i]] +
rho*(prior_diag_log_loss[dev_new[i]-1] -
prior_diag_mu[dev_new[i]-1]);
} else {
mu_new[i] = log_prem_new[i] + loglr[origin_new[i]] +
alpha[origin_new[i]] +
beta[dev_new[i]];
}
}
}
model {
elr ~ lognormal(-0.25, 0.1);
dev ~ normal(0, 2.5);
a ~ normal(0, 2.5);
alpha ~ normal(0, sqrt(10));
for (i in 1:num_obs) {
log_loss[i] ~ normal(mu[i], sqrt(sig2[development_lag[i]]));
}
}
generated quantities {
vector[num_obs] pred_paid;
vector[new_obs] pred_paid2;
vector[num_obs] mu_new2;
for (i in 1:num_obs) {
pred_paid[i] = lognormal_rng(mu[i], sqrt(sig2[development_lag[i]]));
}
// this works
// for (i in 1:new_obs) {
// pred_paid2[i] = lognormal_rng(mu_new[i], sqrt(sig2[dev_new[i]]));
// }
for (i in 1:new_obs) {
if (cy_new[i] != 11) {
mu_new2[i] = mu_new[i] +
rho*(log(pred_paid2[origin_new[i-1]]) - mu_new2[origin_new[i-1]]);
pred_paid2[i] = lognormal_rng(mu_new2[i], sqrt(sig2[dev_new[i]]));
} else {
mu_new2[i] = mu_new[i];
pred_paid2[i] = lognormal_rng(mu_new[i], sqrt(sig2[dev_new[i]]));
}
}
}
df <- structure(list(num_ay = c(10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), num_dev = c(10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L), num_obs = c(55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 55L, 55L), premium = c(5812, 4908,
5454, 5165, 5214, 5230, 4992, 5466, 5226, 4962, 5812, 4908, 5454,
5165, 5214, 5230, 4992, 5466, 5226, 5812, 4908, 5454, 5165, 5214,
5230, 4992, 5466, 5812, 4908, 5454, 5165, 5214, 5230, 4992, 5812,
4908, 5454, 5165, 5214, 5230, 5812, 4908, 5454, 5165, 5214, 5812,
4908, 5454, 5165, 5812, 4908, 5454, 5812, 4908, 5812), cum_loss = c(952,
849, 983, 1657, 932, 1162, 1478, 1240, 1326, 1413, 1529, 1564,
2211, 2685, 1940, 2402, 2980, 2080, 2412, 2813, 2202, 2830, 3169,
2626, 2799, 3945, 2607, 3647, 2432, 3832, 3600, 3332, 2996, 4714,
3724, 2468, 4039, 3900, 3368, 3034, 3832, 2487, 4065, 4320, 3491,
3899, 2513, 4102, 4332, 3907, 2526, 4155, 3911, 2531, 3912),
origin = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5,
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7,
1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 1,
2, 1), cy = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 3, 4, 5,
6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9,
10, 5, 6, 7, 8, 9, 10, 6, 7, 8, 9, 10, 7, 8, 9, 10, 8, 9,
10, 9, 10, 10), development_lag = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 9L, 9L, 10L)), row.names = c(NA, -55L), class = c("data.table",
"data.frame"))
df_new <- structure(list(num_ay = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L), num_dev = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L), num_obs = c(45L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L
), premium = c(4962, 5226, 4962, 5466, 5226, 4962, 4992, 5466,
5226, 4962, 5230, 4992, 5466, 5226, 4962, 5214, 5230, 4992, 5466,
5226, 4962, 5165, 5214, 5230, 4992, 5466, 5226, 4962, 5454, 5165,
5214, 5230, 4992, 5466, 5226, 4962, 4908, 5454, 5165, 5214, 5230,
4992, 5466, 5226, 4962), cum_loss = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), origin = c(10, 9, 10, 8, 9, 10, 7, 8, 9, 10, 6, 7, 8, 9, 10,
5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 3, 4, 5, 6, 7, 8, 9,
10, 2, 3, 4, 5, 6, 7, 8, 9, 10), cy = c(11, 11, 12, 11, 12, 13,
11, 12, 13, 14, 11, 12, 13, 14, 15, 11, 12, 13, 14, 15, 16, 11,
12, 13, 14, 15, 16, 17, 11, 12, 13, 14, 15, 16, 17, 18, 11, 12,
13, 14, 15, 16, 17, 18, 19), development_lag = c(2L, 3L, 3L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L)), row.names = c(NA,
-45L), class = c("data.table", "data.frame"))
crc_model <- cmdstan_model(
stan_file = "./CRC.stan"
)
crc_fit <- crc_model$sample(
data = list(
num_ay = unique(df$num_ay),
num_dev = unique(df$num_dev),
num_obs = unique(df$num_obs),
log_prem = log(df$premium),
log_loss = log(df$cum_loss),
origin = df$origin,
cy = df$cy,
development_lag = df$development_lag,
new_obs = unique(df_new$num_obs),
log_prem_new = log(df_new$premium),
origin_new = df_new$origin,
dev_new = df_new$development_lag,
cy_new = df_new$cy
),
step_size = 0.01,
iter_warmup = 21,
iter_sampling = 1,
chains = 1,
adapt_delta = 0.999,
max_treedepth = 10
)