I am trying to replicate/approximate the feature in brms
to estimate epred
from new data, but struggling to get things specified in the generated quantities block.
I am fitting a piecewise/broken stick model with truncated responses.
functions {
real normal_lub_rng(real mu, real sigma, real lb, real ub) {
real p1 = normal_cdf(lb | mu, sigma); // cdf with lower bound
real p2 = normal_cdf(ub | mu, sigma); // cdf with upper bound
real u = uniform_rng(p1, p2);
return (sigma * inv_Phi(u)) + mu; // inverse cdf
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
array[N] real lb; // upper truncation bounds
array[N] real ub; // upper truncation bounds
int<lower=1> K_pslope; // number of slope1
matrix[N, K_pslope] X_pslope; // slope1 design matrix
int<lower=1> K_cslope; // number of slope2 predictors
matrix[N, K_cslope] X_cslope; // slope2 design matrix
int<lower=1> K_peak; // number of peak predictors
matrix[N, K_peak] X_peak; // peak design matrix
int<lower=1> K_peaktime; // number of peak time predictors
matrix[N, K_peaktime] X_peaktime; // peak time design matrix
array[N] int t; // time
int<lower=1> nSubj; // number of subjects
array[N] int<lower=1> Subj; // subject indicator
// prediction data
int<lower=1> N_pred;
int<lower=1> K_pslope_pred;
matrix[N_pred, K_pslope_pred] X_pslope_pred;
int<lower=1> K_cslope_pred;
matrix[N_pred, K_cslope_pred] X_cslope_pred;
int<lower=1> K_peak_pred;
matrix[N_pred, K_peak_pred] X_peak_pred;
int<lower=1> K_peaktime_pred;
matrix[N_pred, K_peaktime_pred] X_peaktime_pred;
array[N_pred] int t_pred;
array[N_pred] int<lower=1> Subj_pred;
}
parameters {
vector[K_pslope] b_pslope; // slope1
vector[K_cslope] b_cslope; // slope2
vector[K_peak] b_peak; // peak
vector[K_peaktime] b_peaktime; // time of peak
real<lower=0> sigma; // variance
vector<lower=0>[1] peak_sd; // subject-level peak varfiance
array[1] vector[nSubj] peak_z; // individual peak effects
vector<lower=0>[1] peaktime_sd; // subject-level peak time variance
array[1] vector[nSubj] peaktime_z; // individual peak time effects
}
transformed parameters {
vector[N] pslope = rep_vector(0.0, N);
vector[N] cslope = rep_vector(0.0, N);
vector[N] peak = rep_vector(0.0, N);
vector[N] peaktime = rep_vector(0.0, N);
vector[N] mu;
vector[nSubj] subj_peak; // subject-specific peak offsets
vector[nSubj] subj_peaktime; // subject-specific peak time offsets
subj_peak = peak_sd[1] * peak_z[1];
subj_peaktime = peaktime_sd[1] * peaktime_z[1];
pslope += X_pslope * b_pslope;
cslope += X_cslope * b_cslope;
peak += X_peak * b_peak;
peaktime += X_peaktime * b_peaktime;
// Add subject-specific offsets to linear predictor
for (n in 1 : N) {
peak[n] += subj_peak[Subj[n]];
}
for (n in 1 : N) {
peaktime[n] += subj_peaktime[Subj[n]];
}
for (n in 1 : N) {
// compute
mu[n] = peak[n]
+ pslope[n] * t[n] * step(peaktime[n] - t[n])
+ (pslope[n] * peaktime[n]
+ cslope[n] * (t[n] - peaktime[n]))
* step(t[n] - peaktime[n]);
}
}
model {
real lprior = 0;
// likelihood
for (n in 1 : N) {
target += normal_lupdf(Y[n] | mu[n], sigma)
- normal_lcdf(ub[n] | mu[n], sigma);
}
// priors
lprior += normal_lupdf(b_pslope | 0, 1);
lprior += normal_lupdf(b_cslope | 0, 1);
lprior += normal_lupdf(b_peak | 18, 10);
lprior += normal_lupdf(b_peaktime | 0, 0.25);
lprior += normal_lupdf(sigma | 0, 4);
lprior += normal_lupdf(peak_sd | 0, 10);
lprior += normal_lupdf(peaktime_sd | 0, 3);
target += lprior;
target += std_normal_lupdf(peak_z[1]);
target += std_normal_lupdf(peaktime_z[1]);
}
I think brms
would use the mean (across iterations?) mu
for each observation/row, but I can’t figure out how to calculate that quantity in generated quantities.
I tried this:
generated quantities {
vector[N] log_lik;
vector[N] pred;
vector[N_pred] pslope_pred = rep_vector(0.0, N_pred);
vector[N_pred] cslope_pred = rep_vector(0.0, N_pred);
vector[N_pred] peak_pred = rep_vector(0.0, N_pred);
vector[N_pred] peaktime_pred = rep_vector(0.0, N_pred);
vector[N_pred] mu_pred;
vector[N_pred] epred;
vector[N_pred] median_mu;
for (i in 1 : N) {
log_lik[i] = normal_lpdf(Y[i] | mu[i], sigma)
- normal_lcdf(ub[i] | mu[i], sigma);
pred[i] = normal_lub_rng(mu[i], sigma, 0.0, 37.0);
}
// Predictions higher time resolution
pslope_pred += X_pslope_pred * b_pslope;
cslope_pred += X_cslope_pred * b_cslope;
peak_pred += X_peak_pred * b_peak;
peaktime_pred += X_peaktime_pred * b_peaktime;
for (n in 1 : N_pred) {
peak_pred[n] += subj_peak[Subj_pred[n]];
}
for (n in 1 : N_pred) {
peaktime_pred[n] += subj_peaktime[Subj_pred[n]];
}
for (n in 1 : N_pred) {
mu_pred[n] = peak_pred[n]
+ pslope_pred[n] * t_pred[n] * step(peaktime_pred[n] - t_pred[n])
+ (pslope_pred[n] * peaktime_pred[n]
+ cslope_pred[n] * (t_pred[n] - peaktime_pred[n]))
* step(t_pred[n] - peaktime_pred[n]);
}
for (n in 1 : N_pred) {
median_mu[n] = quantile(mu_pred, 0.5);
epred[n] = normal_lub_rng(median_mu[n], sigma, 0.0, 37.0);
}
}
But the model hangs once it gets to sampling, presumably due to my mis-specification.
Is there a way I can calculate a mean or median mu
(across iterations) for the epred
values? Or am I misunderstanding what brms
does with posterior_epred
?