I am trying to fit a Gaussian process quantile regression with measurement error but I am struggling with divergent transitions and was hoping if anyone has some suggestions in terms of reparametrization. To find out where my problem was I started with the simplest model (a quantile regression) and added more features. These are: model1.stan
(just a quantile regression), model2.stan
(a quantile regression with measurement error), model3.stan
(gaussian process quantile regression), and model4.stan
(gaussian process quantile regression with measurement error). Models 1 and 2 worked ok (though model 2 had some high R-hats), but models 3 and 4 ran very slowly (which is expected I guess), had large number of divergent transitions, high R-hats, etc… I have attached a simulated dataset, the stan codes, and an R script for running those, and pasted below model 4. Adjusting adapt_delta
and max_treedepth
did not make any difference, so I suspect this really needs to be reparametrised but not sure where to start… Any tips/advice will be greatly appreciated!
functions{
int which_first(real z, vector x){
int index = 1;
for (i in 1:rows(x))
{
if (z > x[i]) {break;}
else {index += 1;}
}
return(index);
}
vector linear_interpolation_v(vector z, vector x, vector y) {
int J = rows(z);
vector[J] ans;
for (j in 1:J)
{
int index=which_first(z[j],x);
ans[j] = y[index] + (((y[index - 1] - y[index]) * (z[j] - x[index]))/(x[index - 1] - x[index]));
}
return ans;
}
real rho_quantile(real y, real quantile) {
if (y < 0) {
return y * (quantile - 1);
} else {
return y * quantile;
}
}
real asym_laplace_lpdf(real y, real mu, real sigma, real quantile) {
return log(quantile * (1 - quantile)) -
log(sigma) -
rho_quantile((y - mu) / sigma, quantile);
}
real asym_laplace_lcdf(real y, real mu, real sigma, real quantile) {
if (y < mu) {
return log(quantile) + (1 - quantile) * (y - mu) / sigma;
} else {
return log1m((1 - quantile) * exp(-quantile * (y - mu) / sigma));
}
}
real asym_laplace_lccdf(real y, real mu, real sigma, real quantile) {
if (y < mu) {
return log1m(quantile * exp((1 - quantile) * (y - mu) / sigma));
} else {
return log1m(quantile) - quantile * (y - mu) / sigma;
}
}
matrix cov_GPL2(matrix x, real sq_alpha, real sq_rho, real delta) {
int N = dims(x)[1];
matrix[N, N] K;
for (i in 1:(N-1)) {
K[i, i] = sq_alpha + delta;
for (j in (i + 1):N) {
K[i, j] = sq_alpha * exp(-sq_rho * square(x[i,j]) );
K[j, i] = K[i, j];
}
}
K[N, N] = sq_alpha + delta;
return K;
}
}
data{
int<lower=1> N;
vector[N] X; //D
matrix[N,N] Dmat;
real<lower=0,upper=1> quantile;
vector[N] cra; //Observed CRA
vector[N] cra_error; // Observed Calibration Error
int<lower=0> tau; // Number of raw in the calibration curve
vector[tau] calBP; //Calibration curve
vector[tau] c14BP; //Calibration curve
vector[tau] c14_err; //Calibration curve
}
transformed data {
real means_X = mean(X);
vector[N] Xc = X - means_X;
vector[N] cra_error_sq = square(cra_error);
}
parameters{
vector<lower=0,upper=55000>[N] theta;
vector[N] s;
real alpha;
real beta;
real<lower=0> sigma;
real<lower=0> etasq;
real<lower=0> rhosq;
}
model{
matrix[N,N] SIGMA;
matrix[N,N] L_SIGMA;
vector[N] mu;
target += exponential_lpdf(sigma | 0.05);
target += exponential_lpdf(rhosq | 50);
target += exponential_lpdf(etasq | 10);
target += normal_lpdf(beta | 0, 0.5);
target += normal_lpdf(alpha | 3000, 200);
SIGMA = cov_GPL2(Dmat, etasq, rhosq, 0.001);
L_SIGMA = cholesky_decompose(SIGMA);
target += multi_normal_cholesky_lpdf(s|rep_vector(0,N),L_SIGMA);
target += normal_lpdf(cra|linear_interpolation_v(theta,calBP,c14BP),sqrt( cra_error_sq + square(linear_interpolation_v(theta,calBP,c14_err))));
for (n in 1:N) {
mu[n] = alpha + Xc[n] * (beta+s[n]);
target += asym_laplace_lpdf(theta[n] | mu[n], sigma, quantile);
}
}
generated quantities {
// actual population-level intercept
real b_Intercept = alpha - (means_X * beta); //addresses the centering
}
runscript.R (4.4 KB)
x.csv (4.3 KB)
intcal20.csv (211.9 KB)
model4.stan (3.3 KB)
model3.stan (2.3 KB)
model1.stan (2.9 KB)
model2.stan (3.9 KB)