Hi, I’ve fit the generalized Pareto PDF with a multi-level structure. I’ve included the Stan code and data. Note the data has a ragged structure because there are varying observations per group. There are some divergences but everything converges nicely. Data and r script included.
functions {
real gpareto_lpdf(vector y, real ymin, real k, real sigma, real s) {
// generalised Pareto log pdf
real N = s;
real inv_k = inv(k);
if (k<0 && max(y-ymin)/sigma > -inv_k)
reject("k<0 and max(y-ymin)/sigma > -1/k; found k, sigma =", k, sigma)
if (sigma<=0)
reject("sigma<=0; found sigma =", sigma)
if (fabs(k) > 1e-15)
return -(1+inv_k)*sum(log1p((y-ymin) * (k/sigma))) -N*log(sigma);
else
return -sum(y-ymin)/sigma -N*log(sigma); // limit k->0
}
}
data {
//threshold
real<lower=0> ymin;
// number of observations
int<lower=1> N;
// number of groups
int<lower=-1> K;
//observations
vector<lower=ymin>[N] y;
// group sizes
int<lower=0> s[K];
// ymax
vector<lower=ymin>[K] ymax;
}
parameters {
//overall mean
real k_mu;
//overall mean's sd
real<lower=0> k_sigma;
//sigma mu
real sigma_mu;
//overall sd
real<lower=0>sigma_sigma;
//y-specific
vector<lower=0>[K] sigma;
vector<lower=-sigma[K]/(ymax[K]-ymin)>[K] k;
}
model {
int pos;
pos = 1;
//priors
k_mu ~ normal(0, 3);
k_sigma ~ normal(0, 3);
sigma_mu ~ normal(0,3);
sigma_sigma ~ normal(0, 3);
k ~ normal(k_mu, k_sigma);
sigma ~ normal(sigma_mu, sigma_sigma);
for(i in 1:K){
segment(y, pos, s[i]) ~ gpareto(ymin, k[i], sigma[i], s[i]);
pos = pos + s[i];
}
}
rstan script.R (1.3 KB)
gpareto.stan (1.4 KB)
data (ccl.csv) (740.9 KB)