Hi,
I’m trying to fit an inhomogeneous intensity function of a Poisson process using a GP.
There are four different intensity functions I’ve created and five countries are each assigned an intensity function. Observations from a country are then generated from the intensity function assigned.
The following plots are intensity functions for different countries.
When I fit GPs separately for each country(no hierarchy), the resulting posterior predictive check looks good.
But after reading
Multilevel Gaussian Processes and Hidden Markov Models with Stan | Brendan Hasz,
I’ve tried adding a hierarchical structure to it, where the GP parameters and the constant term, a, are drawn from some common population distributions. But the model fails to capture the non-linearity, e.g.
And the posterior values for the GP parameters, \alpha and \rho are extremely small numbers(values like 1e-100 and 0.00005 respectively). When I looked at the posterior distributions of the non-hierarchical model, mean \rho values range from 7 to 30, and mean \alpha values range from 0.5 to 1.2.
I initially thought this was because of the priors I used, so changed the inv_gamma(2,0.5) prior of \rho_m to normal(30,5), but still they converge to values near 0s.
Is anything wrong with my code?
functions {
vector gp_f(int N,
real[] x,
real alpha,
real rho,
vector eta) {
vector[N] f;
{
matrix[N, N] K;
matrix[N, N] L_K;
K = cov_exp_quad(x, alpha, rho)
+ diag_matrix(rep_vector(1e-6, N));
L_K = cholesky_decompose(K);
f = L_K * eta;
}
return f;
}
}
data {
int<lower=0> J; // Number of countries
int<lower=1> N; // Total number of obs
int<lower=1,upper=J> country[N];
real x[N]; // Input
int<lower=0> y[N]; // Poisson observations
int<lower=1> N1; // Number of country 1 observations
int<lower=1> N2;
int<lower=1> N3;
int<lower=1> N4;
int<lower=1> N5;
int<lower = 0> N_tilde;
real x_tilde1[N_tilde]; // np.linspace(0, max(x1)+10, N_tilde) concatenated to x1 vector and used as an input to a GP.
real x_tilde2[N_tilde];
real x_tilde3[N_tilde];
real x_tilde4[N_tilde];
real x_tilde5[N_tilde];
}
transformed data {
int<lower=1> N1_m = N1 + N_tilde;
int<lower=1> N2_m = N2 + N_tilde;
int<lower=1> N3_m = N3 + N_tilde;
int<lower=1> N4_m = N4 + N_tilde;
int<lower=1> N5_m = N5 + N_tilde;
real x1[N1] = x[1:N1]; // country 1 data
int y1[N1] = y[1:N1];
real x2[N2] = x[N1+1:N1+N2];
int y2[N2] = y[N1+1:N1+N2];
real x3[N3] = x[N1+N2+1:N1+N2+N3];
int y3[N3] = y[N1+N2+1:N1+N2+N3];
real x4[N4] = x[N1+N2+N3+1:N1+N2+N3+N4];
int y4[N4] = y[N1+N2+N3+1:N1+N2+N3+N4];
real x5[N5] = x[N1+N2+N3+N4+1:N1+N2+N3+N4+N5];
int y5[N5] = y[N1+N2+N3+N4+1:N1+N2+N3+N4+N5];
real x1_m[N1_m];
real x2_m[N2_m];
real x3_m[N3_m];
real x4_m[N4_m];
real x5_m[N5_m];
for (n1 in 1:N_tilde) {
x1_m[n1] = x_tilde1[n1];
x2_m[n1] = x_tilde2[n1];
x3_m[n1] = x_tilde3[n1];
x4_m[n1] = x_tilde4[n1];
x5_m[n1] = x_tilde5[n1];
}
for (n2 in 1:N1) x1_m[N_tilde + n2] = x1[n2];
for (n2 in 1:N2) x2_m[N_tilde + n2] = x2[n2];
for (n2 in 1:N3) x3_m[N_tilde + n2] = x3[n2];
for (n2 in 1:N4) x4_m[N_tilde + n2] = x4[n2];
for (n2 in 1:N5) x5_m[N_tilde + n2] = x5[n2];
}
parameters {
// Per-subject parameters (non-centered parameterization)
real rho_tilde[J]; //non-centered std of length scale
real alpha_tilde[J]; //non-centered std of signal std dev
real a_tilde[J];
vector[N1_m] eta1;
vector[N2_m] eta2;
vector[N3_m] eta3;
vector[N4_m] eta4;
vector[N5_m] eta5;
real a_m;
real<lower=0> a_s;
real<lower=0> alpha_m;
real<lower=0> rho_m;
real<lower=0> alpha_s;
real<lower=0> rho_s;
}
transformed parameters {
// Per-subject parameters
real<lower=0> rho[J]; //length scale
real<lower=0> alpha[J]; //signal standard deviation
real a[J]; // GP constant
// Non-centered parameterization of per-subject parameters
for (s in 1:J) {
rho[s] = exp(log(rho_m) + rho_s * rho_tilde[s]);
alpha[s] = exp(log(alpha_m) + alpha_s * alpha_tilde[s]);
a[s] = a_m + a_s * a_tilde[s];
}
vector[N1_m] log_f1 = gp_f(N1_m, x1_m ,alpha[1], rho[1], eta1);
vector[N2_m] log_f2 = gp_f(N2_m, x2_m ,alpha[2], rho[2], eta2);
vector[N3_m] log_f3 = gp_f(N3_m, x3_m ,alpha[3], rho[3], eta3);
vector[N4_m] log_f4 = gp_f(N4_m, x4_m ,alpha[4], rho[4], eta4);
vector[N5_m] log_f5 = gp_f(N5_m, x5_m ,alpha[5], rho[5], eta5);
}
model {
// Priors (on population-level params)
target += inv_gamma_lpdf(rho_m | 2, 0.5); // tried normal_lpdf(rho_m | 30, 5) but also fails.
target += normal_lpdf(alpha_m | 0, 4) + log(2);
target += normal_lpdf(a_m | 0, 2) + log(2);
target += normal_lpdf(rho_s | 0, 4) + log(2);
target += normal_lpdf(alpha_s | 0, 4) + log(2);
target += normal_lpdf(a_s | 0, 2) + log(2);
// Subject-level parameters drawn from pop-level distributions
// (non-centered parameterizations)
target += normal_lpdf(rho_tilde | 0, 1); //log(rho) ~ normal(exp(rho_m), rho_s)
target += normal_lpdf(alpha_tilde | 0, 1); //log(alpha) ~ normal(exp(alpha_m), alpha_s)
target += normal_lpdf(a_tilde | 0, 1); //log(a) ~ normal(a_m, a_s)
// Jacobian adjustments for GLM parts of model
target += -sum(log(rho));
target += -sum(log(alpha));
target += normal_lpdf(eta1 | 0, 1);
target += normal_lpdf(eta2 | 0, 1);
target += normal_lpdf(eta3 | 0, 1);
target += normal_lpdf(eta4 | 0, 1);
target += normal_lpdf(eta5 | 0, 1);
// Accumulate evidence over trials
target += poisson_lpmf(y1 | exp(a[1] + log_f1[N_tilde+1:]));
target += poisson_lpmf(y2 | exp(a[2] + log_f2[N_tilde+1:]));
target += poisson_lpmf(y3 | exp(a[3] + log_f3[N_tilde+1:]));
target += poisson_lpmf(y4 | exp(a[4] + log_f4[N_tilde+1:]));
target += poisson_lpmf(y5 | exp(a[5] + log_f5[N_tilde+1:]));
}
Any help would be appreciated!