I am modeling a set of quantiles from a Gaussian mixture distribution, and it involves calculating the quantile function of a Gaussian mixture as well as populating a variance-covariance matrix with values that are a function of the density and quantile function. I’m using an algebraic solver to calculate the quantile function. I frequently, but not always, get the error that the VCOV matrix is not positive definite. This is surprising because all the entries should be positive. I try adding .000001 in case the numerical solutions give answers very close to 0, but it hasn’t fixed anything. Are there any suggestions on how to fix the issue? The model code is included below as well as how data might be generated.
functions{
vector GMInv_CDF(vector Q, data real p, vector pi, vector mu,
vector sigma, int N) {
vector[1] z;
real cdf;
// this encodes the cdf from which we want to sample
cdf = pi[1]*exp(normal_lcdf(Q | mu[1], sigma[1])) +
pi[2]*exp(normal_lcdf(Q | mu[2], sigma[2])) +
pi[3]*exp(normal_lcdf(Q | mu[3], sigma[3])) //+
//pi[4]*exp(normal_lcdf(Q | mu[4], sigma[4]))
;
z[1] = cdf - p;
return z;
}
real GM_PDF(vector mu, vector sigma, vector pi, real y) {
real density;
density = pi[1]*(exp(normal_lpdf(y | mu[1], sigma[1]))) +
pi[2]*(exp(normal_lpdf(y | mu[2], sigma[2]))) +
pi[3]*(exp(normal_lpdf(y | mu[3], sigma[3]))) +
//pi[4]*exp(normal_lpdf(y | mu[4], sigma[4])) +
.00001;
// density = log_sum_exp({log(pi[1]) + normal_lpdf(y | mu[1], sigma[1]),
// log(pi[2]) + normal_lpdf(y | mu[2], sigma[2]),
// log(pi[3]) + normal_lpdf(y | mu[3], sigma[3]),
// log(pi[4]) + normal_lpdf(y | mu[4], sigma[4]),
// log(pi[5]) + normal_lpdf(y | mu[5], sigma[5])});
return density;
}
}
data {
int<lower=0> N;
vector[N] Q;
vector<lower=0, upper=1>[N] p;
int<lower=1> n_components;
real m;
real<lower=0> c;
real<lower=0> sv; // sd prior sd
real<lower=0> nv; // n prior sd
}
transformed data{
real scaling_step = 1e-5;
real rel_tol = 1e-6;
real f_tol = 1;
int max_steps = 3000;
vector[1] y_guess = [0.5]';
}
parameters {
ordered[n_components] mus;
vector<lower=0>[n_components] sigmas;
real<lower=0> n;
vector<lower=0,upper=1>[n_components] pi;
}
transformed parameters {
vector[N] Qi;
cov_matrix[N] q_var;
vector[n_components] pit;
pit = softmax(pi);
for (i in 1:N) {
// // vector[1] Q_guess = [Q[i]]';
Qi[i] = solve_powell_tol(GMInv_CDF, y_guess,
rel_tol, f_tol, max_steps,
p[i], pit, mus, sigmas, N)[1];
// Qi[i] = solve_newton_tol(GMInv_CDF, y_guess,
// scaling_step, f_tol, max_steps,
// p[i], pit, mus, sigmas, N)[1];
}
for (i in 1:N) {
for (j in 1:i) {
q_var[i, j] = ((fmin(p[i],p[j])*(1 - fmax(p[i],p[j]))) /
(n*GM_PDF(mus, sigmas, pit, Qi[i])*
GM_PDF(mus, sigmas, pit, Qi[j]) + .000001)) +
.000001;
q_var[j, i] = q_var[i, j];
}
}
// q_var_cholesky = cholesky_decompose(q_var);
}
model {
pi ~ normal(4,5);
mus ~ normal(4, 5);
sigmas ~ normal(0,7);
n ~ normal(0, nv);
Q ~ multi_normal(Qi, q_var);
// Q ~ multi_normal_cholesky(Qi, q_var_cholesky);
}
I am treating the data I’m working with as though it came from a Gaussian mixture, but if the data was generated from a simple normal in R it could be as here
probs <- seq(0.05, .95, by = 0.05)
samp <- rnorm(500)
quantiles <- quantile(samp, probs = probs)
dat <- list(
N = length(quantiles),
n = size,
Q = quantiles,
p = probs,
n_components = 3,
m = 2,
c = 3,
sv = 3,
nv = 3000,
pv = 3
)