I’m fitting a univariate gaussian mixture model to a dataset that has relatively high means (100+).
set.seed(1)
n <- 10000
mu <- c(120, 140, 160)
sigma <- c(4, 3, 5)
alpha <- c(0.2, 0.5, 0.3)
K <- length(alpha)
z <- sample(length(alpha), n, replace=TRUE, prob=alpha)
y <- rnorm(n, mu[z], sigma[z])
My understanding of Stan and from experimentation is that standardizing the data (transforming to have mean 0 and std dev 1) will increase the efficiency of the sampler. However, I’m stuck at transforming the priors. I have fairly strong priors on component standard deviations (parameterized in terms of mean and std dev and transformed to shape and rate). In the current model, it looks something like this:
data {
// data
int N; // number observations
int K; // number clusters
vector[N] y; // count of fragments of size n
// priors
vector[K] mu0;
real sigma0;
vector<lower=0>[K] sigma_mu;
vector<lower=0>[K] sigma_gamma;
vector<lower=0>[K] alpha;
}
transformed data{
vector<lower=0>[K] alpha1;
vector<lower=0>[K] beta1;
for (k in 1:K) {
alpha1[k] = pow(sigma_mu[k], 2) / pow(sigma_gamma[k], 2);
beta1[k] = sigma_mu[k] / pow(sigma_gamma[k], 2);
}
}
parameters {
vector[K] mu;
vector<lower=0>[K] sigma;
simplex[K] theta;
}
model {
real contributions[K];
// prior
mu ~ normal(mu0, sigma0);
sigma ~ gamma(alpha1, beta1);
theta ~ dirichlet(alpha);
for (n in 1:N) {
for (k in 1:K) {
contributions[k] = log(theta[k]) + normal_lpdf(y[n] | mu[k], sigma[k]);
}
target += log_sum_exp(contributions);
}
}
However, if I create y_std = (y - mean(y)) / sd(y)
how would I modify sigma_mu
and sigma_gamma
?