Hello,

I am trying to fit a custom Hidden Markov Model with an unknown number of hidden states. Each state has 1 or more normal emissions as illustrated in the plot below (the data is faceted by the hidden states). HMC is very slow so I am using optimizing with multiple restarts; however, on my real data set I am getting a different local minimum each time I fit the model even when I take the best of 100 restarts. My questions are: is there something I could change about my model to make fitting easier? and is there a better approach than random restarts that I could try?

Here is my simulated data attached:

simulated_data.csv (21.3 KB)

R code:

```
df <- read.csv('simulated_data.csv')
model = stan_model(file='hmm.stan')
K <- 5
y <- df$norm_value
df <- df[order(df$date_index), ]
date_rle <- rle(df$date_index)
date_index <- cumsum(date_rle$lengths) - date_rle$lengths
stan.data <- list(
K = K,
y = y,
N = length(y),
W = length(unique(df$date_index)),
V = length(unique(df$compound_index)),
date_index = date_index,
date_length = date_rle$lengths,
cpd_index = df$compound_index,
l = min(y),
u = max(y)
)
fit <- optimizing(model, data=stan.data, as_vector=FALSE)
for (i in 1:10){
candidate_fit <- optimizing(model, data=stan.data, as_vector=FALSE)
if (candidate_fit$value > fit$value){
fit <- candidate_fit
}
}
```

Thanks,

Ben

```
data {
int<lower=2> K; // number of categories
int<lower=1> W; // number of unique dates
int<lower=1> V; // number of compounds
int<lower=1> N; // number of observations
real y[N]; // observations
int date_index[W];
int<lower=1> date_length[W];
int<lower=1> cpd_index[N];
real l;
real u;
}
parameters {
simplex[K] theta[K];
matrix<lower=0>[V, K] sigma_unordered;
matrix<lower=l, upper=u>[V, K] mu_unordered;
}
transformed parameters {
matrix[V, K] mu;
matrix[V, K] sigma;
{
int idx[K];
idx = sort_indices_asc(mu_unordered[1]);
for (v in 1:V){
mu[v,] = mu_unordered[v, idx];
sigma[v,] = sigma_unordered[v, idx];
}
}
}
model {
for (k in 1:K){
vector[K] alpha;
alpha = rep_vector(1, K);
alpha[k] = 100;
theta[k] ~ dirichlet(alpha);
sigma_unordered[, k] ~ cauchy(100, 10);
}
{ // Forward algorithm
real accumulator[K];
real gamma[W, K];
for (k in 1:K) {
gamma[1, k] = 0;
for (t in 1:date_length[1]) {
int cpd;
cpd = cpd_index[t];
gamma[1, k] += normal_lpdf(y[t] | mu[cpd, k], sigma[cpd, k]);
}
}
for (w in 2:W) {
for (j in 1:K) { // j=current date (w)
for (i in 1:K) { // i=previous date (w-1)
int date;
date = date_index[w];
accumulator[i] = gamma[w-1, i] + log(theta[i, j]);
for (t in 1:date_length[w]) {
int cpd;
cpd = cpd_index[date + t];
accumulator[i] += normal_lpdf(y[date + t] | mu[cpd, j], sigma[cpd, j]);
}
}
gamma[w, j] = log_sum_exp(accumulator);
}
}
target += log_sum_exp(gamma[W]);
}
}
generated quantities {
int<lower=1, upper=K> zstar[W];
real logp_zstar;
{ // Viterbi algorithm
int back_pointer[W, K]; // backpointer to most likely previous state
real delta[W, K]; // max prob for the sequence up to t
for (k in 1:K) {
delta[1, k] = 0;
for (t in 1:date_length[1]) {
int cpd;
cpd = cpd_index[t];
delta[1, k] += normal_lpdf(y[t] | mu[cpd, k], sigma[cpd, k]);
}
}
for (w in 2:W) {
for (j in 1:K) { // j=current (w)
delta[w, j] = negative_infinity();
for (i in 1:K) { // i=previous (w-1)
real logp;
int date;
logp = delta[w-1, i] + log(theta[i, j]);
date = date_index[w];
for (t in 1:date_length[w]) {
int cpd;
cpd = cpd_index[date + t];
logp += normal_lpdf(y[date + t] | mu[cpd, j], sigma[cpd, j]);
}
if (logp > delta[w, j]) {
back_pointer[w, j] = i;
delta[w, j] = logp;
}
}
}
}
logp_zstar = max(delta[W]);
for (k in 1:K) {
if (delta[W, k] == logp_zstar) {
zstar[W] = k;
}
}
for (w in 1:(W-1)) {
zstar[W - w] = back_pointer[W - w + 1, zstar[W - w + 1]];
}
}
}
```