Hi all, I am learning to use HMM and starting with estimating simulated data. However, even the simplest model, there is divergence. I have checked my code, and approached it in several different ways, either by using forward algorithm myself or using the hmm_marginal() function, neither worked. Could anyone kindly help me take a look?
Here is the code for the simulated data. I simply assume there are 3 hidden states, and individuals can only transit between adjacent state.
np.random.seed(42)
# Parameters
N = 500 # number of customers
T = 20 # time periods
K = 3 # number of hidden states
# Transition matrix
Gamma = np.array([
[0.75, 0.25, 0.0],
[0.25, 0.50, 0.25],
[0.0, 0.75, 0.25]
])
# Emission probabilities for each state
emission_probs = np.array([0.1, 0.5, 0.9])
# Simulate data
states = np.zeros((N, T), dtype=int)
observations = np.zeros((N, T), dtype=int)
# Initial state: uniform random across states
states[:, 0] = np.random.choice(K, size=N)
# Generate first observation
for i in range(N):
s0 = states[i, 0]
observations[i, 0] = np.random.binomial(1, emission_probs[s0])
# Simulate transitions and emissions over time
for t in range(1, T):
for i in range(N):
prev_s = states[i, t-1]
curr_s = np.random.choice(K, p=Gamma[prev_s])
states[i, t] = curr_s
observations[i, t] = np.random.binomial(1, emission_probs[curr_s])
# Reshape into long format
stan_data = {
'N': N,
'T': T,
'K': K,
'y': observations.tolist()
}
Here is the stan code, approach 1:
data {
int N; // Number of observations
int T; // Number of obs per individuals
array[N, T] int<lower=0, upper=1> y; //observations
}
parameters {
// Parameters of measurement model
ordered[3] mu;
// Initial state
simplex[3] rho;
// Rows of the transition matrix
simplex[2] t1;
simplex[3] t2;
simplex[2] t3;
}
transformed parameters {
matrix[3, 3] Gamma = rep_matrix(0, 3, 3);
array[N] matrix[3,T] log_omega;
vector<lower=0, upper=1>[3] theta; // emission probs: P(y_t=1 | z_t=k)
for (k in 1:3) theta[k] = inv_logit(mu[k]); // preserves ordering
// Build the transition matrix
Gamma[1, 1:2] = t1';
Gamma[2, : ] = t2';
Gamma[3, 2:3] = t3';
for (n in 1:N){
for (t in 1:T){
log_omega[n][1,t] = bernoulli_lpmf(y[n,t]|theta[1]);
log_omega[n][2,t] = bernoulli_lpmf(y[n,t]|theta[2]);
log_omega[n][3,t] = bernoulli_lpmf(y[n,t]|theta[3]);
}
}
}
model {
mu ~ normal(0, 4);
rho ~ dirichlet([1, 1, 1]);
t1 ~ dirichlet([1, 1]);
t2 ~ dirichlet([1, 1, 1]);
t3 ~ dirichlet([1, 1]);
for (n in 1:N){
target += hmm_marginal(log_omega[n],Gamma,rho);
}
}
here is stan code, approach 2:
//simple example
data {
int N; // Number of observations
int T; // Number of obs per individuals
array[N, T] int<lower=0, upper=1> y; //observations
}
transformed data {
row_vector[3] rho = [1.0/3.0,1.0/3.0,1.0/3.0];
}
parameters {
// Parameters of measurement model
ordered[3] mu;
// Initial state
//simplex[3] rho;
// Rows of the transition matrix
simplex[2] t1;
simplex[3] t2;
simplex[2] t3;
}
transformed parameters {
matrix[3, 3] Gamma = rep_matrix(0, 3, 3);
vector<lower=0, upper=1>[3] theta; // emission probs: P(y_t=1 | z_t=k)
for (k in 1:3) theta[k] = inv_logit(mu[k]); // preserves ordering
// Build the transition matrix
Gamma[1, 1:2] = t1';
Gamma[2, : ] = t2';
Gamma[3, 2:3] = t3';
}
model {
mu ~ normal(0, 4);
//rho ~ dirichlet([1, 1, 1]);
t1 ~ dirichlet([1, 1]);
t2 ~ dirichlet([1, 1, 1]);
t3 ~ dirichlet([1, 1]);
for (n in 1:N){
vector[3] log_alpha;
for (k in 1:3) //gamma[1,k] = log(theta[k]);
log_alpha[k] = log(rho[k]) + bernoulli_lpmf(y[n,1]|theta[k]);
//when t=1, p(z,y) = p(z)*p(y|z)
for (t in 2:T) {
vector[3] log_alpha_new;
for (k in 1:3) {
vector[3] temp;
for (j in 1:3)
temp[j] = log_alpha[j] + log(Gamma[j,k]);// + bernoulli_lpmf(y[n,t]|theta[k]);//log(theta[k])
log_alpha_new[k] = bernoulli_lpmf(y[n,t]|theta[k]) + log_sum_exp(temp);
}
log_alpha = log_alpha_new;
}
target += log_sum_exp(log_alpha);
}
}
the trace_plot is like the following:



