Hello everyone,
leveraging the package library(BayesHMM) I want to propose a combination of HMM and a mixture of skew-normal distribution in a regression setting.
But I got different errors, see eg:
Error in stanc(file = file, model_code = model_code, model_name = model_name, :
0
Semantic error in 'string', line 36, column 6 to column 31:
Ill-typed arguments supplied to infix operator .*. Available signatures:
(int, int) => int
(real, real) => real
(vector, vector) => vector
(row_vector, row_vector) => row_vector
(matrix, matrix) => matrix
Instead supplied arguments of incompatible type: real[], matrix.
This is the theoretical model.
HMM involve two interconnected models. The state model consists of a discrete-time, discrete-state { } firstorder Markov chain z_t \in\{1, \ldots, K\} that transitions according to p\left(z_t \mid z_{t-1}\right). In turns, the observation model is governed by p\left(\mathbf{y}_t \mid z_t\right), where \mathbf{y}_t are the observations, emissions or output. The corresponding joint distribution is
Each single time slice corresponds to a mixture distribution with component densities given by p\left(\mathbf{y}_t \mid z_t\right),
where in my case
\begin{equation}
p\left(\mathbf{y}_t \mid z_t=k, \theta\right) = (m_1) \times \operatorname{SN}_a\left(y_a ; \xi_a, \omega_a^2, \lambda_a\right)
+(m_2) \times \operatorname{SN}_b\left(y_b ; \xi_b, \omega_b^2, \lambda_b\right)
\end{equation}
where \xi_a=\beta_a X; \xi_b=\beta_b X; and m \in[0,1] is the mixture weight such that m_1+m_2=1 in order o guarantee that the resulting expression remains a proper PDF
Here is the simulation in R.
library(brms)
library(rstan)
library(BayesHMM)
# Function to generate random values from a simplex distribution
runif_simplex <- function(T) {
x <- -log(runif(T))
x / sum(x)
}
hmm_generate <- function(K, T, lambda) {
# 1. Parameters
pi1 <- runif_simplex(K)
A <- t(replicate(K, runif_simplex(K)))
mu1 <- sort(rnorm(K, 10 * 1:K, 1))
mu2 <- sort(rnorm(K, 10 * 1:K, 1))
sigma1 <- abs(rnorm(K))
sigma2 <- abs(rnorm(K))
alpha1 <- runif(K, -1, 1)
alpha2 <- runif(K, -1, 1)
beta1 <- runif(K, -1, 1) # Regression coefficient for mixture 1
beta2 <- runif(K, -1, 1) # Regression coefficient for mixture 2
# 2. Hidden path
z <- vector("numeric", T)
z[1] <- sample(1:K, size = 1, prob = pi1)
for (t in 2:T)
z[t] <- sample(1:K, size = 1, prob = A[z[t - 1], ])
# 3. Covariate
x <- rnorm(T)
# 4. Observations
y <- vector("numeric", T)
for (t in 1:T) {
if (runif(1) < lambda) {
y[t] <- rskew_normal(1, xi = mu1[z[t]] + beta1[z[t]] * x[t], omega = sigma1[z[t]], alpha = alpha1[z[t]])
} else {
y[t] <- rskew_normal(1, xi = mu2[z[t]] + beta2[z[t]] * x[t], omega = sigma2[z[t]], alpha = alpha2[z[t]])
}
}
list(y = y, x = x, z = z, theta = list(pi1 = pi1, A = A, mu1 = mu1, mu2 = mu2, sigma1 = sigma1, sigma2 = sigma2, alpha1 = alpha1, alpha2 = alpha2, lambda = lambda, beta1 = beta1, beta2 = beta2))
}
# Set the seed for reproducibility
set.seed(900)
K <- 3
T_length <- 1500
lambda <- 0.7 # Mixture weight for the first component
# Generate HMM data
dataset <- hmm_generate(K, T_length, lambda)
# Create a data frame with covariate and response
data <- data.frame(x = dataset$x, y = dataset$y)
plot(data$y)
plot(data$x,data$y)
hmm_init <- function(K, y) {
clasif <- kmeans(y, K)
# Initialize parameters for mixture of skewed normal distributions
init.mu1 <- by(y, clasif$cluster, mean)
init.mu2 <- by(y, clasif$cluster, mean)
init.sigma1 <- by(y, clasif$cluster, sd)
init.sigma2 <- by(y, clasif$cluster, sd)
init.alpha1 <- runif(K, -1, 1)
init.alpha2 <- runif(K, -1, 1)
init.lambda <- runif(K)
# Sort parameters based on means
init.order <- order(init.mu1)
list(mu1 = init.mu1[init.order],
mu2 = init.mu2[init.order],
sigma1 = init.sigma1[init.order],
sigma2 = init.sigma2[init.order],
alpha1 = init.alpha1[init.order],
alpha2 = init.alpha2[init.order],
lambda = init.lambda[init.order],
beta = rep(1, K))
}
The stan code
functions {
vector normalize(vector x) {
return x / sum(x);
}
}
data {
int<lower=1> T; // number of observations (length)
int<lower=1> K; // number of hidden states
real y[T]; // observations
real x[T];
}
parameters {
// Discrete state model
simplex[K] pi1; // initial state probabilities
simplex[K] A[K]; // transition probabilities
// A[i][j] = p(z_t = j | z_{t-1} = i)
// Continuous observation model
vector[K] beta1; // observation means for component 1
vector[K] beta2; // observation means for component 2
real<lower=0> sigma1[K]; // observation standard deviations for component 1
real<lower=0> sigma2[K]; // observation standard deviations for component 2
real<lower=-10, upper=10> skewness1[K]; // skewness parameters for component 1
real<lower=-10, upper=10> skewness2[K]; // skewness parameters for component 2
// Mixture parameters
simplex[K] theta; // mixing proportions
}
transformed parameters {
vector[K] logalpha[T];
vector[K] mu1;
vector[K] mu2;
// Compute predicted means based on linear regression
mu1 = to_vector(x) .* beta1;
mu2 = to_vector(x) .* beta2;
{ // Forward algorithm log p(z_t = j | x_{1:t})
real accumulator[K];
logalpha[1] = log(pi1) + log_sum_exp(log(theta[1]) + skew_normal_lpdf(y[1] | mu1, sigma1, skewness1),
log(theta[2]) + skew_normal_lpdf(y[1] | mu2, sigma2, skewness2));
for (t in 2:T) {
for (j in 1:K) { // j = current (t)
for (i in 1:K) { // i = previous (t-1)
// Murphy (2012) Eq. 17.48
// belief state + transition prob + local evidence at t
accumulator[i] = logalpha[t-1, i] + log(A[i, j]) +
log_sum_exp(log(theta[1]) + skew_normal_lpdf(y[t] | mu1[j], sigma1[j], skewness1[j]),
log(theta[2]) + skew_normal_lpdf(y[t] | mu2[j], sigma2[j], skewness2[j]));
}
logalpha[t, j] = log_sum_exp(accumulator);
}
}
} // Forward
}
model {
target += log_sum_exp(logalpha[T]); // Note: update based only on last logalpha
}
generated quantities {
vector[K] logbeta[T];
vector[K] loggamma[T];
vector[K] alpha[T];
vector[K] beta[T];
vector[K] gamma[T];
int<lower=1, upper=K> zstar[T];
real logp_zstar;
{ // Forward algorithm
for (t in 1:T)
alpha[t] = softmax(logalpha[t]);
} // Forward
{ // Backward algorithm log p(x_{t+1:T} | z_t = j)
real accumulator[K];
for (j in 1:K)
logbeta[T, j] = 1;
for (tforward in 0:(T-2)) {
int t;
t = T - tforward;
for (j in 1:K) { // j = previous (t-1)
for (i in 1:K) { // i = next (t)
// Murphy (2012) Eq. 17.58
// backwards t + transition prob + local evidence at t
accumulator[i] = logbeta[t, i] + log(A[j, i]) +
log_sum_exp(log(theta[1]) + skew_normal_lpdf(y[t] | mu1[i], sigma1[i], skewness1[i]),
log(theta[2]) + skew_normal_lpdf(y[t] | mu2[i], sigma2[i], skewness2[i]));
}
logbeta[t-1, j] = log_sum_exp(accumulator);
}
}
for (t in 1:T)
beta[t] = softmax(logbeta[t]);
} // Backward
{ // Forwards-backwards algorithm log p(z_t = j | x_{1:T})
for(t in 1:T) {
loggamma[t] = alpha[t] .* beta[t];
}
for(t in 1:T)
gamma[t] = normalize(loggamma[t]);
} // Forwards-backwards
{ // Viterbi algorithm
int bpointer[T, K]; // backpointer to the most likely previous state on the most probable path
real delta[T, K]; // max prob for the seq up to t
// with final output from state k for time t
for (j in 1:K)
delta[1, K] = skew_normal_lpdf(y[1] | mu1[j], sigma1[j], skewness1[j]) +
skew_normal_lpdf(y[1] | mu2[j], sigma2[j], skewness2[j]);
for (t in 2:T) {
for (j in 1:K) { // j = current (t)
delta[t, j] = negative_infinity();
for (i in 1:K) { // i = previous (t-1)
real logp;
logp = delta[t-1, i] + log(A[i, j]) +
log_sum_exp(log(theta[1]) + skew_normal_lpdf(y[t] | mu1[j], sigma1[j], skewness1[j]),
log(theta[2]) + skew_normal_lpdf(y[t] | mu2[j], sigma2[j], skewness2[j]));
if (logp > delta[t, j]) {
bpointer[t, j] = i;
delta[t, j] = logp;
}
}
}
}
logp_zstar = max(delta[T]);
for (j in 1:K)
if (delta[T, j] == logp_zstar)
zstar[T] = j;
for (t in 1:(T - 1)) {
zstar[T - t] = bpointer[T - t + 1, zstar[T - t + 1]];
}
}
}
hmm_fit <- function(K, y, x) {
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
stan.model <- 'code.stan'
stan.data <- list(T = length(y), K = K, y = y, x = x)
stan(file = stan.model, data = stan.data, verbose = TRUE,
iter = 600, warmup = 150, thin = 1, chains = 1,
cores = 3,
seed = 900, init = function() { hmm_init(K, y) })
}
fit <- hmm_fit(K, dataset$y, dataset$x)