Forward algorithm for ZIP - Hidden Markov model

Rafael_Diaz · February 16, 2019, 9:06pm

I’m trying to adjust a Zero Inflated Poisson Hidden Markov Model with Stan. For the Poisson-HMM in a past forum this setting was shown. see link.

While to adjust the ZIP with the classical theory is well documented the code and model.

Zero Inflation

It uses a parameter theta here there is a probability \theta of drawing a zero, and a probability 1−\theta of drawing from Poisson(\lambda)

p(y_n | \theta, \lambda) = \begin{cases} \theta + (1-\theta)*e^{-\lambda} & \text{ if } y_n=0 \\ (1-\theta)*e^{-\lambda} \frac{\lambda^{y_n}}{y_n!} & \text{ if } y_n>0 \end{cases}

Zero Inflation - Hidden Markov Model

Let Y_t be the observed series and C_t a homogeneous, unobserved Markov chain, withstate-space E={e_1,· · ·, e_m}. We suppose that, conditionally to C_t=e_i, Y_t is distributed according to a ZIP of parameters (\theta_i, \lambda_i).

p(y_t | C_t ; \theta, \lambda) = \begin{cases} \theta_i + (1-\theta_i)*e^{-\lambda_i} & \text{ if } y_t=0 \\ (1-\theta_i)*e^{-\lambda_i} \frac{\lambda_i^{y_t}}{y_t!} & \text{ if } y_t>0 \end{cases}

Classical Theory

library(ziphsmm)
set.seed(123)
prior_init <- c(0.5,0.5)
emit_init <- c(20,6)
zero_init <- c(0.5,0)
tpm <- matrix(c(0.9, 0.1, 0.2, 0.8),2,2,byrow=TRUE)
result <- hmmsim(n=100,M=2,prior=prior_init, tpm_parm=tpm,emit_parm=emit_init,zeroprop=zero_init)
y <- result$series
serie <- data.frame(y = result$series, m = result$state)

fit1 <-  fasthmmfit(y,x=NULL,ntimes=NULL,M=2,prior_init,tpm,
                    emit_init,0.5, hessian=FALSE,method="BFGS", 
                    control=list(trace=1))
fit1
$prior
            [,1]
[1,] 0.997497445
[2,] 0.002502555

$tpm
          [,1]       [,2]
[1,] 0.9264945 0.07350553
[2,] 0.3303533 0.66964673

$zeroprop
[1] 0.6342182

$emit
          [,1]
[1,] 20.384688
[2,]  7.365498

$working_parm
[1] -5.9879373 -2.5340475  0.7065877  0.5503559  3.0147840  1.9968067

$negloglik
[1] 208.823

Bayesian theory (Stan)

library(rstan)

ZIPHMM <- 'data {
    int<lower=0> N;
    int<lower=0> y[N];
    int<lower=1> m;
  }

parameters {
    real<lower=0, upper=1> theta; //
    positive_ordered[m] lambda; //
    simplex[m] Gamma[m]; // tpm
  }

model {
  vector[m] log_Gamma_tr[m];
  vector[m] lp;
  vector[m] lp_p1;
  
  // priors
  lambda ~ gamma(0.1,0.01);
  theta ~ beta(0.05, 0.05);

  // transposing tpm and taking the log of each entry
  for(i in 1:m)
    for(j in 1:m)
      log_Gamma_tr[j, i] = log(Gamma[i, j]);
  
  lp = rep_vector(-log(m), m); // 
  
    for(n in 1:N) {
      for(j in 1:m){
        if (y[n] == 0)
          lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp) +
                     log_sum_exp(bernoulli_lpmf(1 | theta),
                     bernoulli_lpmf(0 | theta) + poisson_lpmf(y[n] | lambda[j]));
        else
          lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp) + 
                     bernoulli_lpmf(0 | theta) + 
                     poisson_lpmf(y[n] | lambda[j]);
                   }
      lp = lp_p1;
                  }
    target += log_sum_exp(lp);
}'
mod_ZIP <- stan(model_code = ZIPHMM, data=list(N=length(y), y=y, m=2), iter=1000, chains=1)
print(mod_ZIP,digits_summary = 3)
               mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
theta         0.518   0.002 0.052    0.417    0.484    0.518    0.554    0.621   568 0.998
lambda[1]     7.620   0.039 0.787    6.190    7.038    7.619    8.194    9.132   404 1.005
lambda[2]    20.544   0.039 0.957   18.861   19.891   20.500   21.189   22.611   614 1.005
Gamma[1,1]    0.664   0.004 0.094    0.473    0.604    0.669    0.730    0.841   541 0.998
Gamma[1,2]    0.336   0.004 0.094    0.159    0.270    0.331    0.396    0.527   541 0.998
Gamma[2,1]    0.163   0.003 0.066    0.057    0.114    0.159    0.201    0.312   522 0.999
Gamma[2,2]    0.837   0.003 0.066    0.688    0.799    0.841    0.886    0.943   522 0.999
lp__       -222.870   0.133 1.683 -227.154 -223.760 -222.469 -221.691 -220.689   161 0.999

True values

real = list(tpm = tpm, 
     zeroprop = nrow(serie[serie$m == 1 & serie$y == 0, ]) / nrow(serie[serie$m == 1,]),
     emit = t(t(tapply(serie$y[serie$y != 0],serie$m[serie$y != 0], mean))))
real
$tpm
     [,1] [,2]
[1,]  0.9  0.1
[2,]  0.2  0.8

$zeroprop
[1] 0.6341463

$emit
       [,1]
1 20.433333
2  7.277778

Estimates give quite oddly to someone could help me to know that I am doing wrong.

Rafael_Diaz · February 23, 2019, 4:56am

I give credits to merv for giving the idea of how to correctly adjust the ZIP-HMM.

library(rstan)

code.ZIPHMM <- '
data {
  int<lower=0> N;    // length of chain
  int<lower=0> y[N]; // emissions
  int<lower=1> m;    // num states
}

parameters {
  simplex[m] start_pos;         // initial pos probs
  real<lower=0, upper=1> theta; // zero-inflation parameter
  positive_ordered[m] lambda_or;   // emission poisson params
  simplex[m] Gamma[m];          // transition prob matrix
}

transformed parameters{
  vector[m] lambda;
  lambda[1] = lambda_or[2];  
  lambda[2] = lambda_or[1];
}

model {
  vector[m] log_Gamma_tr[m];
  vector[m] lp;
  vector[m] lp_p1;

  // transposing tpm and taking the log of each entry
  for (i in 1:m) {
    for (j in 1:m) { 
      log_Gamma_tr[j, i] = log(Gamma[i, j]);
    }
}
  
// initial position log-lik
lp = log(start_pos);

  for (n in 1:N) {
    for (j in 1:m) {
      // log-lik for state
      lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp);

    // log-lik for emission
      if (j == 1) { // assuming only state 1 has zero-inflation
        if (y[n] == 0) {
          lp_p1[j] += log_mix(theta, 0, poisson_lpmf(0 | lambda[j]));
        } else {
          lp_p1[j] += log1m(theta) + poisson_lpmf(y[n] | lambda[j]);
        }
      } else {
        lp_p1[j] += poisson_lpmf(y[n] | lambda[j]);
      }
    }
    lp = lp_p1; // log-lik for next position
  }
  target += log_sum_exp(lp);
}'

samples.ZIPHMM <- stan(model_code = code.ZIPHMM, 
                       data=list(N=length(y), y=y, m=2), 
                       iter=1000, chains=1, 
                       pars = c("theta","lambda","Gamma","lp__"))
print(samples.ZIPHMM, digits_summary = 3)
               mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
theta         0.637   0.003 0.058    0.524    0.596    0.640    0.674    0.746   505 1.001
lambda[1]    20.422   0.033 0.869   18.824   19.794   20.411   20.985   22.102   696 0.999
lambda[2]     7.562   0.028 0.654    6.279    7.140    7.554    7.997    8.958   557 1.001
Gamma[1,1]    0.915   0.001 0.033    0.840    0.898    0.919    0.937    0.967   589 0.998
Gamma[1,2]    0.085   0.001 0.033    0.033    0.063    0.081    0.102    0.160   589 0.998
Gamma[2,1]    0.348   0.005 0.116    0.149    0.266    0.333    0.430    0.583   551 1.003
Gamma[2,2]    0.652   0.005 0.116    0.417    0.570    0.667    0.734    0.851   551 1.003
lp__       -214.914   0.104 1.866 -219.042 -216.092 -214.591 -213.451 -212.236   319 1.002

Now the estimates of the values with Stan match those of the ziphsmm package.

Topic		Replies	Views
Learning how to write a Zero Inflated Poisson model with stan? Modeling	10	3870	November 30, 2018
Zero-Inflated models in Stan General	23	5384	August 15, 2022
Double inflated models in Stan Modeling	16	981	October 13, 2018
Zero one inflated beta regression in STAN Modeling	18	6599	January 14, 2020
Checking Stan code for zero-inflated LDA Modeling rstan , specification	0	440	May 13, 2023