Forward algorithm for ZIP - Hidden Markov model

I’m trying to adjust a Zero Inflated Poisson Hidden Markov Model with Stan. For the Poisson-HMM in a past forum this setting was shown. see link.

While to adjust the ZIP with the classical theory is well documented the code and model.

Zero Inflation

It uses a parameter theta here there is a probability \theta of drawing a zero, and a probability 1−\theta of drawing from Poisson(\lambda)

p(y_n | \theta, \lambda) = \begin{cases} \theta + (1-\theta)*e^{-\lambda} & \text{ if } y_n=0 \\ (1-\theta)*e^{-\lambda} \frac{\lambda^{y_n}}{y_n!} & \text{ if } y_n>0 \end{cases}

Zero Inflation - Hidden Markov Model

Let Y_t be the observed series and C_t a homogeneous, unobserved Markov chain, withstate-space E={e_1,· · ·, e_m}. We suppose that, conditionally to C_t=e_i, Y_t is distributed according to a ZIP of parameters (\theta_i, \lambda_i).

p(y_t | C_t ; \theta, \lambda) = \begin{cases} \theta_i + (1-\theta_i)*e^{-\lambda_i} & \text{ if } y_t=0 \\ (1-\theta_i)*e^{-\lambda_i} \frac{\lambda_i^{y_t}}{y_t!} & \text{ if } y_t>0 \end{cases}


Classical Theory

library(ziphsmm)
set.seed(123)
prior_init <- c(0.5,0.5)
emit_init <- c(20,6)
zero_init <- c(0.5,0)
tpm <- matrix(c(0.9, 0.1, 0.2, 0.8),2,2,byrow=TRUE)
result <- hmmsim(n=100,M=2,prior=prior_init, tpm_parm=tpm,emit_parm=emit_init,zeroprop=zero_init)
y <- result$series
serie <- data.frame(y = result$series, m = result$state)

fit1 <-  fasthmmfit(y,x=NULL,ntimes=NULL,M=2,prior_init,tpm,
                    emit_init,0.5, hessian=FALSE,method="BFGS", 
                    control=list(trace=1))
fit1
$prior
            [,1]
[1,] 0.997497445
[2,] 0.002502555

$tpm
          [,1]       [,2]
[1,] 0.9264945 0.07350553
[2,] 0.3303533 0.66964673

$zeroprop
[1] 0.6342182

$emit
          [,1]
[1,] 20.384688
[2,]  7.365498

$working_parm
[1] -5.9879373 -2.5340475  0.7065877  0.5503559  3.0147840  1.9968067

$negloglik
[1] 208.823

Bayesian theory (Stan)

library(rstan)

ZIPHMM <- 'data {
    int<lower=0> N;
    int<lower=0> y[N];
    int<lower=1> m;
  }

parameters {
    real<lower=0, upper=1> theta; //
    positive_ordered[m] lambda; //
    simplex[m] Gamma[m]; // tpm
  }

model {
  vector[m] log_Gamma_tr[m];
  vector[m] lp;
  vector[m] lp_p1;
  
  // priors
  lambda ~ gamma(0.1,0.01);
  theta ~ beta(0.05, 0.05);

  // transposing tpm and taking the log of each entry
  for(i in 1:m)
    for(j in 1:m)
      log_Gamma_tr[j, i] = log(Gamma[i, j]);
  
  lp = rep_vector(-log(m), m); // 
  
    for(n in 1:N) {
      for(j in 1:m){
        if (y[n] == 0)
          lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp) +
                     log_sum_exp(bernoulli_lpmf(1 | theta),
                     bernoulli_lpmf(0 | theta) + poisson_lpmf(y[n] | lambda[j]));
        else
          lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp) + 
                     bernoulli_lpmf(0 | theta) + 
                     poisson_lpmf(y[n] | lambda[j]);
                   }
      lp = lp_p1;
                  }
    target += log_sum_exp(lp);
}'
mod_ZIP <- stan(model_code = ZIPHMM, data=list(N=length(y), y=y, m=2), iter=1000, chains=1)
print(mod_ZIP,digits_summary = 3)
               mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
theta         0.518   0.002 0.052    0.417    0.484    0.518    0.554    0.621   568 0.998
lambda[1]     7.620   0.039 0.787    6.190    7.038    7.619    8.194    9.132   404 1.005
lambda[2]    20.544   0.039 0.957   18.861   19.891   20.500   21.189   22.611   614 1.005
Gamma[1,1]    0.664   0.004 0.094    0.473    0.604    0.669    0.730    0.841   541 0.998
Gamma[1,2]    0.336   0.004 0.094    0.159    0.270    0.331    0.396    0.527   541 0.998
Gamma[2,1]    0.163   0.003 0.066    0.057    0.114    0.159    0.201    0.312   522 0.999
Gamma[2,2]    0.837   0.003 0.066    0.688    0.799    0.841    0.886    0.943   522 0.999
lp__       -222.870   0.133 1.683 -227.154 -223.760 -222.469 -221.691 -220.689   161 0.999

True values

real = list(tpm = tpm, 
     zeroprop = nrow(serie[serie$m == 1 & serie$y == 0, ]) / nrow(serie[serie$m == 1,]),
     emit = t(t(tapply(serie$y[serie$y != 0],serie$m[serie$y != 0], mean))))
real
$tpm
     [,1] [,2]
[1,]  0.9  0.1
[2,]  0.2  0.8

$zeroprop
[1] 0.6341463

$emit
       [,1]
1 20.433333
2  7.277778

Estimates give quite oddly to someone could help me to know that I am doing wrong.

I give credits to merv for giving the idea of how to correctly adjust the ZIP-HMM.

library(rstan)

code.ZIPHMM <- '
data {
  int<lower=0> N;    // length of chain
  int<lower=0> y[N]; // emissions
  int<lower=1> m;    // num states
}

parameters {
  simplex[m] start_pos;         // initial pos probs
  real<lower=0, upper=1> theta; // zero-inflation parameter
  positive_ordered[m] lambda_or;   // emission poisson params
  simplex[m] Gamma[m];          // transition prob matrix
}

transformed parameters{
  vector[m] lambda;
  lambda[1] = lambda_or[2];  
  lambda[2] = lambda_or[1];
}

model {
  vector[m] log_Gamma_tr[m];
  vector[m] lp;
  vector[m] lp_p1;

  // transposing tpm and taking the log of each entry
  for (i in 1:m) {
    for (j in 1:m) { 
      log_Gamma_tr[j, i] = log(Gamma[i, j]);
    }
}
  
// initial position log-lik
lp = log(start_pos);

  for (n in 1:N) {
    for (j in 1:m) {
      // log-lik for state
      lp_p1[j] = log_sum_exp(log_Gamma_tr[j] + lp);

    // log-lik for emission
      if (j == 1) { // assuming only state 1 has zero-inflation
        if (y[n] == 0) {
          lp_p1[j] += log_mix(theta, 0, poisson_lpmf(0 | lambda[j]));
        } else {
          lp_p1[j] += log1m(theta) + poisson_lpmf(y[n] | lambda[j]);
        }
      } else {
        lp_p1[j] += poisson_lpmf(y[n] | lambda[j]);
      }
    }
    lp = lp_p1; // log-lik for next position
  }
  target += log_sum_exp(lp);
}'

samples.ZIPHMM <- stan(model_code = code.ZIPHMM, 
                       data=list(N=length(y), y=y, m=2), 
                       iter=1000, chains=1, 
                       pars = c("theta","lambda","Gamma","lp__"))
print(samples.ZIPHMM, digits_summary = 3)
               mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
theta         0.637   0.003 0.058    0.524    0.596    0.640    0.674    0.746   505 1.001
lambda[1]    20.422   0.033 0.869   18.824   19.794   20.411   20.985   22.102   696 0.999
lambda[2]     7.562   0.028 0.654    6.279    7.140    7.554    7.997    8.958   557 1.001
Gamma[1,1]    0.915   0.001 0.033    0.840    0.898    0.919    0.937    0.967   589 0.998
Gamma[1,2]    0.085   0.001 0.033    0.033    0.063    0.081    0.102    0.160   589 0.998
Gamma[2,1]    0.348   0.005 0.116    0.149    0.266    0.333    0.430    0.583   551 1.003
Gamma[2,2]    0.652   0.005 0.116    0.417    0.570    0.667    0.734    0.851   551 1.003
lp__       -214.914   0.104 1.866 -219.042 -216.092 -214.591 -213.451 -212.236   319 1.002

Now the estimates of the values with Stan match those of the ziphsmm package.