Hi all, I’m trying to fit a multilevel hidden markov model for a longitudinal data, but constantly met this error: Chain 1: Log probability evaluates to log(0), i.e. negative infinity.
Chain 1: Stan can’t start sampling from this initial value.
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Model configuration: Gaussian emission probabilities are assumed with 2 hidden states. Time-varying transition probability matrix is fitted at subject level, with time-varying covariates implemented in multinomial link function; with another layer of covariates at subject-level, but time-invariant, being added to the t.p.m logit link at the same time.
Thanks for your help!
Here’s the stan code:
data {
int<lower=1> N; // num of subjects
int<lower=1> NTotal; // num of observations for all subjects, NTotal=sum(T[N])
int T[N+1]; // num of observations for each subject
int<lower=1> K; // num of states
int<lower=0> M; // num of circadian components, a column vector of 1s should be added to the covariates matrix
int<lower=0> M1; // num of suject-level covariates
real <lower=0> y[NTotal]; // observations
matrix[NTotal, M] u; // time-varying components/covariates
matrix[N, M1] u1; // subject-level covariates
// prior for mu
real <lower=0> mu0;
real <lower=0> k0;
// prior for sigma
real <lower=0> v0;
real <lower=0> sigma02;
}
parameters {
positive_ordered[K] mu; // observation means
real<lower=0> sigma[K]; // observation standard deviations
matrix[K*(K-1), M] beta[N]; // coefficients for time-varying components
matrix[K, M1] beta1; // coefficients for covariates
}
transformed parameters{
matrix[K, K] unA[NTotal];
matrix[K, K] A[NTotal];
for (n in 1:N){
for (t in (T[n]+2):(T[n]+T[n+1])){
int betarow=1;
for (i in 1:K){
for (j in 1:K){
if (i==j){
//unA[t,i,j]=1;
unA[t,i,j]=exp(beta1[j]*to_vector(u1[n]));
} else {
unA[t,i,j]=exp(beta[n, betarow]*to_vector(u[t])+beta1[j]*to_vector(u1[n]));
betarow=betarow+1;
}
}
}
for (i in 1:K)
A[t][i]=unA[t][i]/sum(unA[t][i]);
}
}
}
model {
// Normal-InverChisq-priors
target+= normal_lpdf(mu[1] | mu0, sqrt(sigma[1]/k0));
target+= normal_lpdf(mu[2] | mu0, sqrt(sigma[2]/k0));
target+= scaled_inv_chi_square_lpdf(sigma[1] | v0,sigma02);
target+= scaled_inv_chi_square_lpdf(sigma[2] | v0,sigma02);
//priors for coefficients
for (n in 1:N){
for (i in 1:K*(K-1)){
for (j in 1:M)
beta[n,i,j] ~ normal(0, 1);
}
}
for (i in 1:K){
for (j in 1:M1)
beta1[i, j] ~ normal(0, 1);
}
{
real acc[K];
real gamma[NTotal, K];
// forward algorithm
for (i in 1:N){ // loop through subject
for (k in 1:K) // loop through previous state
gamma[T[i]+1, k] = normal_lpdf(y[1] | mu[k], sigma[k]);
for (t in (T[i]+2):(T[i]+T[i+1])) { // loop through time points from subject i
for (k in 1:K) { // previous state
for (j in 1:K){ // current state
// accumulator
// belief state + transition prob + local evidence at t
acc[j] = gamma[t-1, j] + log(A[t][k,j]) + normal_lpdf(y[t] | mu[k], sigma[k]);
}
gamma[t, k] = log_sum_exp(acc);
}
}
}
target += log_sum_exp(gamma[NTotal]); // log sum of posterior mixture likelihood
}
}