pystan.StanModel RuntimeError: Initialization failed when trying to fit

LTL · May 4, 2020, 11:11am

Hi, I have recently read a paper which developed the following model:

//Markov modulated Marked Point Process (M3PP)
//Author: Tobias Hatt
//Date: 14.04.2020

functions{
//Transition probability
real transition_prob(int s_from, int s, int m, int d, matrix beta_trans, matrix alpha_trans, real[] delta_times){
    real duration;

    if(s_from == s){
     return negative_infinity();
    }
    else{
    duration = sum(delta_times[(m-d+1):m]);


    return duration * beta_trans[s_from, s] + alpha_trans[s_from, s];
   }
}
//loglikelihood of arrival times (point process)
real compute_log_lik_HP(vector Time, int n, real time_window, real lengthscaleT, real mu, real a){
    //Time: vector observation times - these must be sorted from smallest to largest!
    //n: number of observation times in interval
    //time_window: time when interval is left
    //lengthscaleT: beta coefficient in exponential function
    //mu: base intensity
    //a: \frac{\alpha}{\beta}

    matrix[n,n] timeD;
    vector[n] timeD2;
    vector[n] ll = rep_vector(mu, n);

    for(i in 1:n) {
      for(j in 1:n) {
        timeD[i,j] = -(Time[i] - Time[j]);
      }
      timeD2[i] = -(time_window - Time[i]);
    }

    for(i in 2:n) {
      ll[i] += sum(a * lengthscaleT * exp(timeD[i, 1:(i-1)] * lengthscaleT));
    }

    return (sum(log(ll)) - time_window * mu + a * (sum(exp(timeD2*lengthscaleT)) - n));
}
//loglikelihood of observations in an interval (lieklihood of the marks)
real compute_log_lik_obs(int[] obs, real[] delta_time, row_vector phi_0, real[,] phi_1){
    int n = num_elements(obs);
    int P = num_elements(phi_0);

    vector[n] ll;

    ll[1] = phi_0[obs[1]] - log_sum_exp(phi_0);

    for(i in 2:n){
     ll[i] = phi_1[obs[i-1], obs[i]] - log_sum_exp(phi_1[obs[i-1]]);
    }
    return sum(ll);
}
//total loglikelihood
real compute_log_lik(vector Time , int M , int D, int S , int L, vector l_pi , real[,,] log_density_duration,
                       real[,,] time_log_lik, real[,,] obs_log_lik, real[,,,] l_tpm){

    //m: period
    //d: number of observations with \tau_n <= t_m < \tau_{n+1}
    //M: censoring time/period
    //L: Purchase or exit sequence

    //Initialisation
    real l_alpha[M,S,D];
    real l_acc_1[D];
    real l_alpha_star[S-1];
    int pointer;
    real log_lik[S];
    int d_prime;

    //Initialise l_alpha
    l_alpha = rep_array(negative_infinity(), M, S, D);

    //m=1
    for(s in 1:S){
     l_alpha[1, s, 1] = l_pi[s] + log_density_duration[s, 1, 1] + time_log_lik[s, 1, 1] + obs_log_lik[s, 1, 1];
    }

    // 2<=m<=D
    for(m in 2:min(D,M)){
     for(s in 1:S){
      for(d in 1:(m-1)){
       pointer = 1;
       d_prime = min(D, m-d);
       for(i in 1:S){
        if(i != s){
         l_alpha_star[pointer] = log_sum_exp(to_vector(l_alpha[m-d, i, 1:d_prime]) + to_vector(l_tpm[i, s, m-d, 1:d_prime]));
         pointer += 1;
        }
       }
       l_alpha[m, s, d] = log_sum_exp(l_alpha_star) + log_density_duration[s, m-d+1, d] + time_log_lik[s, m-d+1, m] + obs_log_lik[s, m-d+1, m];
      }
      l_alpha[m, s, m] = l_pi[s] + log_density_duration[s, 1, m] + time_log_lik[s, 1, m] + obs_log_lik[s, 1, m];
     }
    }
    // D+1<=m<=M-1
    for(m in (D+1):M){
     for(s in 1:S){
      for(d in 1:D){
       pointer = 1;
       d_prime = min(D, m-d);
       for(i in 1:S){
        if(i != s){
         l_alpha_star[pointer] = log_sum_exp(to_vector(l_alpha[m-d, i, 1:d_prime]) + to_vector(l_tpm[i, s, m-d, 1:d_prime]));
         pointer += 1;
        }
       }
       l_alpha[m, s, d] = log_sum_exp(l_alpha_star) + log_density_duration[s, m-d+1, d] + time_log_lik[s, m-d+1, m] + obs_log_lik[s, m-d+1, m];
      }
     }
    }

    //Integral over all durations
    for(s in 1:S){
     log_lik[s] = log_sum_exp(l_alpha[M, s]);
    }

    return log_sum_exp(log_lik); //log P(y_{1:T}|\lambda) - loglikelihood
}
}
data {
  int<lower=1> S; // number of transient states
  int<lower=1> P; // number of possible pages
  int<lower=1> D; // max state duration allowed
  int<lower=1> N_train; // number of users in training set
  int<lower=1> max_length;
  int<lower=1> M_train[N_train]; // pages per sequence
  int<lower=1, upper=2> L_train[N_train]; // outcome - 1 is exit & 2 is purchase
  real Y_train[N_train, max_length];// time on page
  int obs_train[N_train, max_length+1];
}
parameters {
  simplex[S] pi ; //initial state distribution for each user
  matrix[S, S] alpha_trans; //transition intercept
  vector[S] alpha; //gamma shape parameter
  vector[S] HP_ratio;// Hawkes process alpha
  vector[S] HP_beta;// Hawkes process beta
  vector[S] HP_base_intensity;// Hawkes process base intensity
  matrix[S, P] phi_0;//MAR intercept for each page in each state
  real phi_1[S, P, P];//MAR intercept for each page in each state
}
transformed parameters{
    vector[N_train] log_lik;
    vector[S] l_pi=log(pi);

    for(n in 1:N_train){
        int CP = M_train[n];
        matrix[CP-1, CP-1] duration_matrix;
        real log_density_duration[S, CP-1, CP-1] = rep_array(negative_infinity(), S, CP-1, CP-1);//state duration density for all possible sojourn time and states
        vector[CP] Time;
        real time_log_lik[S, CP-1, CP-1] = rep_array(negative_infinity(), S, CP-1, CP-1);
        real obs_log_lik[S, CP, CP] = rep_array(negative_infinity(), S, CP, CP);
        real l_tpm[S, S, CP-1, D];
        real acc[S-1];
        int pointer;
        //Observation times
        Time[1] = 0;
        Time[2:(CP)] = to_vector(cumulative_sum(Y_train[n, 1:(CP-1)]));

        //likelihood of possible combinations of observation times
        for(s in 1:(S)){
            for(i in 1:(CP-1)){
                for(j in i:(CP-1)){
                    time_log_lik[s, i, j] = compute_log_lik_HP(Time[i:j] - Time[i], j-i+1, Time[j+1] - Time[i], HP_beta[s], HP_base_intensity[s], HP_ratio[s]);
                }
            }
        }

        //p(y|t)
        for(s in 1:(S)){
            for(i in 1:CP){
                for(j in i:CP){
                    obs_log_lik[s, i, j] = compute_log_lik_obs(obs_train[n, i:j], Y_train[n, i:(j-1)], phi_0[s], phi_1[s]);
                }
            }
        }

        //Duration matrix
        for(i in 1:(CP-1)){
            duration_matrix[i,] = rep_row_vector(0.0, (CP-1));
            duration_matrix[i, 1:((CP-1)-i+1)] = to_row_vector(cumulative_sum(Y_train[n, i:(CP-1)]));
        }

        //density_duration
        for(s in 1:S){
            for(i in 1:(CP-1)){
                for(j in 1:(CP-1)){
                    if(j <= ((CP-1)-i+1)){
                        log_density_duration[s, i, j] = log(alpha[s]) - alpha[s] * duration_matrix[i, j];//alpha[s] * log(beta[s]) - log(tgamma(alpha[s]))+(alpha[s]-1)*log(duration_matrix[i, j])-beta[s]*duration_matrix[i, j];//gamma_lpdf(duration_matrix[i, j] | alpha[s], beta[s]);
                    }
                }
            }
        }

        //Transition matrix
        for(s_from in 1:S){
            for(m in 1:(CP-1)){
                for(d in 1:D){
                    pointer = 1;
                    acc = rep_array(0.0, S-1);
                    for(s in 1:(S)){
                        if(d <= min(D, m)){
                            l_tpm[s_from, s, m, d] = transition_prob(s_from, s, m, d, rep_matrix(0.0, S, S), alpha_trans, Y_train[n]);
                            if(s != s_from){
                                acc[pointer] = l_tpm[s_from, s, m, d];
                                pointer += 1;
                            }
                        }
                        else{
                            l_tpm[s_from, s, m, d] = negative_infinity();
                        }
                    }
                    //Scale so [0,1]
                    for(s in 1:(S)){
                        if(s != s_from){
                            l_tpm[s_from, s, m, d] -= log_sum_exp(acc);
                        }
                    }
                }
            }
        }
        log_lik[n] = compute_log_lik(Time , CP-1, D , S , L_train[n] , l_pi , log_density_duration, time_log_lik, obs_log_lik, l_tpm);
    }
}
model {
    pi ~ dirichlet(rep_vector(1,S));
    alpha ~ gamma(2, 0.1);
    HP_ratio ~ gamma(2, 0.1);
    HP_beta ~ gamma(2, 0.1);
    HP_base_intensity ~ gamma(2, 0.1);
    for(s in 1:S){
      alpha_trans[s] ~ normal(0, 5);
    }
    target += sum(log_lik);
}

Here is the link to the paper: https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/404486/M3PP.pdf?sequence=1&isAllowed=y

I wanted to try this model with the following data:

toy_dat = {'S':  2 ,
'P':  3 ,
'D':  10000 ,
'N_train':  10 ,
'max_length':  8 ,
'M_train':  [5, 3, 5, 3, 6, 4, 7, 8, 3, 7] ,
'L_train':  [2, 2, 2, 2, 2, 1, 2, 2, 2, 2] ,
'Y_train':  [[286.0, 119.0, 291.0, 115.0, 120.0, 0.0, 0.0, 0.0],
             [160.0, 243.0, 23.0, 0.0, 0.0, 0.0, 0.0, 0.0],
             [154.0, 111.0, 210.0, 180.0, 99.0, 0.0, 0.0, 0.0], 
             [39.0, 135.0, 14.0, 0.0, 0.0, 0.0, 0.0, 0.0], 
             [225.0, 73.0, 33.0, 75.0, 38.0, 28.0, 0.0, 0.0], 
             [82.0, 278.0, 162.0, 44.0, 0.0, 0.0, 0.0, 0.0],
             [125.0, 122.0, 86.0, 113.0, 75.0, 34.0, 224.0, 0.0], 
             [37.0, 27.0, 27.0, 58.0, 77.0, 36.0, 55.0, 27.0], 
             [171.0, 70.0, 42.0, 0.0, 0.0, 0.0, 0.0, 0.0], 
             [10.0, 31.0, 39.0, 84.0, 105.0, 42.0, 43.0, 0.0]] ,
'obs_train':  [[3, 3, 1, 1, 1, 0, 0, 0, 0],
               [3, 1, 1, 0, 0, 0, 0, 0, 0],
               [1, 3, 3, 3, 1, 0, 0, 0, 0],
               [1, 2, 1, 0, 0, 0, 0, 0, 0],
               [3, 3, 1, 3, 1, 1, 0, 0, 0], 
               [1, 3, 1, 1, 0, 0, 0, 0, 0],
               [1, 3, 1, 3, 1, 1, 1, 0, 0],
               [3, 3, 3, 3, 3, 1, 3, 1, 0], 
               [3, 1, 1, 0, 0, 0, 0, 0, 0], 
               [1, 2, 3, 2, 3, 3, 1, 0, 0]]
}
fit = sm.sampling(data=toy_dat, iter=10, chains=4)

However I get the following error:

RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback: 
"""
Traceback (most recent call last):
  File "C:\Users\Student\anaconda3\lib\multiprocessing\pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "C:\Users\Student\anaconda3\lib\multiprocessing\pool.py", line 44, in mapstar
    return list(map(*args))
  File "stanfit4anon_model_6552c7c244e295f4f1abcbcd0ea84620_8300969585210234451.pyx", line 371, in stanfit4anon_model_6552c7c244e295f4f1abcbcd0ea84620_8300969585210234451._call_sampler_star
  File "stanfit4anon_model_6552c7c244e295f4f1abcbcd0ea84620_8300969585210234451.pyx", line 404, in stanfit4anon_model_6552c7c244e295f4f1abcbcd0ea84620_8300969585210234451._call_sampler
RuntimeError: Initialization failed.
"""

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
<ipython-input-11-d7f2a8cf24c8> in <module>
----> 1 fit = sm.sampling(data=toy_dat, iter=10, chains=4)

~\anaconda3\lib\site-packages\pystan\model.py in sampling(self, data, pars, chains, iter, warmup, thin, seed, init, sample_file, diagnostic_file, verbose, algorithm, control, n_jobs, **kwargs)
    776         call_sampler_args = izip(itertools.repeat(data), args_list, itertools.repeat(pars))
    777         call_sampler_star = self.module._call_sampler_star
--> 778         ret_and_samples = _map_parallel(call_sampler_star, call_sampler_args, n_jobs)
    779         samples = [smpl for _, smpl in ret_and_samples]
    780 

~\anaconda3\lib\site-packages\pystan\model.py in _map_parallel(function, args, n_jobs)
     83         try:
     84             pool = multiprocessing.Pool(processes=n_jobs)
---> 85             map_result = pool.map(function, args)
     86         finally:
     87             pool.close()

~\anaconda3\lib\multiprocessing\pool.py in map(self, func, iterable, chunksize)
    266         in a list that is returned.
    267         '''
--> 268         return self._map_async(func, iterable, mapstar, chunksize).get()
    269 
    270     def starmap(self, func, iterable, chunksize=None):

~\anaconda3\lib\multiprocessing\pool.py in get(self, timeout)
    655             return self._value
    656         else:
--> 657             raise self._value
    658 
    659     def _set(self, i, obj):

RuntimeError: Initialization failed.

I have checked other threads regarding this issue and most of them concluded in a problem in the model. I have read it through several times but I am afraid I am stuck because the model is pretty straight forward (just using a lot of different functions which makes it look heavy).

If it will help, I am using pystan with jupyter notebook with python 3.7

LTL · May 4, 2020, 11:48am

Solved! What worked is changing the priors of the Hawks process to:

HP_ratio ~ normal(0, 5);
HP_beta ~ normal(0, 5);
HP_base_intensity ~ normal(0, 5);

nhuurre · May 4, 2020, 5:41pm

Gamma priors should work too, you just need to declare the parameters with a positivity constraint.

parameters {
  vector<lower=0>[S] alpha;
  vector<lower=0>[S] HP_ratio;
  vector<lower=0>[S] HP_beta;
  vector<lower=0>[S] HP_base_intensity;
  ...
}

The initialization does not consider the priors. It just chooses a random point consistent with the constraints and evaluates the probability of that guess. If the probability is zero then it tries again 100 times.
Any parameter declared without <lower=0> has 50% chance of initializing to a negative value which contradicts the gamma prior. When you have S=2 alphas there’s one-in-four chance both are positive and the initialization succeeds. Hundred attempts is plenty. But with all 16 parameters the probability of successful initialization is only 2^{-16}=\frac{1}{256} so even 100 tries won’t be enough.

EDIT: all 8 parameters. Funny how right-hand-side of 2^{-16}=\frac{1}{256} was correct.

Topic		Replies	Views
RuntimeError: Initialization failed Modeling	10	802	July 28, 2020
Need help. Keep getting initialization error Modeling	2	353	August 3, 2021
Model fails initialization Modeling pystan , fitting-issues , specification	0	492	April 10, 2022
PyStan RuntimeError: Initialization Failed and slow compilation of STAN Modeling performance	4	803	March 1, 2020
Pystan runtime error: initialization failed Modeling pystan , initialization	2	1398	May 26, 2022

pystan.StanModel RuntimeError: Initialization failed when trying to fit

Related topics