Runtime error occurs at the half of the model running

My model is running well in the beginning but suddenly got stuck and returned the following runtime error at 45% of the running process. I am really confused because such error generally occurs at the beginning but this time it occurs at the middle of process. Does anyone have any idea about what reason may cause such problem? Thanks ahead!

Traceback (most recent call last):
File “/n/home10/eva/rdm_linear_gaussian_delta_var_new_0514.py”, line 1038, in
save_diagnostics=True, threads_per_chain=12)
File “/n/home10/eva/lib/python3.7/site-packages/cmdstanpy/model.py”, line 789, in sample
raise RuntimeError(msg)
RuntimeError: Error during sampling.
RunSet: chains=4
cmd:
[’/n/home10/eva/fasrc/comp_phenotyping/data/rdm_linear_gaussian’, ‘id=1’, ‘random’, ‘seed=30261’, ‘data’, ‘file=/tmp/tmpp43_ittw/r_fb2ogv.json’, ‘init=0.1’, ‘output’, ‘file=/n/home10/eva/fasrc/comp_phenotyping/output/rdm_linear_gaussian-202105170753-1.csv’, ‘diagnostic_file=/n/home10/wenqichen/fasrc/comp_phenotyping/output/rdm_linear_gaussian-202105170753-diagnostic-1.csv’, ‘method=sample’, ‘num_samples=1000’, ‘num_warmup=1000’, ‘save_warmup=1’, ‘thin=1’, ‘algorithm=hmc’, ‘engine=nuts’, ‘max_depth=10’, ‘adapt’, ‘engaged=1’, ‘delta=0.9’]
retcodes=[-7, -7, -7, -7]



functions {

    real partial_sum(real[,,] all_parameters_sliced, int start, int end, vector Al, matrix Bl, matrix Cl, real[,,] Ul, 
                            vector mu_dl, vector sigma_r, matrix X1l, int Wl, int Xdiml,

                            int[,] idx_rdm_obsl, real[,,] RTu_rdml, real[,,] RTl_rdml, real[,,] Cohu_rdml, real[,,] Cohl_rdml,
                            int[,] Nu_rdml, int[,] Nl_rdml, int Nu_max_rdm, int Nl_max_rdm
                            ) 
    
    {

    real lt = 0;

    for (n in 1:(end-start+1)) {

        // unpack parameters


        real alpha_rdm_prl[Wl]      =   all_parameters_sliced[n,,1];
        real alpha_rdml[Wl]     =   all_parameters_sliced[n,,2];
        real beta_rdm_prl[Wl]     =   all_parameters_sliced[n,,3];
        real beta_rdml[Wl]        =   all_parameters_sliced[n,,4];
        real delta_mu_rdm_prl[Wl]     =   all_parameters_sliced[n,,5];
        real delta_mu_rdml[Wl] = all_parameters_sliced[n,,6];
        real delta_sigma_rdm_prl[Wl] = all_parameters_sliced[n,,7];
        real delta_sigma_rdm[Wl] = all_parameters_sliced[n,,8];
        real tau_rdm_prl[Wl]    =   all_parameters_sliced[n,,9];
        real tau_rdml[Wl]       =   all_parameters_sliced[n,,10];
        //real delta_pr_RTL_rdml[Wl, Nl_max_rdm]  =    all_parameters_sliced[n, , 11:(10+Nl_max_rdm)];
        //real delta_pr_RTU_rdml[Wl, Nu_max_rdm]  =  all_parameters_sliced[n, , (11+Nl_max_rdm):(10+Nl_max_rdm+Nu_max_rdm)];
                
        real Xl[Wl,Xdiml];
        

        int s = start + (n - 1); 
	        for (w in 1:Wl) {
	            
	            if (w == 1) {
                    Xl[w,] = to_array_1d(inv_logit(X1l[s,])); 
                } else {
                    Xl[w,] = to_array_1d(inv_logit(diag_matrix(Al) * to_vector(Xl[w-1,]) + Bl * to_vector(Ul[s,w-1,])));
                } 
                    
                    
                real delta_pr_RTU_rdml[Nu_rdml[s,w]] = all_parameters_sliced[n,w , 11:(10+Nu_rdml[s,w])];
                real delta_pr_RTL_rdml[Nl_rdml[s,w]]  = all_parameters_sliced[n,w , (11+Nu_rdml[s,w]):(10+Nu_rdml[s,w] + Nl_rdml[s,w])];
          

                lt += normal_lpdf(alpha_rdm_prl[w]    | mu_dl[1] + Cl[1,] * to_vector(Xl[w,]),sigma_r[1]);
                lt += normal_lpdf(beta_rdm_prl[w]   | mu_dl[2] + Cl[2,] * to_vector(Xl[w,]),sigma_r[2]);  
                lt += normal_lpdf(delta_mu_rdm_prl[w]   | mu_dl[3] + Cl[3,] * to_vector(Xl[w,]),sigma_r[3]);  
                lt += normal_lpdf(tau_rdm_prl[w]   | mu_dl[4] + Cl[4,] * to_vector(Xl[w,]),sigma_r[4]); 

                lt += cauchy_lpdf(delta_sigma_rdm_prl[w]|0, 5);
                lt += normal_lpdf(delta_pr_RTL_rdml | 0, 1);
                lt += normal_lpdf(delta_pr_RTU_rdml | 0, 1);

                //TODO: do we need to give trial-level delta primes???????????????


            	  if (idx_rdm_obsl[s,w] != 0) { // if week exists

                  vector[Nl_rdml[s, w]] delta_RTL_rdm = exp(delta_mu_rdml[w] + delta_sigma_rdm[w] * to_vector(delta_pr_RTL_rdml));
                  vector[Nu_rdml[s, w]] delta_RTU_rdm = exp(delta_mu_rdml[w] + delta_sigma_rdm[w] * to_vector(delta_pr_RTU_rdml));

                  //delta_pr_RTL_rdml[ w, (Nl_rdml[s, w]+1):] = rep_array(0, Nl_max_rdm - Nl_rdml[s, w]) ;
                  //delta_pr_RTU_rdml[w, (Nu_rdml[s, w]+1):] = rep_array(0, Nu_max_rdm - Nu_rdml[s, w]) ;

	                vector[Nu_rdml[s,w]] delta_cohu = delta_RTU_rdm .* to_vector(Cohu_rdml[s,w,:Nu_rdml[s,w]]);
	                vector[Nl_rdml[s,w]] delta_cohl = delta_RTL_rdm .* to_vector(Cohl_rdml[s,w,:Nl_rdml[s,w]]);


	                lt += wiener_lpdf(RTu_rdml[s,w,:Nu_rdml[s,w]] | alpha_rdml[w], tau_rdml[w], beta_rdml[w], delta_cohu);
                  lt += wiener_lpdf(RTl_rdml[s,w,:Nl_rdml[s,w]] | alpha_rdml[w], tau_rdml[w], 1-beta_rdml[w], -delta_cohl); 
	            }                    
 	    }
 }

    return lt;

    }

}


data {

    // Gaussian model
    int W;                                                          // Number of weeks (typically 12)
    int N;                                                          // Number of subjects
    int Xdim;                                                       // Dimension of X - latent low dimensional structure of the phenotype

    // Exogenous variables
    int exo_q_num;                                                  // number of exogenous survey questions
    real U[N,W,exo_q_num];                                       // exogenous survey questions - missing weeks were linearly interpolated outside of Stan 
   
       // Random dot motion
    int<lower=0> idx_rdm_obs[N,W];                            // Indices of weeks WITH data
    int<lower=1> P_rdm;                                             // number of parameters
    int<lower=0> Nu_max_rdm;                            // Max (across subjects) number of upper boundary responses
    int<lower=0> Nl_max_rdm;                        // Max (across subjects) number of lower boundary responses
    int<lower=0> Nu_rdm[N,W];                     // Number of upper boundary responses for each subj
    int<lower=0> Nl_rdm[N,W];                         // Number of lower boundary responses for each subj
    real RTu_rdm[N, W, Nu_max_rdm];                // upper boundary response times
    real RTl_rdm[N, W, Nl_max_rdm];                    // lower boundary response times
    real Cohu_rdm[N, W, Nu_max_rdm];                           // coherence for correct trials
    real Cohl_rdm[N, W, Nl_max_rdm];                   // coherence for incorrect trials
    matrix[N,W] minRT_rdm;                          // minimum RT for each subject of the observed data
    real RTbound_rdm;  

}
transformed data {

    int num_par = P_rdm;
}
parameters {
    vector<lower=0, upper=1>[Xdim] A;
    matrix[Xdim, exo_q_num] B;
    matrix[num_par,Xdim] C;
    matrix[N, Xdim] X1;
    vector[num_par] mu_d;                     // constant offset
    vector<lower=0>[num_par] sigma_r; 


 //Group level parameters in vecotized form, one for each parameter.
    //vector[4] mu_mu_rdm; //group level mean means
    //vector<lower = 0>[4] mu_sigma_rdm; //group level mean variances

    //vector[4] sigma_mu_rdm; //group level variance means
    //vector<lower=0>[4] sigma_sigma_rdm; //group level variance variances

    //// Subject-level raw parameters (for Matt trick)
    real alpha_mu_pr_rdm[N, W];
    real tau_mu_pr_rdm[N, W];
    real beta_mu_pr_rdm[N,W];
    real delta_mu_pr_rdm[N,W];

    real delta_sigma_pr_rdm[N,W];

    // Trial-level raw parameters (for Matt trick)???????????
    //real delta_pr_RTL_rdm[N, W, Nl_max_rdm];
    //real delta_pr_RTU_rdm[N, W, Nu_max_rdm];
    real delta_pr_RTL_rdm[sum(to_array_1d(Nl_rdm))];
    real delta_pr_RTU_rdm[sum(to_array_1d(Nu_rdm))];

}

transformed parameters {

  // Subject-level declarations: MEANS
    real<lower=0> alpha_mu_rdm[N,W];                 
    real<lower=0, upper=1> beta_mu_rdm[N,W]; 
    real<lower=0> delta_mu_rdm[N,W];                 
    real<lower=RTbound_rdm, upper=max(minRT_rdm[,])> tau_mu_rdm[N,W];
    
  // Subject-level declarations: VARIANCES
    real<lower=0> delta_sigma_rdm[N,W]; // drift rate variance subject level

  // Trial-level declarations??????????
    //real delta_RTL_rdm[N, W, Nl_max_rdm];
    //real delta_RTU_rdm[N, W, Nu_max_rdm];


  //Subject-level MEANS transformations

    for (n in 1:N) {
        for (w in 1:W) {
            beta_mu_rdm[n,w] = Phi(beta_mu_pr_rdm[n,w]);                                                       // gng
            tau_mu_rdm[n,w]  = Phi(tau_mu_pr_rdm[n,w]) * (minRT_rdm[n,w] - RTbound_rdm) + RTbound_rdm;     // rdm
        }
    }

    alpha_mu_rdm = exp(alpha_mu_pr_rdm); //reparametrization as in  Gelman manual second ed pg 313 and Kruschkes manual pg. 281
    delta_mu_rdm = exp(delta_mu_pr_rdm);  

    //Subject-level VARIANCES transformations
    delta_sigma_rdm = exp(delta_sigma_pr_rdm);//already vectorized for subject level in declaration 



}
model {
       
    A ~ normal(0.5, 0.05);
    to_vector(B) ~ normal(0, 0.05);
    to_vector(C) ~ normal(0, 0.05);
    to_vector(X1) ~ normal(0, 0.1);
    mu_d ~ normal(0, 0.1);
    sigma_r ~ normal(0, 0.1);

    // pack parameters for reduce sum
    real all_parameters[N, W, 10 + Nl_max_rdm+Nu_max_rdm]; 



     for (n in 1:N) {
        for (w in 1:W) {
                
             int NuL; int NuU; int NlL; int NlU;
            if(n > 1){
                NuL = sum(to_array_1d(Nu_rdm[1:(n-1), 1:12])) + sum(to_array_1d(Nu_rdm[n, 1:w])) -Nu_rdm[n,w] +1;
                NuU = sum(to_array_1d(Nu_rdm[1:(n-1), 1:12])) + sum(to_array_1d(Nu_rdm[n, 1:w]));
                NlL = sum(to_array_1d(Nl_rdm[1:(n-1), 1:12])) + sum(to_array_1d(Nl_rdm[n, 1:w])) -Nl_rdm[n, w] +1;
                NlU = sum(to_array_1d(Nl_rdm[1:(n-1), 1:12])) + sum(to_array_1d(Nl_rdm[n, 1:w]));
            }
            else{
                NuL =  sum(to_array_1d(Nu_rdm[n, 1:w])) -Nu_rdm[n,w] +1;
                NuU =  sum(to_array_1d(Nu_rdm[n, 1:w]));
                NlL =  sum(to_array_1d(Nl_rdm[n, 1:w])) -Nl_rdm[n, w] +1;
                NlU =  sum(to_array_1d(Nl_rdm[n, 1:w]));
            }
            
            all_parameters[n, w, 1] = alpha_mu_pr_rdm[n,w];
            all_parameters[n, w, 2] = alpha_mu_rdm[n,w];
            all_parameters[n, w, 3] = beta_mu_pr_rdm[n,w];
            all_parameters[n, w, 4] = beta_mu_rdm[n,w];
            all_parameters[n, w, 5] = delta_mu_pr_rdm[n,w];
            all_parameters[n, w, 6] = delta_mu_rdm[n,w];
            all_parameters[n, w, 7] = delta_sigma_pr_rdm[n,w];
            all_parameters[n, w, 8] = delta_sigma_rdm[n,w];
            all_parameters[n, w, 9] = tau_mu_pr_rdm[n,w];
            all_parameters[n, w, 10] = tau_mu_rdm[n,w];
            
            //all_parameters[n, w, 11:(10+Nl_max_rdm)] = delta_pr_RTL_rdm[n,w];
            //all_parameters[n, w, (11+Nl_max_rdm):(10+Nl_max_rdm+Nu_max_rdm)] = delta_pr_RTU_rdm[n,w];
            
            all_parameters[n, w, 11:(10+Nu_rdm[n,w])] = delta_pr_RTU_rdm[NuL:NuU];
            all_parameters[n, w, (11+Nu_rdm[n,w]):(10+Nu_rdm[n,w] + Nl_rdm[n,w])] = delta_pr_RTL_rdm[NlL:NlU];


        }
    }                                      

        target += reduce_sum(partial_sum, all_parameters, 1, A, B, C, U, mu_d, sigma_r, X1, W, Xdim,
                            idx_rdm_obs, RTu_rdm, RTl_rdm, Cohu_rdm, Cohl_rdm, Nu_rdm, Nl_rdm, Nu_max_rdm, Nl_max_rdm); 
                                       
} 
"""

instead of

model{
vector[N] mu = alpha+beta*x;
y~normal(mu,sigma);
}


1 Like

Hi, sorry for not getting to you earlier. Did you manage to resolve the problem in the meantime?

Generally, it is extremely hard to debug such large models, so I would recommend you try to simplify the model and try to find the simplest model that still exhibits the issue.

Also, could you share the full error message / output? I am not using the Python interfaces myself, so not sure where to find it, but I would expect that there is more detailed message somewhere…

Best of luck with your model!