Session aborted when switching to a smoothing precision matrix in a multivariate normal

I’m having troubles fitting a model. The Rstudio’s session gets aborted without any backtrace or message so it has been difficult to do any debugging. The initial gradient evaluation seems to be fine, but as soon as that is done Rstudio crashes, not even completing the first iteration.
I’m pasting some code+data and two models.
Model A works fine, notice that it uses avg_varcov which is just a diagonal matrix inside multi_normal.
Model B crashes the session. The only difference is that it uses multi_normal_prec because avg_varcov now is a precision matrix called avg_precision with smoothing proprieties
P = \begin{pmatrix} 1 & -2 & 1 & 0 & 0 & 0 & \cdots\\ -2 & 5 & -4 & 1 & 0 & 0 & \cdots\\ 1 & -4 & 6 & -4 & 1 & 0 & \cdots\\ \ddots & \ddots & \ddots & \ddots & \ddots & \ddots & \ddots \\ \cdots & 0 & 1 & -4 & 6 & -4 & 1 \\ \cdots & 0 & 0 & 1 & -4 & 5 & -2 \\ \cdots & 0 & 0 & 0 & 1 & -2 & 2 \end{pmatrix}
plus a diagonal component to make it defined positive.
I also tried without success using target += … instead of multi_normal_prec.

Any suggestions about how to debug this problem would be very much appreciated.

If needed:
Windows 10
R 3.6.1
Rstan 2.19.2

library(rstan)
Sys.setenv(LOCAL_CPPFLAGS = '-march=native')
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
options(shinystan.rstudio = TRUE)


set.seed(1234)


###CREATING SOME DATA


N_fuelcells <- 16
vec_positions <- 1:16
N_tests <- 400
vec_N_obs_by_test <- sample(8:16, size = N_tests, replace = T)
ii_tests <- rep(1:N_tests,times=vec_N_obs_by_test)
N_obs <- sum(vec_N_obs_by_test)
mat_alphabeta <- rep(runif(n = N_tests, min = 0.9, max = 1.1),each=N_fuelcells) *
                  matrix(c(85.5, 4.8, 4.4, 4.1, 3.7, 3.6, 3.5, 3.6, 3.2, 2.8, 2.3, 1.7, 1.2, 0.7, 0.2, 0.1),
                          nrow = N_tests, ncol = N_fuelcells, byrow = T)
vec_spillage <- NULL
for(i in 1:N_tests) vec_spillage <- c(vec_spillage, sort(runif(vec_N_obs_by_test[i],1,N_fuelcells)))
mat_x <- matrix(0 , nrow = N_obs, ncol = N_fuelcells)
for(i in 1:N_obs){
  xx <- vec_spillage[i] - 1
  mat_x[i, 1:(2+floor(xx))] <- c(1, rep(1,floor(xx)), xx - floor(xx))}
vec_y <- rowSums(mat_x * mat_alphabeta[ii_tests,]) + rnorm(N_obs,0,5)


###STARTING VALUES

starting_values <- list(list(
        tau = 5,
        eta = 2.5,
        len_par = 1,
        vec_mu = c(85.5, 4.8, 4.4, 4.1, 3.7, 3.6, 3.5, 3.6, 3.2, 2.8, 2.3, 1.7, 1.2, 0.7, 0.2, 0.1),
        vec_sigma = sqrt(c(5,rep(2,N_fuelcells-1))),
        mat_alphabeta = matrix(c(85.5, 4.8, 4.4, 4.1, 3.7, 3.6, 3.5, 3.6, 3.2, 2.8, 2.3, 1.7, 1.2, 0.7, 0.2, 0.1),
                               nrow = N_tests, ncol = N_fuelcells, byrow = T)
      ))


###MODELS

model_A <- "

data{
  int<lower = 1> N_obs;
  int<lower = 1> N_tests;
  int<lower = 1> N_fuelcells;
  real vec_positions[N_fuelcells];
  int<lower = 1> ii_tests[N_obs];
  matrix<lower = 0, upper = 1>[N_obs, N_fuelcells] mat_x;
  real vec_y[N_obs];

}

transformed data{
  vector[N_fuelcells-1] avg_mu;
  matrix[N_fuelcells-1, N_fuelcells-1] avg_varcov;
  
  avg_mu = rep_vector(0,N_fuelcells-1);
  avg_varcov = diag_matrix(rep_vector(1, N_fuelcells-1));
}

parameters{
  real<lower = 0> tau;
  real<lower = 0> eta;
  real<lower = 0> len_par;
  vector<lower = 0>[N_fuelcells] vec_mu;
  vector<lower = 0>[N_fuelcells] vec_sigma;
  matrix<lower = 0>[N_tests, N_fuelcells] mat_alphabeta;
}

transformed parameters{
  corr_matrix[N_fuelcells] mat_corr;
  cov_matrix[N_fuelcells] mat_varcov;
  
  mat_corr = cov_exp_quad(vec_positions, 1, len_par);
  mat_varcov = quad_form_diag(mat_corr, vec_sigma);
}

model{
  
  tau ~ cauchy(0, 5);
  eta ~ cauchy(0, 5);
  len_par ~ cauchy(0, 5);
  vec_sigma ~ cauchy(0, 5);

  vec_mu[1] ~ normal(87.5, sqrt(5));
  vec_mu[2:N_fuelcells] ~ multi_normal(avg_mu, eta * avg_varcov);

  for(test in 1:N_tests){
    mat_alphabeta[test] ~ multi_normal(vec_mu, mat_varcov);
  }

  vec_y ~ normal(rows_dot_product(mat_alphabeta[ii_tests], mat_x), tau);

}


";



model_B <- "

data{
  int<lower = 1> N_obs;
  int<lower = 1> N_tests;
  int<lower = 1> N_fuelcells;
  real vec_positions[N_fuelcells];
  int<lower = 1> ii_tests[N_obs];
  matrix<lower = 0, upper = 1>[N_obs, N_fuelcells] mat_x;
  real vec_y[N_obs];

}

transformed data{
  vector[N_fuelcells-1] avg_mu;
  matrix[N_fuelcells-1, N_fuelcells-1] avg_precision;
  
  avg_mu = rep_vector(0,N_fuelcells-1);

  avg_precision = diag_matrix(rep_vector(6, N_fuelcells-1));
  avg_precision[1, 1] = 1;
  avg_precision[2, 2] = 5;
  avg_precision[1, 2] = -2;
  avg_precision[2, 1] = -2;
  avg_precision[N_fuelcells-2, N_fuelcells-2] = 5;
  avg_precision[N_fuelcells-2, N_fuelcells-1] = -2;
  avg_precision[N_fuelcells-1, N_fuelcells-2] = -2;
  avg_precision[N_fuelcells-1, N_fuelcells-1] = 2;
  for(i in 2:(N_fuelcells-3)){
    avg_precision[i+1, i] = -4;
    avg_precision[i, i+1] = -4;
  }
  for(i in 1:(N_fuelcells-3)){
    avg_precision[i+2, i] = 1;
    avg_precision[i, i+2] = 1;
  }
  avg_precision = avg_precision + diag_matrix(rep_vector(0.001,N_fuelcells-1));
  
}

parameters{
  real<lower = 0> tau;
  real<lower = 0> eta;
  real<lower = 0> len_par;
  vector<lower = 0>[N_fuelcells] vec_mu;
  vector<lower = 0>[N_fuelcells] vec_sigma;
  matrix<lower = 0>[N_tests, N_fuelcells] mat_alphabeta;
}

transformed parameters{
  corr_matrix[N_fuelcells] mat_corr;
  cov_matrix[N_fuelcells] mat_varcov;
  
  mat_corr = cov_exp_quad(vec_positions, 1, len_par);
  mat_varcov = quad_form_diag(mat_corr, vec_sigma);
}

model{
  
  tau ~ cauchy(0, 5);
  eta ~ cauchy(0, 5);
  len_par ~ cauchy(0, 5);
  vec_sigma ~ cauchy(0, 5);

  vec_mu[1] ~ normal(87.5, sqrt(5));
  vec_mu[2:N_fuelcells] ~ multi_normal_prec(avg_mu, avg_precision / eta);

  for(test in 1:N_tests){
    mat_alphabeta[test] ~ multi_normal(vec_mu, mat_varcov);
  }

  vec_y ~ normal(rows_dot_product(mat_alphabeta[ii_tests], mat_x), tau);

}


";



###FIT

model_fit<-stan(model_code = model_A, iter = 10,  warmup = 5, init = starting_values, chains = 1,
            data = list(
              N_obs = N_obs,
              N_tests = N_tests,
              N_fuelcells = N_fuelcells,
              vec_positions = vec_positions,
              ii_tests = ii_tests,
              mat_x = mat_x,
              vec_y = vec_y))


model_fit<-stan(model_code = model_B, iter = 10,  warmup = 5, init = starting_values, chains = 1,
                data = list(
                  N_obs = N_obs,
                  N_tests = N_tests,
                  N_fuelcells = N_fuelcells,
                  vec_positions = vec_positions,
                  ii_tests = ii_tests,
                  mat_x = mat_x,
                  vec_y = vec_y))

Just a suggestion, you may check how it goes in cmdstan without Rstudio.

2 Likes

Could you try with chains = 1, verbose = TRUE? EDIT: The claim about memory was based on misreading of the code and was wrong.

That’s a good suggestion. Now we also have the CmdStanR to make that easier.

Hope that helps.

2 Likes

Thanks a lot for the suggestion, both of you. After a bit of fiddling I was able to install and run the models (both) with cmdstan.
I think the problem could be a division by zero at the very beginning of the exploration phase, and also the var/cov matrix evaluated as invalid. Not sure why they crash rstudio instead of just throwing an exception.

First time I ran model B I got a bunch of times this warning within the very first few iterations:

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue: Exception: gp_exp_quad_cov: length scale is 0, but must be > 0! (in ‘examples/model_B.stan’ at line 52)

which I solved by setting a lower limit of 0.0001 for len_par.
Maybe I should use a different prior for it because afterwards I would still warnings in the first few iterations:

Informational Message: The current Metropolis proposal is about to be rejected because of the following warning: Exception: validate transformed params: mat_varcov is not symmetric. mat_varcov[1,2] = nan, but mat_varcov[2,1] = nan (in ‘examples/model_B.stan’ at line 50)

or instead of NaN there could be Inf or big numbers (decreasing values as iteration number increases?). Strangely even if I set it to lower=1 and set vec_sigma upper=100 I would still get a warning:

Informational Message: The current Metropolis proposal is about to be rejected because of the following issue: Exception: validate transformed params: y is not positive definite.

but at this point I don’t have a clue about what is happening. Anyway the sampler now works, simply it throws a bunch of warnings at the very beginning. Thanks again for the suggestion.

1 Like

Mike Betancourt discusses priors for length scale in his GP case study: Robust Gaussian Processes in Stan - the whole three parts are worth reading (but it as bit long). TLDR: good priors for length scale are trickier than they appear.

This could easily be problems downstream of extreme values of len_par.

This tends to happen. What people do is add some small number (like 1e-12) to the diagonal of the covariance matrix. Just a hack I picked up along the way (not sure if it is documented somewhere)

The fact that it crashes R is very likely a bug though, so if you have small reproducible example, it would be probably sensible to file it as an issue in the RStan GitHub repo.

Hope that helps!

2 Likes