#### Summary:
I constantly run into segfaults when calling rstan programs from …my shell scripts.
#### Description:
I'm running Stan programs on a computing cluster and need to do this by calling the R programs using shell scripts. I constantly run into segfaults when refitting a compiled model using different data. The example below shows a reproducible case (at least on my laptop).
#### Reproducible Steps:
Here's a toy Stan program called `simple.stan`:
```
data {
int<lower=0> n;
vector[n] y;
}
parameters {
real<lower=0> sigma;
real mu;
}
model {
y ~ normal(mu, sigma);
}
```
The model above is being called repeatedly from `simpletest.R`:
```
library(rstan)
set.seed(1)
n <- 10
y <- rnorm(n)
for (j in 1:20) {
print(sprintf('Fit number %d:', j))
fit <- stan('simple.stan', data=list(n=n,y=y), cores=4)
}
```
The R-code is being called by the following shell script `simplerun.sh`:
```
Rscript simpletest.R
```
I run the shell script from the command line simply as `source simplerun.sh`. On my laptop, the program segfaults before refit number 8:
```
"Fit number 8:"
*** caught segfault ***
address 0x7f663eac3780, cause 'memory not mapped'
Traceback:
1: .Call(Module__get_class, pointer, name)
2: .get_Module_Class(module, demangled_name, xp)
3: Module(module, mustStart = TRUE)
4: .getModulePointer(x)
5: <S4 object of class "Module">$stan_fit4model351e44f524a8_simple
6: eval(call("$", mod, paste("stan_fit4", model_cppname, sep = "")))
7: eval(call("$", mod, paste("stan_fit4", model_cppname, sep = "")))
8: object@mk_cppmodule(object)
9: .local(object, ...)
10: (function (object, ...) { standardGeneric("sampling")})(algorithm = "NUTS", chains = 1L, check_data = TRUE, control = NULL, cores = 1L, data = list(n = 10L, y = c(-0.626453810742332, 0.183643324222082, -0.835628612410047, 1.59528080213779, 0.329507771815361, -0.820468384118015, 0.487429052428485, 0.738324705129217, 0.575781351653492, -0.305388387156356)), diagnostic_file = NA, include = TRUE, init = "random", iter = 2000, object = <S4 object of class "stanmodel">, open_progress = FALSE, pars = NA, sample_file = NA, seed = 821171885L, show_messages = TRUE, thin = 1, verbose = FALSE, warmup = 1000, check_unknown_args = FALSE, chain_id = 1L)
11: (function (object, ...) { standardGeneric("sampling")})(algorithm = "NUTS", chains = 1L, check_data = TRUE, control = NULL, cores = 1L, data = list(n = 10L, y = c(-0.626453810742332, 0.183643324222082, -0.835628612410047, 1.59528080213779, 0.329507771815361, -0.820468384118015, 0.487429052428485, 0.738324705129217, 0.575781351653492, -0.305388387156356)), diagnostic_file = NA, include = TRUE, init = "random", iter = 2000, object = <S4 object of class "stanmodel">, open_progress = FALSE, pars = NA, sample_file = NA, seed = 821171885L, show_messages = TRUE, thin = 1, verbose = FALSE, warmup = 1000, check_unknown_args = FALSE, chain_id = 1L)
12: do.call(rstan::sampling, args = .dotlist)
13: FUN(X[[i]], ...)
14: eval(expr, env)
15: doTryCatch(return(expr), name, parentenv, handler)
16: tryCatchOne(expr, names, parentenv, handlers[[1L]])
17: tryCatchList(expr, classes, parentenv, handlers)
18: tryCatch(expr, error = function(e) { call <- conditionCall(e) if (!is.null(call)) { if (identical(call[[1L]], quote(doTryCatch))) call <- sys.call(-4L) dcall <- deparse(call)[1L] prefix <- paste("Error in", dcall, ": ") LONG <- 75L msg <- conditionMessage(e) sm <- strsplit(msg, "\n")[[1L]] w <- 14L + nchar(dcall, type = "w") + nchar(sm[1L], type = "w") if (is.na(w)) w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L], type = "b") if (w > LONG) prefix <- paste0(prefix, "\n ") } else prefix <- "Error : " msg <- paste0(prefix, conditionMessage(e), "\n") .Internal(seterrmessage(msg[1L])) if (!silent && identical(getOption("show.error.messages"), TRUE)) { cat(msg, file = outFile) .Internal(printDeferredWarnings()) } invisible(structure(msg, class = "try-error", condition = e))})
19: try(eval(expr, env), silent = TRUE)
20: sendMaster(try(eval(expr, env), silent = TRUE))
21: mcparallel(FUN(X[[i]], ...), name = names(X)[i], mc.set.seed = mc.set.seed, silent = mc.silent)
22: FUN(X[[i]], ...)
23: lapply(seq_along(X), function(i) mcparallel(FUN(X[[i]], ...), name = names(X)[i], mc.set.seed = mc.set.seed, silent = mc.silent))
24: parallel::mclapply(1:chains, FUN = callFun, mc.preschedule = FALSE, mc.cores = min(chains, cores))
25: .local(object, ...)
26: sampling(sm, data, pars, chains, iter, warmup, thin, seed, init, check_data = TRUE, sample_file = sample_file, diagnostic_file = diagnostic_file, verbose = verbose, algorithm = match.arg(algorithm), control = control, check_unknown_args = FALSE, cores = cores, open_progress = open_progress, include = include, ...)
27: sampling(sm, data, pars, chains, iter, warmup, thin, seed, init, check_data = TRUE, sample_file = sample_file, diagnostic_file = diagnostic_file, verbose = verbose, algorithm = match.arg(algorithm), control = control, check_unknown_args = FALSE, cores = cores, open_progress = open_progress, include = include, ...)
28: stan("simple.stan", data = list(n = n, y = y), cores = 4)
An irrecoverable exception occurred. R is aborting now ...
```
The above gives me segfault only when calling `simpletest.R` from the shell script, but I have not encountered it when running the R code in RStudio. It also seems that it is related to the use of multiple cores because it does not segfault when I remove the argument `cores=4`.
Any help is greatly appreciated.
#### RStan Version:
2.15.1 (tested also on 2.16.2 but it segfaults too)
#### R Version:
3.4.1 (2017-06-30)
#### Operating System:
Ubuntu 16.04.2 LTS (Xenial Xerus) 64-bit