Large multivariate model fails to compile

Hi all,

I’m running a large gaussian multivariate model with 1485 DVs (corresponding to a 55x55 brain connectivity matrix), one binary predictor and three ordered monotonic predictors. The model starts running, but in the end I get the following error:

cannot remove file '/var/folders/r8/f7z6l6014bd6zcd96wx3pf7x11rzwt/T//RtmpX3b80u/file19d657b99b33.stan', 
reason 'No such file or directory'

make cmd is  make -f '/Library/Frameworks/R.framework/Resources/etc/Makeconf' 
 -f '/Library/Frameworks/R.framework/Resources/share/make/shlib.mk' 
-f '/Users/46078924/.R/Makevars' CXX='$(CXX14) $(CXX14STD)' 
CXXFLAGS='$(CXX14FLAGS)' CXXPICFLAGS='$(CXX14PICFLAGS)' SHLIB_LDFLAGS='$(SHLIB_CXX14LDFLAGS)' 
SHLIB_LD='$(SHLIB_CXX14LD)' SHLIB='file19d61b10af1e.so' 
OBJECTS='file19d61b10af1e.o'

make would use
clang++ -mmacosx-version-min=10.13 -std=gnu++14 -
I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -
I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -
DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -
include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  
-D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -
fPIC  -Wall -g -O2  -O3 -mtune=native -arch x86_64 -ftemplate-depth-256 -c file19d61b10af1e.cpp -o 
file19d61b10af1e.oif test  "zfile19d61b10af1e.o" != "z"; then \  
echo clang++ -mmacosx-version-min=10.13 -std=gnu++14 -dynamiclib -Wl,-
headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress 
-L"/Library/Frameworks/R.framework/Resources/lib" -L/usr/local/lib -o file19d61b10af1e.so file19d61b10af1e.o  
/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/lib//libStanServices.a -
L'/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/lib/' -lStanHeaders -
L'/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/lib/' -ltbb -ltbbmalloc -
ltbbmalloc_proxy  -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -
Wl,CoreFoundation; \  clang++ -mmacosx-version-min=10.13 -std=gnu++14 -dynamiclib -Wl,-
headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined 
suppress -L"/Library/Frameworks/R.framework/Resources/lib" -L/usr/local/lib -o file19d61b10af1e.so 
file19d61b10af1e.o  
/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/lib//libStanServices.a -
L'/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/lib/' -lStanHeaders -
L'/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/lib/' -ltbb -ltbbmalloc -
ltbbmalloc_proxy  -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -
Wl,CoreFoundation; \ fi

Error in compileCode(f, code, language = language, verbose = verbose) :   
clang: note: diagnostic msg: /Users/46078924/Library/Logs/DiagnosticReports/clang_<YYYY-MM-DD-HHMMSS>_<hostname>.crashclang: 
note: diagnostic msg: 
(choose the .crash file that corresponds to your crash)clang: note: diagnostic msg: ********************make: *** [file19d61b10af1e.o] 
Error 254

Error in sink(type = "output") : invalid connection> 

Here is my code:

library(tidyverse)
library(brms)
library(future)

set.seed(7890)

# Fake data 

dat <- tibble(pID = rep(1:5000, 1485),
              DV = rep(1:1485, each =5000),
              value = rep(runif(1485, min=-1, max=1), 5000),
              lon = rep(sample(1:0, 5000, replace = TRUE), 1485),
              fam = rep(sample(1:6, 5000, replace = TRUE), 1485),
              fr = rep(sample(1:6, 5000, replace = TRUE), 1485),
              clos = rep(sample(5:0, 5000, replace = TRUE), 1485))

dat <- dat %>%
  pivot_wider(names_from = DV, values_from = value)

# Rename DV columns so they are not just a number

for (col in 6:ncol(dat)){
  colnames(dat)[col] <- paste("DV", colnames(dat)[col], sep = "")
}

# Model formula

formula = eval(parse(text = paste0("bf(mvbind(", paste(paste0(names(dat)[6:1490]), # paste DV names
                                             collapse = ", "), ") 
                                      ~ 1 + lon + mo(fam) + mo(fr) + 
                                      mo(clos)) +
                                      set_rescor(FALSE)")))

# DV column names to specify in priors 

dv_names <- names(dat)[6:1490]

# Priors 

priors = c(
  set_prior('normal(0, 0.5)', class = 'Intercept',
            resp = dv_names),  
  set_prior('normal(0, 0.1)', class = 'sigma',
            resp = dv_names), 
  set_prior("normal(0, 0.167)", 
            class = "b", coef = "mofam",
            resp = dv_names),
  set_prior("dirichlet(2, 2, 2, 2, 2)", 
            class = "simo", coef = "mofam1",
            resp = dv_names),
  set_prior("normal(0, 0.167)", 
            class = "b", coef = "mofr",
            resp = dv_names),
  set_prior("dirichlet(2, 2, 2, 2, 2)", 
            class = "simo", coef = "mofr1",
            resp = dv_names),
  set_prior("normal(0, 0.167)", 
            class = "b", coef = "moclos",
            resp = dv_names),
  set_prior("dirichlet(2, 2, 2, 2, 2)", 
            class = "simo", coef = "moclos1",
            resp = dv_names),
  set_prior('normal(0, 0.2)', class = 'b', 
            resp = dv_names) 
)

# Run the model

plan(multicore)

b1 <- brm(formula = formula,
          data = dat, family = gaussian(),
          prior = priors,
          iter = 2000, warmup = 1000, cores = 20, chains = 4,
          save_pars = save_pars(all=TRUE),
          seed = 7890)
summary(b1)

Please note that this takes a long time to finish running (~10 days). The model with only intercepts (no predictors) runs with no errors. The same model (with all predictors) also runs with no errors in a different dataset with only 220 DVs. Any idea why I’m getting this error here?

  • Operating System: macOS Big Sur 11.2.3
  • brms Version: 2.15.0
  • Makevars: CXX14FLAGS += -O3 -mtune=native -arch x86_64 -ftemplate-depth-256

Any help would be greatly appreciated!