I ran into the same problem just now and found this thread. I already learned something from @avehtari’s comments but just wanted to share my experience in case it really is a bug.
I am running brms 2.13.5, rstan 2.21.2, R version 4.0.2 on a Ubuntu 18.4 workstation.
I am trying to compare multiple model specifications using only a subset of my data, which might explain the high pareto_k values that I am experiencing. My full dataset has about 20k observations but needs some days to run which is why I am using a 1k sample for trying out model specifications
I adapted a simulated MRP example of Lauren Kennedy to produce this example that produces the error on my machine. I’m sorry that it’s a long code, but I hope it reproduces the error.
```
Error in .update_pars(x, upars = upars, ...) :
length(new_samples) == nrow(pars) is not TRUE
In addition: Warning message:
Some Pareto k diagnostic values are slightly high. See help('pareto-k-diagnostic') for details.
Error: Moment matching failed. Perhaps you did not set 'save_all_pars' to TRUE when fitting your model?
```
Okay, I just tried to reproduce the error on my Laptop running Windows 10, R 4.0.2, and brms 2.13.0. Would not reproduce. I updated to brms 2.13.5 and now I got the error again on my Laptop as well.
Edit: I might’ve updated other packages as well, not quite sure. I’m just reinstalling Brms 2.13.0 and will see if I can reproduce it again with that version.
[library(truncnorm)
#Define function that simulates mrp data ####
simulate_mrp_data <- function(n) {
J <- c(2, 3, 5, 3, 400) # male or not, jobstatus, age (<18, 18-29, 30-49, 50-64, >64),migration background,
poststrat <- as.data.frame(array(NA, c(prod(J), length(J)+1))) # Columns of post-strat matrix, plus one for size
colnames(poststrat) <- c("male", "job", "age","migback", "state",'N')
count <- 0
for (i1 in 1:J[1]){ # for i1 in 1:2 (i.e. for both genders)
for (i2 in 1:J[2]){
for (i3 in 1:J[3]){
for (i4 in 1:J[4]){
for (i5 in 1:J[5]){ # 1:J[5] is 1:50 because J[5] is 50
count <- count + 1
# Fill them in so we know what category we are referring to
poststrat[count, 1:5] <- c(i1-1, i2, i3,i4,i5)
}
}
}
}
}
# Proportion in each sample in the population (Values based on Zensus 2011 DE)
p_male <- c(0.512, 0.488)
p_job <- c(0.515, 0.025, 0.459)
p_age <- c(0.164,0.142,0.285,0.204,0.206)
p_migback <- c(.808,.116,.76)
p_state_tmp <- runif(n = 400, min = 10,max = 20)
p_state <- p_state_tmp/sum(p_state_tmp)
poststrat$N <- 0
for (j in 1:prod(J)){ # 82e6 is a random massive population size here, about Germany
poststrat$N[j] <- round(82e5 * p_male[poststrat[j,1]+1] * # get the value for each j in poststrat and multiply by the proportions in the sample
p_job[poststrat[j,2]] * p_age[poststrat[j,3]] *
p_migback[poststrat[j,4]] * p_state[poststrat[j,5]]) #Adjust the N to be the number observed in each category in each group
}
# Now let's adjust for the probability of response depending on characteristics
p_response_baseline <- 0.01
p_response_male <- c(2, 0.8) / 2.8
p_response_job <- c(1, 1.2, 2.5) / 4.7
p_response_age <- c(0.5, 0.4, 1, 1.5, 3) / 6.4
p_response_migback <- c(1, 0.8, 0.6) / 2.4
p_response_state <- rbeta(n = 400, shape1 = 1, shape2 = 1)
p_response_state <- p_response_state / sum(p_response_state)
p_response <- rep(NA, prod(J))
for (j in 1:prod(J)) {
p_response[j] <-
p_response_baseline * p_response_male[poststrat[j, 1] + 1] *
p_response_job[poststrat[j, 2]] * p_response_age[poststrat[j, 3]] *
p_response_migback[poststrat[j, 4]] * p_response_state[poststrat[j, 5]]
}
people <- sample( prod(J), size = n, replace = TRUE, prob = poststrat$N * p_response) #sample random people depending on the proportions and response rate of that cell
## For respondent i, people[i] is that person's poststrat cell, (Which cell does that person in our sample stem from)
## some number between 1 and 32
n_cell <- rep(NA, prod(J))
for (j in 1:prod(J)) {
n_cell[j] <- sum(people == j)
}
coef_male <- c(0, -1)
coef_job <- c(1, -1, 0.5)
coef_age <- c(2, 0 , -1, -0.5, 1)
coef_migback <- c(0, -0.5, -1)
coef_state <- c(0, round(rnorm(399, 0, 1.5), 1))
true_popn <- data.frame(poststrat[, 1:5], lsat = rep(NA, prod(J))) # get the first columns from the cell matrix, define a new one with variable of interest NAs
for (j in 1:prod(J)) {
true_popn$lsat_influence[j] <- sum( # Influence on life satisfaction depending on subgroup
coef_male[poststrat[j, 1] + 1] +
coef_job[poststrat[j, 2]] + coef_age[poststrat[j, 3]] +
coef_migback[poststrat[j, 4]] + coef_state[poststrat[j, 5]]
)
}
true_popn$lsat <- round(rtruncnorm(n = 1, a = 0, b = 10, sd = 2, mean = 6 + true_popn$lsat_influence),0) #generate a life satisfaction value for each observation between 0 and 10
#male or not, jobstatus, age, migback, state
y <- round(true_popn$lsat[people] , 0) #for our sample "people" we get the life satisfaction from cell that the person belongs to
male <- poststrat[people, 1]
job <- poststrat[people, 2]
age <- poststrat[people, 3]
migback <- poststrat[people, 4]
state <- poststrat[people, 5]
sample <- data.frame(lsat = y,
male, age, job, migback, state,
id = 1:length(people)) #create a dataframe from our people sample
#Make all numeric:
for (i in 1:ncol(poststrat)) {
poststrat[, i] <- as.numeric(poststrat[, i])
}
for (i in 1:ncol(true_popn)) {
true_popn[, i] <- as.numeric(true_popn[, i])
}
for (i in 1:ncol(sample)) {
sample[, i] <- as.numeric(sample[, i])
}
list(
sample = sample,
poststrat = poststrat,
true_popn = true_popn
)
}
# Generate sample data using the function ####
set.seed(123)
mrp_sim <- simulate_mrp_data(n=1000)
sample <- mrp_sim[["sample"]]
sample$state <- factor(sample$state, levels=1:400)
prior4 <- c(prior(normal(6,1), class = Intercept),
prior(normal(0,0.75), class = b),
prior(normal(0,0.75), class = sd),
prior(exponential(0.5), class = sigma),
prior(lkj(1), class = cor))
fit <- brm(
formula = lsat ~ male + age + job + migback + (1 + male + age + job + migback | state),
data = sample, family = gaussian,
control=list(adapt_delta=.8, max_treedepth=15),
chains = 8, cores = 8, iter = 4000, warmup = 1000,
prior = prior4, save_all_pars = TRUE)
fit <- add_criterion(fit, "waic")
fit <- add_criterion(fit, "loo", moment_match = TRUE)