Doing a reproducible example was a great suggestion. I’m sorry the code is long, I’ve put the full code in the end in case this description is not clear.
Basically, I understand now how brms’ fitted() works and this is exactly what I wanted. However, I’m still not clear why in my real data my two approaches give different results - as they do not differ in simulations. I think the careful approach here then is to find the result untrustworthy and not report them.
So, what I do is: I simulate 3 groups of participants. Each group differs in their mean and standard deviation (b2 and b5 below). In each group I have 30 participants. Each participant differs in their mean and standard deviation (beyond effects of group) (b0 and b4 below). There is also another covariate of no interest - Age (b3). Each person contributes 50 measurements (‘days’), the measurements can vary linearly over days (b1 below):
p ~ normal(b0 + b1Xday + b2Xgroup1 + b3Xage, sd ~ b4+b5XGroup2)
I analyse the data with two types of models.
[1] Hierarchical model:
my.fit = brm(bf( y ~ 1+day+mo(group) + Age + (1+day|ID),
sigma~ mo(group) + (1|ID)), data = all.data.scaled)
I then extract results with fitted(). For this I first generate ‘new.data’ which has the effect of ‘day’ averaged out:
new.data = all.data.scaled %>%
dplyr::group_by(ID)%>%
dplyr::summarize(across(c('Age','group'),unique),
across(contains('day'),mean))
And then feed this into fitted() to get mean and standard deviation values for each person (not corrected for group or age):
output$par.mean= as.data.frame(fitted(my.fit,newdata=new.data))$Estimate
output$par.sd = as.data.frame(fitted(my.fit,newdata=new.data,dpar='sigma'))$Estimate
[2] Nonhierarchical model:
df = all.data.scaled %>%
dplyr::filter(ID==1) # Example for participant with ID=1
fit.singleSubject= brm(y ~ 1+ day,data=df)
And extract parameters like this:
t2 =lapply(as.data.frame(fit.singleSubject),mean)
pars.temp$nonhier.mean = t2$b_Intercept
pars.temp$nonhier.sd = t2$sigma
And finally, I make scatter plots comparing the true value (intercept.mean and intercept.sd) with the fitted values (par.mean & par.sd for hierarchical model; and nonhier.mean & nonhier.sd for nonhierarchical model). I define the true values like this:
temp.sub$intercept.mean = temp.sub$b0 + temp.sub$b1*mean(my.days) + temp.sub$b2 + temp.sub$b3*this.sub.age
temp.sub$intercept.sd= temp.sub$b4 + temp.sub$b5
The result of this is that recovery is perfect and that it’s the same for ther hierachical or non-hierarchical model.
![BehavModelbased_checking_BRMS](https://canada1.discourse-cdn.com/flex030/uploads/mc_stan/original/2X/f/fc42dd91bfdc7dda2fe07a67fcd0714f586fe1c7.jpeg)
Here is the full code that I’ve used (uploaded is the result of running the code); sorry this is long, I’m still quite far away from full proficiency with R (as well as Stan/BRMS obviously…):
# Simulate some data -----
# We simulate 3 groups with 30 participants per group and each participant contributes 50 data points
nsub = 30 # subjects per group
my.ranges=list()
my.ranges$b0 =c(-1,1) # intercept
my.ranges$b1 = c(-1,1) # day
my.ranges$b2 = c(-5,0,5) # group 1
my.ranges$b3 = 0.05 # age - fixed effect for each subject
my.ranges$b4 = c(0.6,1) # intercept SD
my.ranges$b5 = c(-0.5,0,0.5) # group SD
age.range = c(20,50)
my.days=seq(1,50)
all.people=data.frame()
all.data =data.frame()
for (igroup in 1:3){
for (isub in 1:nsub){
temp.sub = list()
temp.sub$b0 = runif(min=my.ranges$b0[1], max=my.ranges$b0[2],n=1) # intercept
temp.sub$b1 = runif(min=my.ranges$b1[1], max=my.ranges$b1[2],n=1) # day
temp.sub$b2 = my.ranges$b2[[igroup]] # group
temp.sub$b3 = my.ranges$b3 #AGE
temp.sub$b4 = runif(min=my.ranges$b4[1], max=my.ranges$b4[2],n=1) # SD
temp.sub$b5 = my.ranges$b5[[igroup]] # group SD
this.sub.age= runif(min=age.range[1],max=age.range[2],n=1)
temp.sub$ID = (igroup-1)*nsub + isub
temp.sub$intercept.mean = temp.sub$b0 + temp.sub$b1*mean(my.days) + temp.sub$b2 + temp.sub$b3*this.sub.age
temp.sub$intercept.sd= temp.sub$b4 + temp.sub$b5
# append to subject list
all.people = rbind(all.people,as.data.frame(temp.sub))
for (iday in my.days){
temp.data=list()
temp.data$Age = this.sub.age
temp.data$ID = (igroup-1)*nsub + isub
temp.data$day= iday
temp.data$group=igroup
temp.data$y = rnorm(mean = temp.sub$b0 + temp.sub$b1*iday + temp.sub$b2 + temp.sub$b3*temp.data$Age,
sd = temp.sub$b4 + temp.sub$b5,
n=1)
all.data = rbind(all.data, as.data.frame(temp.data))
}
}
}
all.data$group = factor(all.data$group, ordered=TRUE, levels=c(1,2,3))
all.data$ID = factor(all.data$ID,ordered=TRUE, levels=unique(all.data$ID))
all.data.scaled = all.data %>%
dplyr::mutate_if(is.numeric,scale)
# Run BRMS and extract results----
# Hierarchical BRMS model, allowing individual differences in mean and standard deviation
my.fit = brm(bf( y ~ 1+day+mo(group) + Age + (1+day|ID),
sigma~ mo(group) + (1|ID)),
data = all.data.scaled, silent=TRUE,refresh=0)
# We extract results using brms' fitted(). For this, we generate new data in which for each person, we set the regressor 'day' to the average, to get their average parameter independent of the linear effect of day
# Generate 'new data' to extract results
new.data = all.data.scaled %>%
dplyr::group_by(ID)%>%
dplyr::summarize(across(c('Age','group'),unique),
across(contains('day'),mean))
names(brmsterms(my.fit$formula)$dpar) # checking what we want to extract ('mu' is the same as not specifying anything)
# We extract each person's mean and standard deviation
output=list()
output$ID = unique(my.fit$data$ID)
output$group = my.fit$data %>%
group_by(ID) %>% dplyr::summarize(group=unique(group)) %>% dplyr::pull(group)
output$par.mean= as.data.frame(fitted(my.fit,newdata=new.data))$Estimate
output$par.mean.asMU= as.data.frame(fitted(my.fit,newdata=new.data,dpar='mu'))$Estimate
output$par.sd = as.data.frame(fitted(my.fit,newdata=new.data,dpar='sigma'))$Estimate
output=as.data.frame(output)
# Single-subject models -----
# For comparison, we also have models that are fitted separately for each person. From these we also extract participants' mean and standard deviation
df = all.data.scaled %>%
dplyr::filter(ID==1)
fit.singleSubject= brm(y ~ 1+ day,data=df,silent=TRUE,refresh=0)
output.nonHier = data.frame()
for (isub in unique(all.data$ID)){
df.new = all.data.scaled %>%
dplyr::filter(ID==isub)
out = update(fit.singleSubject,newdata=df.new,silent=TRUE,refresh=0)
t2 =lapply(as.data.frame(out),mean)
pars.temp= list()
pars.temp$ID = isub
pars.temp$nonhier.mean = t2$b_Intercept
pars.temp$nonhier.sd = t2$sigma
pars.temp$nonhier.day = t2$b_day
output.nonHier = rbind(output.nonHier,as.data.frame(pars.temp))
}
# Store list of data frames if we want to rerun these checks -----
my.checks = list()
my.checks$all.data.scaled=all.data.scaled
all.people$ID = factor(all.people$ID,ordered=TRUE, levels=unique(all.people$ID))
my.checks$all.people=all.people
my.checks$output = output
my.checks$output.nonHier=output.nonHier
save(my.checks,file=file.path(model.output.path,'brms_checks.RData'))
# Compare the results to the simulation ----
to.plot = left_join(my.checks$output,my.checks$output.nonHier,by='ID') %>%
left_join(.,my.checks$all.people,by='ID')
# add columns for y
ps=list()
ps$p1=ggscatter(to.plot,x='nonhier.mean',y='par.mean',color='group') # nonhierarchically vs. hierarchically fitted mean
ps$p2=ggscatter(to.plot,x='nonhier.sd',y='par.sd',color='group') # nonhierarchically vs. hierarchically fitted sd
ps$p3=ggscatter(to.plot,x='intercept.mean',y='par.mean',color='group') # true vs. hierarchically fitted mean
ps$p4=ggscatter(to.plot,x='intercept.sd',y='par.sd',color='group') # true vs. hierarchically fitted sd
ps$p5=ggscatter(to.plot,x='intercept.mean',y='nonhier.mean',color='group') # true vs. nonhierarchically fitted mean
ps$p6=ggscatter(to.plot,x='intercept.sd',y='nonhier.sd',color='group') # true vs. nonhierarchically fitted sd
check.plot=ggarrange(plotlist=ps,nrow=3,ncol=2)
ggexport(check.plot,filename=file.path(figures.path,'BehavModelbased_checking_BRMS.jpg'))
plot(check.plot)
brms_checks.RData (43.7 KB)