I am trying to incorporate mi() into a non-linear formula, but end up with:
Error: The parameter ‘value’ is not a valid distributional or non-linear parameter. Did you forget to set ‘nl = TRUE’?
#estimating latent variable with missing values on 0 to 1 scaled response data
require(tidyverse)
require(brms)
require(cmdstanr)
require(ggokabeito)
require(scales)
require(tidybayes)
options(mc.cores = parallel::detectCores()-1)
options(brms.save_pars = save_pars(all = TRUE))
options(brms.backend="cmdstanr")
#options("cmdstanr_verbose" = TRUE)
options(cmdstanr_write_stan_file_dir = getwd())
set.seed(1234)
#create artifical data: a and b are likert 1-5, c and d are percentages (out of 336 available totals) based on real world data
N=353
dta1x=
tibble(
data.frame(a=sample(c(0:5),size=N, replace=T, prob = c(0.53,0.01,0.06,0.21,0.15,0.04)))%>%
mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID),
data.frame(b=sample(c(0:5),size=N, replace=T,prob = c(0.53,0.03,0.1,0.21,0.1,0.03)))%>%
mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID),
data.frame(c=c(
sample(c(0,1,2,3,5,6,8,9,12,15),
size=N-153, replace=T,prob = c(0.08,0.2,0.12,0.1,0.02,0.03,0,0,0.01,0.0001)),
rep(NA,153)))%>%
mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID), #randomise NAs
data.frame(d=c(
sample(c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,21),
size=N-178, replace=T,prob = c(0.01,0.01,0.01,0,0.02,0.01,0.01,0.03,0.04,0.01,0.03,0.08,0.01,0.03,0.19,0.01,0.0001))
, rep(NA,178)))%>%
mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID) #randomise NAs
)%>% mutate(across(c(a, b), ~ if_else(. == 0, NA_real_, .)))
# create a long dataframe with groupings to indicate causal connections
# a<-->b-->y<--d<-->c, with a,b - group SWB, c,d group OBW
dta1=dta1x%>%
#rescale to same level
mutate(across(everything(), function(x) rescale(x, to = c(0.00000001, 1))))%>% # ensure positive values for log transform
mutate(ID=1:nrow(.))%>%
pivot_longer(cols=c(a,b,c,d), names_to="Group", values_to = "value")%>% #long format
mutate(SOWB=if_else(Group %in% c("a","b"),"SWB","OBW"))%>%
group_by(SOWB)%>%
mutate(y=mean(value,na.rm=T))%>% # assume y is somewhere around the mean of each group
mutate(ysd=0.1)%>% # assume a measurement error to make y overlaping
ungroup
#scaled
dta1%>%
ggplot()+
#stat_halfeye(aes(x=value, y=Group))+
stat_histinterval(aes(x=value, y=Group))+
xlab(">0 - 1 scaled value")+
theme_classic()
# assuming a mean y for SOWB groups
dta1%>%
ggplot()+
stat_histinterval(aes(x=value, y=factor(y), colour=Group, fill = Group))+
scale_fill_okabe_ito()+
scale_color_okabe_ito()+
xlab(">0 - 1 scaled value")+
theme_classic()
# simple without missing values
f1=bf(y|mi(ysd)~yI+x, # assume that the response is somewhere between normal(y,ysd) with intercept yI and mean of x
yI~0, # estimate yI
nlf(x~0+Gvalue), # assume that x is a function of Gvalue so we can include group terms with |
Gvalue~value|Group, # value is a function of group
nl=T)
fitnlf0<-brm(
f1,
data=dta1,
family=gaussian(link=c("identity")),
warmup=500,
iter=1500,
output_dir=".",
chains=3,
threads = 2,
backend="cmdstanr"
)
fitnlf0
################## missing values on the fly ############################################
f1=bf(y|mi(ysd)~yI+x, #assume that the response is somewhere between normal(y,ysd) with intercept yI and mean of x
yI~0, # yI intercept
nlf(x~0+Gvalue), # assume that x is a function of Gvalue
Gvalue~mi(value), # Gvalue is a function of value group
value|mi()~Group, # estimate missing values for value
nl=T)
get_prior(f1, data=dta1)
fitnlf1<-brm(
data=dta1,
family=gaussian(link=c("identity")),
warmup=500,
iter=1500,
output_dir=".",
chains=3,
threads = 2,
backend="cmdstanr"
)
Any suggestions welcome
Thanks
Herry
version
_
platform x86_64-pc-linux-gnu
arch x86_64
os linux-gnu
system x86_64, linux-gnu
status
major 4
minor 4.2
year 2024
month 10
day 31
svn rev 87279
language R
version.string R version 4.4.2 (2024-10-31)
nickname Pile of Leaves
packageVersion(“brms”)
[1] ‘2.22.0’