Incorporating missing values with nlf model formula

I am trying to incorporate mi() into a non-linear formula, but end up with:
Error: The parameter ‘value’ is not a valid distributional or non-linear parameter. Did you forget to set ‘nl = TRUE’?

#estimating latent variable with missing values on 0 to 1 scaled response data
require(tidyverse)
require(brms)
require(cmdstanr)
require(ggokabeito)
require(scales)
require(tidybayes)
options(mc.cores = parallel::detectCores()-1)
options(brms.save_pars = save_pars(all = TRUE))
options(brms.backend="cmdstanr")
#options("cmdstanr_verbose" = TRUE)
options(cmdstanr_write_stan_file_dir = getwd())
set.seed(1234)

#create artifical data: a and b are likert 1-5, c and d are percentages (out of 336 available totals) based on real world data
N=353
dta1x=
  tibble(
    data.frame(a=sample(c(0:5),size=N, replace=T, prob = c(0.53,0.01,0.06,0.21,0.15,0.04)))%>%
      mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID),
    data.frame(b=sample(c(0:5),size=N, replace=T,prob = c(0.53,0.03,0.1,0.21,0.1,0.03)))%>%
      mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID),
    data.frame(c=c(
      sample(c(0,1,2,3,5,6,8,9,12,15),
             size=N-153, replace=T,prob = c(0.08,0.2,0.12,0.1,0.02,0.03,0,0,0.01,0.0001)),
      rep(NA,153)))%>%
      mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID),  #randomise NAs
    data.frame(d=c(
      sample(c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,21),
             size=N-178, replace=T,prob = c(0.01,0.01,0.01,0,0.02,0.01,0.01,0.03,0.04,0.01,0.03,0.08,0.01,0.03,0.19,0.01,0.0001))
      , rep(NA,178)))%>%
      mutate(ID=sample(1:N,size=N,replace=F))%>%arrange(ID)%>%select(-ID)        #randomise NAs
  )%>% mutate(across(c(a, b), ~ if_else(. == 0, NA_real_, .)))

# create a long dataframe with groupings to indicate causal connections
# a<-->b-->y<--d<-->c, with a,b - group SWB, c,d group OBW
dta1=dta1x%>%
  #rescale to same level
  mutate(across(everything(), function(x) rescale(x, to = c(0.00000001, 1))))%>% # ensure positive values for log transform
  mutate(ID=1:nrow(.))%>%
  pivot_longer(cols=c(a,b,c,d), names_to="Group", values_to = "value")%>% #long format
  mutate(SOWB=if_else(Group %in% c("a","b"),"SWB","OBW"))%>%
  group_by(SOWB)%>%
  mutate(y=mean(value,na.rm=T))%>% # assume y is somewhere around the mean of each group
  mutate(ysd=0.1)%>%                 # assume a measurement error to make y overlaping
  ungroup

#scaled
dta1%>%
  ggplot()+
  #stat_halfeye(aes(x=value, y=Group))+
  stat_histinterval(aes(x=value, y=Group))+
  xlab(">0 - 1 scaled value")+
  theme_classic()

# assuming a mean y for SOWB groups
dta1%>%
  ggplot()+
  stat_histinterval(aes(x=value, y=factor(y), colour=Group, fill = Group))+
  scale_fill_okabe_ito()+
  scale_color_okabe_ito()+
  xlab(">0 - 1 scaled value")+
  theme_classic()


# simple without missing values
f1=bf(y|mi(ysd)~yI+x,       # assume that the response is somewhere between normal(y,ysd) with intercept yI and mean of x
      yI~0,                 # estimate yI 
      nlf(x~0+Gvalue),      # assume that x is a function of Gvalue so we can include group terms with | 
      Gvalue~value|Group,   # value is a function of group
      nl=T)

fitnlf0<-brm(
  f1,
  data=dta1,
  family=gaussian(link=c("identity")),
  warmup=500,
  iter=1500,
  output_dir=".",
  chains=3,
  threads = 2,
  backend="cmdstanr"
)

fitnlf0

################## missing values on the fly ############################################
f1=bf(y|mi(ysd)~yI+x,           #assume that the response is somewhere between normal(y,ysd) with intercept yI and mean of x
      yI~0,                     # yI intercept
      nlf(x~0+Gvalue),          # assume that x is a function of Gvalue 
      Gvalue~mi(value),         # Gvalue is a function of value group
      value|mi()~Group,         # estimate missing values for value
      nl=T)


get_prior(f1, data=dta1)

fitnlf1<-brm(
  data=dta1,
  family=gaussian(link=c("identity")),
  warmup=500,
  iter=1500,
  output_dir=".",
  chains=3,
  threads = 2,
  backend="cmdstanr"
)

Any suggestions welcome

Thanks
Herry

version
_
platform x86_64-pc-linux-gnu
arch x86_64
os linux-gnu
system x86_64, linux-gnu
status
major 4
minor 4.2
year 2024
month 10
day 31
svn rev 87279
language R
version.string R version 4.4.2 (2024-10-31)
nickname Pile of Leaves

packageVersion(“brms”)
[1] ‘2.22.0’

Hmmm, using:

f1=bf(y|mi(ysd)~yI+x,
yI~0, # yI intercept
nlf(x~0+Gvalue), # assume that x is a function of Gvalue
Gvalue~mi(value), # Gvalue is a function of value group
nl=T)+
bf(value|mi()~Group)

does work. Note to self…

hey @herryATcsiro , yeah it seems you identified the issue, but in case this helps clarify at all:
In your second instance of f1, the line with value|mi()~Group, makes brms think that value must be a nonlinear or distributional parameter (rather than an actual variable in your data). But adding a separate bf for value like you did seems to be the way Paul’s vignette suggests, i.e. the model is now multivariate.
Good luck with the model!