Errors "data NAME not used", "variable does not exist" in hierarchical multinomial processing tree (MPT) model in stan

Hello,

I’m relatively new to hand-coding models in stan, and I’m running into an error when trying to fit a hierarchically-enriched multinomial processing tree model to unordered, categorical response data (on one branch {meu, coded as 1, mu - 2}, the other branch {peu - 3, pu - 4, seu - 5, su - 6}).

I’ve added a regression (“roundness”) to both parameters “round” and “spirantize”, which together control which of four possible responses are on one branch of the tree; just the rounding parameter controls the two possible outcomes on the other branch. That runs fine when done separately.

However, when I try to add a random intercept for subject and/or item to the regressions, I get an error of the form “data with name subject is not numeric and not used”, as well as a n exception “variable does not exist; processing stage=data initialization; variable name=subject; base type=int (in ‘model24405c096146_HeirarchicalPhonologyAndMore’ at line 7). failed to create the sampler; sampling not done”. I’m not sure what I’m doing wrong. Below is the stan code I’m using in the model.

data { 
  int<lower = 1> N_trials;
  int<lower = 1,upper = 6> w_ans[N_trials];
  real roundness[N_trials]; 
  int<lower = 1> N_subject;
  int<lower = 1, upper = N_subject> subject[N_subject];
}
parameters {
  
  real<lower = 0, upper = 1> be_meu;
  vector[N_subject] spirantize_baseline_subject_adjustment;  
  real round_general_baseline;
  real round_peu_offset;
  real round_meu_offset;
  real spirantize_general_baseline;
  real round_general_beta;
  real spirantize_general_beta;

} 
transformed parameters {
  simplex[6] theta[N_trials];
  for (n in 1:N_trials){
    for (s in 1:N_subject){
    real round_general = round_general_baseline +  roundness[n]*round_general_beta;
    real round_peu = inv_logit(round_general +  round_peu_offset);
    real round_meu = inv_logit(round_general +  round_meu_offset);
    real spirantize_general = inv_logit((spirantize_general_baseline + spirantize_baseline_subject_adjustment[s])  + roundness[n] * spirantize_general_beta);// +  spirantize_general_beta_subject_adjustment[s]));
    
    theta[n, 1] = be_meu * (1-round_meu) ; //meu
    theta[n, 2] = be_meu * round_meu; //mu 
    theta[n, 3] = (1 - be_meu) * (1 - round_peu) * (1 - spirantize_general) ;  //peu 
    theta[n, 4] = (1 - be_meu) * round_peu * (1 - spirantize_general) ; //pu 
    theta[n, 5] = (1 - be_meu) * (1 - round_peu) * spirantize_general ; //seu 
	theta[n, 6] = (1 - be_meu) * round_peu * spirantize_general; //su 
}
  }
}
model {
  target += beta_lpdf(be_meu | 2, 2);  
 

  target += normal_lpdf(round_general_baseline | 0, 1);  
  target += normal_lpdf(spirantize_general_baseline | 0, 1);  
  target += normal_lpdf(round_general_beta | 0, 1);  
  target += normal_lpdf(spirantize_general_beta | 0, 1);  

  target += normal_lpdf(round_peu_offset | 0, 1);
  target += normal_lpdf(round_meu_offset | 0, 1);  

  target += normal_lpdf(spirantize_baseline_subject_adjustment[subject]| 0,1);

  for(n in 1:N_trials)
    target +=  categorical_lpmf(w_ans[n] | theta[n]);
}
//generated quantities{
 // int<lower = 1, upper = 6> pred_w_ans[N_trials];
 // for(n in 1:N_trials) 
 //   pred_w_ans[n] = categorical_rng(theta[n]);
//}

I’ve seen previous errors like this get resolved by passing the data to the model as a list; however, I tried doing that and it doesn’t seem to help. Here is the R code I’m using:

library(rstan)
library(rstanarm)
library(tidyverse)
library(bayesplot)

# read in data
dat <- read_csv("MPTDataForStanForums.csv")


roundness_dat <- dat %>% 
  filter(LabialInitial == TRUE) %>% 
  #mutate(TotalLabialsInStem = scale(TotalLabialsInStem)) %>% 
  select(Subject,MPT_answer,TotalLabialsInStem) %>% 
  filter(!(is.na(MPT_answer)))

n_subjects <- roundness_dat %>% 
  select(Subject) %>% 
  distinct() %>% 
  summarise(
    count = n()
  )
  
data_list_subject <-  list(N_trials = nrow(roundness_dat),
         w_ans = roundness_dat$MPT_answer,
         N_subject = n_subjects$count,
         subject = as.factor(roundness_dat$Subject),
         roundness = roundness_dat$TotalLabialsInStem
         )

fit_heir_2 <- stan(file = 'HeirarchicalPhonologyAndMore.stan', data = data_list_subject, chains = 4, warmup = 1000, iter = 2000, verbose = T, cores = 4, control = list(adapt_delta = .9)) 

I’ve uploaded the (anonymized) data I’m trying to model here MPTDataForStanForums.csv (64.8 KB) ; any help on this would be very much appreciated!

The code below compiled for me. The two important changes are that I declared subject to be an array of length N_trials, not N_subject, and I passed subject to stan() as a numeric not a factor. Note that I used read.csv() to import the data from the csv, so subject was immediately made a factor. I think read_csv will bring it in as a character. Other changes were just for my convenience or because the data provide did not exactly match the code, e.g. there is no column LabialIntial.
The 200 iterations I ran did not go well, with large Rhat and small n_eff.

library(rstan)
options(mc.cores = parallel::detectCores())
Model <- "
data { 
  int<lower = 1> N_trials;
int<lower = 1,upper = 6> w_ans[N_trials];
real roundness[N_trials]; 
int<lower = 1> N_subject;
int<lower = 1, upper = N_subject> subject[N_trials];
}
parameters {

real<lower = 0, upper = 1> be_meu;
vector[N_subject] spirantize_baseline_subject_adjustment;  
real round_general_baseline;
real round_peu_offset;
real round_meu_offset;
real spirantize_general_baseline;
real round_general_beta;
real spirantize_general_beta;

} 
transformed parameters {
simplex[6] theta[N_trials];
for (n in 1:N_trials){
for (s in 1:N_subject){
real round_general = round_general_baseline +  roundness[n]*round_general_beta;
real round_peu = inv_logit(round_general +  round_peu_offset);
real round_meu = inv_logit(round_general +  round_meu_offset);
real spirantize_general = inv_logit((spirantize_general_baseline + spirantize_baseline_subject_adjustment[s])  + roundness[n] * spirantize_general_beta);// +  spirantize_general_beta_subject_adjustment[s]));

theta[n, 1] = be_meu * (1-round_meu) ; //meu
theta[n, 2] = be_meu * round_meu; //mu 
theta[n, 3] = (1 - be_meu) * (1 - round_peu) * (1 - spirantize_general) ;  //peu 
theta[n, 4] = (1 - be_meu) * round_peu * (1 - spirantize_general) ; //pu 
theta[n, 5] = (1 - be_meu) * (1 - round_peu) * spirantize_general ; //seu 
theta[n, 6] = (1 - be_meu) * round_peu * spirantize_general; //su 
}
}
}
model {
target += beta_lpdf(be_meu | 2, 2);  


target += normal_lpdf(round_general_baseline | 0, 1);  
target += normal_lpdf(spirantize_general_baseline | 0, 1);  
target += normal_lpdf(round_general_beta | 0, 1);  
target += normal_lpdf(spirantize_general_beta | 0, 1);  

target += normal_lpdf(round_peu_offset | 0, 1);
target += normal_lpdf(round_meu_offset | 0, 1);  

target += normal_lpdf(spirantize_baseline_subject_adjustment[subject]| 0,1);

for(n in 1:N_trials)
target +=  categorical_lpmf(w_ans[n] | theta[n]);
}
"

dat <- read.csv("MPTDataForStanForums.csv")
library(dplyr)

dat <- dat %>% 
  #filter(LabialInitial == TRUE) %>% 
  #mutate(TotalLabialsInStem = scale(TotalLabialsInStem)) %>% 
  select(Subject,MPT_answer,TotalLabialsInStem) %>% 
  filter(!(is.na(MPT_answer)))

n_subjects <- dat %>% #roundness_dat
  select(Subject) %>% 
  distinct() %>% 
  summarise(
    count = n()
  )

data_list_subject <-  list(N_trials = nrow(dat),
                           w_ans = dat$MPT_answer,
                           N_subject = n_subjects$count,
                           subject = as.numeric(dat$Subject),
                           roundness = dat$TotalLabialsInStem
)

fit_heir_2 <- stan(model_code = Model, data = data_list_subject, chains = 4, 
                   warmup = 100, iter = 200, verbose = FALSE, cores = 4, control = list(adapt_delta = .9)) 
1 Like

Thanks very much for checking and your help, this fixed the issue for me! I apologize re: the data formatting issues. Re: poor rhat, n_eff - I’m not surprised; now that it’s running I can address these issues. Thanks again!