Hello,
I’m relatively new to hand-coding models in stan, and I’m running into an error when trying to fit a hierarchically-enriched multinomial processing tree model to unordered, categorical response data (on one branch {meu, coded as 1, mu - 2}, the other branch {peu - 3, pu - 4, seu - 5, su - 6}).
I’ve added a regression (“roundness”) to both parameters “round” and “spirantize”, which together control which of four possible responses are on one branch of the tree; just the rounding parameter controls the two possible outcomes on the other branch. That runs fine when done separately.
However, when I try to add a random intercept for subject and/or item to the regressions, I get an error of the form “data with name subject is not numeric and not used”, as well as a n exception “variable does not exist; processing stage=data initialization; variable name=subject; base type=int (in ‘model24405c096146_HeirarchicalPhonologyAndMore’ at line 7). failed to create the sampler; sampling not done”. I’m not sure what I’m doing wrong. Below is the stan code I’m using in the model.
data {
int<lower = 1> N_trials;
int<lower = 1,upper = 6> w_ans[N_trials];
real roundness[N_trials];
int<lower = 1> N_subject;
int<lower = 1, upper = N_subject> subject[N_subject];
}
parameters {
real<lower = 0, upper = 1> be_meu;
vector[N_subject] spirantize_baseline_subject_adjustment;
real round_general_baseline;
real round_peu_offset;
real round_meu_offset;
real spirantize_general_baseline;
real round_general_beta;
real spirantize_general_beta;
}
transformed parameters {
simplex[6] theta[N_trials];
for (n in 1:N_trials){
for (s in 1:N_subject){
real round_general = round_general_baseline + roundness[n]*round_general_beta;
real round_peu = inv_logit(round_general + round_peu_offset);
real round_meu = inv_logit(round_general + round_meu_offset);
real spirantize_general = inv_logit((spirantize_general_baseline + spirantize_baseline_subject_adjustment[s]) + roundness[n] * spirantize_general_beta);// + spirantize_general_beta_subject_adjustment[s]));
theta[n, 1] = be_meu * (1-round_meu) ; //meu
theta[n, 2] = be_meu * round_meu; //mu
theta[n, 3] = (1 - be_meu) * (1 - round_peu) * (1 - spirantize_general) ; //peu
theta[n, 4] = (1 - be_meu) * round_peu * (1 - spirantize_general) ; //pu
theta[n, 5] = (1 - be_meu) * (1 - round_peu) * spirantize_general ; //seu
theta[n, 6] = (1 - be_meu) * round_peu * spirantize_general; //su
}
}
}
model {
target += beta_lpdf(be_meu | 2, 2);
target += normal_lpdf(round_general_baseline | 0, 1);
target += normal_lpdf(spirantize_general_baseline | 0, 1);
target += normal_lpdf(round_general_beta | 0, 1);
target += normal_lpdf(spirantize_general_beta | 0, 1);
target += normal_lpdf(round_peu_offset | 0, 1);
target += normal_lpdf(round_meu_offset | 0, 1);
target += normal_lpdf(spirantize_baseline_subject_adjustment[subject]| 0,1);
for(n in 1:N_trials)
target += categorical_lpmf(w_ans[n] | theta[n]);
}
//generated quantities{
// int<lower = 1, upper = 6> pred_w_ans[N_trials];
// for(n in 1:N_trials)
// pred_w_ans[n] = categorical_rng(theta[n]);
//}
I’ve seen previous errors like this get resolved by passing the data to the model as a list; however, I tried doing that and it doesn’t seem to help. Here is the R code I’m using:
library(rstan)
library(rstanarm)
library(tidyverse)
library(bayesplot)
# read in data
dat <- read_csv("MPTDataForStanForums.csv")
roundness_dat <- dat %>%
filter(LabialInitial == TRUE) %>%
#mutate(TotalLabialsInStem = scale(TotalLabialsInStem)) %>%
select(Subject,MPT_answer,TotalLabialsInStem) %>%
filter(!(is.na(MPT_answer)))
n_subjects <- roundness_dat %>%
select(Subject) %>%
distinct() %>%
summarise(
count = n()
)
data_list_subject <- list(N_trials = nrow(roundness_dat),
w_ans = roundness_dat$MPT_answer,
N_subject = n_subjects$count,
subject = as.factor(roundness_dat$Subject),
roundness = roundness_dat$TotalLabialsInStem
)
fit_heir_2 <- stan(file = 'HeirarchicalPhonologyAndMore.stan', data = data_list_subject, chains = 4, warmup = 1000, iter = 2000, verbose = T, cores = 4, control = list(adapt_delta = .9))
I’ve uploaded the (anonymized) data I’m trying to model here MPTDataForStanForums.csv (64.8 KB) ; any help on this would be very much appreciated!