Hi,
I’m trying to include subject dependent effects for a categorical predictor (5 levels) in an ordinal probit model using brms. The model results in convergence issues and bad pareto k diagnostic values.
The model includes 4 categorical predictors in total where two of them (A and G) contains gender coding (one for the participant and one for the stimuli). C contains country (5) and T has 3 levels.
The predictors has a small to medium effect which I believe the prior reflects.
Below are the model that works (fit2) and the model that results in issues (fit3).
fit2priors <- c(prior(normal(-1.07, 1), class = Intercept, coef = 1),
prior(normal(-0.57, 1), class = Intercept, coef = 2),
prior(normal(-0.18, 1), class = Intercept, coef = 3),
prior(normal( 0.18, 1), class = Intercept, coef = 4),
prior(normal( 0.57, 1), class = Intercept, coef = 5),
prior(normal( 1.07, 1), class = Intercept, coef = 6),
prior(normal( 0, 0.25 ), class = b),
prior(exponential(1) , class = sd),
prior(normal(0, log(2) / 2), class = b, dpar = disc))
fit2 <- brm(
formula = bf(Y ~ 1 + C*T + G*A + (1 | id) + (1 | Item)) +
lf(disc ~ 0 + C + T, cmc = FALSE),
data = result,
family = cumulative("probit"),
prior = fit2priors,
warmup = 3000, iter = 16000,
chains = 4, cores = nCores,
seed = 5476,
control = list(adapt_delta = .99),
init_r = 0.2
)
fit3priors = c(prior(normal(-1.07, 1), class = Intercept, coef = 1),
prior(normal(-0.57, 1), class = Intercept, coef = 2),
prior(normal(-0.18, 1), class = Intercept, coef = 3),
prior(normal( 0.18, 1), class = Intercept, coef = 4),
prior(normal( 0.57, 1), class = Intercept, coef = 5),
prior(normal( 1.07, 1), class = Intercept, coef = 6),
prior(normal( 0, 0.25 ), class = b),
prior(exponential(1) , class = sd, group = id),
prior(exponential(1) , class = sd, group = Item),
prior(lkj_corr_cholesky (2), class = cor),
prior(normal(0, log(2) / 2), class = b, dpar = disc))
fit3 <- brm(
formula = bf(Y ~ 1 + C*T + G*A + (1 + C | id) + (1 | Item)) +
lf(disc ~ 0 + C + T, cmc = FALSE),
data = result,
family = cumulative("probit"),
prior = fit3priors,
warmup = 3000, iter = 16000,
chains = 4, cores = nCores,
seed = 5476,
control = list(adapt_delta = .99),
init_r = 0.2
Any recommendations in regard to priors and or model specification?
Code for simulating the data below:
#Generate likert scale data
library(LikertMakeR) #https://github.com/WinzarH/LikertMakeR
library(reshape2)
library(tibble)
library(tidyr)
library(ggplot2)
######################## Start simulation of data ##############################
#ensure repicable results
set.seed(5476)
TA <- lfast(
n = 30,
mean = 6,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
TB <- lfast(
n = 30,
mean = 6,
sd = 1,
lowerbound = 1,
upperbound = 7,
items = 3
)
TC <- lfast(
n = 30,
mean = 6,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
Sw <- data.frame(TA, TB, TC)
rm(TA, TB, TC)
TA <- lfast(
n = 30,
mean = 5.5,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
TB <- lfast(
n = 30,
mean = 5.5,
sd = 1,
lowerbound = 1,
upperbound = 7,
items = 3
)
TC <- lfast(
n = 30,
mean = 5.5,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
Uk <- data.frame(TA, TB, TC)
rm(TA, TB, TC)
TA <- lfast(
n = 30,
mean = 5.0,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
TB <- lfast(
n = 30,
mean = 5.0,
sd = 1,
lowerbound = 1,
upperbound = 7,
items = 3
)
TC <- lfast(
n = 30,
mean = 5.0,
sd = 0.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
Ge <- data.frame(TA, TB, TC)
rm(TA, TB, TC)
TA <- lfast(
n = 30,
mean = 4.0,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
TB <- lfast(
n = 30,
mean = 4.0,
sd = 1,
lowerbound = 1,
upperbound = 7,
items = 3
)
TC <- lfast(
n = 30,
mean = 4.0,
sd = 0.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
Br <- data.frame(TA, TB, TC)
rm(TA, TB, TC)
TA <- lfast(
n = 30,
mean = 3.0,
sd = 1.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
TB <- lfast(
n = 30,
mean = 3.0,
sd = 1,
lowerbound = 1,
upperbound = 7,
items = 3
)
TC <- lfast(
n = 30,
mean = 3.0,
sd = 0.5,
lowerbound = 1,
upperbound = 7,
items = 3
)
Th <- data.frame(TA, TB, TC)
rm(TA, TB, TC)
######################### End simulation of data ##############################
######################### Create the data frame #############################
#Convert each frame to long format
CM <- gather(Sw, key = "T", value = "Y", c("TA", "TB", "TC" ))
CL <- gather(Uk, key = "T", value = "Y", c("TA", "TB", "TC" ))
CK <- gather(Ge, key = "T", value = "Y", c("TA", "TB", "TC" ))
CJ <- gather(Br, key = "T", value = "Y", c("TA", "TB", "TC" ))
CI <- gather(Th, key = "T", value = "Y", c("TA", "TB", "TC" ))
#Remove the short frames from the environment
rm(Sw, Uk, Ge, Br, Th)
#Create a variable for factor for country in each data frame
C <- rep(c("CI"), len = 90)
CI <- cbind(C, CI)
rm(C)
C <- rep(c("CJ"), len = 90)
CJ <- cbind(C, CJ)
rm(C)
C <- rep(c("CK"), len = 90)
CK <- cbind(C, CK)
rm(C)
C <- rep(c("CL"), len = 90)
CL <- cbind(C, CL)
rm(C)
C <- rep(c("CM"), len = 90)
CM <- cbind(C, CM)
rm(C)
#Create subj id
id1 <- rep(1:30)
id2 <- rep(31:60)
id3 <- rep(61:90)
id <- data.frame(id1, id2, id3)
idL <- gather(id, key = "id", value = "id", (c(id1, id2, id3)))
idL <- subset(idL, select = -c(id))
#Insert subject id
CI <- cbind(idL, CI)
CJ <- cbind(idL, CJ)
CK <- cbind(idL, CK)
CL <- cbind(idL, CL)
CM <- cbind(idL, CM)
rm(id1, id2, id3, id, idL)
#Creating item
Item <- rep(1:6, each = 30, len = 90)
CI <- cbind(Item, CI)
rm(Item)
Item <- rep(7:12, each = 30, len = 90)
CJ <- cbind(Item, CJ)
rm(Item)
Item <- rep(13:18, each = 30, len = 90)
CK <- cbind(Item, CK)
rm(Item)
Item <- rep(19:24, each = 30, len = 90)
CL <- cbind(Item, CL)
rm(Item)
Item <- rep(25:30, each = 30, len = 90)
CM <- cbind(Item, CM)
rm(Item)
#Create one dataframe
result <- rbind(CI, CJ, CK, CL, CM)
rm(CI, CJ, CK, CL, CM)
#Creating coding for gender on assessed material
G <- rep( c( rep(c ( "M") , each = 15 ), rep( c( "F") , each = 15)) ,
times = 5 )
result <- cbind(G, result)
rm(G)
#Creating coding for gender of assessor
A <- rep( c( rep(c ( "M") , each = 8 ), rep( c( "F") , each = 7)) ,
times = 10 )
result <- cbind(A, result)
rm(A)
#Set factor for country
result$C = factor(result$C,
levels = c("CI", "CJ", "CK", "CL", "CM"))
#Set factor for type of relationship
result$T = factor(result$T,
levels = c("TA", "TB", "TC"))
#Set factor for gender on assessed material
result$G <- factor(result$G,
levels = c("M", "F"))
#Set factor for gender of assessor
result$A <- factor(result$A,
levels = c("M", "F"))
contrasts(result$C)
contrasts(result$T)
contrasts(result$G)
contrasts(result$A)
#Set outcome as integer
result$Y <-as.integer(result$Y)
#Get a glimpse
glimpse(result)
table(result$Y, result$C, result$T)
Any guidance on this topic is highly appreciated.
Best,
Dan