This is very very helpful, @erognli! Mission accomplished!!
sim_data <- function(n_families = 100,
range_family_size = c(2,6),
dist_mu_family = c(0,1),
dist_sigma_family = c(1,.2),
seed = NA) {
require(simstudy); require(magrittr)
ddef <- defData(id = 'family',
varname = 'n_members',
dist = 'uniformInt',
formula = paste(range_family_size[1],
';',
range_family_size[2])) %>%
defData(varname = 'mu_family',
dist = 'normal',
formula = dist_mu_family[1],
variance = dist_mu_family[2]) %>%
defData(varname = 'sigma_family',
dist = 'gamma',
formula = dist_sigma_family[1],
variance = dist_sigma_family[2],
link = 'identity')
d <- genData(n = n_families,
dtDefs = ddef)
# ADDED
d <- trtAssign(d, n = 2)
d <- genCluster(dtClust = d,
cLevelVar = 'family',
numIndsVar = 'n_members',
level1ID = 'id')
ddef <- defDataAdd(varname = 'latent_score',
dist = 'normal',
formula = 'mu_family + trtGrp*0.5', # ADDED
variance = 'sigma_family')
d <- addColumns(dtDefs = ddef,
dtOld = d)
# https://cran.r-project.org/web/packages/simstudy/vignettes/ordinal.html
d <- genOrdCat(dtName = d,
adjVar = 'latent_score',
baseprobs =
matrix(c(rep(c(0.05, 0.31, 0.23, 0.03, 0.03, 0.21, 0.03, 0.03, 0.03, 0.05), 10),
rep(c(0.38, 0.22, 0.14, 0.11, 0.02, 0.02, 0.02, 0.02, 0.05, 0.02), 10),
rep(c(0.08, 0.41, 0.16, 0.05, 0.05, 0.08, 0.08, 0.02, 0.02, 0.05), 10)
),
nrow = 30,
byrow = T),
asFactor = F,
idname = 'id',
prefix = 'item_',
rho = 0.425,
corstr = 'ind')
return(d)
}
This gives me the ability to simulate a dataset that meets the original goals!
- 100 families with between 2 and 6 members each
- Families assigned 1:1 to treatment or control
- Responses to 30 ordinal items, 1-10 (formerly 0-3)
a) Person-level: items correlated to measure one latent construct
b) Family-level: scale scores correlated within families
c) Group-level: scale scores higher in treatment arm
df <- sim_data(seed = 8675309) %>%
dplyr::select(family, n_members, trtGrp, id, starts_with("item")) %>%
mutate(scale_avg = rowMeans(across(starts_with("item")))) %>%
as_tibble()
glimpse(df)
Rows: 372
Columns: 35
$ family <int> 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8…
$ n_members <int> 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3…
$ trtGrp <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0…
$ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, …
$ item_01 <int> 2, 8, 2, 2, 6, 1, 6, 2, 6, 2, 10, 2, 3, 2, 6, 6, 10, 2, 9, 6, 7, 7,…
$ item_02 <int> 6, 9, 3, 3, 2, 2, 4, 1, 8, 9, 8, 9, 2, 1, 8, 1, 7, 3, 2, 7, 7, 2, 7…
$ item_03 <int> 6, 10, 7, 2, 6, 4, 9, 2, 3, 6, 3, 6, 6, 2, 3, 2, 6, 2, 6, 2, 7, 6, …
$ item_04 <int> 7, 9, 2, 1, 3, 2, 4, 5, 2, 3, 10, 2, 10, 2, 3, 2, 3, 2, 6, 2, 10, 2…
$ item_05 <int> 2, 6, 1, 2, 10, 2, 10, 3, 3, 6, 6, 9, 3, 2, 3, 2, 2, 1, 7, 3, 6, 10…
$ item_06 <int> 6, 6, 3, 2, 10, 2, 6, 2, 9, 6, 3, 10, 4, 2, 1, 2, 3, 2, 10, 10, 6, …
$ item_07 <int> 3, 6, 6, 1, 2, 2, 6, 2, 9, 10, 10, 10, 2, 2, 9, 2, 7, 3, 8, 7, 6, 3…
$ item_08 <int> 3, 6, 3, 3, 2, 2, 1, 2, 6, 6, 7, 6, 6, 5, 2, 6, 10, 2, 10, 6, 9, 3,…
$ item_09 <int> 6, 3, 6, 2, 7, 2, 3, 3, 3, 6, 6, 3, 3, 2, 2, 6, 3, 2, 7, 9, 9, 2, 2…
$ item_10 <int> 2, 6, 3, 2, 6, 4, 6, 4, 3, 6, 8, 10, 3, 2, 2, 5, 6, 3, 2, 2, 6, 6, …
$ item_11 <int> 2, 10, 4, 2, 4, 6, 3, 2, 3, 4, 4, 9, 8, 1, 2, 1, 9, 3, 9, 1, 8, 3, …
$ item_12 <int> 2, 10, 1, 1, 3, 1, 4, 1, 1, 1, 1, 6, 2, 1, 4, 1, 1, 1, 4, 9, 8, 6, …
$ item_13 <int> 1, 1, 3, 2, 9, 2, 4, 1, 2, 2, 6, 7, 9, 1, 2, 2, 4, 1, 4, 3, 4, 1, 1…
$ item_14 <int> 2, 1, 1, 1, 10, 3, 3, 1, 4, 2, 7, 5, 1, 2, 1, 2, 4, 9, 1, 6, 6, 1, …
$ item_15 <int> 3, 9, 1, 1, 9, 1, 1, 1, 1, 9, 4, 5, 1, 1, 1, 2, 3, 1, 1, 10, 2, 3, …
$ item_16 <int> 9, 2, 1, 1, 1, 1, 3, 2, 2, 4, 1, 8, 8, 1, 1, 4, 10, 3, 4, 4, 10, 1,…
$ item_17 <int> 2, 2, 1, 1, 4, 1, 4, 1, 1, 1, 5, 3, 1, 1, 3, 1, 1, 2, 8, 2, 5, 1, 4…
$ item_18 <int> 9, 2, 1, 1, 1, 1, 3, 1, 3, 9, 9, 3, 4, 1, 1, 1, 1, 2, 1, 3, 1, 3, 4…
$ item_19 <int> 2, 10, 1, 1, 7, 2, 6, 6, 2, 9, 3, 10, 1, 2, 1, 1, 2, 1, 4, 8, 4, 1,…
$ item_20 <int> 9, 9, 1, 1, 4, 3, 9, 2, 9, 3, 4, 3, 1, 1, 1, 3, 4, 1, 10, 3, 3, 1, …
$ item_21 <int> 2, 7, 2, 2, 2, 2, 2, 2, 3, 2, 2, 7, 1, 2, 3, 3, 10, 2, 10, 7, 2, 2,…
$ item_22 <int> 3, 5, 2, 2, 10, 2, 6, 2, 5, 10, 7, 4, 3, 1, 2, 2, 1, 4, 6, 2, 2, 2,…
$ item_23 <int> 10, 7, 2, 1, 4, 2, 10, 6, 3, 10, 4, 2, 5, 6, 2, 6, 5, 2, 2, 3, 3, 1…
$ item_24 <int> 2, 9, 1, 2, 3, 4, 8, 4, 2, 7, 10, 2, 1, 1, 2, 3, 2, 2, 10, 6, 4, 2,…
$ item_25 <int> 3, 7, 1, 2, 3, 2, 2, 6, 2, 6, 7, 7, 1, 2, 2, 2, 2, 2, 2, 10, 5, 5, …
$ item_26 <int> 2, 2, 2, 1, 10, 2, 7, 6, 3, 5, 7, 2, 10, 2, 2, 10, 4, 7, 6, 7, 3, 3…
$ item_27 <int> 7, 5, 2, 2, 2, 1, 9, 3, 1, 10, 5, 8, 1, 2, 2, 2, 7, 2, 7, 3, 4, 1, …
$ item_28 <int> 2, 7, 2, 1, 10, 2, 2, 6, 10, 6, 7, 10, 2, 1, 1, 6, 7, 5, 7, 2, 1, 2…
$ item_29 <int> 8, 8, 2, 1, 7, 1, 10, 3, 2, 2, 4, 4, 5, 8, 2, 1, 3, 2, 5, 2, 5, 1, …
$ item_30 <int> 10, 7, 3, 1, 3, 2, 3, 1, 2, 7, 5, 2, 4, 10, 4, 7, 7, 3, 4, 4, 6, 2,…
$ scale_avg <dbl> 4.433333, 6.300000, 2.333333, 1.566667, 5.333333, 2.133333, 5.13333…
Receipts…
# 100 families...
# ----------------------------------------------------------------------
df %>%
distinct(family) %>%
nrow()
# 100
# ...with between 2 and 6 members each
# ----------------------------------------------------------------------
df %>%
group_by(family) %>%
count() %>%
ungroup() %>%
summarize(min = min(n), max=max(n))
## A tibble: 1 × 2
# min max
# <int> <int>
#1 2 6
# Families assigned 1:1 to treatment or control
# ----------------------------------------------------------------------
df %>%
distinct(family, .keep_all = TRUE) %>%
group_by(trtGrp) %>%
count()
## A tibble: 2 × 2
## Groups: trtGrp [2]
# trtGrp n
# <int> <int>
#1 0 50
#2 1 50
# Person-level: items correlated to measure one latent construct
performance::item_intercor(df %>% select(starts_with("item")), method = "pearson")
# 0.3426369
# Family-level: scale scores correlated within families
# ----------------------------------------------------------------------
fit <- lme4::lmer(scale_avg ~ trtGrp + (1 | family), data = df)
performance::icc(fit)
# Adjusted ICC: 0.420
# Group-level: scale scores higher in treatment arm
# ----------------------------------------------------------------------
parameters::model_parameters(fit, effects = "fixed", ci_method = "satterthwaite")
Parameter | Coefficient | SE | 95% CI | t | df | p
------------------------------------------------------------------------
(Intercept) | 3.63 | 0.19 | [3.26, 4.00] | 19.51 | 93.09 | < .001
trtGrp | 0.71 | 0.27 | [0.18, 1.24] | 2.67 | 95.72 | 0.009