I am trying to understand the RCode of a publicly available study: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/0EIQXL
I have difficulties to make sens of how data for the ordinal scaling model is set up.
Is there is anyone who could run me through the code? Would offer payment as well.
####################
### Set up data for ordinal scaling model
item_start_polarity <- matrix(c(
0,0,
1,0,
-1,0,
0,0,
0,0,
0,0,
1,0,
0,0,
-1,0,
-1,0,
0,0,
1,0,
0,0,
0,0,
0,0,
0,0,
0,0,
0,-1,
0,-1,
1,0,
0,0,
0,1,
0,0,
0,0,
0,0,
0,0,
0,0,
0,0,
0,0,
0,0,
0,0,
0,0,
0,1,
0,-1
),N_items,2)
item_prior_zeros <- matrix(0,N_items,2)
item_prior_zeros[33,1] <- 1 # EU does not load on dim 1
item_prior_zeros[7,2] <- 1 # NHS privatisation does not load on dim 2
item_prior_ones <- matrix(0,N_items,2)
item_prior_ones[33,2] <- 1 # EU loading on dim 2 is 1
item_prior_ones[7,1] <- 1 # NHS privatisation loading on dim 1 is 1
#item_prior_ones[19,2] <- -1 # Second language support loading on dim 2 is 1
#item_prior_ones[9,1] <- -1 # Zero hours contracts loading on dim 1 is 1
# convert to flat format to remove missing respondent-items
ordinal_responses_numeric <- sapply(ordinal_responses,as.numeric)
ordinal_responses_flat <- melt(ordinal_responses_numeric,na.rm=TRUE)
colnames(ordinal_responses_flat) <- c("Respondent","Question","Response")
ordinal_scaling_data <- list(
Y = ordinal_responses_flat$Response,
respondentID = ordinal_responses_flat$Respondent,
itemID = as.numeric(ordinal_responses_flat$Question),
N_items = ncol(ordinal_responses),
N_respondents = nrow(ordinal_responses),
N_responses = nrow(ordinal_responses_flat),
item_prior_ones = item_prior_ones,
item_prior_zeros = item_prior_zeros
)
ordinal_code <- '
data {
int<lower = 1> N_responses; // Number of total responses in Y
int<lower = 1> N_respondents; // Number of respondents
int<lower = 1> N_items; // Number of issues
int<lower = 1,upper=5> Y[N_responses]; // Vector of ordinal response data
int<lower = 1,upper=N_items> itemID[N_responses];
int<lower = 1,upper=N_respondents> respondentID[N_responses];
// identifying priors
int<lower = 0,upper=1> item_prior_zeros[N_items,2];
int<lower = -1,upper=1> item_prior_ones[N_items,2];
}
parameters {
ordered[4] alpha_unconstrained[N_items]; // cutpoints for ordered responses for each item
vector[2] beta_unconstrained[N_items]; // item loadings
vector[2] theta_individual_standardized_residual[N_respondents]; // respondent position residuals
real<lower=0> sigma_beta[2];
real<lower=0> sigma_alpha;
vector[2] mu_theta;
cov_matrix[2] thetaSigma; // error covariance of models for dim 1 and dim 2 individual thetas
}
transformed parameters {
ordered[4] alpha[N_items]; // cutpoints for ordered responses for each item
vector[2] beta[N_items]; // item loadings
real mu[N_responses]; // fitted values per response
vector[2] theta[N_respondents]; // respondent ideal point
// impose rotation and scale identifying constraints via item parameters
for (j in 1:N_items) {
alpha[j] = alpha_unconstrained[j]*sigma_alpha;
beta[j,1] = beta_unconstrained[j,1]*sigma_beta[1];
beta[j,2] = beta_unconstrained[j,2]*sigma_beta[2];
if (item_prior_zeros[j,1] == 1) beta[j,1] = 0.0;
if (item_prior_zeros[j,2] == 1) beta[j,2] = 0.0;
if (item_prior_ones[j,1] != 0) beta[j,1] = item_prior_ones[j,1];
if (item_prior_ones[j,2] != 0) beta[j,2] = item_prior_ones[j,2];
}
// model for individual ideal points
for (i in 1:N_respondents) theta[i] = mu_theta + thetaSigma * theta_individual_standardized_residual[i];
// fitted value for each observed response
for (n in 1:N_responses) mu[n] = dot_product(beta[itemID[n]],theta[respondentID[n]]);
}
model {
// model for response
for (n in 1:N_responses) Y[n] ~ ordered_logistic(mu[n], alpha[itemID[n]]);
// item parameter priors
for (j in 1:N_items) {
alpha_unconstrained[j] ~ normal(0.0,1.0);
beta_unconstrained[j,1] ~ normal(0.0,1.0);
beta_unconstrained[j,2] ~ normal(0.0,1.0);
}
// individual-level ideal points
for (i in 1:N_respondents) theta_individual_standardized_residual[i] ~ normal(0.0,1.0);
}
'
geninits <- function() {
theta_init <- cbind(c(1,-1,0,0,-1,-1,-1,0,0)[as.numeric(svyclean$vote17)],
c(1,-1,-1,1,-1,-1,-1,0,0)[as.numeric(svyclean$vote17)])
return(list(
beta_unconstrained = item_start_polarity,
theta = theta_init))
}
geninits_replication <- function() {
# this function gets the initial values used in the simulations
# presented in the paper from the saved posterior objects and
# uses them as the initial values for purposes of exact replication
load(file="ordinal_posterior.Rdata")
inits <- posterior@stan_args[[1]]$init
rm(posterior)
return(inits)
}
seed_replication <- function() {
# this function gets the initial seed used in the simulations
# presented in the paper from the saved posterior objects and
# uses them as the seed for purposes of exact replication
load(file="ordinal_posterior.Rdata")
seed <- posterior@stan_args[[1]]$seed
rm(posterior)
return(seed)
}
posterior <- stan(
model_code = ordinal_code,
data = ordinal_scaling_data,
pars=c(
"theta","alpha","beta","thetaSigma"
),
init = geninits_replication,
seed = seed_replication(),
iter = 1000,
warmup = 500,
chains = 1,
refresh = 1,
control = list(adapt_delta = 0.8,max_treedepth = 10)
)
save(posterior,ordinal_scaling_data,file="ordinal_posterior.Rdata")