library(tidyverse) library(patchwork) library(cmdstanr) model_sum0 <- cmdstan_model('model_sum0.stan') model_normal <- cmdstan_model('model_normal.stan') # Setup data ---- #set.seed(573) # Standard seed is 573 n <- 11 # Number of candidates p <- 4 # Number of recruiting panel members n_compare <- 2 # Which candidate (by order) is the last candidate (by letter); i.e. is constrained to be -sum(mu_hat) candidate_details <- tibble(name = letters[1:n], # Generate letter names for each candidate taken from start of alphabet sc_ability = runif(n, min = 0, max = 10), # Generate scientific ability for each candidate by random sampling sc_rank = rank(sc_ability), # Find the rank of each candidate, use negative as we want highest ability to be first sc_order = order(sc_ability)) %>% # Find the order in which the candidates have to be selected to put the highest ability first, ie in the first row is the index of the candidate with highest ability arrange(sc_rank == n_compare, name) %>% mutate(name = letters[1:n()], sc_order = order(sc_ability)) panel_sc_sd <- 0.0 # Standard deviation of recruiters estimate of sc_ability panel_names <- letters[(27-p):26] # Generate letter names for each member of the recruiting panel taken from end of alphabet # Define function to produce panel estimates of scores find_member_scores <- function(sc_ability){ sc_ability + rnorm(length(sc_ability), mean = 0, sd = panel_sc_sd) } # Add panel member latent scores for(panel_member in seq_along(panel_names)){ # * TO DO * It would be great to replace this with Purrr calls panelist <- paste0("p_sc_", panel_names[panel_member]) candidate_details <- candidate_details %>% mutate({{panelist}} := find_member_scores(sc_ability)) } # Work out how each panel member ranks the candidates candidate_details <- candidate_details %>% mutate(across(starts_with("p_sc"), ~ dense_rank(.), .names = "rank_{.col}")) # Prepare data for Stan # Select the ranking rows from candidate_details data frame, make into a matrix and transpose panel_ranking_data <- candidate_details %>% select(starts_with("rank")) %>% as.matrix() %>% t() mu_sd_prior <- 0.5 data_list <- list( K = n, J = p, y = panel_ranking_data, mu_sd_prior = mu_sd_prior ) # Estimate ---- fit_sum0 <- model_sum0$sample( data = data_list, # named list of data chains = 1, # number of Markov chains iter_warmup = 1000, # number of warmup iterations per chain iter_sampling = 2000 # total number of iterations per chain ) fit_normal <- model_normal$sample( data = data_list, # named list of data chains = 1, # number of Markov chains iter_warmup = 1000, # number of warmup iterations per chain iter_sampling = 2000 # total number of iterations per chain ) # Examine ---- # Get a list of mu estimates and their ranks so can put it into the candidates details frame for comparisons est_ranks_sum0 <- fit_sum0$summary(variable = 'mu') %>% # * TO DO * This feels very clunky surely there is an easier way to get a dataframe of parameters select(mean) %>% mutate(name = letters[1:n()]) %>% arrange(mean) %>% mutate(est_sc_rank = 1:n(), model = 'Sum-to-zero') est_ranks_normal <- fit_normal$summary(variable = 'mu') %>% # * TO DO * This feels very clunky surely there is an easier way to get a dataframe of parameters select(mean) %>% mutate(name = letters[1:n()]) %>% arrange(mean) %>% mutate(est_sc_rank = 1:n(), model = 'Normal') candidate_details2 <- candidate_details %>% left_join(bind_rows(est_ranks_sum0, est_ranks_normal), by = ("name" = "name")) ind10 <- (1:n)[candidate_details$sc_order==n] est_rank_plot <- ggplot(candidate_details2, aes(x = sc_rank, y = est_sc_rank, color = model, group = model)) + geom_abline(linetype = 2) + geom_hline(yintercept = ind10, linetype = 2) + geom_point(position = position_dodge(width = 0.2)) + scale_color_brewer('Model', palette = 'Set1') + scale_x_continuous('True rank', breaks = 1:n) + scale_y_continuous('Estimated rank', breaks = 1:n) est_mean_plot <- ggplot(candidate_details2, aes(x = sc_rank, y = mean, color = model, group = model)) + geom_hline(yintercept = 0) + geom_hline(yintercept = -sum(fit1$summary(variable = 'mu')$mean[1:(n-1)]), linetype = 2) + geom_point(position = position_dodge(width = 0.2)) + scale_color_brewer('Model', palette = 'Set1') + scale_x_continuous('True rank', breaks = 1:n) + scale_y_continuous('Estimated mean') (est_rank_plot + est_mean_plot) + plot_layout(guides = "collect") & theme_bw() & theme(legend.position = 'bottom', panel.grid.minor = element_blank())