Bob Carpenter's code for ranking in Stan case study on repeated binary trials


#1

Has anyone else had issues with this part of Bob’s code?.

generated quantities {
  ...
  int<lower=1, upper=N> rnk[N];   // rnk[n] is rank of player n
  ...
  {
    int dsc[N];
    dsc <- sort_indices_desc(theta);
    for (n in 1:N)
      rnk[dsc[n]] <- n;
  }
  ...
}

Everything else works ok (except for the code for the graph for the rankings which I think should be scale_y_continuous,not scale_y_discrete.)

My issue is I can’t get the rankings to be in order,but I’ve probably made a mistake somewhere else.

My whole code:

model_string_4<-
  "data {
int<lower=0> N;           // items
int<lower=0> K[N];        // initial operations
int<lower=0> y[N];        // initial return to OT

int<lower=0> K_new[N];    // new operations
int<lower=0> y_new[N];    // new return to OT
  }

parameters {
  real mu;                       // population mean of return to OT log-odds
  real<lower=0> sigma;           // population sd of return to OT log-odds
  vector[N] alpha_std;           // return to OT log-odds
}

model {
  mu ~ normal(-2, 0.5);                             // hyperprior
  sigma ~ normal(0, 1);                           // hyperprior
  alpha_std ~ normal(0, 1);                       // prior
  y ~ binomial_logit(K, mu + sigma * alpha_std);  // likelihood
}

generated quantities {
  vector[N] theta;  // chance of success to compare to alpha
int<lower=0> z[N];  // posterior prediction remaining trials
int<lower=0, upper=1> some_ability_gt_150;  // Pr[some theta > 0.15]
  int<lower=0, upper=1> avg_gt_200[N];        // Pr[season avg of n] >= 0.20
int<lower=0, upper=1> ability_gt_200[N];    // Pr[chance-of-RTOT of n] >= 0.20
int<lower=1, upper=N> rnk[N];   // rnk[n] is rank of hospital n
 int<lower=0, upper=1> is_worst[N];  // Pr[hospital n highest chance of RTOT] 



for (n in 1:N)
theta[n] = inv_logit(mu + sigma * alpha_std[n]);
for (n in 1:N)
z[n] = binomial_rng(K_new[n], theta[n]);
some_ability_gt_150 = (max(theta) > 0.15);
  for (n in 1:N)
    avg_gt_200[n] = (((y[n] + z[n]) / (0.0 + K[n] + K_new[n])) > 0.200);
  for (n in 1:N)
    ability_gt_200[n] = (theta[n] > 0.200);

 {
    int dsc[N];
    dsc = sort_indices_desc(theta);
    for (n in 1:N)
      rnk[dsc[n]] = n;
 }
for (n in 1:N) 
    is_worst[n] = (rnk[n] == 1); 

}
"

fit_hier_logit <- stan(model_code=model_string_4, data=c("N", "K", "y", "K_new", "y_new"),
                       iter=(M / 2), chains=4,
                       control=list(stepsize=0.01, adapt_delta=0.99))
#ranking
print(fit_hier_logit, "rnk", probs=c(0.1, 0.5, 0.9))

library(ggplot2);
df_rank <- data.frame(list(name = rep(as.character(df[[1,2]]), M),
                           rank = ss_hier_logit$rnk[, 1]));
for (n in 2:N) {
  df_rank <- rbind(df_rank,
                   data.frame(list(name = rep(as.character(df[[n,2]]), M),
                                   rank = ss_hier_logit$rnk[, n])));
}
rank_plot <-
  ggplot(df_rank, aes(rank)) +
  stat_count(width=0.8) +
  facet_wrap(~ name) +
  scale_x_discrete(limits=c(1, 5, 10, 15)) +
  scale_y_continuous(name="posterior probability", breaks=c(0, 0.1*M, 0.2*M,0.3*M),
                   labels=c("0.0", "0.1", "0.2", "0.3")) + 
  ggtitle("Rankings for Log Partial Pooling Model")
rank_plot

#worst hospital
df_is_worst_for <- function(name, ss) {
  is_worst <- rep(NA, N);
  for (n in 1:N) {
    is_worst[n] <- mean(ss$is_worst[,n]);
  }
  return(data.frame(list(item=1:N, is_worst=is_worst, model=name)));
}
df_is_worst <-  rbind(df_is_worst_for("hier (log odds)", ss_hier_logit))
is_worst_plot <-
  ggplot(df_is_worst, aes(x=item, y=is_worst)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ model) +
  scale_y_continuous(name = "Pr[hospital is worst]") +
  scale_x_discrete(name="hospital", breaks=c(1, 5, 10, 15)) +
  ggtitle("Which is the worst hospital?")
is_worst_plot

I’ve used a different(made up) datasets looking at unplanned return to operating theatre after a procedure at fictitious hospitals(attached)
Regards
Chris
Fake_RTOT.csv (765 Bytes)


#2

This code will help run the above code if you want to give it a go.

df<-Fake_RTOT
N <- dim(df)[1]
K <- df$`First 50 cases`
y <- df$`First fifty unplanned`
K_new <- df$`Follow up Cases`
y_new <- df$`Follow up unplanned return to OT`

Regards
Chris


#3

Ok now that I look at this more carefully,the code is fine; I was just misinterpreting the results as my dataset is in random order and Bob’s baseball dataset is in descending order.
So the ranks gives the mean “rank”,not some other number. This is very useful-thanks Bob!
On the baseball figures,it’s funny how infrequently batters hit the ball, but I guess the bat is much skinnier than a cricket bat and it’s not that baseball players can’t see.
Regards
Chris