Hi all,
I am attempting to make a model similar to what Milad Kharratzadeh did for the English Premier League. I can fit the model without issue but would like to simulate game outcomes using only the information available at the beginning of the season.
The code below simulates game outcomes based on team ability. x_prior provides some information about team ability going into the first game and ability is changing over time. I would like to return predictions for each game knowing only the value of x_prior for the two teams involved but haven’t been able to figure out how to do this in the generated quantities section.
library(purrr)
library(dplyr)
library(rstan)
n_teams <- 30
n_weeks <- 16
alpha <- 3
sigma <- 14
sigma_start = 3
sigma_state = 1
x_prior <- rnorm(n_teams, 0, 1)
x_beta <- .5
mu_start <- rnorm(n_teams, x_prior * x_beta, sigma_start)
gen_mu_ts <- function(mu_start, sigma_state) {
mu_team <- 0
for (i in 1:16){
if (i == 1){
mu_team[i] <- mu_start
} else {
mu_team[i] <- mu_team[i-1] + rnorm(1, 0, sigma_state)
}
}
return(mu_team)
}
create_matchups <- function(n_teams, n_weeks){
weeklist <- list()
for (i in 1:n_weeks){
teams <- 1:n_teams
matchlist <- list()
for (k in 1:(n_teams/2)){
match <- sample(teams, 2)
teams <- teams[!teams %in% match]
matchlist[[k]] <- match
}
df <- data.frame(team_1 = sapply(matchlist, '[', 1),
team_2 = sapply(matchlist, '[', 2),
week = i
)
weeklist[[i]] <- df
}
return_df <- dplyr::bind_rows(weeklist)
return(return_df)
}
games <- create_matchups(n_teams, n_weeks)
##Generate team abilities over time
team_abilities <- unlist(map(mu_start, gen_mu_ts, sigma_state))
team_id <- unlist(lapply(1:n_teams, rep, n_weeks))
week <- rep(1:n_weeks, 30)
abilities <- data.frame(ability = team_abilities,
team_id = team_id,
week = week)
##Join team abilities
games <- games %>%
left_join(abilities, by = c('team_1' = 'team_id',
'week' = 'week')) %>%
rename(team_1_mu = ability) %>%
left_join(abilities, by = c('team_2' = 'team_id',
'week' = 'week')) %>%
rename(team_2_mu = ability)
## Generate Score
games$score_diff <- rnorm(nrow(games), alpha + games$team_1_mu - games$team_2_mu, sigma)
standat <- list(T = n_weeks,
N = nrow(games),
K = n_teams,
week = games$week,
team_1 = games$team_1,
team_2 = games$team_2,
y = games$score_diff,
x = mu_start)
Below is the stan code with simulated game outcomes generated using the updated information about team ability.
data {
int<lower = 1> T; // Number of time periods
int<lower=0> N; // number of observations
int<lower=1> K; //number of teams
int<lower=1> week[N];
int<lower=1> team_1[N];
int<lower=1> team_2[N];
row_vector[K] x; // Independent variable for prior
real y[N]; // final score difference
}
parameters {
real alpha; // standard home field advantage
real beta_x; // coefficient on prior
real<lower = 0> sigma_state; // week to week variation
real<lower = 0> sigma_ability; // team ability variation
real<lower = 0> sigma; // residual standard error
matrix[T,K] eta_mu; //Matrix of team abilities
}
transformed parameters {
matrix[T,K] mu; // Team abilities
mu[1] = x * beta_x + sigma_ability * eta_mu[1];
for (t in 2:T){
mu[t] = mu[t-1] + sigma_state * eta_mu[t];
}
}
model {
vector[N] mu_diff;
alpha ~ normal(3,1);
sigma_state ~ normal(1,1);
sigma_ability ~ normal(3,1);
sigma ~ normal(14,3);
beta_x ~ normal(0,1);
to_vector(eta_mu) ~ normal(0,1);
for (n in 1:N){
mu_diff[n] = mu[week[n],team_1[n]] - mu[week[n], team_2[n]];
}
//likelihood
y ~ normal(alpha + mu_diff, sigma);
}
generated quantities{
vector[N] yrep;
for (n in 1:N){
yrep[n] = normal_rng(alpha + mu[week[n],team_1[n]] - mu[week[n], team_2[n]], sigma);
}
}
}