I’m trying to write a slightly more complicated model than the one I used in my previous question. This is the stan code without multi-threading:
data {
int N;
int n_redcards[N];
int n_games[N];
real rating[N];
int<lower=0> N_leagues;
int<lower=1,upper=N_leagues> idx_league[N];
int<lower=0> N_positions;
int<lower=1,upper=N_positions> idx_position[N];
}
parameters {
vector[2] beta;
vector[N_leagues] league_std;
real<lower=0> sigma_league;
vector[N_positions] position_std;
real<lower=0> sigma_position;
}
transformed parameters{
vector[N_leagues] league = sigma_league* league_std;
vector[N_positions] position = sigma_position* position_std;
}
model {
beta ~ std_normal();
league_std ~ std_normal();
sigma_league ~ std_normal();
position_std ~ std_normal();
sigma_position ~ std_normal();
n_redcards ~ binomial_logit( n_games , beta[1] + beta[2] * to_vector(rating) +
league[idx_league] + position[idx_position] );
}
First I want to create shards by just splitting the data without blocking everyone from a league together. I also want to avoid hardcoding the number of shards. To do this, I wrote the following code:
functions {
vector lp_reduce( vector beta_common , vector theta , real[] xr , int[] xi ) {
int n = size(xr);
int y[n] = xi[1:n];
int m[n] = xi[(n+1):(2*n)];
int idx_league[n] = xi[(2*n+1):(3*n)];
int idx_position[n] = xi[(3*n+1):(4*n)];
real beta_1 = beta_common[1];
real beta_2 = beta_common[2];
vector[4] league_std = beta_common[3:6];
real sigma_league = beta_common[7];
vector[13] position_std = beta_common[8:20];
real sigma_position = beta_common[21];
vector[4] league = sigma_league* league_std;
vector[13] position = sigma_position* position_std;
real lp = binomial_logit_lpmf( y | m , beta_1 + to_vector(xr) * beta_2 +
league[idx_league] + position[idx_position]);
return [lp]';
}
}
data {
int N;
int n_redcards[N];
int n_games[N];
real rating[N];
int<lower=1,upper=4> idx_league[N];
int<lower=1,upper=13> idx_position[N];
int n_shards;
}
transformed data {
int modulo = N % n_shards;
int n_per_shard = N/n_shards;
int n_padded = n_per_shard + modulo;
int s_pad = n_per_shard + 1;
int s_games = n_padded + 1;
int e_games = s_games + n_per_shard - 1;
int s_pad_games = e_games+1;
int e_pad_games = 2*n_padded;
int s_idx_league = e_pad_games + 1;
int e_idx_league = s_idx_league + n_per_shard - 1;
int s_pad_idx_league = e_idx_league+1;
int e_pad_idx_league = 3*n_padded;
int s_idx_position = e_pad_idx_league + 1;
int e_idx_position = s_idx_position + n_per_shard - 1;
int s_pad_idx_position = e_idx_position+1;
int e_pad_idx_position = 4*n_padded;
int xi[n_shards, 4*n_padded]; // 4 because 4 variables, and they get stacked in array
real xr[n_shards, n_padded];
// an empty set of per-shard parameters
vector[0] theta[n_shards];
// split into shards
int pos = 1;
//Shards 1 to n_shards - 1 (these ones are padded with zeros)
for ( i in 1:(n_shards-1) ) {
int end = pos + n_per_shard - 1;
xr[i,1:n_per_shard] = rating[pos:end];
xr[i,s_pad:n_padded] = rep_array(0.0, modulo);
xi[i,1:n_per_shard] = n_redcards[pos:end];
xi[i,s_pad:n_padded] = rep_array(0, modulo);
xi[i,s_games:e_games] = n_games[pos:end];
xi[i,s_pad_games:e_pad_games] = rep_array(0, modulo);
xi[i,s_idx_league:e_idx_league] = idx_league[pos:end];
xi[i,s_pad_idx_league:e_pad_idx_league] = rep_array(0, modulo);
xi[i,s_idx_position:e_idx_position] = idx_position[pos:end];
xi[i,s_pad_idx_position:e_pad_idx_position] = rep_array(0, modulo);
pos = end + 1;
}
// last shard (this one has no padding)
xr[n_shards,1:n_padded] = rating[pos:N];
xi[n_shards,1:n_padded] = n_redcards[pos:N];
xi[n_shards,s_games:e_pad_games] = n_games[pos:N];
xi[n_shards,s_idx_league:e_pad_idx_league] = idx_league[pos:N];
xi[n_shards,s_idx_position:e_pad_idx_position] = idx_position[pos:N];
}
parameters {
vector[2] beta;
vector[4] league_std;
real<lower=0> sigma_league;
vector[13] position_std;
real<lower=0> sigma_position;
}
model {
beta ~ std_normal();
league_std ~ std_normal();
sigma_league ~ std_normal();
position_std ~ std_normal();
sigma_position ~ std_normal();
target += sum( map_rect( lp_reduce ,
append_row(append_row(append_row(append_row(beta, league_std), sigma_league), position_std), sigma_position) , theta , xr , xi ) );
}
Alas, this code only works if I don’t have to pad my shards with zeros. If the data is padded with zeros, then I end up with things like position[idx_position=0]
which is not defined and therefore the code does not work. Any suggestion for how to avoid this problem?
I believe the following would work, but it is also very very slow:
functions {
vector lp_reduce( vector beta_common , vector theta , real[] xr , int[] xi ) {
int n = size(xr);
int y[n] = xi[1:n];
int m[n] = xi[(n+1):(2*n)];
int idx_league[n] = xi[(2*n+1):(3*n)];
int idx_position[n] = xi[(3*n+1):(4*n)];
real beta_1 = beta_common[1];
real beta_2 = beta_common[2];
vector[4] league_std = beta_common[3:6];
real sigma_league = beta_common[7];
vector[13] position_std = beta_common[8:20];
real sigma_position = beta_common[21];
vector[4] league = sigma_league* league_std;
vector[13] position = sigma_position* position_std;
vector[n] lp;
for (i in 1:n) {
if(idx_league[i]==0){
lp[i]=0;
}else{
lp[i] = binomial_logit_lpmf( y[i] | m[i] , beta_1 + xr[i] * beta_2 +
league[idx_league[i]] + position[idx_position[i]]);
}
}
return lp;
}
}
data {
int N;
int n_redcards[N];
int n_games[N];
real rating[N];
int<lower=1,upper=4> idx_league[N];
int<lower=1,upper=13> idx_position[N];
int n_shards;
}
transformed data {
int modulo = N % n_shards;
int n_per_shard = N/n_shards;
int n_padded = n_per_shard + modulo;
int s_pad = n_per_shard + 1;
int s_games = n_padded + 1;
int e_games = s_games + n_per_shard - 1;
int s_pad_games = e_games+1;
int e_pad_games = 2*n_padded;
int s_idx_league = e_pad_games + 1;
int e_idx_league = s_idx_league + n_per_shard - 1;
int s_pad_idx_league = e_idx_league+1;
int e_pad_idx_league = 3*n_padded;
int s_idx_position = e_pad_idx_league + 1;
int e_idx_position = s_idx_position + n_per_shard - 1;
int s_pad_idx_position = e_idx_position+1;
int e_pad_idx_position = 4*n_padded;
int xi[n_shards, 4*n_padded]; // 4 because 4 variables, and they get stacked in array
real xr[n_shards, n_padded];
// an empty set of per-shard parameters
vector[0] theta[n_shards];
// split into shards
int pos = 1;
//Shards 1 to n_shards - 1 (these ones are padded with zeros)
for ( i in 1:(n_shards-1) ) {
int end = pos + n_per_shard - 1;
xr[i,1:n_per_shard] = rating[pos:end];
xr[i,s_pad:n_padded] = rep_array(0.0, modulo);
xi[i,1:n_per_shard] = n_redcards[pos:end];
xi[i,s_pad:n_padded] = rep_array(0, modulo);
xi[i,s_games:e_games] = n_games[pos:end];
xi[i,s_pad_games:e_pad_games] = rep_array(0, modulo);
xi[i,s_idx_league:e_idx_league] = idx_league[pos:end];
xi[i,s_pad_idx_league:e_pad_idx_league] = rep_array(0, modulo);
xi[i,s_idx_position:e_idx_position] = idx_position[pos:end];
xi[i,s_pad_idx_position:e_pad_idx_position] = rep_array(0, modulo);
pos = end + 1;
}
// last shard (this one has no padding)
xr[n_shards,1:n_padded] = rating[pos:N];
xi[n_shards,1:n_padded] = n_redcards[pos:N];
xi[n_shards,s_games:e_pad_games] = n_games[pos:N];
xi[n_shards,s_idx_league:e_pad_idx_league] = idx_league[pos:N];
xi[n_shards,s_idx_position:e_pad_idx_position] = idx_position[pos:N];
}
parameters {
vector[2] beta;
vector[4] league_std;
real<lower=0> sigma_league;
vector[13] position_std;
real<lower=0> sigma_position;
}
model {
beta ~ std_normal();
league_std ~ std_normal();
sigma_league ~ std_normal();
position_std ~ std_normal();
sigma_position ~ std_normal();
target += sum( map_rect( lp_reduce ,
append_row(append_row(append_row(append_row(beta, league_std), sigma_league), position_std), sigma_position) , theta , xr , xi ) );
}