I am trying to run loo_subsample
for model that I have fitted using cmdstanr using variational inference.
data {
int<lower=0> N; // number of observations
int<lower=0> year[N]; // year hierarchy level
int<lower=0> day[N]; // day hierarchy level
int<lower=0> hour[N]; // hour hierarchy level
int<lower=0> num_years;
int<lower=0> num_days;
int<lower=0> num_hours;
int<lower=0> score[N]; // post scores
}
parameters {
vector<lower=0>[num_years] year_mu;
vector<lower=0>[num_years] year_phi;
vector<lower=0>[num_days] days_mu;
vector<lower=0>[num_days] days_phi;
vector<lower=0>[num_hours] hours_mu;
vector<lower=0>[num_hours] hours_phi;
}
model {
year_mu ~ normal(0, 10);
year_phi ~ normal(0, 10);
days_mu ~ normal(0, 10);
days_phi ~ normal(0, 10);
hours_mu ~ normal(0, 10);
hours_phi ~ normal(0, 10);
score ~ neg_binomial_2_log(year_mu[year] + days_mu[day] + hours_mu[hour], year_phi[year] + days_phi[day] + hours_phi[hour]);
}
The model fits fine. Test run using loo_i
is ok too. But when I try to run loo_subsample
somehow I get wrong dimensions in the draws
argument. Instead of passing the whole draws matrix (which happens as expected when I use loo_i
), loo_subsample
passes a single row of that matrix to my likelihood function, which I have checked by printing its dimensions. Here is the code I use for running loo:
data <- list(score = hn_posts$score,
N = length(hn_posts$score),
year = as.integer(as.factor(hn_posts$year)),
day = as.integer(hn_posts$weekday),
hour = hn_posts$hour,
num_years = n_distinct(hn_posts$year),
num_days = n_distinct(hn_posts$weekday),
num_hours = n_distinct(hn_posts$hour))
hierarchical_fit <- hierarchical_model$variational(data = data, output_samples = 5000, algorithm = "fullrank")
hierarchical_draws <- hierarchical_fit$draws()
llfun_hierarchical <- function(data_i, draws, log = TRUE) {
year_mu <- glue("year_mu[{data_i$year}]")
year_phi <- glue("year_phi[{data_i$year}]")
days_mu <- glue("days_mu[{data_i$day}]")
days_phi <- glue("days_phi[{data_i$day}]")
hours_mu <- glue("hours_mu[{data_i$hour}]")
hours_phi <- glue("hours_phi[{data_i$hour}]")
print(dim(draws))
print(draws[, c(year_mu, days_mu, hours_mu)])
mu_sum <- rowSums(
draws[, c(year_mu, days_mu, hours_mu)]
)
phi_sum <- rowSums(
draws[, c(year_phi, days_phi, hours_phi)]
)
dnbinom(data_i$score, mu = mu_sum, size = phi_sum, log = log)
}
hierarchical_params <- as_draws_matrix(hierarchical_draws)
# this runs fine
#loo_i(1, llfun_hierarchical, data = as.data.frame(data), draws = hierarchical_params)
# but this fails since dim of draws in llfun_hierarchical is (1 x num of parameters) instead of (n x num of parameters)
loo_subsample(llfun_hierarchical,
data = as.data.frame(data),
draws = hierarchical_params,
observations = 10,
cores = 1)
I use the latest master build of cmdstan and cmdstanr, and stable build of loo.