# K-fold cross validation in cmdstanr - extracting loglikelihood from generated quantities

I am setting up a cross-validation loop in cmdstanr, inspired by the examples in @bnicenboim 's book: 16.6 Cross-validation in Stan | An Introduction to Bayesian Data Analysis for Cognitive Science
and I am having issues extracting the log likelihood for the validation set, in the example below:

``````log_pd_random_kfold[, d\$fold == k] <- extract_log_lik(gq_random_test)
``````

which gives: “Error: Not a stanfit object.” Nor I can figure out where in gq_random_test the log_lik could be.

Reproducible code:

``````d <- read_csv("d_random.csv")

stan_file <- write_stan_file("
// This STAN model infers a random bias from a sequences of 1s and 0s (heads and tails). Now multilevel
//

functions{
real normal_lb_rng(real mu, real sigma, real lb) {
real p = normal_cdf(lb | mu, sigma);  // cdf for bounds
real u = uniform_rng(p, 1);
return (sigma * inv_Phi(u)) + mu;  // inverse cdf for value
}
}

// The input (data) for the model. n of trials and h of heads
data {
int<lower = 1> trials;
int<lower = 1> agents;
array[trials, agents] int h;
}

// The parameters accepted by the model.
parameters {
real thetaM;
real<lower = 0> thetaSD;
array[agents] real theta;
}

// The model to be estimated.
model {
target += normal_lpdf(thetaM | 0, 1);
target += normal_lpdf(thetaSD | 0, .3)  -
normal_lccdf(0 | 0, .3);

// The prior for theta is a uniform distribution between 0 and 1
target += normal_lpdf(theta | thetaM, thetaSD);

for (i in 1:agents)
target += bernoulli_logit_lpmf(h[,i] | theta[i]);

}

generated quantities{
real thetaM_prior;
real<lower=0> thetaSD_prior;
real<lower=0, upper=1> theta_prior;
real<lower=0, upper=1> theta_posterior;

array[trials,agents] int<lower=0, upper = trials> prior_preds;
array[trials,agents] int<lower=0, upper = trials> posterior_preds;

array[trials, agents] real log_lik;

thetaM_prior = normal_rng(0,1);
thetaSD_prior = normal_lb_rng(0,0.3,0);
theta_prior = inv_logit(normal_rng(thetaM_prior, thetaSD_prior));
theta_posterior = inv_logit(normal_rng(thetaM, thetaSD));

for (i in 1:agents){

prior_preds[,i] = binomial_rng(trials, inv_logit(theta_prior));
posterior_preds[,i] = binomial_rng(trials, inv_logit(theta[i]));

for (t in 1:trials){

log_lik[t,i] = bernoulli_logit_lpmf(h[t,i] | theta[i]);

}
}
}
")

mod_random <- cmdstan_model(stan_file, cpp_options = list(stan_threads = TRUE), pedantic = TRUE

d\$fold <- kfold_split_grouped(K = 10, x = d\$agent)

for(k in unique(d\$fold)){
# Training set for k
d_train <- d %>% filter(fold != k)

## Create the data
d1_train <- d_train %>%
subset(select=c(agent, choice)) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = agent, values_from = choice)

data_train <- list(
trials = trials-1,
agents = agents,
h = as.matrix(d1_train[2:120,2:(length(unique(d_train\$agent)))]))

# Held out set for k

d_test <- d %>%
filter(fold == k)
d1_test <- d_test %>%
subset(select=c(agent, choice)) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = agent, values_from = choice)

test_n <- length(unique(d_test\$agent))

ls_test <- list(
trials = trials-1,
agents = test_n,
h = as.matrix(d1_test[2:120,2:(test_n+1)]))

# Train the models
fit_random_train <- mod_random\$sample(
data = data,
seed = 123,
chains = 2,
parallel_chains = 2,
threads_per_chain = 2,
iter_warmup = 1000,
iter_sampling = 1000,
refresh = 1000,
max_treedepth = 20,
adapt_delta = 0.99
)

# Generated quantities based on the posterior from the training set
# and the data from the held out set
gq_random_test <- mod_random\$generate_quantities(
fit_random_train,
data=ls_test,
threads_per_chain = 2)

# Extract log likelihood which represents
# the pointwise predictive density
log_pd_random_kfold[, d\$fold == k] <- extract_log_lik(gq_random_test)
}

elpd_pupil_kfold <- elpd(log_pd_random_kfold)
``````
1 Like

Anything computed in the generated quantities block will be available using the `draws` method:

By default that will keep the chains separate, but if you need to combine all the chains then you can also specify `format = "matrix"`:

``````gq_random_test\$draws("log_lik", format = "matrix")
``````
4 Likes

thanks! this solved it!

2 Likes