Hi all, (as promised on the Stan Slack!)
I am having some trouble setting up reduce_sum
to work on a three-level occupancy model I am building. The aim of the model is to model the number of DNA sequences encountered of species i (out of a total number of DNA molecules sequenced) conditional on its occupancy at site j, in replicate k. The model looks something like the following, using the a = p \times \phi, b = (1 - p) \times \phi) parameterisation of the beta binomial:
This marginalises to:
Where Q_{ijk} is 2 if y_{ijk} > 0, 1 if y_{ijk} = 0 but there are detections at site j, and 0 if y_{ijk} = 0 for all replicates at a site.
The model runs fine when I set the likelihood to run single-threaded in the model block, but I am running into trouble getting this to run with reduce sum, which is desireable as even the test dataset I am working with has 38k data points and takes a day to fit (the final dataset is close to 120k). The sampler fails at the initialisation step, e.g.:
Chain 4 Rejecting initial value:
Chain 4 Log probability evaluates to log(0), i.e. negative infinity.
Chain 4 Stan can't start sampling from this initial value.
Chain 4 Rejecting initial value:
Chain 4 Error evaluating the log probability at the initial value.
', line 7, column 4 to line 10, column 8) (in '/var/folders/78/j8s8yd9n3gl0z0gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan', line 36, column 8 to line 37, column 113) (in '/var/folders/78/j8s8yd9n3gl0z0gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan', line 133, column 2 to column 75)gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan
Chain 4 Exception: Exception: Exception: Exception: beta_binomial_lpmf: Second prior sample size parameter is 0, but must be positive finite! (in '/var/folders/78/j8s8yd9n3gl0z0gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan', line 3, column 4 to column 95) (in '/var/folders/78/j8s8yd9n3gl0z0gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan', line 7, column 4 to line 10, column 8) (in '/var/folders/78/j8s8yd9n3gl0z0gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan', line 36, column 8 to line 37, column 113) (in '/var/folders/78/j8s8yd9n3gl0z0gn5y1bzzhm0000gn/T/RtmpQ21eQy/model-1598a6f52e08b.stan', line 133, column 2 to column 75)
I am not familiar enough with the reduce_sum interface to figure out what’s going wrong - anyone have any insight?
Model code is below (with thanks to occStan by @Richard_Erickson from which I borrowed some of the code for the likelihood):
functions {
real sample_detection_lpmf(int y, int N, real p, real theta, real phi){
return bernoulli_logit_lpmf(1 | theta) + beta_binomial_lpmf(y | N, p * phi, (1 - p) * phi);
}
real sample_nondetection_lpmf(int y, int N, real p, real theta, real phi){
return log_sum_exp(
sample_detection_lpmf(y | N, p, theta, phi),
bernoulli_logit_lpmf(0 | theta)
);
}
real site_nondetection_lpmf(int y, int N, real p, real theta, real psi, real phi){
return log_sum_exp(
bernoulli_logit_lpmf(1 | psi) + sample_nondetection_lpmf(y | N, p, theta, phi),
bernoulli_logit_lpmf(0 | psi)
);
}
real partial_sum_lpmf(array[,] int Y,
int start,
int end,
matrix p,
matrix theta,
matrix psi,
vector phi){
int N = size(Y);
real temp_target;
for(i in 1:N){
if(Y[i,2] == 2) {
temp_target += bernoulli_logit_lpmf(1 | psi[Y[i,5], Y[i,4]]) +
sample_detection_lpmf(Y[i,1] | Y[i,3], p[Y[i,6], Y[i,4]], theta[Y[i,6], Y[i,4]], phi[Y[i,4]]);
} if (Y[i,2] == 1) {
temp_target += bernoulli_logit_lpmf(1 | psi[Y[i,5], Y[i,4]]) +
sample_nondetection_lpmf(Y[i,1] | Y[i,3], p[Y[i,6], Y[i,4]], theta[Y[i,6], Y[i,4]], phi[Y[i,4]]);
} if (Y[i,2] == 0) {
temp_target += site_nondetection_lpmf(Y[i,1] | Y[i,3], p[Y[i,6], Y[i,4]], theta[Y[i,6], Y[i,4]], psi[Y[i,5], Y[i,4]], phi[Y[i,4]]);
}
}
return temp_target;
}
}
data {
int N; // Number of samples
int N_species; // Number of species
int N_sites; // Number of sites
int N_replicates; // Number of replicates
array[N, 6] int Y; // Raw data [Y, Q, Total, Species, Site, Replicate]
// Site occupancy predictors
int N_pred_occ; // Number of predictors
matrix[N_sites, N_pred_occ] X_occ; // Predictor matrix for occupancy
// array[N_sites] vector[N_pred_occ] X_occ; // Predictor matrix for occupancy
// Replicate occupancy predictors
int N_pred_rep;
matrix[N_replicates, N_pred_rep] X_rep;
// Read abundance predictors
int N_pred_reads;
matrix[N_replicates, N_pred_reads] X_reads;
// Parallelisation options
int<lower=1> grainsize;
}
parameters {
// Site occupancy parameters
vector[N_pred_occ] b_occ;
cholesky_factor_corr[N_pred_occ] L_Rho_occ;
vector<lower=0>[N_pred_occ] sigmas_occ;
matrix[N_pred_occ, N_species] z_occ;
// Sample occupancy parameters
vector[N_pred_rep] b_rep;
cholesky_factor_corr[N_pred_rep] L_Rho_rep;
vector<lower=0>[N_pred_rep] sigmas_rep;
matrix[N_pred_rep, N_species] z_rep;
// Read abundance parameters
real<lower=0> sigma_phi;
vector[N_species] z_phi;
real a_phi;
vector[N_pred_reads] b_reads;
cholesky_factor_corr[N_pred_reads] L_Rho_reads;
vector<lower=0>[N_pred_reads] sigmas_reads;
matrix[N_pred_reads, N_species] z_reads;
}
transformed parameters {
// Site occupancy parameters
matrix[N_pred_occ, N_species] b_occ_species;
b_occ_species = (diag_pre_multiply(sigmas_occ, L_Rho_occ) * z_occ);
// Sample occupancy parameters
matrix[N_pred_rep, N_species] b_rep_species;
b_rep_species = (diag_pre_multiply(sigmas_rep, L_Rho_rep) * z_rep);
// Read abundance parametters
matrix[N_pred_reads, N_species] b_reads_species;
b_reads_species = (diag_pre_multiply(sigmas_reads, L_Rho_reads) * z_reads);
}
model {
// Priors
// Site occupancy
target += normal_lpdf(b_occ | 0, 3);
target += lkj_corr_cholesky_lpdf(L_Rho_occ | 1);
target += exponential_lpdf(sigmas_occ | 1);
target += std_normal_lpdf(to_vector(z_occ));
// Sample occupancy
target += normal_lpdf(b_rep | 0, 3);
target += lkj_corr_cholesky_lpdf(L_Rho_rep | 1);
target += exponential_lpdf(sigmas_rep | 1);
target += std_normal_lpdf(to_vector(z_rep));
// Read abundance
target += exponential_lpdf(sigma_phi | 1);
target += std_normal_lpdf(z_phi);
target += normal_lpdf(a_phi | 0, 3);
target += normal_lpdf(b_reads | 0, 3);
target += lkj_corr_cholesky_lpdf(L_Rho_reads | 1);
target += exponential_lpdf(sigmas_reads | 1);
target += std_normal_lpdf(to_vector(z_reads));
matrix[N_replicates, N_species] p = inv_logit(rep_matrix(X_reads * b_reads, N_species) + X_reads * b_reads_species);
matrix[N_replicates, N_species] theta = rep_matrix(X_rep * b_rep, N_species) + X_rep * b_rep_species;
matrix[N_sites, N_species] psi = rep_matrix(X_occ * b_occ, N_species) + X_occ * b_occ_species;
vector[N_species] phi = inv_logit((z_phi + a_phi) * sigma_phi);
target += reduce_sum(partial_sum_lpmf, Y, grainsize, p, theta, psi, phi);
// single threaded likelihood
// for(i in 1:N){
// if(Y[i,2] == 2) {
// target += bernoulli_logit_lpmf(1 | psi[Y[i,5], Y[i,4]]) +
// sample_detection_lpmf(Y[i,1] | Y[i,3], p[Y[i,6], Y[i,4]], theta[Y[i,6], Y[i,4]], phi[Y[i,4]]);
// } if (Y[i,2] == 1) {
// target += bernoulli_logit_lpmf(1 | psi[Y[i,5], Y[i,4]]) +
// sample_nondetection_lpmf(Y[i,1] | Y[i,3], p[Y[i,6], Y[i,4]], theta[Y[i,6], Y[i,4]], phi[Y[i,4]]);
// } if (Y[i,2] == 0) {
// target += site_nondetection_lpmf(Y[i,1] | Y[i,3], p[Y[i,6], Y[i,4]], theta[Y[i,6], Y[i,4]], psi[Y[i,5], Y[i,4]], phi[Y[i,4]]);
// }
// }
}