I’m currently working on modeling the number of survey completes I get in any particular hour using a Poisson model. I have the following simple Poisson model, where there are a certain number of completes each hour of the day (hour
) and this amount shifts by sex
, age
, how long a survey has been collecting responses (survey_age
), and the price being paid for a survey response (price
).
data {
int N;
array[N] int<lower=0> y;
// age levels
int<lower=1> K;
array[N] int<lower=1, upper=K> age_idx;
// sex levels
int<lower=1> J;
array[N] int<lower=1, upper=J> sex_idx;
// Time of day
int<lower=1> M; // hours in the day
array[N] int<lower=1,upper=24> hour_idx;
vector[N] survey_age;
vector[N] price;
}
parameters {
vector[J] sex_coef;
vector[M] hour_coef;
vector[K] age_coef;
real sa_coef;
real price_coef;
}
transformed parameters {
vector<lower=0> [N] lambda = sex_coef[sex_idx] + age_coef[age_idx] + hour_coef[hour_idx] + sa_coef*survey_age + price_coef*price;
}
model {
sex_coef ~ normal(1, 2);
age_coef ~ normal(1, 2);
hour_coef ~ normal(1, 2);
sa_coef ~ normal(-0.5, 2);
price_coef ~ normal(1, 1);
y ~ poisson(lambda);
}
However, I’m getting issues with each chain like the following:
Chain 4 Exception: forum_example2_model_namespace::log_prob: lambda[sym1__] is -1.67585, but must be greater than or equal to 0
Finally, the sampling fails with the output:
Chain 4 Initialization between (-2, 2) failed after 100 attempts.
I have about 1.3 million observations in my real data so I’m not overly concerned about the priors, but I think the issue is that I keep getting prior draws that make the \lambda parameter less than 0. How should I ensure that the draws are such that \lambda is positive? I have the following fake data that recreates this problem:
library(cmdstanr)
sex <- c("Male", "Female", "Male", "Female", "Female", "Female", "Male",
"Male", "Male", "Female", "Male", "Female", "Male", "Male", "Female",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Male", "Male", "Male", "Female", "Male", "Female", "Female",
"Female")
age <- c("4564", "1834", "1834", "4564", "65", "3544", "3544", "3544",
"3544", "65", "1834", "4564", "1834", "3544", "65", "3544", "4564",
"4564", "1834", "1834", "1834", "3544", "65", "65", "3544", "65",
"4564", "1834", "3544", "3544")
hour <- c(9L, 16L, 14L, 10L, 12L, 6L, 0L, 13L, 1L, 3L, 15L, 19L, 18L,
4L, 0L, 22L, 1L, 7L, 21L, 23L, 11L, 17L, 8L, 6L, 20L, 5L, 7L,
1L, 2L, 13L)
survey_age <- c(51L, 33L, 7L, 39L, 41L, 58L, 81L, 113L, 13L, 4L, 18L, 25L,
12L, 31L, 40L, 35L, 8L, 3L, 2L, 3L, 48L, 21L, 61L, 10L, 51L,
57L, 19L, 4L, 4L, 12L)
price <- c(1.24, 1.61, 1.8, 1.07, 0.9, 0.67, 1.98, 0.97, 1.22, 0.89, 1.26,
1.01, 1.13, 1.27, 0.73, 1.17, 1.13, 0.95, 1.08, 0.94, 0.73, 1.04,
1.3, 1.75, 2.27, 0.75, 1.43, 1.45, 0.91, 0.87)
y <- c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 2L, 4L, 0L, 0L, 9L, 0L, 0L)
z.price <- scale(price)[, 1]
z.survey_age <- scale(survey_age)[, 1]
stan_dat <- list(sex_idx = as.integer(factor(sex)),
J = length(unique(sex)),
age_idx = as.integer(factor(age)),
K = length(unique(age)),
hour_idx = as.integer(factor(hour)),
M = length(unique(hour)),
price = z.price,
survey_age = z.survey_age,
N = length(y),
y = y)
poi_mod1 <- cmdstan_model('code/forum_example2.stan')
fit_poi1 <- poi_mod1$sample(data = stan_dat,
parallel_chains = 4,
chains = 4,
adapt_delta = 0.85)