I am trying to fit a Hierarchical Media Mix Model, according to the specifications provided by Google White Paper. The Hierarchy is at the geographic level, with DMAs. I have tried to define the model as below with both media and controls transformed between 0-1.
Stan…
functions {
// the Hill function
real Hill(real t, real ec, real slope) {
return 1 / (1 + (t / ec)^(-slope));
}
// the adstock transformation with a vector of weights
real Adstock(row_vector t, row_vector weights) {
return dot_product(t, weights) / sum(weights);
}
}
data {
// the total number of observations
int<lower=1> N;
//the number of DMAs
int<lower=1> J;
//vector of group indeces
int<lower=1,upper=J> dma[N];
// the vector of sales
real<lower=0> Y[N];
// the maximum duration of lag effect, in weeks
int<lower=1> max_lag;
// the number of media channels
int<lower=1> num_media;
// a vector of 0 to max_lag - 1
row_vector[max_lag] lag_vec;
// 3D array of media variables
row_vector[max_lag] X_media[N, num_media];
// the number of other control variables
int<lower=1> num_ctrl;
// a matrix of control variables
row_vector[num_ctrl] X_ctrl[N];
}
parameters {
// residual variance
real<lower=0> noise_var;
// the population level intercept
real tau;
// the population coefficients for media variables
vector<lower=0>[num_media] beta_medias;
// popultaion level coffecients for other control variables
vector[num_ctrl] gamma_ctrl;
// group level coffecients for intercept
vector[J] theta;
// the group coefficients for media variables
vector[num_media] beta[J];
// the group coffecients for control
vector[num_ctrl] gamma[J];
// the standard deviation of intercept
real<lower=0>sigma_tau;
// the standard deviation of betas
real<lower=0>sigma_beta;
// the retention rate and delay parameter for the adstock transformation of
// each media
vector<lower=0,upper=1>[num_media] retain_rate;
vector<lower=0,upper=max_lag-1>[num_media] delay;
// ec50 and slope for Hill function of each media
vector<lower=0,upper=1>[num_media] ec;
vector<lower=0>[num_media] slope;
}
transformed parameters {
// a vector of the mean response
real mu[N];
// the cumulative media effect after adstock
real cum_effect;
// the cumulative media effect after adstock, and then Hill transformation
row_vector[num_media] cum_effects_hill[N];
row_vector[max_lag] lag_weights;
for (nn in 1:N) {
for (media in 1 : num_media) {
for (lag in 1 : max_lag) {
lag_weights[lag] = pow(retain_rate[media], (lag - 1 - delay[media]) ^ 2);
}
cum_effect = Adstock(X_media[nn, media], lag_weights);
cum_effects_hill[nn, media] = Hill(cum_effect, ec[media], slope[media]);
}
mu[nn] = theta[dma[nn]] +
dot_product(cum_effects_hill[nn], beta[dma[nn]]) +
dot_product(X_ctrl[nn], gamma[dma[nn]]);
}
}
model {
retain_rate ~ uniform(0,1);
delay ~ uniform(0, 3);
slope ~ gamma(1.5,0.5);
ec ~ uniform(0,1);
tau ~ normal(0,1);
sigma_tau ~ cauchy(0,2.5);
sigma_beta ~ gamma(2,0.1);
theta ~ normal(0,sigma_tau);
beta_medias ~ normal(0,1);
for (j in 1:J){
beta[j] ~ normal(beta_medias,sigma_beta);
}
gamma_ctrl ~ normal(0,1);
for (j in 1:J) {
gamma[j] ~ normal(gamma_ctrl,sigma_beta);
}
noise_var ~ inv_gamma(0.05, 0.05 * 0.01);
Y ~ normal(mu, sqrt(noise_var));
}
generated quantities {
vector[N] Y_new;
for (n in 1:N) {Y_new[n] = normal_rng(mu[n], sqrt(noise_var));
}
}
I am fairly new to specifying mutillevel stan model, so can’t seem to debug the program when I get the following error; Chain 1: Log probability evaluates to log(0), i.e. negative infinity.
Chain 1: Stan can’t start sampling from this initial value.
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.