Hello,
I am trying to use a bayesian approach to understand responses/contributions of 5 marketing expenses (Radio, OOH, TV, Online, Incentives) with regard to a business metric (Y).
Each marketing expense generates its own response characterized by 3 variables: beta (response strength), retain_rate (response duration), and ec (response saturation).
I have been following the model from the following paper (https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46001.pdf)
My stan model is as followed:
functions {
real Hill(real t, real ec, real slope) {
return t^(slope) / (ec^(slope) + t^(slope));
}
real Adstock(row_vector t, row_vector weights) {
return dot_product(t, weights) / sum(weights);
}
}
data {
int<lower=1> N;
real<lower=0> Y[N];
int<lower=1> max_lag;
row_vector[max_lag] x_tv[N];
row_vector[max_lag] x_radio[N];
row_vector[max_lag] x_ooh[N];
row_vector[max_lag] x_online[N];
row_vector[max_lag] x_incentives[N];
real<lower=0> x_nb_users[N];
}
parameters {
real<lower=0> beta_tv;
real<lower=0,upper=1> ec_tv;
real<lower=0,upper=1> retain_rate_tv;
real<lower=0> beta_radio;
real<lower=0,upper=1> ec_radio;
real<lower=0,upper=1> retain_rate_radio;
real<lower=0> beta_ooh;
real<lower=0,upper=1> ec_ooh;
real<lower=0,upper=1> retain_rate_ooh;
real<lower=0> beta_online;
real<lower=0,upper=1> ec_online;
real<lower=0,upper=1> retain_rate_online;
real<lower=0> beta_incentives;
real<lower=0,upper=1> ec_incentives;
real<lower=0,upper=1> retain_rate_incentives;
real<lower=0,upper=0.5> gamma_ctrl_nb_users;
real<lower=0> tau;
real<lower=0> noise_var;
}
transformed parameters {
real mu[N];
real cum_effect;
row_vector[max_lag] lag_weights;
row_vector[5] cum_effects_hill[N];
row_vector[5] beta_medias;
row_vector[1] cum_effects_linear[N];
row_vector[1] gamma_ctrl;
beta_medias[1] = beta_tv;
beta_medias[2] = beta_radio;
beta_medias[3] = beta_ooh;
beta_medias[4] = beta_online;
beta_medias[5] = beta_incentives;
gamma_ctrl[1] = gamma_ctrl_nb_users;
for (nn in 1:N) {
for (lag in 1 : max_lag) {
lag_weights[lag] = pow(retain_rate_tv, (lag - 1) ^ 2);
}
cum_effect = Adstock(x_tv[nn], lag_weights);
cum_effects_hill[nn, 1] = Hill(cum_effect, ec_tv, 1);
for (lag in 1 : max_lag) {
lag_weights[lag] = pow(retain_rate_radio, (lag - 1) ^ 2);
}
cum_effect = Adstock(x_radio[nn], lag_weights);
cum_effects_hill[nn, 2] = Hill(cum_effect, ec_radio, 1);
for (lag in 1 : max_lag) {
lag_weights[lag] = pow(retain_rate_ooh, (lag - 1) ^ 2);
}
cum_effect = Adstock(x_ooh[nn], lag_weights);
cum_effects_hill[nn, 3] = Hill(cum_effect, ec_ooh, 1);
for (lag in 1 : max_lag) {
lag_weights[lag] = pow(retain_rate_online, (lag - 1) ^ 2);
}
cum_effect = Adstock(x_online[nn], lag_weights);
cum_effects_hill[nn, 4] = Hill(cum_effect, ec_online, 1);
for (lag in 1 : max_lag) {
lag_weights[lag] = pow(retain_rate_incentives, (lag - 1) ^ 2);
}
cum_effect = Adstock(x_incentives[nn], lag_weights);
cum_effects_hill[nn, 5] = Hill(cum_effect, ec_incentives, 1);
cum_effects_linear[nn, 1] = x_nb_users[nn];
mu[nn] = tau + dot_product(cum_effects_hill[nn], beta_medias) + dot_product(cum_effects_linear[nn], gamma_ctrl);
}
}
model {
beta_tv ~ normal(0,1);
ec_tv ~ beta(2, 2);
retain_rate_tv ~ uniform(0,1);
beta_radio ~ normal(0,1);
ec_radio ~ beta(2, 2);
retain_rate_radio ~ uniform(0,1);
beta_ooh ~ normal(0,1);
ec_ooh ~ beta(2, 2);
retain_rate_ooh ~ uniform(0,1);
beta_online ~ normal(0,1);
ec_online ~ beta(2, 2);
retain_rate_online ~ uniform(0,1);
beta_incentives ~ normal(0,1);
ec_incentives ~ beta(2, 2);
retain_rate_incentives ~ uniform(0,1);
gamma_ctrl_nb_users ~ normal(0,1);
tau ~ normal(0,1);
noise_var ~ inv_gamma(0.01, 0.01 * 0.01);
Y ~ normal(mu, sqrt(noise_var));
}
I run the sampling with 4 chains, and 4000 iterations (2000 of warmup).
My N is 80. All my input data are normalized between 0 and 1.
sm = pystan.StanModel(model_code=stan_model)
fit = sm.sampling(
data=mmm_data, # Dictionary {"x_radio": [...], etc...)
iter=iter, # 4000
chains=chains, # 4
seed=seed, # 0
n_jobs=n_jobs, # -1
warmup=int(iter / 2)
)
Till there, everything ok.
However, my problem is that all my posterior distributions look almost the same as the priors (with the exception of one marketing expense). It is like the model didn’t learn much.
I tried changing parameters such as the number of iterations but it doesn’t seem to change much the results.
What could be the cause? Could it be the way i define the residual error (noise_var)?
I know it may be difficult to advise without further information but any suggestion would be helpful :).