Hi All,
I am trying to predict whether the introduction of a paywall changes the anger level of comments on, say, a youtube chanel. We have already run a sentiment analysis and have data of the format:
y = # of angry comments
n = # of total comments
delta = 0 if post was published before paywall, 1 if post was poublished after
distPre = weeks before paywall (introduction is 0)
distPost = weeks after paywall (introduction is 0)
I am attempting to model this using a binomial_logit model and predicting the difference at the introduction of the paywall. I would like to account for possible pre- and post-treatment trends and thus included two variables that indicate how many weeks the current post is away from the introduction of the paywall. I then predict at the point where both distances are 0 and compare the predictive densities. Is this a reasonable approach for causal inference?
I’d be very greatful for any help/ suggestions (also other models?).
Thanks!
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
pewdat <- read.csv("./data.csv")
pewdat$post <- ifelse(as.Date(pewdat$created_at) > as.Date("2016-02-10"), 1, 0)
y <- pewdat$anger
n <- pewdat$total
T <- length(y)
delta <- pewdat$post
#distPost <- c(rep(0, T-sum(delta)),(1:sum(delta))/sum(delta))
#distPre <- c(((T-sum(delta)):1)/(T-sum(delta)), rep(0, sum(delta)))
distPost <- c(rep(0, T-sum(delta)),(1:sum(delta)))
distPre <- c(((T-sum(delta)):1), rep(0, sum(delta)))
mod <- stan(file = "./logbinom2.stan", thin = 5, iter = 20000)
predPre <- as.data.frame(mod, pars = "y_hat_pre")
predPost <- as.data.frame(mod, pars = "y_hat_post")
plot(density(as.matrix(predPre)), col = 'red', xlim = c(0, 1))
lines(density(as.matrix(predPost)), col = 'blue')
legend('topleft', legend = c("pre", "post"), col = c('red', 'blue'), lty = 1)
library(shinystan)
launch_shinystan(mod)
data {
int<lower=1> T;
// number of data points
int y[T];
// success
int n[T];
// total
vector<lower=0, upper=1>[T] delta;
// treatment
vector[T] distPre;
vector[T] distPost;
}
parameters {
real beta;
real alpha;
real gamma1;
real gamma2;
}
model {
beta ~ normal(0, 100);
alpha ~ normal(0, 100);
gamma1 ~ normal(0, 100);
gamma2 ~ normal(0, 100);
y ~ binomial_logit(n, alpha + delta * beta + distPre * gamma1 + distPost * gamma2);
}
generated quantities {
real y_hat_pre;
real y_hat_post;
y_hat_pre = inv_logit(alpha);
y_hat_post = inv_logit(alpha + beta);
}