Yes, sure.
ordered_groups.csv (40.5 KB) This assigns individual observations to their cluster
individual_level_variables.csv (191.0 KB) This are the individual level variables
school_level_variables.csv (5.0 KB) This are the school level variables
This is the R script to run the model :
setwd("...")
rm(list=ls())
library(StanHeaders)
library(rstan)
library(rstanarm, options(mc.cores = parallel::detectCores()))
individual_level_variables <- read.csv("individual_level_variables.csv", sep = ";")
school_level_variables <- read.csv("school_level_variables.csv", sep = ";")
N <- 5135
K <- 5
J <- 121
L <- 12
y <- individual_level_variables[,8]
w <- individual_level_variables[,2]
x <- individual_level_variables[,3:7]
u <- school_level_variables[,2:13]
ordered_groups <- read.csv("ordered_groups.csv", sep = ";")
jj <- c(1:5135)
for (n in c(1:5135)) {
jj[n] <- ordered_groups[n,2]
}
stan_data <- list(N = N, K = K, J = J, L = L, x = as.matrix(x), y = y, g = jj, u = as.matrix(u), w = w)
fit_mod1 <- stan(file = "Cholensky Decomposition.stan",
data = stan_data,
iter = 200, chains = 4, warmup = 100, seed = 69,
sample_file = file.path("mysimulation.csv"),
control = list(adapt_delta = 0.95, max_treedepth = 12),
cores = 8)
csvfiles <- c("mysimulation_1.csv", "mysimulation_2.csv", "mysimulation_3.csv",
"mysimulation_4.csv")
fit_mod1 <- read_stan_csv(csvfiles)
param <- c("effect", "effect_fs", "effect_qte25", "effect_qte50", "effect_qte75", "alpha")
print(fit_mod1, pars=param, probs = c(0.1, 0.5, 0.9), digits = 3)
And lastly, this is the Stan model:
functions {
real quantile(vector x, real p){
int n; // length of vector x
real index; // integer index of p
int lo; // lower integer cap of the index
int hi; // higher integer cap of the index
real h; // generated weight between the lo and hi
real qs; // weighted average of x[lo] and x[hi]
n = num_elements(x);
index = 1 + (n - 1)*p;
lo = 1;
while ((lo + 1) < index)
lo = lo + 1;
hi = lo + 1;
h = index - lo;
qs = (1 - h)*sort_asc(x)[lo] + h*sort_asc(x)[hi];
return qs;
}
}
data {
int<lower=0> N; // num individuals
int<lower=1> K; // num ind predictors
int<lower=1> J; // num groups
int<lower=1> L; // num group predictors
int<lower=1,upper=J> g[N]; // group for individual
matrix[N, K] x; // individual predictors
matrix[J,L] u; // group predictors
vector[N] y; // outcomes
vector[N] w; // treatment assigned
}
transformed data{
matrix[L,J] u_transp; // group predictors transposed
u_transp = u';
}
parameters {
real alpha[J];
matrix[K, J] z; // this will be the basis for the beta's
// transformation
matrix[K,L] gamma; // school coefficients
vector<lower=0,upper=pi()/2>[K] tau_unif; // prior scale
vector[L] eta;
real effect; // super-population average treatment effect
real<lower=0> sigma2_t; // residual SD for the treated
real<lower=0> sigma2_c; // residual SD for the control
cholesky_factor_corr[K] L_Omega;
}
transformed parameters{
vector<lower=0>[K] tau = tan(tau_unif);
matrix[K,J] beta; // vector of school-specific coefficients
// Here we define beta, not that since it is the transformation of a
// multivariate normal, we do not need to give a prior to beta, but rather
// to z and to the variance component (tau) and the correlation component
// of the Cholensky decomposition (L_Omega)
beta = gamma * u_transp + diag_pre_multiply(tau, L_Omega) * z;
}
model {
vector[J] c;
vector[N] m;
vector[N] d;
vector[N] uno;
// Here we assign priors to the variances of the beta_j's, to the correlation
// component L_Omega and to the random component z
tau_unif ~ uniform(0,pi()/2);
L_Omega ~ lkj_corr_cholesky(2);
to_vector(z) ~ std_normal();
// Here we model the likelihood of our outcome variable y
effect ~ normal(0, (9.11134)^2);
sigma2_c ~ inv_gamma(1, 0.01);
sigma2_t ~ inv_gamma(1, 0.01);
for (j in 1:J) {
c[j] = eta' * u_transp[ ,j];
}
alpha ~ normal(c,(9.11134)^2);
for (n in 1:N) {
m[n] = alpha[g[n]] + x[n, ] * beta[, g[n]] + effect*w[n];
}
uno = rep_vector(1, N);
d = sigma2_t*w + sigma2_c*(uno-w);
y ~ normal(m, d);
}
generated quantities{
real effect_fs; // finite-sample average treatment effect
real effect_qte25; // quantile treatment effect at p = 0.25
real effect_qte50; // quantile treatment effect at p = 0.50
real effect_qte75; // quantile treatment effect at p = 0.75
real y0[N]; // potential outcome if W = 0
real y1[N]; // potential outcome if W = 1
real effect_unit[N]; // unit-level treatment effect
for(n in 1:N){
real mu_c = alpha[g[n]] + x[n, ] * beta[, g[n]];
real mu_t = alpha[g[n]] + x[n, ] * beta[, g[n]] + effect;
if(w[n] == 1){
y0[n] = normal_rng(mu_c , sigma2_c);
y1[n] = y[n];
}else{
y0[n] = y[n];
y1[n] = normal_rng(mu_t, sigma2_t);
}
effect_unit[n] = y1[n] - y0[n];
}
effect_fs = mean(effect_unit);
effect_qte25 = quantile(to_vector(y1), 0.25) - quantile(to_vector(y0), 0.25);
effect_qte50 = quantile(to_vector(y1), 0.50) - quantile(to_vector(y0), 0.50);
effect_qte75 = quantile(to_vector(y1), 0.75) - quantile(to_vector(y0), 0.75);
}
Thanks a lot for your interest in the issue and all your help :)
Update: there might be an issue in the ordered groups file as some clusters do not have observation, working now to fix it asap.