Hi,
With the random data below, I am testing if the difference between a parameter and a hierarchical population paramter is 0 using bayes factors. H0.stan is the null, mean =0, hypothesis and H1.stan is the non-zero hypothesis. I am getting a bayes factor that looks too extreme and I was wondering where something may be off. Thank you.
r code:
library(ggplot2) library(dplyr) library(rstan) library(bridgesampling) set.seed(1234) N= 100 Y = c(rnorm(N,1,5), rnorm(N,1,5), rnorm(N,1,5)) group = rep(c(1:3),each =N) data = data.frame(Y=Y, group = group) ggplot(data, aes(Y,color = as.factor(group)))+geom_density() input = list( N= length(group), N_group = 3, group = group, Y=Y) fit = stan(file = "group_stan.stan",data=input,seed=233,chains = 1 ,control =list(adapt_delta = .99))
alpha <- as.data.frame(extract(fit)$alpha) mu_alpha = as.numeric(extract(fit)$mu_alpha) means = data.frame( y = c(as.numeric(alpha[,1]),as.numeric(alpha[,2]),as.numeric(alpha[,3]),mu_alpha), group = rep(c("1","2","3","population"),each= length(mu_alpha) ) ) alpha_diff = alpha - mu_alpha #diff means vs. population params=as.data.frame(extract(fit,pars=c("alpha","mu_alpha","sigma","sigma_alpha"))) summary(fit,pars =c("alpha","mu_alpha","sigma_alpha", "sigma","mu_sigma","sigma_sigma"))
#plots pairs =pairs(params) means_plot = ggplot(means, aes(y, group= group,color = group))+geom_density()
## diffs vs. population
fitH0 = stan(file = “H0.stan”,data=input,seed=233,chains = 1 ,
iter = 40000, warmup = 2000,
control =list(adapt_delta = .95))
input=list(N=nrow(alpha_diff),Y = alpha_diff[,1] )
fitH1 = stan(file = “H1.stan”,data=input,seed=233,chains = 1 ,
iter = 40000, warmup = 2000,
control =list(adapt_delta = .95))
#bayes factor for group 1 mean vs population mean H0 = bridge_sampler(fitH0, silent = TRUE) H1= bridge_sampler(fitH1, silent = TRUE) bf(H1, H0)
error_measures(H0)$percentage
error_measures(H1)$percentage
bf(H1, H0)
Estimated Bayes factor in favor of H1 over H0: 768116452207877328404080484444020800642220886680040406022880082282868844846644220600428.00000
error_measures(H0)$percentage
[1] “0.0202%”
error_measures(H1)$percentage
[1] “0.0232%”
group_stan.stan
data{ int<lower=1> N; int<lower=1> N_group; int<lower=1, upper =N_group> group[N]; real Y[N]; } parameters{ vector[N_group] alpha; real mu_alpha; real<lower=0> sigma_alpha; vector<lower=0>[N_group] sigma; real mu_sigma; real<lower=0> sigma_sigma; } model{ target += normal_lpdf(mu_alpha | 1, 1); target += normal_lpdf(sigma_alpha | 10, 1); target += normal_lpdf(alpha | mu_alpha, sigma_alpha); target += normal_lpdf(mu_sigma | 1, 1); target += normal_lpdf(sigma_sigma | 10, 1); target += normal_lpdf(sigma | mu_sigma, sigma_sigma); for(n in 1:N){ target += normal_lpdf(Y[n] | alpha[group[n]], sigma[group[n]]); } } generated quantities{ vector[N] y_ppc; for(n in 1:N){ y_ppc[n] = normal_rng(alpha[group[n]],sigma[group[n]]); } }
H1.stan
data{ int<lower=1> N; real Y[N]; } parameters{ real alpha; real sigma; } model{ target += normal_lpdf(alpha | 0,10); target += normal_lpdf(sigma |5,5); for(n in 1:N){ target += normal_lpdf(Y[n] | alpha, sigma); } }
H0.stan
data{ int<lower=1> N; real Y[N]; } parameters{ real sigma; } model{ target += normal_lpdf(sigma | 5, 5); for(n in 1:N){ target += normal_lpdf(Y[n] | 0, sigma); } }