Hi Everyone,
I have some toy data below that simulates having 2 groups with heteroskedasticity. The variance of Y increases as the independent variable X increases:
I was looking to model the variances as mentioned here:
https://jrnold.github.io/bayesian_notes/heteroskedasticity.html
"It is common to model the log-transformation of the scale or variance to transform it to R, log \sigma_i = Normal(Z_i\gamma,\omega) where Z_i are covariates used to the model the variance, which may or may not be the same as X_i"
My attempt at modeling the variances is in the transformed parameters block.
-
Can you advise if the modeling of the variance in the transformed parameters block is correct?
-
When I look at the ppc, the median for group A is not near the observed. Any thoughts on iimproving the model to better capture group A’s median?
- Parameter plots are below and there are high correlations between the group intercepts and the slopes. Is this something to be concerned about?
Thank you in advance!
r code:
library(dplyr)
library(ggplot2)
library(rstan)
library(bayesplot)
library(gridExtra)
set.seed(45)
n = 2000
group= sample(c("A","B"),n,replace= TRUE)
x= sample(seq(1,30,.25),n,replace = TRUE)
y = rep(0,n)
table(group)
for(i in 1:n){
#i=1
if(x[i]<10){
a = 3
b=8
}else if(x[i]>10 & x[i]<20){
a=7
b=13
}else{
a=12
b=19
}
#a=5
#b=5
y[i] = ifelse(group[i]=="A",.8,.7)*x[i]+
ifelse(group[i]=="A",8,-2)+
rnorm(1,0,ifelse(group[i]=="A",a,b))
}
d = data.frame(x=x, y=y,group=group)
#plot
ggplot(d, aes(x=x, y = y,color = group))+geom_point()+geom_smooth(method ="lm")+facet_wrap(~group)
data = data.frame(x =x , y = y, group = group)
#stan input
input = list(X =data$x , Y= data$y,N= length(data$x), N_group=2, group = as.numeric(data$group))
fit = stan(file = "mystanfile.stan",data=input,seed=233,chains = 2)
fit
params = rstan::extract(fit)
y = data$y
yrep = params$y_ppc
grid.arrange(
ppc_stat_grouped(y, yrep, group = data$group, stat = "median"),
ppc_stat_grouped(y, yrep, group = data$group, stat = "mean")
)
post = as.array(fit)
mcmc_pairs( post, np =nuts_params(fit) ,
pars = c( #"mu_alpha[1]","mu_alpha[2]",
#"mu_beta[1]","mu_beta[2]",
# "sig_alpha[1]","sig_alpha[2]", "sig_beta[1]","sig_beta[2]","sig[1]","sig[2]"
"sig_beta","sig_group[1]","sig_group[2]"
),
off_diag_args = list(size = 0.7))
stan code:
data{
int<lower=1> N;
vector[N] X;
int N_group;
int<lower=1, upper =N_group> group[N];
real Y[N];
}
parameters{
vector[N_group] mu_alpha;
vector[N_group] mu_beta;
vector[N_group] sig_group;
vector[N_group] sig_beta;
}
transformed parameters{
vector[N] log_sig;
vector<lower=0>[N] sig;
for(n in 1:N){
log_sig[n] = sig_beta[group[n]]*X[n]+sig_group[group[n]];
sig[n] = exp(log_sig[n]);
}
}
model{
mu_alpha~normal(0,5);
mu_beta~normal(0,5);
sig~normal(0,20);
log_sig~normal(0,20);
//sig_group ~normal(0,20);
sig_beta ~normal(0,20);
sig_group ~normal(0,20);
for(n in 1:N){
Y[n] ~ normal(mu_alpha[group[n]]+mu_beta[group[n]]*X[n], sig[n] );
}
}
generated quantities{
vector[N] y_ppc;
for(n in 1:N){
y_ppc[n] = normal_rng( mu_alpha[group[n]]+mu_beta[group[n]]*X[n],sig[n] ) ;
}
}