I have an outcome, y, a continous predictor, x, and a categorical predictor “groups”. I am looking to build a hierarchical gaussian process regression so the output would be 5 nonlinear fits one for each group. I am trying to adapt the code from page 21 here:
# http://mc-stan.org/events/stancon2017-notebooks/stancon2017-trangucci-hierarchical-gps.pdf pg 21
In that code its a beta regression with more complicated parameters than my case. The code has
vector[??] sigma_group;
vars = n_comps * prop_var * tot_var;
for(i in 1:10)
sigma_group[i] = sqrt(vars[i+2]);
What should sigma_group be in this model? Can you offer any suggestions on how to get this working?
Thank you.
library(ggplot2)
library(rstan)
library(dplyr)
x = rep(seq(-10,10,1),each=5)
y = rep(0,length(x) )
for(i in 1:length(x)){
y[i] = sample( seq(-x[i],-x[i]+ ifelse(x[i]<0 ,
sample(seq(1,10,.1),1),
sample(seq(5,40,.1),1) ),1)
,1)
}
weights = sample( seq(1,20,1) ,length(x), replace = TRUE)
weights = weights/sum(weights)
groups = rep( letters[1:5], times =length(x)/5 )
x2 = rnorm(length(x))
lower_bound = -x
dat = data.frame(y = y ,x = x, x2 = x2, lower_bound = lower_bound, weights = weights, groups = groups)
library(ggplot2)
ggplot(data = dat, aes(x = x, y = y, color = groups))+geom_point( aes(size = weights))+
ylab("outcome")+xlab("predictor x1")+geom_line(data = dat, aes(x = x, y = lower_bound))
head(dat)
library(rstan)
standat <- list(
N = nrow(dat),
N_pred = nrow(dat),
y = dat$y,
x = dat$x,
x_pred = dat$x,
weights = dat$weights,
L = dat$lower_bound,
N_groups = length(unique(dat$groups)),
group_ind = match(dat$groups, levels(dat$groups))
)
str(standat)
model2_code = "data{
int<lower=1> N;
int<lower=1> N_pred;
real y[N];
real x[N];
real x_pred[N_pred];
int<lower=1> N_groups;
int<lower=1, upper=N_groups> group_ind[N];
}
transformed data{
}
parameters{
matrix[N,N_groups] GP_group_std;
real<lower=0> length_GP_group;
vector[N_groups] group_std;
real mu;
real<lower=0> sigma;
}
transformed parameters{
matrix[N, N_groups] GP_group;
vector[N_groups] group_re;
vector[??] sigma_group;
real sigma_GP_group;
vars = n_comps * prop_var * tot_var;
for(i in 1:10)
sigma_group[i] = sqrt(vars[i+2]);
group_re = sigma_group[group_ind] .* group_std;
{
matrix[N,N] = cov_group;
cov_group = cov_exp_quad(N, ,sigma_GP_group,length_GP_group )
for(n in 1:N){
cov_group[N,N] = cov_group[N,N]+1e-12;
}
L_cov_group = cholesky_decompose(cov_group);
GP_group = L_cov_group * GP_group_std;
}
}
model{
vector[N] obs_mu;
for(i in 1:N){
obs_mu[n] = mu + group_re[group_ind[n]]+GP_group[ x[n] , group_ind[n]];
}
y~normal(obs_mu, sigma );
to_vector(GP_group_std) ~ normal(0,1);
group_std ~ normal(0,1);
mu ~ normal(0,100);
sigma ~ normal(0,100);
length_GP_group ~ weibull(30,8);
} //end model
} //end data
"
fit = stan(model_code=model2_code, data=standat, iter=2500, warmup=500, chains = 1)
f <- extract(fit)