Display predicted values of gp


#1

following the stan documentation a gaussian process is fit below for the function x^2. How do you display the predicted values for the data points?

set.seed(5)
x = seq(-10,10,.25)
y = x^2
plot(x, y)

library(rstan)
standat = list(
  N=length(x),
  x = x,
   y = y
)
stancode = 'data {
  int<lower=1> N;
  real x[N];
  vector[N] y;
}
transformed data {
  real delta = 1e-9;
}
parameters {
  real<lower=0> rho;
  real<lower=0> alpha;
  real<lower=0> sigma;
  vector[N] eta;
}
model {
  vector[N] f;
  {
    matrix[N, N] K = cov_exp_quad(x, alpha, rho);
    matrix[N, N] L_K;
    // diagonal elements
    for (k in 1:N)
      K[k, k] = K[k, k] + delta;
    L_K = cholesky_decompose(K);
    f = L_K * eta;
  }
  rho ~ gamma(4, 4);
  alpha ~ normal(0, 1);
  sigma ~ normal(0, 1);
  eta ~ normal(0, 1);
  y ~ normal(f, sigma);
}

generated quantities {
    vector[N] f;
    matrix[N, N] K = cov_exp_quad(x, alpha, rho);
    matrix[N, N] L_K;
    // diagonal elements
    for (k in 1:N)
      K[k, k] = K[k, k] + delta;
    L_K = cholesky_decompose(K);
    f = L_K * eta;
}
'

fit3 = stan(model_code=stancode, data=standat, iter=2500, warmup=500, chains = 1)
plot (  fit3@inits[[1]]$f , type = "l" )

#2

extract(fit3, pars = c("f")) should give you access to the samples of f. It can be a bit of a bear to get everything lined up right for plotting. You can wrestle with ggplot yourself, or there’s at least this library: https://github.com/mjskay/tidybayes#fit-curves (from @mjskay) out there to make it easier.

You can make your model a little cleaner by building f in a transformed parameters block (variables in there are accessible by model and generated quantities blocks).

Something like:

transformed parameters {
  vector[N] f;
  {
    matrix[N, N] K = cov_exp_quad(x, alpha, rho);
    matrix[N, N] L_K;
    // diagonal elements
    for (k in 1:N)
      K[k, k] = K[k, k] + delta;
    L_K = cholesky_decompose(K);
    f = L_K * eta;
  }
}

model {
  rho ~ gamma(4, 4);
  alpha ~ normal(0, 1);
  sigma ~ normal(0, 1);
  eta ~ normal(0, 1);
  y ~ normal(f, sigma);
}

generated quantities {
  vector[N] yhat;
  for(n in 1:N)
    yhat[n] = normal_rng(f[n], sigma);
}

edits: fixed a couple code typos

Or is it making predictions at new points that you want?


#3

Not sure if this is what you wanted, but this should give you the fit curve and uncertainty bands with data overlaid:

library(dplyr)
# you can install tidybayes using devtools::install_github("mjskay/tidybayes")
library(tidybayes)

df = data.frame(
  i = seq_along(x), 
  x = x, 
  y = y
)

fit3 %>%
  spread_samples(f[i]) %>%
  inner_join(df, by = "i") %>%
  ggplot(aes(x = x, y = y)) +
  stat_lineribbon(aes(y = f), .prob = c(.50, .80, .95)) +
  geom_point(data = df) +
  scale_fill_brewer()