Display predicted values of gp

following the stan documentation a gaussian process is fit below for the function x^2. How do you display the predicted values for the data points?

set.seed(5)
x = seq(-10,10,.25)
y = x^2
plot(x, y)

library(rstan)
standat = list(
  N=length(x),
  x = x,
   y = y
)
stancode = 'data {
  int<lower=1> N;
  real x[N];
  vector[N] y;
}
transformed data {
  real delta = 1e-9;
}
parameters {
  real<lower=0> rho;
  real<lower=0> alpha;
  real<lower=0> sigma;
  vector[N] eta;
}
model {
  vector[N] f;
  {
    matrix[N, N] K = cov_exp_quad(x, alpha, rho);
    matrix[N, N] L_K;
    // diagonal elements
    for (k in 1:N)
      K[k, k] = K[k, k] + delta;
    L_K = cholesky_decompose(K);
    f = L_K * eta;
  }
  rho ~ gamma(4, 4);
  alpha ~ normal(0, 1);
  sigma ~ normal(0, 1);
  eta ~ normal(0, 1);
  y ~ normal(f, sigma);
}

generated quantities {
    vector[N] f;
    matrix[N, N] K = cov_exp_quad(x, alpha, rho);
    matrix[N, N] L_K;
    // diagonal elements
    for (k in 1:N)
      K[k, k] = K[k, k] + delta;
    L_K = cholesky_decompose(K);
    f = L_K * eta;
}
'

fit3 = stan(model_code=stancode, data=standat, iter=2500, warmup=500, chains = 1)
plot (  fit3@inits[[1]]$f , type = "l" )

extract(fit3, pars = c("f")) should give you access to the samples of f. It can be a bit of a bear to get everything lined up right for plotting. You can wrestle with ggplot yourself, or there’s at least this library: https://github.com/mjskay/tidybayes#fit-curves (from @mjskay) out there to make it easier.

You can make your model a little cleaner by building f in a transformed parameters block (variables in there are accessible by model and generated quantities blocks).

Something like:

transformed parameters {
  vector[N] f;
  {
    matrix[N, N] K = cov_exp_quad(x, alpha, rho);
    matrix[N, N] L_K;
    // diagonal elements
    for (k in 1:N)
      K[k, k] = K[k, k] + delta;
    L_K = cholesky_decompose(K);
    f = L_K * eta;
  }
}

model {
  rho ~ gamma(4, 4);
  alpha ~ normal(0, 1);
  sigma ~ normal(0, 1);
  eta ~ normal(0, 1);
  y ~ normal(f, sigma);
}

generated quantities {
  vector[N] yhat;
  for(n in 1:N)
    yhat[n] = normal_rng(f[n], sigma);
}

edits: fixed a couple code typos

Or is it making predictions at new points that you want?

Not sure if this is what you wanted, but this should give you the fit curve and uncertainty bands with data overlaid:

library(dplyr)
# you can install tidybayes using devtools::install_github("mjskay/tidybayes")
library(tidybayes)

df = data.frame(
  i = seq_along(x), 
  x = x, 
  y = y
)

fit3 %>%
  spread_samples(f[i]) %>%
  inner_join(df, by = "i") %>%
  ggplot(aes(x = x, y = y)) +
  stat_lineribbon(aes(y = f), .prob = c(.50, .80, .95)) +
  geom_point(data = df) +
  scale_fill_brewer()
2 Likes