So for the non-Gaussian outcome I think you would need to use the latent variable formulation (e.g. Gaussian Processes, were f
would be computed using the HSGP instead of the Cholesky decomposition of the full K) which doesn’t use those matrix inversions.
However, for a noise free Gaussian outcome we would want to use the analytical form of the joint distribution and I think it works just fine. I’m unsure why \Phi_1 \Lambda \Phi_1^T would necessarily be singular. \Phi_1 \Lambda \Phi_1^T has dimension (n_1 \times M) \times (M \times M) \times (M \times n_1) = n_1 \times n_1 and should be full rank. I don’t think the matrix inversion lemma applies when we don’t have a nugget.
Below is some code implementing the HSGP and making predictions under known parameters. Note that I use the following terminology for the block covariances
\boldsymbol{\Sigma}_{joint} = \begin{bmatrix}
\boldsymbol{\Sigma}_{obs,obs} & \boldsymbol{\Sigma}_{obs,pred} \\
\boldsymbol{\Sigma}_{pred,obs} & \boldsymbol{\Sigma}_{pred,pred}
\end{bmatrix}
= \begin{bmatrix}
\boldsymbol{\Sigma} & \boldsymbol{\Sigma}_{0}^T \\
\boldsymbol{\Sigma}_{0} & \boldsymbol{\Omega}
\end{bmatrix}
library(fields)
library(patchwork)
c_light <- c("#DCBCBC") #colors
c_light_trans <- adjustcolor( c_light, alpha.f = 0.7)
c_light_highlight <- c("#C79999")
c_mid <- c("#B97C7C")
c_mid_trans <- adjustcolor(c_mid, alpha.f = 0.7)
c_mid_highlight <- c("#A25050")
c_dark <- c("#8F2727")
c_dark_trans <- adjustcolor(c_dark, alpha.f = 0.7)
c_dark_highlight <- c("#7C0000")
c_light_teal <- c("#6B8E8E")
c_mid_teal <- c("#487575")
c_dark_teal <- c("#1D4F4F")
set.seed(420)
n_obs <- 50
n_pred <- 500
jitter <- 1E-6
x_latent <- seq(-5,5,length.out = 1000)
obs_idx <- sort(sample(1:1000, n_obs))
x_obs <- x_latent[obs_idx]
sigma <- 1
a <- 2
dists_latent <- rdist(x_latent, x_latent)
Sigma_latent <- sigma^2*exp(-dists_latent/a)
Z_latent <- t(chol(Sigma_latent)) %*% rnorm(1000)
Z_obs <- Z_latent[obs_idx]
x_pred <- seq(-5,5,length.out = n_pred)
dists_pred <- rdist(x_pred, x_pred)
dists_obs <- rdist(x_obs, x_obs)
dists_0 <- rdist(x_pred, x_obs)
Sigma_0 <- sigma^2*exp(-dists_0/a)
Sigma <- sigma^2*exp(-dists_obs/a)
Omega <- sqsigma*exp(-dists_pred/a)
mu_pred <- Sigma_0 %*% solve(Sigma)%*% Z_obs
Sigma_pred <- Omega-Sigma_0 %*% solve(Sigma) %*% t(Sigma_0)
sigma_pred <- sqrt(diag(Sigma_pred)+jitter)
uppers <- mu_pred+1.96*sigma_pred
lowers <- mu_pred-1.96*sigma_pred
f <- ggplot() + geom_ribbon(data = data.frame(x=x_pred, uppers, lowers), aes(x=x, ymin = lowers, ymax=uppers), fill = c_light_trans) + geom_line(data = data.frame(x=x_latent,y = Z_latent), aes(x=x,y=y), color = "gray") + geom_line(data = data.frame(x=x_pred, y=mu_pred), aes(x=x,y=y), color = c_dark) + geom_point(data = data.frame(x=x_obs, y = Z_obs), aes(x=x,y=y)) + xlab("t") + ylab("Z(t)") + labs(title = "Exact")
exp_cov_sdens <- function(omega, a, sigma){
return(2*a*sigma^2/(1+a^2*omega^2))
}
phi_vec <- function(m, x, L){
n <- 1
temp <- matrix(rep(-99, m), nrow = m, ncol = n)
temp[,1] <- L^(-1/2)*sin((1:m)*pi/(2*L)*(x+L))
return(temp)
}
phi_mat <- function(m, x, L){
n <- length(x)
temp <- matrix(rep(-99, m), nrow = n, ncol = m)
for (i in 1:n){
temp[i,] <- L^(-1/2)*sin((1:m)*pi/(2*L)*(x[i]+L))
}
return(temp)
}
Delta_mat_exp_cov <- function(m, L, a, sigma){
lambda <- ((1:m)*pi/(2*L))^2
d <- exp_cov_sdens(sqrt(lambda), a, sigma)
return(diag(d, m))
}
m <- 512
c <- 1.1
L <- c*5
Phi_obs <- phi_mat(m, x_obs, L)
Lambda <- Delta_mat_exp_cov(m = m, L = L, a = a, sigma = 1)
Phi_pred <- phi_mat(m, x_pred, L)
Sigma_0 <- Phi_pred %*% Lambda %*% t(Phi_obs)
Sigma <- Phi_obs %*% Lambda %*% t(Phi_obs) + diag(jitter, n_obs)
Omega <- Phi_pred %*% Lambda %*% t(Phi_pred)
mu_pred <- Sigma_0 %*% solve(Sigma )%*% Z_obs
Sigma_pred <- Omega-Sigma_0 %*% solve(Sigma) %*% t(Sigma_0)
sigma_pred <- sqrt(diag(Sigma_pred)+jitter)
uppers <- mu_pred+1.96*sigma_pred
lowers <- mu_pred-1.96*sigma_pred
g <- ggplot() + geom_ribbon(data = data.frame(x=x_pred, uppers, lowers), aes(x=x_pred, ymin = lowers, ymax=uppers), fill = c_light_trans) + geom_line(data = data.frame(x=x_latent,y = Z_latent), aes(x=x,y=y), color = "gray") + geom_line(data = data.frame(x=x_pred, y=mu_pred), aes(x=x,y=y), color = c_dark) + geom_point(data = data.frame(x=x_obs, y = Z_obs), aes(x=x,y=y)) + xlab("t") + ylab("Z(t)") + labs(title = "Approximate")
f + g + plot_layout(nrow = 2)
I noticed that the lack of nugget effect adds a lot more instability into the linear algebra.