# SDE formulation of Gaussian Processes with Kalman filtering in Stan

I wrote a GP model for time series using the sde formulation as proposed by Särkkä & Solin. The covariance function is a sum of short-term, longer term and (weekly) periodic covariance fucntions. It turned out to be more complicated than I expected, but I would appreciate comments re eg the efficiency of code. As it is, 4x400 samples with warmup = 150 takes about 2 minutes for n=300, so for that small n the speed up compared with the batch approach isn’t that great.

``````    functions{
matrix Fmatrix(real ell, int D) {
// creates matrix F corresponding to a Matern kernel with nu = D - 1/2
matrix[D,D] F;
real lambda = sqrt(2*(D-0.5))/ell;
for (j in 1:D) {
F[D,j] = -choose(D,j-1)*lambda^(D-j+1);
for (i in 1:(D-1)) {
if(j == i+1)
F[i,j] = 1;
else
F[i,j] = 0;
}
}
return F;
}
matrix Pmatrix(real s2, matrix F){
// Creates steady state covariance matrix corresponding to F
int D = rows(F);
real lambda = -F[D,D]/D;
matrix[D,D] P;
vector[D*D] Pvec;
matrix[D*D,D*D] XX;
matrix[D,D] ID = diag_matrix(rep_vector(1,D));
vector[D*D] Q = rep_vector(0,D*D);
Q[D*D] = -s2 * tgamma(D)^2 / tgamma(2*D-1) * (2*lambda)^(2*D-1);
for (i in 1:D) {
for (k in 1:D) {
XX[(i-1)*D+1:i*D,(k-1)*D+1:k*D] = F[i,k]*ID;
}
XX[(i-1)*D+1:i*D,(i-1)*D+1:i*D] = XX[(i-1)*D+1:i*D,(i-1)*D+1:i*D] + F;
}
Pvec = XX\Q;
for (j in 1:D) {
P[1:D,j] = Pvec[(j-1)*D+1:j*D];
}
return P;
}

matrix blockdiag(matrix m1, matrix m2){
// creates a block diagonal matrix from 2 square matrices
int d1 = rows(m1);
int d2 = rows(m2);
matrix[d1+d2,d1+d2] mm;
mm[1:d1,1:d1] = m1;
mm[(d1+1):(d1+d2),(d1+1):(d1+d2)] = m2;
mm[d1+1:d1+d2,1:d1] = rep_matrix(0,d2,d1);
mm[1:d1,d1+1:d1+d2] = rep_matrix(0,d1,d2);
return mm;
}
matrix Fseasonal(real omega,int J){
matrix[2*J,2*J] F = rep_matrix(0,2*J,2*J);
for(j in 1:J){
F[2*j,2*j-1] = omega*j;
F[2*j-1,2*j] = -F[2*j,2*j-1];
}
return F;
}
matrix Pseas(int J, real ell){
matrix[2*J,2*J] P= rep_matrix(0,2*J,2*J);
for(i in 1:J){
P[2*i,2*i] = 2*modified_bessel_first_kind(i,square(1/ell))/exp(square(1/ell));
P[2*i-1,2*i-1] = P[2*i,2*i];
}
return P;
}
}
data{
int N; // n of time points
int Nobs; // n of observations
int np; // n of prediction steps
real y[N]; // observations including dummies
real  tt[N]; // vector of time points
int obs[N]; // binary indicator for observations
int obsind[Nobs]; //time indices of observations
real pred_step; // prediction time step
}
parameters{
real mu;
real<lower=0> ellshort;
real<lower=0> elllong;
real<lower=0> ellseas;
real<lower=0> s2short;
real<lower=0> s2long;
real<lower=0> epsilon;
}
transformed parameters{
matrix[2,2] Fshort = Fmatrix(ellshort,2);
matrix[3,3] Flong = Fmatrix(elllong,3);
matrix[4,4] Fseas = Fseasonal(2*pi()/(7), 2);
matrix[9,9] F = blockdiag(blockdiag(Fshort,Flong),Fseas);
matrix[9,9] Pinf = blockdiag(blockdiag(Pmatrix(s2short,Fshort), Pmatrix(s2long,Flong)),Pseas(2,ellseas));
matrix[9,9] A;
matrix[9,9] QQ;
vector[N] muvec;
vector[N] Pvec;
vector mp;
matrix[9,9] Pp;
vector K;
vector m = rep_vector(0,9);
matrix[9,9] P = Pinf;
row_vector H = [1,0,1,0,0,1,0,1,0];
real dt = 0;

// Kalman filter
for (k in 1:N) {
if(k==1){
mp = m;
Pp = P;
} else {
if( dt != (tt[k]-tt[k-1]) ){
dt = tt[k]-tt[k-1];
A = matrix_exp(F*dt);
QQ = Pinf-A*Pinf*A';
}
mp = A*m;
Pp = A*P*A' + QQ;
}
muvec[k] = H*mp;
Pvec[k] = H*Pp*H' + epsilon;
if(obs[k]==1){
K = Pp*H'/Pvec[k];
m = mp + K*((y[k]-mu) - muvec[k]);
P = Pp - K*H*Pp; }
else {
m = mp;
P = Pp;
}
}
}
model{
mu ~ normal(0,20);
ellshort ~ inv_gamma(3,2);
elllong ~ normal(50,10);
ellseas ~ inv_gamma(3,2);
epsilon ~ inv_gamma(3,2);
s2short ~ inv_gamma(3,2);
s2long ~ inv_gamma(3,2);
y[obsind] ~ normal(mu + muvec[obsind], sqrt(Pvec[obsind]));
}
generated quantities {
vector[np] preds;
vector[np] simul;
vector mb = m;
matrix[9,9] Pb = P;
matrix[9,9] Ap = matrix_exp(F*pred_step);
matrix[9,9] Qp = Pinf-Ap*Pinf*Ap';
vector mpb;
matrix[9,9] Ppb;
for(j in 1:np) {
mpb = Ap*mb;
Ppb = Ap*Pb*Ap' + Qp;
preds[j] = mu + H*mpb;
simul[j] = normal_rng(preds[j], sqrt(H*Ppb*H' + epsilon) );
mb = mpb;
Pb = Ppb;
}
}``````

Create a index vector `obsidx` which has the indeces for observations and then write

`````` y[obsidx] ~ normal(mu + muvec[obsidx], sqrt(Pvec[obsidx]))
``````
3 Likes

Thanks. I edited the model in my original post as you suggested. I also added a variable pred_step to the data block for defining the prediction step in the generated quantities block. In the previous version the prediction step would always be the last value of dt.

Also, a correction to my first post: inference using this model is much faster than the naive approach. The performance stats are from rstan in Windows.

1 Like

Did it change the speed?

It’s known that autodiffing through the Kalman filter loop is not the most efficient way, and with special code it would be possible to compute gradients more efficiently with one forward and one backward pass. Currently this can be implemented only in C++, but maybe someday we’ll have user defined gradients also for functions defined in Stan program with Stan language. This same thing holds for ODE and other solvers, which have C++ gradient implementations to make them faster.