Hi,
Long-time reader first-time poster. I’m trying to reproduce the categorical data model found in the paper below; the Stan code can all be found in the appendix.
http://sethrf.com/files/fast-hierarchical-GPs.pdf
I’ve copied the model pretty much verbatim, I only cleaned up some deprecated syntax. My error on sampling is;
Rejecting initial value:
Error evaluating the log probability at the initial value.
Exception: lkj_corr_lpdf: y is not positive definite.
This repeats for 100 attempts until it errors out. I understand why a correlation matrix needs to be positive-def but don’t understand why it fails to initialize properly here.
Full Stan Code:
functions {
// return (A \otimes B) v where:
// A is n1 x n1, B = n2 x n2, V = n2 x n1 = reshape(v,n2,n1)
matrix kron_mvprod(matrix A, matrix B, matrix V) {
return transpose(A * transpose(B * V));
}
// A is a length n1 vector, B is a length n2 vector.
// Treating them as diagonal matrices, this calculates:
// v = (A \otimes B + sigma2)ˆ{-1}
// and returns the n1 x n2 matrix V = reshape(v,n1,n2)
matrix calculate_eigenvalues(vector A, vector B, int n1, int n2, real sigma2) {
matrix[n1,n2] e;
for(i in 1:n1) {
for(j in 1:n2) {
e[i,j] = (A[i] * B[j] + sigma2);
} }
return(e); }
}
data {
int<lower=1> n1;
int<lower=1> n2; // categories for learning cross-type correlations
vector[n2] x1; // observation locations (e.g. timestamps)
matrix[n2,n1] y; // NB: this should be reshape(y, n2, n1),
}
transformed data {
matrix[n1, n1] xd;
vector[2] one;
// where y corresponds to expand.grid(x2,x1). // To double-check, make sure that y[i,j] is // the observation from category x2[i]
// at location x1[j]
one[1] = 1;
one[2] = 1;
for (i in 1:n1) {
xd[i, i] = 0;
for (j in (i+1):n1) {
xd[i, j] = -((x1[i]-x1[j]) ^ 2);
xd[j, i] = xd[i, j];
}
}
}
parameters {
real<lower=0> var1; // signal variance
real<lower=0> bw1; // this is equivalent to 1/sqrt(length-scale)
corr_matrix[n2] L;
real<lower=0.00001> sigma1;
}
model {
matrix[n1, n1] Sigma1;
matrix[n1, n1] Q1;
vector[n1] R1;
matrix[n2, n2] Q2;
vector[n2] R2;
matrix[n2,n1] eigenvalues;
Sigma1 = var1 * exp(xd * bw1);
for(i in 1:n1)
Sigma1[i,i] = Sigma1[i,i] + .00001;
L ~ lkj_corr(2);
Q1 = eigenvectors_sym(Sigma1);
R1 = eigenvalues_sym(Sigma1);
Q2 = eigenvectors_sym(L);
R2 = eigenvalues_sym(L);
eigenvalues = calculate_eigenvalues(R2,R1,n2,n1,sigma1);
var1 ~ lognormal(0,1);
bw1 ~ cauchy(0,2.5);
sigma1 ~ lognormal(0,1);
target +=
-0.5 * sum(y .* kron_mvprod(Q1,Q2, // calculates -0.5 * y’ (K1 \otimes K2) y
kron_mvprod(transpose(Q1),transpose(Q2),y) ./ eigenvalues))
-0.5 * sum(log(eigenvalues)); // calculates logdet(K1 \otimes K2)
}
R Code to generate data (also from referenced paper)
library(kernlab)
eps = 1e-8
n = 200
space = seq(-2,2,length.out=n)
time = space
K1 = kernelMatrix(rbfdot(4), as.matrix(space))
K2 = kernelMatrix(rbfdot(1), as.matrix(time))
L1 = chol(K1 + eps * diag(n))
L2 = chol(K2 + eps * diag(n))
v = rnorm(n*n)
y = as.numeric(matrix(t(t(L2) %*% matrix(t(t(L1) %*% matrix(v,n,n)),n,n)),n*n,1))
y = y + rnorm(n*n,sd=.1) # Add spherical noise
y = matrix(as.numeric(y), ncol = n, byrow = FALSE) ## NOT in original R code, but the Stan code expects an n2 x n1 matrix, as noted in the Stan data block.
data = list(n1=length(space),n2=length(time), x1=space, x2=time, y=y)
Thanks,
Simon