Hi,
I’m using PyStan and fitting a factor model. To avoid funnel-like behaviour, I have reparameterised my model according to the manual and it runs pretty quickly. However, I am getting lots of warnings :
Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
Exception: std_normal_lpdf: Random variable[1] is -nan, but must not be nan! (in 'infer.stan' at line 76)
If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,
but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.
which refers to
z_y[i] ~ std_normal();
in
data {
int <lower=1> P; // number of dimensions
int <lower=1> N; // number of
int <lower=1> D; // number of latent factors
vector[P] y[N];
}
transformed data {
vector[D] fac_mu = rep_vector(0.0, D);
vector[D] fac_sd = rep_vector(1.0, D);
int<lower=1> beta_l; // number of lower-triangular, non-zero loadings
beta_l = P * (P - D) + D * (D - 1) / 2;
}
parameters {
// Parameters
vector[D] z_fac[N]; // reparameterised latent factors
vector[D] z_log_f_sd; // normalised log latent factor standard deviations
// vector[D] log_f_sd; // log latent factor standard deviations
// vector[D] log_f_sd_beta; // log latent factor standard deviation transform
// vector[D] log_f_sd_bias; // log latent factor standard deviation bias
vector[beta_l] L_lower; // lower diagonal loadings
// vector[beta_l] L_lower_beta; // lower diagonal transform
// vector[beta_l] L_lower_bias; // lower diagonal bias
// real<lower=0> L_lower_sd; // lower diagonal standard deviations
vector<lower=0>[D] L_diag; // positive diagonal loadings
real log_y_sd; // y standard deviations
// Hyperparameters
vector<lower=0>[D] tau; // scale
cholesky_factor_corr[D] corr_log_f_sd; //prior correlation
}
transformed parameters {
matrix[P, D] beta;
vector[D] log_f_sd;
vector[D] fac[N]; // latent factors
// beta[T] and Q[T]
{
int idx;
idx = 1;
for (j in 1:D) {
beta[j, j] = L_diag[j]; // set positive diagonal loadings
for (k in (j+1):D){
beta[j, k] = 0; // set upper triangle values to 0
}
for (i in (j+1):P){
beta[i, j] = L_lower[idx]; // set lower diagonal loadings
idx = idx + 1;
}
}
}
log_f_sd = tau .* (corr_log_f_sd * z_log_f_sd);
for (i in 1:N){
fac[i] = exp(log_f_sd) .* z_fac[i];
}
}
model {
vector[P] z_y[N]; // reparameterised ys
// priors
tau ~ normal(0, 1);
corr_log_f_sd ~ lkj_corr_cholesky(1);
L_lower ~ normal(0, 1);
L_diag ~ gamma(2, 0.5);
// L_lower_sd ~ gamma(2, 0.1);
log_y_sd ~ normal(0, 1);
// log_f_sd ~ multi_normal_cholesky(log_f_sd, diag_pre_multiply(tau, corr_log_f_sd));
// L_lower ~ normal(L_lower_bias + (L_lower_beta .* L_lower), L_lower_sd);
// L_lower ~ normal(L_lower, L_lower_sd);
z_log_f_sd ~ std_normal();
for (i in 1:N){
// fac[i] ~ multi_normal_cholesky(fac_mu, diag_matrix(exp(log_f_sd)));
// z_fac[i] ~ multi_normal_cholesky(fac_mu, diag_matrix(fac_sd));
z_fac[i] ~ std_normal();
// y[i] ~ multi_normal_cholesky(beta * fac[i], diag_matrix(exp(rep_vector(log_y_sd, P))));
z_y[i] = (y[i] - beta * fac[i]) / exp(log_y_sd);
z_y[i] ~ std_normal();
// target += z_y[i];
}
}
My data-generating process and pystan code is:
import numpy as np
import matplotlib.pyplot as plt
import pystan
import gzip
import os
import pdb
import pickle
seed = 1
np.random.seed(seed)
threads = 4
os.environ['STAN_NUM_THREADS'] = str(threads)
n_N = 100
n_F = 5
n_Y = 10
n_D = 5000
n_W = 1000
n_C = 4
AD = 0.95
Y = np.zeros((n_N, n_Y))
B = np.zeros((n_Y, n_F))
log_F_sigma = np.zeros((n_F))
# y hyperparameters
log_y_sigma = np.random.normal(0, 1) * np.ones(n_Y)
# f hyperparameters
log_f_sigma_ = np.random.normal(-1, 0.25, n_F)
# diag chol
chol_f_sigma = np.diag(np.abs(np.random.normal(0, 0.1, n_F)))
# full chol
# chol_f_sigma = np.random.normal(0, 0.25, n_F * n_F).reshape(n_F, n_F)
# row, col = np.diag_indices(n_F)
# chol_f_sigma[row, col] = np.abs(chol_f_sigma[row, col])
# B
base_order = 0.05
bias_order = 0.5
p_connect = 0.3
n_connect = np.int(0.3 * n_Y * n_F)
add = (2 * np.random.binomial(1, 0.5, n_connect) - 1) * (bias_order + np.abs(np.random.standard_normal(n_connect)))
B_ = base_order * np.random.standard_normal(n_Y * n_F)
B_[:n_connect] += add
B_ = np.random.permutation(B_).reshape(n_Y, n_F)
row, col = np.triu_indices(n_Y, 0, n_F)
B_[row, col] = 0
np.fill_diagonal(B_, 1)
# Initialise
# log_F_sigma[0] = np.random.multivariate_normal(log_f_sigma_, chol_f_sigma ** 2)
log_F_sigma = log_f_sigma_ + chol_f_sigma @ np.random.standard_normal(n_F)
B = B_ * base_order * np.tril(np.random.standard_normal(n_Y * n_F).reshape(n_Y, n_F), k=-1)
for i in range(1, n_N):
Y[i] = B @ np.random.multivariate_normal(np.zeros(n_F), np.diag(np.exp(2 * log_F_sigma))) + np.exp(log_y_sigma) * np.random.standard_normal(n_Y)
dat = {
'P': n_Y,
'D': n_F,
'N': n_N,
# 'fac_mu': np.zeros(n_F),
'y': Y
}
extra_compile_args = ['-pthread', '-DSTAN_THREADS']
model = pystan.StanModel(file='infer.stan', extra_compile_args=extra_compile_args)
fit = model.sampling(data=dat, iter=n_D, warmup=n_W, seed=seed, chains=n_C, verbose=True, control={'adapt_delta':AD}, n_jobs=threads)
with gzip.open('pystan_non_tv_fit_{}_{}_{}_{}_{}.gz'.format(n_D, n_W, seed, n_C, AD), 'wb') as f:
pickle.dump({'model' : model, 'fit' : fit}, f)
Could someone tell me if this is normal behaviour or should I be concerned?
Cheers,
Sky