[edit: escaped code]
Dear Stan-community,
I have a model with a covariance matrix as a function of another variable. The calculation time is high and I would like to reduce it.
The model consists of a univariate variable Z (genralized normally distributed).
Z~GN(mean, alpba, beta),
and W is multivariate normally distributed
W~N(mean, COV)
with mean and covariance COV, where COV is a function of Z:
COV[W_i, W,j] = rho_ij * D_i( Z) * D_j( Z)
D_i(Z) is the standard dev and is a linear function of Z.
In the case, when rho_ij is the diagonal matrix (rho_ii=1, rest 0, i.e. un-corellated W), then the run time is 5.5h. If rho is a correlation matrix with rho_ij in [-1,1], then the calculation time is 62h.
Some remarks:
Number of records is 15’000,
pystan uses one core only,
Number of chain is set to chain=1 (increasing the number of chains did not reduce time)
My main question is how can the calculation time be reduced.
Thank you in advance for your helpful comments!
MODEL
functions{
real func_linear(real z, real zmin, real zmax, real rho_zmin, real rho_zmax ){
// linear interpolation between zmin and zmax
real y;
y = rho_zmin + (z - zmin) * (rho_zmax - rho_zmin) / (zmax - zmin) ;
return y;
}
real gen_normal_lpdf( vector x, real mu_, real alpha_, real beta_ ) {
vector[rows(x)] lp;
int N;
N = rows(x);
for(i in 1:N){
// pdf according to wikipedia
// https://en.wikipedia.org/wiki/Generalized_normal_distribution
lp[ i ] = log(beta_) - (log(2*alpha_)+lgamma(1/beta_)) - pow( fabs(x[i]-mu_)/alpha_, beta_ );
}
// returns loglikelihood to maximise it (not neg. likelihood!)
return sum( lp );
}
}
data {
int < lower = 1 > data_n; // number of values
int < lower = 1 > axes_n; //
matrix[data_n, axes_n] W; // data of ratios values
vector[data_n] Z; // data of total
real < upper = 1 > prior_W_mean; // mean priordistribution
real < upper = 2 > prior_W_std; // std dev prior
real < upper = 1 > prior_W_cov; // CoV for priors
real < lower = 0 > Z_0; // start value for Z (Z_min * 0.95), used e.g. for D[A_i(Z)]
real < lower = 0 > Z_1; // end value for Z (Z_max * 1.05)
real < lower = 0 > Z_beta; // beta parameter for the Z variable as input
real < lower = 0, upper = 100 > prior_Z_mean; // prior for mean
real < lower = 0, upper = 10 > prior_Z_std; // prior for std
}
transformed data {
vector < upper = 35> [data_n] W1;
vector < upper = 35> [data_n] W2;
vector < upper = 35> [data_n] W3;
vector < upper = 35> [data_n] W4;
W1 = W[:, 1];
W2 = W[:, 2];
W3 = W[:, 3];
W4 = W[:, 4];
}
parameters {
real < lower = 0, upper = axes_n * 35 > Z_mean; // mean of total
real < upper = 1 > W1_mean; // mean
real < upper = 1 > W2_mean; //
real < upper = 1 > W3_mean; //
real < upper = 1 > W4_mean; //
real < lower = 0, upper = 1 > W1_std_c; // Wi_std for Z0
real < lower = 0, upper = 1 > W2_std_c; //
real < lower = 0, upper = 1 > W1_std_d; // Wi_std for Z1
real < lower = 0, upper = 1 > W2_std_d; //
real < lower = -1, upper = 1 > rho_12; // rho
real < lower = -1, upper = 1 > rho_13; //
real < lower = -1, upper = 1 > rho_14; //
real < lower = -1, upper = 1 > rho_23; //
real < lower = -1, upper = 1 > rho_24; //
real < lower = -1, upper = 1 > rho_34; //
real < lower = 0, upper = axes_n * 20 > Z_alpha; // parameter alpha for Z
}
transformed parameters{
matrix < lower = -1000, upper = 1000 > [axes_n, axes_n] COVMAT [data_n]; // covariance matrix
cholesky_factor_cov [axes_n] L_COVMAT [data_n] ; // choleski of COVMAT
vector < upper = 1 > [axes_n] W_mean; // vector of mean values
vector < lower = 0, upper = 2 > [data_n] W1_std; // std dev
vector < lower = 0, upper = 2 > [data_n] W2_std; //
vector < lower = 0, upper = 2 > [data_n] W3_std; //
vector < lower = 0, upper = 2 > [data_n] W4_std; //
W_mean[1]= W1_mean; // building the vector with mean values
W_mean[2]= W2_mean;
W_mean[3]= W3_mean;
W_mean[4]= W4_mean;
for (i in 1:data_n){
// calculate STD as a function of Z
W1_std[i] = func_linear( Z[i], Z_0, Z_1, W1_std_c, W1_std_d) ;
W2_std[i] = func_linear( Z[i], Z_0, Z_1, W2_std_c, W2_std_d) ;
W3_std[i] = W2_std[i] ; // use W2 values
W4_std[i] = W1_std[i] ; // use W1 values
COVMAT[i,1,1] = 1 * W1_std[i] * W1_std[i] + 1e-10;
COVMAT[i,1,2] = rho_12 * W1_std[i] * W2_std[i];
COVMAT[i,1,3] = rho_13 * W1_std[i] * W3_std[i];
COVMAT[i,1,4] = rho_14 * W1_std[i] * W4_std[i];
COVMAT[i,2,1] = COVMAT[i,1,2];
COVMAT[i,2,2] = 1 * W2_std[i] * W2_std[i] + 1e-10;
COVMAT[i,2,3] = rho_23 * W2_std[i] * W3_std[i];
COVMAT[i,2,4] = rho_24 * W2_std[i] * W4_std[i];
COVMAT[i,3,1] = COVMAT[i,1,3];
COVMAT[i,3,2] = COVMAT[i,2,3];
COVMAT[i,3,3] = 1 * W3_std[i] * W3_std[i] + 1e-10;
COVMAT[i,3,4] = rho_34 * W3_std[i] * W4_std[i];
COVMAT[i,4,1] = COVMAT[i,1,4];
COVMAT[i,4,2] = COVMAT[i,2,4];
COVMAT[i,4,3] = COVMAT[i,3,4];
COVMAT[i,4,4] = 1 * W4_std[i] * W4_std[i] + 1e-10;
L_COVMAT[i,:,:] = cholesky_decompose( COVMAT[i,:,:] );
} // end loop over data
}
model {
// priors
target += normal_lpdf( W1_std_c | prior_W_std, 1 );
target += normal_lpdf( W2_std_c | prior_W_std, 1 );
target += normal_lpdf( W1_std_d | prior_W_std, 1 );
target += normal_lpdf( W2_std_d | prior_W_std, 1 );
// priors Wi_std
for (i in 1: data_n) {
target += normal_lpdf( W1_std[i] | prior_W_std, 1 );
target += normal_lpdf( W2_std[i] | prior_W_std, 1 );
target += normal_lpdf( W3_std[i] | prior_W_std, 1 );
target += normal_lpdf( W4_std[i] | prior_W_std, 1 );
}
// priors Wi_mean
target += normal_lpdf( W1_mean | prior_W_mean, fabs( prior_W_mean ) * prior_W_cov );
target += normal_lpdf( W2_mean | prior_W_mean, fabs( prior_W_mean ) * prior_W_cov );
target += normal_lpdf( W3_mean | prior_W_mean, fabs( prior_W_mean ) * prior_W_cov );
target += normal_lpdf( W4_mean | prior_W_mean, fabs( prior_W_mean ) * prior_W_cov );
// priors Z
target += normal_lpdf( Z_mean | prior_Z_mean, prior_W_cov * prior_Z_mean );
target += normal_lpdf( Z_alpha | prior_Z_std * tgamma(1/Z_beta) / tgamma(3/Z_beta), prior_W_cov*sqrt( axes_n ) * prior_Z_std * tgamma(1/Z_beta) / tgamma(3/Z_beta) );
// Likelihood
target += gen_normal_lpdf( Z | Z_mean, Z_alpha, Z_beta );
for (i in 1: data_n) {
W[i,:] ~ multi_normal_cholesky_lpdf(W_mean, L_COVMAT[i,:,:] );
}
}