Normal distribution with matrix mean and vector variance divergent transitions issues

hi guys, there are some issues in my code, the normal distribution with mean as matrix and variance as vactor

dat<-list(rt=c( 1,  9,  2, 1, 10, 1, 1,  90,2216),
          nt=c(40,135,200,48,150,59,25,1159,29011),   
          rc=c( 2, 23,  7, 1,  8, 9, 3, 118,2103),      
          nc=c(36,135,200,46,148,56,23,1157,29039)
)

options(mc.cores = parallel::detectCores()) 

dataList = list(
  Ntotal = 9,
  y = log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),   # log -odds-ratios
  sigma2 = 1/(dat$rt+.5) + 1/(dat$nt-dat$rt+.5) + 1/(dat$rc+.5)  + 1/ (dat$nc-dat$rc+.5),     # variances
  s02 = 1/mean( (1/( 1/(dat$rt+.5) + 1/(dat$nt-dat$rt+.5) + 1/(dat$rc+.5)  + 1/ (dat$nc-dat$rc+.5)))[1:8]),
  yy1= log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),
  yy2= log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),
  yy3= log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),
  yy4= log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),
  yy5= log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),
  yy6= log(  ((dat$rt+.5) / (dat$nt-dat$rt+.5) )   /  ((dat$rc+.5) / (dat$nc-dat$rc+.5) )  ),
  sigma29=(1/(dat$rt+.5) + 1/(dat$nt-dat$rt+.5) + 1/(dat$rc+.5)  + 1/ (dat$nc-dat$rc+.5))[9]
)

modelString1 = "
data{
  int<lower=0>    Ntotal;
  vector[Ntotal]   y;
  real<lower=0>   sigma2[Ntotal];
  real<lower=0>   s02;
  real<lower=0>   sigma29;
}
parameters {
  real<lower=0,upper=1> pfix[Ntotal];
  real<lower=-10,upper=10>  mu[6];
  matrix[6, 8]  theta;
  vector[6] thetanew_raw;
  vector[6] ynew_raw;
  real<lower=0> tau21;
  real<lower=0,upper=50> tau22;
  real<lower=0,upper=50> tau23;
  real<lower=0,upper=1> B0;
  real<lower=0,upper=1> D0;
  real<lower=0> tau26;
}
transformed parameters {
   real<lower=0> tau[6];
   matrix[6, 8]  yy;
   vector[6] thetanew;
   vector[6] ynew;
   tau[1] = 1/sqrt(tau21);
   
  tau[2] = sqrt(tau22);
  tau[3] = tau23;
  tau[4] = sqrt(s02 * (1-B0)/B0);
  tau[5] = sqrt(s02) * (1-D0) / D0;
  tau[6] = tau26;
  
 for(j in 1:6){
    for( k in 1 : 8 ) {    
       yy[j,k]=y[k];
    } 
     thetanew = mu[j] + thetanew_raw*tau[j]  ;
 }
     ynew = thetanew + ynew_raw*sqrt(sigma29);
}

model
{
   tau21  ~ gamma(0.001,0.001);  

   tau22 ~ uniform(0,50);                                    

   tau23 ~ uniform(0,50);                                       

   B0 ~ uniform(0,1);  

   D0 ~ uniform(0,1);

   tau26 ~ normal(0,sqrt(3.84))T[0,];
   
   thetanew_raw ~ std_normal();
   ynew_raw  ~ std_normal();
   
   
 for(j in 1:6){
    for( k in 1 : 8 ) {   
       target += normal_lpdf(theta[j,k] | mu[j],tau[j]);
       target += normal_lpdf(yy[j,k] | theta[j,k],sigma2[k]) ; 
    } 
   target += normal_lpdf( thetanew[j]|mu[j],tau[j]);
   target += normal_lpdf( ynew[j] |thetanew[j],sqrt(sigma29)); 
   
 }   
 
}

generated quantities {
  real<lower=0> OR[6];

  for (j in 1:6) {
     OR[j]=exp(mu[j]);
   }
}
"


writeLines( modelString1 , con="TEMPmodel.stan" )
stanModel <- stan(
  data = dataList ,
  file = "TEMPmodel.stan",  
  chains = 4,            
  warmup = 5000,
  iter = 25000,
  control = list(adapt_delta = 0.99)
  
)

There are several variance divergent transitions issues which I have no idea how to fix, I think the manual only mention the variance as matrix not the mean as matirx

2: There were 9 divergent transitions after warmup. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
3: Examine the pairs() plot to diagnose sampling problems

Have you considered setting sane priors on your parameters? :)

what is sane priors?thanks

Hi,

I usually think about what prior knowledge I have or if I can find some in the literature, and then I conduct prior predictive checks :)

1 Like

This is likely the same issue you encountered in your other thread here, because you have a hierarchical model.

That is, the hyperparameters for theta (mu and tau) are themselves parameters:

target += normal_lpdf(theta[j,k] | mu[j],tau[j]);

Have a look at the User’s guide chapter that I linked in that topic and its discussion of the non-centered parameterisation for these models.

Also, if thetanew and ynew are predictions from the model, rather than model parameters, then it’s more efficient to compute those in the generated quantities block

1 Like

I cannot do the same thing as the other post because if I have
theta[j,k]= mu[j]+theta_raw[j,k]*tau[j];
yy[j,k]= theta[j,k]+yy_raw[j,k]*sigma2[k];

There will be issues as follows:

No matches for:

matrix ~ std_normal()

Available argument signatures for std_normal:

real ~ std_normal()
real[ ] ~ std_normal()
vector ~ std_normal()
row_vector ~ std_normal()

Looks like there are other tricks for the matrix?

You can either loop over each row of the matrix:

for(r in 1:R) {
  your_matrix[r] ~ std_normal();
}

Or use to_vector:

to_vector(your_matrix) ~ std_normal();
1 Like

There are more issues than before. 936 variance divergent…

modelString1 = "
data{
int<lower=0> Ntotal;
vector[Ntotal] y;
real<lower=0> sigma2[Ntotal];
real<lower=0> s02;
real<lower=0> sigma29;
}
parameters {
real<lower=0,upper=1> pfix[Ntotal];
real<lower=-10,upper=10> mu[6];

matrix[6, 8] theta_raw;

vector[6] thetanew_raw;
vector[6] ynew_raw;

real<lower=0> tau21;
real<lower=0,upper=50> tau22;
real<lower=0,upper=50> tau23;
real<lower=0,upper=1> B0;
real<lower=0,upper=1> D0;
real<lower=0> tau26;
}
transformed parameters {
real<lower=0> tau[6];

vector[6] thetanew;
vector[6] ynew;
matrix[6,8] theta;
matrix[6, 8] yy;
matrix[6, 8] yy_raw;
tau[1] = 1/sqrt(tau21);
tau[2] = sqrt(tau22);
tau[3] = tau23;
tau[4] = sqrt(s02 * (1-B0)/B0);
tau[5] = sqrt(s02) * (1-D0) / D0;
tau[6] = tau26;

for(j in 1:6){
for( k in 1 : 8 ) {
yy_raw[j,k]=y[k];
theta[j,k]= mu[j]+theta_raw[j,k]tau[j];
yy[j,k]= theta[j,k]+yy_raw[j,k]sigma2[k];
}
thetanew = mu[j] + thetanew_raw
tau[j] ;
}
ynew = thetanew + ynew_raw
sqrt(sigma29);
}

model
{
tau21 ~ gamma(0.001,0.001);

tau22 ~ uniform(0,50);

tau23 ~ uniform(0,50);

B0 ~ uniform(0,1);

D0 ~ uniform(0,1);

tau26 ~ normal(0,sqrt(3.84))T[0,];

thetanew_raw ~ std_normal();
ynew_raw ~ std_normal();

target += normal_lpdf(to_vector(theta_raw) | 0,1);
target += normal_lpdf(to_vector(yy_raw)| 0,1);

for(j in 1:6){
for( k in 1 : 8 ) {
target += normal_lpdf(theta[j,k] | mu[j],tau[j]);
target += normal_lpdf(yy[j,k] | theta[j,k],sigma2[k]) ;
}
target += normal_lpdf( thetanew[j]|mu[j],tau[j]);
target += normal_lpdf( ynew[j] |thetanew[j],sqrt(sigma29));

}

}

generated quantities {
real<lower=0> OR[6];

for (j in 1:6) {
OR[j]=exp(mu[j]);
}
}
"

I’m having a bit of trouble following your model.

First you declare yy as a transformed parameter:

   matrix[6, 8] yy;

Then you assign every column of a given row to have the same value:

       yy[j,k]=y[k];

Then you also give it a prior:

       target += normal_lpdf(yy[j,k] | theta[j,k],sigma2[k]) ; 

Can you explain your model a little more?

1 Like

There are x=8 trials, for each trials use the same data and y=6 priors.

yy[j,k]=y[k] is just to get the data for the xth studies and yth priors.

I think I cannot assign the data at the beginning, if I do that, I will have yy[j,k]= theta[j,k]+yy_raw[j,k]*sigma2[k] directly, but with SYNTAX ERROR, MESSAGE(S) FROM PARSER:
Cannot assign to variable outside of declaration block; left-hand-side variable origin=data
Illegal statement beginning with non-void expression parsed as
yy[[j, k]]

Ah, so yy is the outcome. In that case, you want to construct it in the transformed data block instead. Below is an updated version of your model, where:

  • yy is constructed in transformed data
  • Non-centered parameterisation for theta is used
  • ynew is generated in the generated quantities block:

This runs with no divergences given the options: control = list(adapt_delta = 0.99,max_treedepth=15)

data{
  int<lower=0>    Ntotal;
  vector[Ntotal]   y;
  real<lower=0>   sigma2[Ntotal];
  real<lower=0>   s02;
  real<lower=0>   sigma29;
}
transformed data {
  matrix[6,8] yy;

 for(j in 1:6){
    for( k in 1 : 8 ) {    
       yy[j,k]=y[k];
    }
  }
}
parameters {
  vector<lower=-10,upper=10>[6]  mu;
  matrix[6, 8]  theta_raw;
  real<lower=0> tau21;
  real<lower=0,upper=50> tau22;
  real<lower=0,upper=50> tau23;
  real<lower=0,upper=1> B0;
  real<lower=0,upper=1> D0;
  real<lower=0> tau26;
}
transformed parameters {
  real<lower=0> tau[6];
  matrix[6, 8] theta;

  tau[1] = 1/sqrt(tau21);
  tau[2] = sqrt(tau22);
  tau[3] = tau23;
  tau[4] = sqrt(s02 * (1-B0)/B0);
  tau[5] = sqrt(s02) * (1-D0) / D0;
  tau[6] = tau26;

  for(j in 1:6){
    for(k in 1:8 ) {    
       theta[j,k] = mu[j] + theta_raw[j,k] * tau[j];
    }
  }
}

model {
  tau21  ~ gamma(0.001,0.001);  
  tau22 ~ uniform(0,50);                                    
  tau23 ~ uniform(0,50);                                       
  B0 ~ uniform(0,1);  
  D0 ~ uniform(0,1);
  tau26 ~ normal(0,sqrt(3.84));

  to_vector(theta_raw) ~ std_normal();

  for(j in 1:6){
    for(k in 1:8 ){   
      target += normal_lpdf(yy[j,k] | theta[j,k], sigma2[k]); 
    } 
  }
}

generated quantities {
  vector<lower=0>[6] OR = exp(mu);
  real thetanew[6] = normal_rng(mu, tau);
  real ynew[6] = normal_rng(to_vector(thetanew),sqrt(sigma29));
}

1 Like

Make sense. But when I ran this on my computer, still one divergent transitions after warmup.
Is this what we can ignore?

2: There were 1 divergent transitions after warmup. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
3: Examine the pairs() plot to diagnose sampling problems

With only 1 divergence it should be fine to use adapt_delta=0.999. But I would re-emphasise @torkar’s advice that you should investigate the appropriateness of your priors, and whether they imply realistic values for your data.

2 Likes

Thanks!!!