Latent variable model - marginalising out discrete parameter

CerulloE · August 22, 2020, 1:35pm

I am trying to convert a latent variable model from BUGS code to Stan. The model runs in BUGS however I am sampling directly from a latent discrete variable D1[i]. I know that to do this in Stan you need to marginalise out the discrete latent variable and cannot sample this directly as the code below attempts to do.

I was wondering if anybody had any idea of how to code this up? I have seen some examples on marginalising but still cannot figure it out.

The data is:
res1 <- c(rep(1, 262), rep(2, 24), rep(3, 26))
res2 <- c(rep(1, 183), rep(2, 61), rep(3, 18), rep(1, 13), rep(2, 5),
rep(3, 6), rep(1, 3), rep(2, 4), rep(3, 19))
res3 <- c(rep(1, 181), rep(2, 1), rep(3, 1), rep(1, 56), rep(2, 5),
rep(1,16), rep(2, 1), rep(3, 1), rep(1, 10), rep(3, 3),
rep(1, 5), rep(1, 2), rep(2, 2), rep(3, 2),
rep(1,1), rep(2, 2), rep(3, 4), rep(2, 1), rep(3, 18))

data = list(res1=res1, res2=res2, res3=res3, N = 312, nt=3)

Code: (need to marginalise out the D1[i])

  data {
  int<lower=0> nt; // # of diagnostic tests
  int<lower=0> N;	// # of subjects
    int res1[N];  // results of test 1
  int res2[N];  // results of test 2 
  int res3[N]; // results of test 3
}

parameters {
  vector<lower = 0>[nt] a11;
  vector<lower = 0>[nt] a10;
  
  vector<lower = 0>[nt] c11;
  vector<lower = 0>[nt] c10;
  vector[nt] b0;
  vector[nt] b1;
  vector[N] t;
  int<lower=0,upper=1> D1[N];
  real<lower=0,upper=1> p; 
}

transformed parameters {
  vector[nt] b11;
  vector[nt] b10;
  vector<lower=0,upper=1>[nt] Se; 
  vector<lower=0,upper=1>[nt] Sp; 
  vector<lower=0,upper=1>[nt] int_d; 
  vector<lower=0,upper=1>[nt] int_nd; 
  real<lower=1,upper=2> D[N];
  real<lower=0,upper=1> p1[nt,N,2,3];
  real<lower=0,upper=1> probs[nt,N,3];
  
  for (j in 1:nt) {
    b11[j] = 0;
    b10[j] = 0;
    Se[j]   = 1-Phi( c11[j] - a11[j] );
    int_d[j] =  Phi( c11[j] - a11[j] ) - Phi( b11[j] - a11[j] );  
    int_nd[j] = Phi( c10[j] - a10[j] ) - Phi( b10[j] - a10[j] );
    Sp[j]     = Phi( b10[j] - a10[j] );
    
    
    for (i in 1:N) { 
     // diseased 
      p1[j,i,2,1] =  Phi(b11[j] - a11[j]) ;
      p1[j,i,2,2] = Phi(c11[j] - a11[j] ) - Phi(b11[j] - a11[j])  ;
      p1[j,i,2,3] = 1-Phi(c11[j] - a11[j] )  ;    
      // non-diseased 
      p1[j,i,1,1] = Phi(b10[j] - a10[j]) ;
      p1[j,i,1,2] = Phi(c10[j] - a10[j]) - Phi(b10[j] - a10[j]) ;
      p1[j,i,1,3] =  1-Phi(c10[j] - a10[j] ) ;

      probs[j,i,1] = p1[j,i,D[i],1];
      probs[j,i,2] = p1[j,i,D[i],2]; 
      probs[j,i,3] = p1[j,i,D[i],3];
      
      D[i]<-D1[i]+1;
      
    }
  }
  
}

model {
  p~uniform(0,1) ;
  for (j in 1:nt) {
    a11[j] ~normal(0,1);
    a10[j] ~normal(0,1);
    c11[j] ~normal(0,1);
    c10[j] ~normal(0,1);
    
    
    for (i in 1:N) { 
      

      D1[i] ~ bernoulli(p)
      
      res1[i]~categorical(probs[1,i,1:3]) ;
      res2[i]~categorical(probs[2,i,1:3]) ;
      res3[i]~categorical(probs[3,i,1:3]) ;
    }
    
    
  }

Background: The model and data are the probit latent class model (assuming conditional dependence between tests) described in Xu et al (doi: 10.1002/sim.5695). I wrote the model in BUGS first, by extending some code I had for a simpler version, where each test has dichotomous results rather than three categories as is the case here (this model: https://pubmed.ncbi.nlm.nih.gov/8805757/).

nhuurre · August 22, 2020, 3:28pm

model {
  p~uniform(0,1) ;
  for (j in 1:nt) {
    a11[j] ~normal(0,1);
    a10[j] ~normal(0,1);
    c11[j] ~normal(0,1);
    c10[j] ~normal(0,1);
  }
    
  for (i in 1:N) { 
    real lps[2];

    // logprob conditinal on D[i]=0
    lps[1] = bernoulli_lpmf(0| p)
             + categorical_lpmf(res1[i]| p1[1,i,1,1:3])
             + categorical_lpmf(res2[i]| p1[2,i,1,1:3])
             + categorical_lpmf(res3[i]| p1[3,i,1,1:3]) ;

    // logprob conditional on D[i]=1
    lps[2] = bernoulli_lpmf(1| p)
             + categorical_lpmf(res1[i]| p1[1,i,2,1:3])
             + categorical_lpmf(res2[i]| p1[2,i,2,1:3])
             + categorical_lpmf(res3[i]| p1[3,i,2,1:3]) ;

    // marginalize
    target += log_sum_exp(lps);
  }
}

Also, parameters b0, b1 and t need priors and maybe you could use Stan’s ordered probit distribution.

CerulloE · August 22, 2020, 5:09pm

Thanks nhuurre

The code is now:

data {
                int<lower=0> nt;
		int<lower=0> N;	// # of subjects
                int res1[N]; 
                int res2[N]; 
                int res3[N]; 
	}

	parameters {
	  vector<lower = 0>[nt] a11;
          vector[nt] a10;
          vector<lower = 0>[nt] c11;
          vector<lower = 0>[nt] c10;
          vector[nt] b0;
          vector[nt] b1;
          vector[N] t;
          real<lower=0,upper=1> p; 
          simplex[N] D;
	}

	transformed parameters {
        vector[nt] b11;
        vector[nt] b10;
        vector<lower=0,upper=1>[nt] Se; 
        vector<lower=0,upper=1>[nt] Sp; 
        vector<lower=0,upper=1>[nt] int_d; 
        vector<lower=0,upper=1>[nt] int_nd; 
      //  vector[nt,N,2,3] p1;
         matrix<lower=0,upper=1>[N,nt]  p1[2,3];

         for (j in 1:nt) {
          b11[j] = 0;
          b10[j] = 0;
          Se[j]   = 1-Phi( c11[j] - a11[j] );
          int_d[j] =  Phi( c11[j] - a11[j] ) - Phi( b11[j] - a11[j] );  
          int_nd[j] = Phi( c10[j] - a10[j] ) - Phi( b10[j] - a10[j] );
          Sp[j]     = Phi( b10[j] - a10[j] );
         
         for (i in 1:N) { 
        // Non-diseased
        p1[1,1,i,j] =  1-Phi(c10[j] - a10[j] ) ;
        p1[1,2,i,j] =  Phi(c10[j] - a10[j]) - Phi(b10[j] - a10[j]) ;
        p1[1,3,i,j] =  Phi(b10[j] - a10[j]) ;   
        // diseased
        p1[2,1,i,j] = 1-Phi(c11[j] - a11[j] )  ;     
        p1[2,2,i,j] = Phi(c11[j] - a11[j] ) - Phi(b11[j] - a11[j])  ;
        p1[2,3,i,j] = Phi(b11[j] - a11[j]) ;
       
	               }
        }
     }

model {
  p~uniform(0,1) ;
  for (j in 1:nt) {
    a11[j] ~normal(0,1);
    a10[j] ~normal(0,1);
    c11[j] ~normal(0,1);
    c10[j] ~normal(0,1);
  }
    
  for (i in 1:N) { 
    real lps[2];
 
    // logprob conditinal on D[i]=0
    lps[1] = bernoulli_lpmf(0| p)
             + categorical_lpmf(res1[i]| p1[1,1:3,i,1])
             + categorical_lpmf(res2[i]| p1[1,1:3,i,2])
             + categorical_lpmf(res3[i]| p1[1,1:3,i,3]) ;

    // logprob conditional on D[i]=1
    lps[2] = bernoulli_lpmf(1| p)
             + categorical_lpmf(res1[i]| p1[2,1:3,i,1])
             + categorical_lpmf(res2[i]| p1[2,1:3,i,2])
             + categorical_lpmf(res3[i]| p1[2,1:3,i,3]) ;

    // marginalize
    target += log_sum_exp(lps);
  }
}

I define p1 as a matrix but I get this error:

No matches for:

categorical_lpmf(int, real[ ])

Available argument signatures for categorical_lpmf:

categorical_lpmf(int, vector)
categorical_lpmf(int[ ], vector)

nhuurre · August 22, 2020, 5:45pm

I guess you need to_vector(...)

categorical_lpmf(res1[i]| to_vector(p1[1,1:3,i,1]))

etc

CerulloE · August 23, 2020, 12:53am

Thanks nhuure!

This version of the model works now with good n_eff, although I do get a very high number of divergent transitions after warmup, despite the results matching JAGS and being correct.

I am now trying to run a version of the model with conditional dependence between tests, by introducing a latent variable t. When I run this, even with a treedepth of 18 and adapt_delta=0.999 I get very low n_eff’s - many parameters with n_eff < 10

data {
  int<lower=0> nt; // number of tests
  int<lower=0> N;	// # of subjects
    int res1[N]; // result of test 1
  int res2[N]; // result of test 2
  int res3[N]; // result of test 3
}
parameters {
  vector[nt] a11;
  vector[nt] a10;
  vector[nt] c11; // upper threshold in diseased
  vector[nt] c10; // upper threshold in non-diseased
  vector[nt] b0; // coefficient for conditional dependence in non-diseased
  vector[nt] b1; // coefficient for conditional dependence in diseased
  vector[N] t; //  induces conditional dependence between tests
  real<lower=0,upper=1> p; // disease prevalence 
}

transformed parameters {
  vector[nt] b11; // lower threshold for diseased, set to 0 
  vector[nt] b10; // lower threshold for non-diseased, set to 0 
  vector<lower=0,upper=1>[nt] Se; 
  vector<lower=0,upper=1>[nt] Sp; 
  vector<lower=0,upper=1>[nt] int_d; 
  vector<lower=0,upper=1>[nt] int_nd; 
  matrix<lower=0,upper=1>[N,nt]  p1[2,3];
  
  for (j in 1:nt) {
    b11[j] = 0;
    b10[j] = 0;
    // overall test accuracy estimates 
    Se[j]   = 1-Phi_approx( (c11[j] - a11[j])/sqrt(1+b1[j]^2) );
    int_d[j] =  Phi_approx( (c11[j] - a11[j])/sqrt(1+b1[j]^2) ) - Phi_approx( (b11[j] - a11[j])/sqrt(1+b1[j]^2) );  
    int_nd[j] = Phi_approx( (c10[j] - a10[j])/sqrt(1+b0[j]^2) ) - Phi_approx( (b10[j] - a10[j])/sqrt(1+b0[j]^2) );
    Sp[j]     = Phi_approx( (b10[j] - a10[j])/sqrt(1+b0[j]^2) );
    
    for (i in 1:N) { 
      // Non-diseased
      p1[1,1,i,j] =  Phi_approx(b10[j] - a10[j] + t[i]*b0[j]) ; // prob of testing negative given non-diseased 
      p1[1,2,i,j] =  Phi_approx(c10[j] - a10[j]+ t[i]*b0[j]) - Phi_approx(b10[j] - a10[j]+ t[i]*b0[j]) ; // intermedtiate result given non-diseased
      p1[1,3,i,j] =  1-Phi_approx(c10[j] - a10[j] + t[i]*b0[j]) ; // prob of testing positive given non-diseased 
      // diseased
      p1[2,1,i,j] = Phi_approx(b11[j] - a11[j] + t[i]*b1[j]) ;  
      p1[2,2,i,j] = Phi_approx(c11[j] - a11[j] + t[i]*b1[j] ) - Phi_approx(b11[j] - a11[j]+ t[i]*b1[j])  ;
      p1[2,3,i,j] = 1-Phi_approx(c11[j] - a11[j] + t[i]*b1[j])  ;   
    }
  }
}

model {
  p~uniform(0,1) ;
  for (j in 1:nt) {
    a11[j] ~normal(0,10)T[0,];
    a10[j] ~normal(0,10);
    c11[j] ~normal(0,1)T[0,];
    c10[j] ~normal(0,1)T[0,];
    b1[j] ~normal(0,1);
    b0[j] ~normal(0,1);
  }
  for (i in 1:N) { 
    t[i] ~ normal(0,1); 
  }
  for (i in 1:N) { 
    real lps[2];
    
    // logprob conditinal on non-diseased
    lps[1] = bernoulli_lpmf(0| p)
    + categorical_lpmf(res1[i]| to_vector(p1[1,1:3,i,1]))
    + categorical_lpmf(res2[i]| to_vector(p1[1,1:3,i,2]))
    + categorical_lpmf(res3[i]| to_vector(p1[1,1:3,i,3])) ;
    
    // logprob conditional on diseased
    lps[2] = bernoulli_lpmf(1| p)
    + categorical_lpmf(res1[i]| to_vector(p1[2,1:3,i,1]))
    + categorical_lpmf(res2[i]| to_vector(p1[2,1:3,i,2]))
    + categorical_lpmf(res3[i]| to_vector(p1[2,1:3,i,3])) ;
    
    // marginalize
    target += log_sum_exp(lps);
  }
}

EDIT:@maxbiostat edited this post for syntax highlighting.

CerulloE · August 23, 2020, 7:55pm

Should I make a new thread for this question, as it is quite different than the original one which started the thread? I am new to this forum

maxbiostat · August 23, 2020, 10:52pm

Please do.

Topic		Replies	Views
Marginalising out many discrete latent variables / modelling augmented count data Modeling techniques	12	1465	July 17, 2018
Latent variables, divergent transitions and prior selection Modeling rstan , techniques , fitting-issues	22	1422	September 19, 2021
Discrete parameter in Stan Developers	14	5619	February 7, 2019
Estimation, not marginalisation, of discrete parameters: a solution? General discrete-parameters	3	724	May 18, 2022
Guidance Needed on Comparing Models with Latent Discrete Variables When Using Marginalized Log-Likelihood Modeling	0	142	March 22, 2024

Latent variable model - marginalising out discrete parameter

Related topics