/* I am coding a likelihood which is the product of a negative binomial and a beta binomial connected by the parameter bj. I also have linear regressors for the negative binomial (betas). When I run the code simulating one beta (intercept) it takes 46 seconds. When I add a covariate it takes 1920 seconds. The results are what I expect based on my simulations, but I wonder how I can reduce the running time.
Many thanks in advance!!!
*/
functions{ // Y is array and arrays are declared as shown in manual p.296)
real help_log(int [] Y, matrix ynmgencov, vector betas, real bj, real phi, real theta){
vector[rows(ynmgencov)] mu;//the linear predictor
real meane; // mu adjusted by genotype
real lprob; // collects each term of the log.likelihood (per individual)
real lbetaprob; // prob for the beta binomial estimation
real p; // proportion of ASE
real x; // to collect information
real y; // to collect info
real z; // to collect info
real t; // to collect info
real q; // to swap haps when genotype =-1
real w; // to deal with complete allelic imbalance
real j; // to deal with complete allelic imbalance opposite hap
real k; // while loop
lprob=0; // collects log likelihood terms
mu = exp(ynmgencov[,5:cols(ynmgencov)] * betas); //using the log link, default for geno=0
for(i in 1:rows(ynmgencov)){ // loop through individuals
meane=mu[i];
if(ynmgencov[i,4]==1){
meane = exp(log(mu[i]) +log(1+exp(bj))-log(2));
}
if(ynmgencov[i,4]==2){
meane=exp(log(mu[i]) + bj);
}
lprob=neg_binomial_2_lpmf(Y[i] | meane, phi) + lprob;
if(ynmgencov[i,3]>5) { // at least 5 total ASE counts to use ASE info
w=ynmgencov[i,2];
j=ynmgencov[i,3]-ynmgencov[i,2];
if(fabs(ynmgencov[i,4])==1) { //hets diff from hom (p=0.5)
p=exp(bj)/(1+exp(bj));
} else {
p=0.5;
}
if(!(ynmgencov[i,4]==-1)) { // no hap swap
t=j;
q=w;
} else { // hap swap
t=w;
q=j;
}
x=0;
y=0;
z=0;
k=1;
while(k<ynmgencov[i,3]){// need to do while loop due to matrix definition of real values
x=x+log(1+k*theta);
k=k+1;
}
k=0;
while(k<t){
z=z+log(1-p+k*theta);
k=k+1;
}
k=0;
while(k<q){
y=y+log(p+k*theta);
k=k+1;
}
lbetaprob=lchoose(ynmgencov[i,3],q)+y+z-x;
lprob = lprob + lbetaprob;
}
}
return(lprob);
}
}
data {
int<lower=0> N; // number of samples
int<lower=0> K; // number of covariates (including intercept)
int Y[N]; // the response
matrix[N,4+K] ynmgencov; // matrix with columns as: counts (y) ,n counts, m counts, genotype rSNP (gen) and covariates (cov)
}
parameters {
vector[K] betas; //the regression param
real bj; // log fold change ASE
real<lower=0> phi; //the overdispersion parameter for negative binomial
real<lower=0> theta; //the overdispersion parameter for beta binomial
}
model {
Y ~ help(ynmgencov, betas, bj,phi,theta);
}
[edit: escaped code]