Adapt a model to multithreading

Hi there! I am a new user to Stan, and I am currently trying to adapt a model to enable multithreading. The original model compiles well and runs without any problem, but the adapted model could not compile successfully and reports error:

PARSER FAILED TO PARSE INPUT COMPLETELY
STOPPED AT LINE 27: 

Here is orginal model:
functions.stan:

functions {
   real gammaModeSD_lpdf(real parm,real m,real sd) {
      real r=(m+sqrt(m^2+4*sd^2))/(2*sd^2);
      real s=1+m*r;
      return gamma_lpdf(parm|r,s);
   }

   // This function can be used via: p~betaModeConc(mode,concentration);
   real betaModeConc_lpdf(real parm,real m,real c) {
      return beta_lpdf(parm|m*(c-2)+1, (1-m)*(c-2)+1);
   }

   real betaMeanConc_lpdf(real parm,real m,real c) {
      return beta_lpdf(parm|m*(c-2), (1-m)*(c-2));
   }

   real dirichMultinom_lpmf(int[] x,int K,vector alpha) {
      real alpha0=sum(alpha);
      int n=sum(x);
      real logP=log(n)+lbeta(alpha0,n);
      for(k in 1:K)
         if(x[k]>0) logP-=(x[k]+lbeta(alpha[k],x[k]));
      return logP; }
}

model.stan:

#include /functions.stan
data {
   int<lower=1> N_VARIANTS;             
   real<lower=0,upper=1> v[N_VARIANTS]; 
   int<lower=1> N_DNA;            
   int<lower=0> a[N_VARIANTS,N_DNA];   
   int<lower=0> b[N_VARIANTS,N_DNA];   
   int<lower=1> N_RNA;             
   int<lower=0> k[N_VARIANTS,N_RNA];   
   int<lower=0> m[N_VARIANTS,N_RNA];   
}

parameters {
   real<lower=0,upper=1> p[N_VARIANTS]; 
   real<lower=0,upper=1> qi[N_VARIANTS,N_RNA]; 
   real<lower=0> theta[N_VARIANTS];
   real<lower=2> c1; // concentration parameter of beta prior for qi
   real<lower=2> c2; // concentration parameter of beta prior for p
   real<lower=0> s; // std.dev. parameter of lognormal prior for theta
}

transformed parameters { // ORDER MATTERS!
   real<lower=0,upper=1> q[N_VARIANTS]; 
   for(j in 1:N_VARIANTS)
      q[j]=theta[j]*p[j]/(1.0-p[j]+theta[j]*p[j]);
}

model {
   // Parameters
   c1 ~ gamma(1.1, 0.005);
   c2 ~ gamma(1.1, 0.005);
   s ~ gammaModeSD(1,1);
   for(j in 1:N_VARIANTS) {
      p[j] ~ betaModeConc(v[j],c2);
      log(theta[j])/s ~ normal(0,1); // theta~lognormal(0,s)
      target+=-log(theta[j])-log(s); // Jacobian
      for(i in 1:N_RNA)
         qi[j,i] ~ betaModeConc(q[j],c1);
      for(i in 1:N_DNA)
         a[j,i] ~ binomial(a[j,i]+b[j,i],p[j]);
      for(i in 1:N_RNA)
         k[j,i] ~ binomial(k[j,i]+m[j,i],qi[j,i]);
   }
}

And here is my naive version of adapted model with map_rect() function (I think I might have done it completely wrong :< ) basically I just packed all data and parameters into 3-d arrays and unpack them inside the function.

#include /functions.stan

functions{
  vector f(vector alpha , vector beta , real[] xr , int[] xi ){
    int n = size(xr);
    int a = xi[1:M,1:N_DNA];
    int b = xi[1:M,(N_DNA+1):(2*N_DNA)];
    int k = xi[1:M,(2*N_DNA+1):(2*N_DNA+N_RNA)];
    int m = xi[1:M,(2*N_DNA+N_RNA+1):2*(N_DNA+N_RNA)];
    int p = alpha[1,1:M];
    int theta = alpha[2,1:M];
    int q = alpha[3,1:M];
    int qi = alpha[4:(N_RNA+2), 1:M]';
    vector[n] l;
    for(j in 1:N_VARIANTS) {
      p[j] ~ betaModeConc(v[j],beta[2]);
      log(theta[j])/beta[3] ~ normal(0,1); // theta~lognormal(0,s)
      l +=-log(theta[j])-log(beta[3]); // Jacobian
      for(i in 1:N_RNA)
         qi[j,i] ~ betaModeConc(q[j],beta[1]);
      for(i in 1:N_DNA)
         a[j,i] ~ binomial(a[j,i]+b[j,i],p[j]);
      for(i in 1:N_RNA)
         k[j,i] ~ binomial(k[j,i]+m[j,i],qi[j,i]);
   }
   return [l]';
  }
}

data {
   int<lower=2> N_VARIANTS;             // number of variants in the model
   real<lower=0,upper=1> v[N_VARIANTS]; // alt allele freq in VCF file
   int<lower=1> N_DNA;                  // number of DNA replicates
   int<lower=0> a[N_VARIANTS,N_DNA];   // DNA alt read counts
   int<lower=0> b[N_VARIANTS,N_DNA];   // DNA ref read counts
   int<lower=1> N_RNA;                  // number of RNA replicates
   int<lower=0> k[N_VARIANTS,N_RNA];   // RNA alt read counts
   int<lower=0> m[N_VARIANTS,N_RNA];   // RNA ref read counts
}


transformed data {
   int N_P = 6;
   int N_THREAD = 2;
   int M = N_VARIANTS/N_THREAD;
   matrix[M, 2*(N_DNA + N_RNA)] xi[2]; 
   real xr[N_THREAD, M];
   for ( i in 1:N_THREAD ) {
    int j = 1 + (i-1)*M;
    int k = i*M;
    xi[i, 1:M, 1:N_DNA] = a[j:k,N_DNA];
    xi[i, 1:M, N_DNA+1:2*N_DNA] = b[j:k,N_DNA];
    xi[i, 1:M, 2*N_DNA+1:2*N_DNA+N_RNA] = k[j:k,N_RNA];
    xi[i, 1:M,2*N_DNA+N_RNA+1:2*(N_DNA + N_RNA)] = m[j:k,N_RNA];
    xr[i] = v[j:k];
   }
}


parameters {
   real<lower=0,upper=1> p[N_VARIANTS]; // alt allele freq in DNA
   real<lower=0,upper=1> qi[N_VARIANTS,N_RNA]; // alt freqs in RNA
   real<lower=0> theta[N_VARIANTS];
   real<lower=2> c1; // concentration parameter of beta prior for qi
   real<lower=2> c2; // concentration parameter of beta prior for p
   real<lower=0> s; // std.dev. parameter of lognormal prior for theta
   vector[3] beta[N_THREAD]; ////Universal Parameter for 2 shards 
   for (i in 1:N_THREAD){
      beta[i, 1] = c1;
      beta[i, 2] = c2;
      beta[i, 3] = s;
   }
   matrix[N_RNA+3, M] alpha[N_THREAD]
   for (i in 1:N_THREAD){
      int j = 1 + (i-1)*M;
      int k = i*M;
      alpha[i, 1, 1:M] = p[j:k];
      alpha[i, 2, 1:M] = theta[j:k];
      alpha[i, 4:(N_RNA+2), 1:M] = qi[j:k,1:N_RNA]';
   }
}


transformed parameters { // ORDER MATTERS!
   real<lower=0,upper=1> q[M]; // alt allele freq in RNA
   for(j in 1:M)
      q[j]=theta[j]*p[j]/(1.0-p[j]+theta[j]*p[j]);
/////////////////////////////////
   for (i in 1:N_THREAD){
      int j = 1 + (i-1)*M;
      int k = i*M;
      alpha[i, 3, 1:M] = q[j:k];
   }
}


model {
   // Parameters
   c1 ~ gamma(1.1, 0.005);
   c2 ~ gamma(1.1, 0.005);
   s ~ gammaModeSD(1,1);
   target += sum(map_rect(f, beta , alpha , xr , xi)));
   }

Any advise would be helpful! Thank you all!

The error points to line 27, which seems to be return [l]'; and those square brackets look strange to me. Try removing them and see if you get any farther.

I tried removing [], but it still reports the same error.

What if you also remove the transposition? The function returns a vector, and l is already a vector.

I removed the transposition and it reports the same error at the same place. I think it does not matter if I transpose the final output or not.

N_DNA & N_VARIANTS are all not defined inside the f. Their definition in transformed parameters does not help you - you have to define them again inside f.

Maybe start there and post again…this is just my very quick look over it.

Your functions.stan defines a functions block, then in your main model file you have another functions block, and this is what confuses the parser. Just put the function definitions inside functions.stan and include that file inside the functions block of your main model file as:

functions {
#include /functions.stan
  vector f(...) {
     <..
  }
}

After you fix that, you’ll see that the parser will point you to other errors such as those that @wds15 pointed out.

@seantalts: does stanc3 report a more helpful message in the case of the opening post?

Should I just redefine them inside the function or I need to pack them into the xi[] and then unpack them. Another problem I just find out is that when I unpack xi[] in the function. I just did int a = a matrix instead of specify the size and dimensions of a.

Yes!!! I just relocate #include /functions.stan inside the functions{} and it starts to report error which @wds15 mentioned! Thank you so much!

@wds15 @mcol
Another problem I have right now is that I actually pack all data into an array of matrix (matrix[M+1, 2*(N_DNA + N_RNA)] xi[N_THREAD])and then input them inside function. Then map_rec function would call f(xi[1]).f(xi[2]). But I need to specify the type of xi in vector f(vector alpha, vector beta , real[] xr , matrix[] xi ){. What type should I define an array of matrix here for xi?

When I define it as matrix vector f(vector alpha, vector beta , real[] xr , matrix [] xi ){
it reports the error that

SYNTAX ERROR, MESSAGE(S) FROM PARSER:
Variable definition base type mismatch, variable declared as base type int variable definition has base type row_vectorVariable definition dimensions mismatch, definition specifies 0, declaration specifies 1 error in 'modelfb696de0da21_multiple_2shards' at line 4, column 26
  -------------------------------------------------
     3:     int M = size(xr);
     4:     int N_DNA = xi[M+1, 1];
                                 ^
     5:     int N_RNA = xi[M+1, 2];
  -------------------------------------------------

Error in rstan::stanc(file = "multiple_2shards.stan") : 
  failed to parse Stan model 'multiple_2shards' due to the above error.

For me that when the map_rec call f(xi[1]), xi[1] should become a matrix and enter the function, and then inside a function I define int N_RNA to be xi[M+1,1] should be fine.

When I definit vector f(vector alpha, vector beta , real[] xr , int[] xi ){, it reports

SYNTAX ERROR, MESSAGE(S) FROM PARSER:
Too many indexes, expression dimensions=1, indexes found=2
 error in 'modelfb692998b5fa_multiple_2shards' at line 4, column 26
  -------------------------------------------------
     3:     int M = size(xr);
     4:     int N_DNA = xi[M+1, 1];
                                 ^
     5:     int N_RNA = xi[M+1, 2];
  -------------------------------------------------