Is defining a large number of transformed parameters better than as local variables in the model block?

I am attempting to simulate a model where users search products and learn (update expectations) about other as a result, on their way to making a choice. To model this learning process, I assume that the value of an option is a gaussian process and users update their prior on the mean and variance of the process as they go. Lets say there are N users, T_max periods and J products per period.

The specifics are not important but the key issue is that for every draw of my parameters, my model block defines and updates many (N) large (T_maxJ) covariance matrices over and over again (as many as T_maxJ times). Technically, all of this updating could be done in the “transformed parameters” block since the updated matrices themselves are not being fit against data. However that would mean defining and therefore outputing a very large number of objects I have no interest in.

Would I gain any efficiency by defining all of these as transformed parameters instead of as local variables within my model block? Apologies for the length and messiness of the code.

data {
  
  int <lower=1> N;
  int <lower=1> T_max;
  int <lower=1> J;
  
  int K1;
  int K2;
  
  matrix[T_max*J,K1+K2] X[N];
  int Searches[N,T_max*J];
  int Purchase[N*T_max];
  vector[T_max*J] Position[N];
  
  int S_i[N];
  int S_it[N*T_max];
  matrix[sum(S_i),K1+K2] X_search;

  
  }
  
transformed data {

  int K=K1+K2;
  int Purchase_out[N*T_max];
  for (nt in 1:N*T_max){Purchase_out[nt]=Purchase[nt]+1;}
} 

parameters{

  vector[sum(S_i)] U;
  
  real alpha;
  // real alpha_i[N];
  // real<lower=0.001,upper=2> w_alpha; 
  vector[K] beta;
  // vector[K] beta_i[N];
  // vector<lower=0.001,upper=2>[K] w;
  
  real c;
  // real c_i[N];
  // real<lower=0.001,upper=2> w_c;
  
  real<lower=0.001,upper=10> lambda;
  vector<lower=0.001,upper=20>[K] rho;
  real<lower=0.001,upper=2> sig_e;
  
  real delta_p;
  real<lower=0.001,upper=20> sig_p;

}  
  
model{

  int pos=1;

  ////////////////////// PRIORS //////////////////////

  alpha ~ normal(5,1);
  c ~ normal(0.2,1);
  delta_p ~ normal(2,1);
  
  // w_alpha ~ uniform(0.001,2);
  // w_c ~ uniform(0.001,2);
  sig_e ~ uniform(0.001,2);
  lambda ~ uniform(0.001,10);
  sig_p ~ uniform(0.001,20);
  
  for (k in 1:K){
  //  beta[k] ~ normal(0,4);
  //  w[k] ~ uniform(0.001,2);
    rho[k] ~ uniform(0.001,20);
  }

  for (i in 1:N){
  
  //  alpha_i[i] ~ normal(alpha,w_alpha);
  //  for (k in 1:K1){
  //    beta_i[i][k] ~ normal(beta[k],w[k]);
  //  c_i[i] ~ normal(c,w_c) ;
    
  //  }
    
    ////////////////////// UTILITY DIST OF SEARCHED ITEMS //////////////////////
  
    int s_i=S_i[i]; // Number of searches for user
  
    matrix[s_i,K] X_si=block(X_search,pos,1,s_i,K);  // Data on searches for user
    
    matrix[s_i, s_i] Kappa_s_p[2];                    //Cont and Cat cov matrices for searches, combine later
    matrix[s_i, s_i] Kappa_s;                             // Combination of parts for searches
    vector[s_i] mu_s=alpha+X_si*beta;            //alpha_i[i]  beta_i[i];   // Expected value for user searches
    
    
    Kappa_s_p[2]=rep_matrix(1,s_i, s_i);              //Initialize Cat part as 1's instead of lambdas
    
    for (s1 in 1:(s_i-1)){
    
      Kappa_s_p[1][s1,s1] = lambda+sig_e;
      
      for (s2 in (s1+1):s_i) {
      
        Kappa_s_p[1][s1, s2] = lambda*exp(-0.5 *dot_product(square(X_si[s1][1:K1] - X_si[s2][1:K1]),1./rho[1:K1]));
        Kappa_s_p[1][s2, s1] = Kappa_s_p[1][s1, s2];
        
        for (k in (K1+1):K){
          Kappa_s_p[2][s1, s2]=Kappa_s_p[2][s1, s2]*exp(-0.5 *int_step(abs(X_si[s1][k] - X_si[s2][k]))/rho[k]);
        }
        
        Kappa_s_p[2][s2, s1] = Kappa_s_p[2][s1, s2];
      }
    }
    Kappa_s_p[1][s_i, s_i] = lambda+sig_e;
    Kappa_s=Kappa_s_p[1].*Kappa_s_p[2];        //Combining by multiplication
    
    
    U[pos:(pos+s_i-1)] ~ multi_normal(mu_s,Kappa_s);   // Draws of searched utilities

    ////////////////////// INITIAL KAPPA OF ALL ITEMS //////////////////////
    
    matrix[T_max*J, T_max*J] Kappa_cur_p[2]; 
    vector[T_max*J] mu_cur=alpha+X[i]*beta;         // alpha_i[i] beta_i[i];
    matrix[T_max*J,T_max*J] Kappa_cur;
    
    Kappa_cur_p[1]=rep_matrix(0,T_max*J, T_max*J);
    Kappa_cur_p[2]=rep_matrix(1,T_max*J, T_max*J);              //Initialize Cat part as 1's instead of lambdas
    
    for (j1 in 1:(T_max*J-1)){
    
      Kappa_cur_p[1][j1,j1] = lambda;
      
      if (Searches[i,j1]==0){continue;}
      
      for (j2 in (j1+1):(T_max*J)) {
        
        Kappa_cur_p[1][j1,j2] = lambda*exp(-0.5*dot_product(square(X[i][j1][1:K1] - X[i][j2][1:K1]),1./rho[1:K1]));
        Kappa_cur_p[1][j2,j1] = Kappa_cur_p[1][j1,j2];
        
        for (k in (K1+1):K){
        Kappa_cur_p[2][j1,j2]=Kappa_cur_p[2][j1,j2]*exp(-0.5*int_step(abs(X[i][j1][k] - X[i][j2][k]))/rho[k]);
        }
        Kappa_cur_p[2][j2,j1] = Kappa_cur_p[2][j1,j2];
      }
    }
    Kappa_cur_p[1][T_max*J,T_max*J] = lambda;
    Kappa_cur=Kappa_cur_p[1].*Kappa_cur_p[2];        //Combining by multiplication
    
    int pos_t=1;
    
    for (t in 1:T_max){
    
    ////////////////////// PROBABILITY OF PURCHASES //////////////////////
  
      int s_it=S_it[(i-1)*T_max+t];
      
      vector[s_it] U_it=delta_p+U[(pos+(pos_t-1)):(pos+(pos_t-1)+(s_it-1))]*sig_p;
      
      vector[s_it+1] opt=append_row(0,U_it);
      
      Purchase_out[(i-1)*T_max+t] ~ categorical_logit(opt);
    
      
    ////////////////////// PROBABILITY OF SEARCHES //////////////////////
      
      real z_star=1;
      real u_hat=0;
      int count=0;
      
      while (count<=s_it){
          
        count=count+1;
        
        if (count>J){break;}
        
        int j_S=J*(t-1)+count;
        
        vector[J-(count-1)] s_var = diagonal(Kappa_cur)[(j_S):(J*t)]+rep_vector(sig_e,J-(count-1));      // Total variance of playlist
        vector[J-(count-1)] a = (rep_vector(u_hat,J-(count-1))-mu_cur[(j_S):(J*t)])./(sqrt(s_var));      // Normalized current utility
        
        vector[J-(count-1)] z1 = Phi(a)*u_hat+(1-Phi(a)).*mu_cur[(j_S):(J*t)];               // First component of score
        vector[J-(count-1)] z2 = (1/(sqrt(2*pi())))*(e()^(-(a^2)/2)).*sqrt(s_var);                 // Second component of score
        
        vector[J-(count-1)] z=z1+z2-c*Position[i,(j_S):(J*t)];    // c_i[i] // Combining into final score.
        vector[J-(count-1)+1] z_cur=append_row(0,z);
        
        int search;
        if (Searches[i,j_S]>0){search=Searches[i,j_S]-(count-1)+1;}
        else {search=1;}
    
        search ~ categorical_logit(z_cur);

        if (count>s_it){break;}
        
        int j_star=(pos-1)+(pos_t-1)+count;
        
        if (U[j_star]>u_hat){u_hat=U[j_star];}   // Update current utility
        
        vector[T_max*J] mu_new;
              
        for (j1 in (j_S):(T_max*J)){
          mu_new[j1]=mu_cur[j1]+Kappa_cur[j1,j_S]*(U[j_star]-mu_cur[j_S])/(Kappa_cur[j_S,j_S]+sig_e);
        }
        
        mu_cur=mu_new;
        
        matrix[T_max*J,T_max*J]Kappa_new=rep_matrix(0,T_max*J, T_max*J);
        
        for (j1 in j_S:(T_max*J-1)){
          Kappa_new[j1,j1]=Kappa_cur[j1,j1]-(Kappa_cur[j1,j_S]*Kappa_cur[j_S,j1])/(Kappa_cur[j_S,j_S]+sig_e);
          if (Searches[i,j1]>0){
            for (j2 in (j1+1):(T_max*J)){
                Kappa_new[j1,j2]=Kappa_cur[j1,j2]-(Kappa_cur[j1,j_S]*Kappa_cur[j_S,j2])/(Kappa_cur[j_S,j_S]+sig_e);
                Kappa_new[j2,j1]=Kappa_new[j1,j2];
            }
          }
        }
        Kappa_new[T_max*J,T_max*J]=Kappa_cur[T_max*J,T_max*J]-(Kappa_cur[T_max*J,j_S]*Kappa_cur[j_S,T_max*J])/(Kappa_cur[j_S,j_S]+sig_e);
        
        Kappa_cur=Kappa_new;
        z_star=u_hat+1;
      }
      pos_t=pos_t+s_it;
    }
    pos = pos+s_i;
  }
} 

I haven’t gone through all the code, but in this case the answer doesn’t depend on your particular code. The same computations would be performed under the hood either way, the only difference between model and transformed parameters is whether the variables are saved or not. So I think you’re right to have them in the model block if you don’t want them in the output.

If you want help looking for potential speedups to your model feel free to open another topic about specific parts of your code.

2 Likes