Using 2D Gaussian process predictions within model

hi all,

I’m still stuck on this so any insights would be helpful (tagging a few people: @mike-lawrence @avehtari, any one else?). Besides running slowly, the above model can’t recover the parameters, particularly the length-scale, at all. I tried re-writing it based on Hierarchical spatial GP with uneven sampling coordinates to first calculate all values and then fit to the values of the observed tiles, see below, but this version runs even slower.

data {
  int<lower=1> nS; //number of participants
  int<lower=1> nB; //number of blocks per participant
  int<lower=1> nT; //total possible trials per block
  int<lower=1> nL; //number of tiles in grid row/column (assume square)
  
  int<lower=1> b_length [nS,nB]; //trials per block per participant
  int<lower=0,upper=1> block_type [nS,nB]; //0=safe,1=risky
  
  int selected_x [nS,nB,nT];
  int selected_y [nS,nB,nT];
  int<lower=0,upper=nL*nL> selected_row [nS,nB,nT]; //X+Y coord of selected tile
  //calculate as (X-1)*nL+Y
  real points [nS,nB,nT]; //outcome of each choice
  
  int all_x[nL*nL]; //X coordinates of all tiles
  int all_y[nL*nL]; //Y coordinates of all tiles
}

transformed data {
  real delta = 1e-9;
  real sigma = 0.01; //assume variance
  
  //transformed coordinates
  row_vector[2] all_coords[nL*nL];
  for (ll in 1:(nL*nL)) {
     all_coords[ll,1] = all_x[ll];
     all_coords[ll,2] = all_y[ll];
  }
}

parameters {
  real<lower=0> length_scale; //aka rho or generalization parameter
  // real<lower=0> sigma; //variance
  real<lower=0> beta; //inverse temperature for choice function
 
  vector [nL*nL] eta; //latent variable GP
}
  
model {
  
  //priors
  
  length_scale ~ inv_gamma(5,5);
  // sigma ~ std_normal();
  beta ~ normal(0,10);
  
 eta ~ std_normal();
  
  //loop through subjects, blocks, & trials
  for (s in 1:nS) {
    for (b in 1:nB) {
      if (block_type[s,b]==0) { //only calculate safe blocks for now to help with debugging
        
        //first set up GP-inferred values for all tiles in grid
        matrix[nL*nL,nL*nL] K_all;
        matrix[nL*nL,nL*nL] L_all;
        vector[nL*nL] f_all;
        K_all = cov_exp_quad(all_coords,1,length_scale);
        K_all = add_diag(K_all, 1e-9);
        L_all = cholesky_decompose(K_all);
        f_all = L_all * eta;
        
        for (t in 1:(b_length[s,b])) { //loop through trials in block
          
          //predict points for current trial from GP-inferred values
          points[s,b,t] ~ normal(f_all[selected_row[s,b,t]],sigma);
        
          //softmax predicting next tile chosen
          if (t<b_length[s,b]) {
            selected_row[s,b,t+1] ~ categorical_logit(beta*f_all); 
          }
          
        }
      }
    }
  }
}