Hi! I am trying to use reduce_sum
to speed up my model. In my case N = 50181
, but I got a error message IndexError("Exception: Exception: array[uni, ...] index: accessing element out of range. index 31363 out of range; expecting index to be between 1 and 24 (in '/scratch/httpstan_8i6uk942/model_c5ende56.stan', line 34, column 6 to column 43)
. I guess this is from my X_slice but I can not see where 1 and 24
comes from. Should I specify the start
and end
somewhere? Thanks you for your help in advance!
functions {
int mapping(int D, int S, int U){
int I = (S - 1)*16 + (D - 1)*4 + U;
return I;
}
real partial_sum_log_term(int[] X_slice,
int start, int end,
matrix theta,
vector[,,,] u,
vector[,,,] d,
vector[] r,
int K,
int T,
int[] doc,
int[,] D_array,
int[] S_array,
int[,] U_array,
int[] I_array
){
real base_rate;
real mutation_rate = 0;
real log_term = 0;
print("start is ", start);
print("end is ", end);
for (n in start:end){
for (k in 1:K){
base_rate = 1;
for (t in 1:T){
base_rate *= d[t, k, D_array[n, t], S_array[n], D_array[n, t+1]]*u[t, k, U_array[n, t], S_array[n], U_array[n, t+1]];
}
mutation_rate += theta[k, doc[n]] * r[k, I_array[n]] * base_rate;
}
log_term += X_slice[n]*mutation_rate;
}
return log_term;
}
}
data {
int<lower = 1> K; //number of sigs
int<lower = 2> C; //number of pairs of flanking bases excluding single-based substitutions
int<lower = 1> N; //Total number of non-zero entries
int<lower = 1> doc[N]; //sample ID
int<lower = 1> X[N]; // mutational counts for each substition in each sample
int<lower = 1, upper = 4> D_array[N, C]; //downstream index array starting from d_0
int<lower = 1, upper = 6> S_array[N]; // single-base substitution array
int<lower = 1, upper = 4> U_array[N, C]; //upstream index array starting from u_0
real<lower = 0> alpha; // hyperparam for signature rates
real<lower = 0> beta; //hyperparam for up/downstream bases rates
real<lower = 0> gamma0; // hyperparam for shape parameters
real<lower = 0> gamma1; // hyperparam for shape parameters
real<lower = 0> delta0; // hyperparam for mean loadings
real<lower = 0> delta1; // hyperparam for mean loadings
}
transformed data{
int<lower = 1> grainsize = 1;
vector<lower = 0>[96] alpha_array = rep_vector(alpha, 96); //Dirichlet params for signatures
vector<lower = 0>[4] beta_array = rep_vector(beta, 4);
int<lower = 1> J = max(doc); // Total number of samples
int<lower = 1> T = C - 1; //number of pairs of flanking bases excluding d_0 and u_0
int<lower = 1, upper = 96> I_array[N]; //tri-nucleotide index array
for (n in 1:N) {
I_array[n] = mapping(D_array[n, 1], S_array[n], U_array[n, 1]);
}
}
parameters {
vector<lower=0>[K] nu; //inferred shaped parameters for loadings
vector<lower=0>[K] mu; // inferred mean loadings
matrix<lower=0>[K, J] theta; //inferred loadings
simplex[4] u[T, K, 4, 6]; //inferred upstream bases
simplex[4] d[T, K, 4, 6]; //inferred downstream bases
simplex[96] r[K]; //inferred sigs K by I
}
model {
for (k in 1:K) {
nu[k] ~ inv_gamma(gamma0, gamma1);
mu[k] ~ inv_gamma(delta0, delta1);
r[k] ~ dirichlet(alpha_array);
}
for (j in 1:J){
for (k in 1:K){
theta[k, j] ~ gamma(nu[k], nu[k]/mu[k]);
}
}
for (t in 1:T)
for (k in 1:K)
for (u0 in 1:4)
for (s in 1:6){
d[t, k, u0, s] ~ dirichlet(beta_array);
u[t, k, u0, s] ~ dirichlet(beta_array);
}
target += reduce_sum(partial_sum_log_term, X, grainsize, theta, u, d, r, K, T, doc, D_array, S_array, U_array, I_array);
target += -sum(theta);
}