Little question about the error of int ( In a large longitudinal model)

Steve · May 22, 2023, 10:51pm

this is my stan code:

data {
  int<lower=1> I; // Total number of observations
  int<lower=1> Q; // Total numbers of covariates
  int<lower=1> J; // Numbers of repeated measurements in time dimension
  int<lower=1> S; // Dimension of alpha
  int<lower=1> L; // Length of each beta
  int<lower=1> K; // Total number of classes
  int<lower=1, upper=K> g[I]; // Class of each observation
  int<lower=1> nz[I, Q]; // calculate the total count of non-zero elements 
  array[I, Q] vector[J] y;  // observed data
  array[I, Q] matrix[J, S] Z;  // covariates for alpha
  array[I, Q] matrix[J, L] B;  // covariates for beta
  int<lower=0, upper=1> data_index[I, Q, J]; // Indicator for non-missing data
  vector[J] data_index_nonzero[I, Q]; // Nonzero indices for each observation and covariate
}

parameters {
  array[Q] vector[S] alpha;
  array[K, Q] vector[L] beta;
  matrix[Q, Q] Sigma_omega;
  array[I] vector[Q] omega;
  vector<lower=0>[Q] sigma2;
}

model {
  for (q in 1:Q) {
    alpha[q] ~ multi_normal(rep_vector(0, S), diag_matrix(rep_vector(100, S))); // Prior for alpha
    sigma2[q] ~ inv_gamma(1, 1); // Prior for sigma2
  }
  for (k in 1:K) {
    for (q in 1:Q) {
      beta[k, q] ~ multi_normal(rep_vector(0, L), diag_matrix(rep_vector(100, L))); // Prior for beta
    }
  }
  for (i in 1:I) {
    omega[i] ~ multi_normal(rep_vector(0, Q), Sigma_omega); // Prior for omega
  }
  
  for (q in 1:Q) {
    vector[S] alpha_q = alpha[q];
    for (i in 1:I) {
      int g_i = g[i];
      vector[L] beta_giq = beta[g_i, q];
      vector[L] beta_Kq = beta[K, q]; // this is the beta_0 in the formula
      vector[J] data_index_iq = data_index_nonzero[i, q];
      int nz_iq = nz[i, q]; 
      vector[nz_iq] y_iq = y[i, q, data_index_iq];
      matrix[nz_iq, S] Z_iq = Z[i, q, data_index_iq, :];
      matrix[nz_iq, L] B_iq = B[i, q, data_index_iq, :];
      vector[nz_iq] mu;
      
      if (g_i == K) {
        // case when g[i] == K
        mu = Z_iq * alpha_q + B_iq * beta_Kq + rep_vector(omega[i, q], nz);
      } else {
        // case when g[i] != K
        mu = Z_iq * alpha_q + B_iq * beta_giq + B_iq * beta_Kq + rep_vector(omega[i, q], nz);
      }
      
      y_iq ~ normal(mu, sqrt(sigma2[q]));
    }
  }
}

The R code to simulate is:

library(MASS)
library(Matrix)
library("rstan")
rstan_options(auto_write = TRUE)
# Set dimensions
I <- 1000
Q <- 10
J <- 10
S <- 3
L <- 3
K <- 5

# Create storage for data
g <- integer(I)
y <- array(NA, c(I, Q, J))
Z <- array(NA, c(I, Q, J, S))
B <- array(NA, c(I, Q, J, L))
data_index <- array(0, c(I, Q, J))

# Generate data
set.seed(1234) # Set seed for reproducibility
for (i in 1:I) {
  g[i] <- ((i-1) %% K) + 1
  for (q in 1:Q) {
    # Generate Z and B
    Z[i, q, , ] <- MASS::mvrnorm(J, rep(0, S), diag(S))
    B[i, q, , ] <- MASS::mvrnorm(J, rep(0, L), diag(L))
    
    # Generate y
    y_mu <- Z[i, q, , ] %*% runif(S) + B[i, q, , ] %*% runif(L)
    y_sigma <- matrix(runif(J), J, J)
    y_sigma <- nearPD(y_sigma)$mat  # Make y_sigma positive definite
    y[i, q, ] <- MASS::mvrnorm(1, y_mu, y_sigma)
    
    # Generate data_index
    data_index[i, q, ] <- ifelse(y[i, q, ] == 0, 0, 1)
  }
}

# Generate data_index_nonzero
data_index_nonzero <- array(0, c(I, Q, J))
for(i in 1:I) {
  for(q in 1:Q) {
    data_index_nonzero[i, q, ] <- which(data_index[i, q, ] == 1)
  }
}

nz <- matrix(NA, I, Q)
for (i in 1:I) {
  for (q in 1:Q) {
    nz[i, q] <- sum(data_index[i, q, ])
  }
}



# List the data to be passed to Stan
stan_data <- list(I = I, Q = Q, J = J, S = S, L = L, K = K, g = g, y = y, Z = Z, B = B, 
                  data_index = data_index, data_index_nonzero = data_index_nonzero,
                  nz = nz)
fit <- stan(file  = 'D:\\Research\\May\\trajectory\\stancode\\try1\\522.stan',
            data = stan_data, iter = 10000, warmup = 1000, chains = 4)

for last line fit, it reports that:

Semantic error in 'string', line 45, column 35 to column 48:

Index must be of type int or int[] or must be a range. Instead found type vector.

The line 45 is :
vector[J] data_index_iq = data_index_nonzero[i, q];
or maybe this line:
int nz_iq = nz[i, q];

I really appreciate that if someone could help!

kholsinger · May 23, 2023, 3:21pm

I think the problem is a bit lower down. You use data_index_iq as an index in the lines following int nz_iq = nz[i, q].

From Section 5.3 of the Stan Reference manual:

Vectors are intrinsically one-dimensional collections of real or complex values (emphasis added)

I think the problem arises because the indices need to be integers when Stan is treating them as a real, because data_index_iq is defined as a vector.

Steve · May 23, 2023, 3:40pm

Really appreciate it!

Topic		Replies	Views
Initialization error Modeling	5	2463	June 11, 2017
Partial sum error Modeling rstan	2	24	August 28, 2024
A little question about the Identifier error General rstan , specification	2	735	June 12, 2023
Mixed effects model Modeling	4	1038	May 29, 2017
Errors when doing compilation in RStan model with integration Modeling rstan , techniques , fitting-issues	9	739	July 23, 2020

Little question about the error of int ( In a large longitudinal model)

Related topics