I defined the data for Stan as below:
data_csv<-read.csv("generice.csv",header=TRUE,sep=",")
data_csv<-na.omit(data_csv)
offset <- 2 # column number we ignore to start with in data.frame
imo <- as.integer(27)
jmo <- array(NA, imo)
for(i in 1:imo){
jmo[i] <- max(data_csv[,i + offset]) ## i+1 as the first column is idno
}
nth <- as.integer(6)
gen_con_simo <- function(length){
return(rep(c(2), length))
}
######
data <-list (
N = nrow(data_csv),
Y = as.array(data_csv[, "e4"]),
nthres = nth,
K = imo +1,
X = as.matrix(data_csv[, (1 + offset):(imo + offset + 1)], rownames.force =
NA),
Ksp = imo,
Imo = imo,
Jmo = jmo,
disc =as.integer(1),
prior_only = 1
)
for(i in 1:imo) {
ind <- i + offset
data[[paste("Xmo_", as.character(i), sep = '')]] = array(data_csv[, ind])
data[[paste("con_simo_", as.character(i), sep = '')]] = gen_con_simo(jmo[i])
}
functions {
/* cumulative-logit log-PDF for a single response
* Args:
* y: response category
* mu: latent mean parameter
* disc: discrimination parameter
* thres: ordinal thresholds
* Returns:
* a scalar to be added to the log posterior
*/
real cumulative_logit_lpmf(int y, real mu, real disc, vector thres) {
int nthres = num_elements(thres);
real p;
if (y == 1) {
p = inv_logit(disc * (thres[1] - mu));
} else if (y == nthres + 1) {
p = 1 - inv_logit(disc * (thres[nthres] - mu));
} else {
p = inv_logit(disc * (thres[y] - mu)) -
inv_logit(disc * (thres[y - 1] - mu));
}
return log(p);
}
/* cumulative-logit log-PDF for a single response and merged thresholds
* Args:
* y: response category
* mu: latent mean parameter
* disc: discrimination parameter
* thres: vector of merged ordinal thresholds
* j: start and end index for the applid threshold within 'thres'
* Returns:
* a scalar to be added to the log posterior
*/
real cumulative_logit_merged_lpmf(int y, real mu, real disc, vector thres, int[] j) {
return cumulative_logit_lpmf(y | mu, disc, thres[j[1]:j[2]]);
}
/* compute monotonic effects
* Args:
* scale: a simplex parameter
* i: index to sum over the simplex
* Returns:
* a scalar between 0 and 1
*/
real mo(vector scale, int i) {
if (i == 0) {
return 0;
} else {
return rows(scale) * sum(scale[1:i]);
}
}
}
data {
int a;
int b[1];
int<lower=1> N; // number of observations
int Y[N]; // response variable
int<lower=2> nthres; // number of thresholds
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> Ksp; // number of special effects terms
int<lower=1> Imo; // number of monotonic variables
int<lower=1> Jmo[Imo]; // length of simplexes
// monotonic variables
int Xmo_1[N];
int Xmo_2[N];
int Xmo_3[N];
int Xmo_4[N];
int Xmo_5[N];
int Xmo_6[N];
// prior concentration of monotonic simplexes
vector[Jmo[1]] con_simo_1;
vector[Jmo[2]] con_simo_2;
vector[Jmo[3]] con_simo_3;
vector[Jmo[4]] con_simo_4;
vector[Jmo[5]] con_simo_5;
vector[Jmo[6]] con_simo_6;
real<lower=0> disc; // discrimination parameters
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K;
matrix[N, Kc] Xc; // centered version of X
vector[Kc] means_X; // column means of X before centering
for (i in 1:K) {
means_X[i] = mean(X[, i]);
Xc[, i] = X[, i] - means_X[i];
}
}
parameters {
vector[Kc] b; // population-level effects
ordered[nthres] Intercept; // temporary thresholds for centered predictors
vector[Ksp] bsp; // special effects coefficients
// simplexes of monotonic effects
simplex[Jmo[1]] simo_1;
simplex[Jmo[2]] simo_2;
simplex[Jmo[3]] simo_3;
simplex[Jmo[4]] simo_4;
simplex[Jmo[5]] simo_5;
simplex[Jmo[6]] simo_6;
}
transformed parameters {
}
model {
// initialize linear predictor term
vector[N] mu = Xc * b;
for (n in 1:N) {
// add more terms to the linear predictor
mu[n] += (bsp[1]) * mo(simo_1, Xmo_1[n]) + (bsp[2]) * mo(simo_2, Xmo_2[n]) + (bsp[3]) * mo(simo_3, Xmo_3[n]) + (bsp[4]) * mo(simo_4, Xmo_4[n]) + (bsp[5]) * mo(simo_5, Xmo_5[n]) + (bsp[6]) * mo(simo_6, Xmo_6[n]));
}
// priors including all constants
target += normal_lpdf(b | 0,1);
target += normal_lpdf(Intercept | 0,10);
target += normal_lpdf(bsp | 0,1);
target += dirichlet_lpdf(simo_1 | con_simo_1);
target += dirichlet_lpdf(simo_2 | con_simo_2);
target += dirichlet_lpdf(simo_3 | con_simo_3);
target += dirichlet_lpdf(simo_4 | con_simo_4);
target += dirichlet_lpdf(simo_5 | con_simo_5);
target += dirichlet_lpdf(simo_6 | con_simo_6);
// likelihood including all constants
if (!prior_only) {
for (n in 1:N) {
target += ordered_logistic_lpmf(Y[n] | mu[n], Intercept);
}
}
}
generated quantities {
// compute actual thresholds
vector[nthres] b_Intercept = Intercept + dot_product(means_X, b);
}
I ran the ordinal regression in brms and tried to convert the syntax to stan to apply time-series autoregressive with 4 time points.
Here are the syntax I used without time-series.
Thanks,
Shabnam