Hi,
I’m trying to code an ordered probit, which estimates the latent variable. (I don’t want to create it in generated quantities, as I want to use the latent variable as predictor for another variable in my model in the HMC process.)
The Stan code is the following
data{
int<lower=2> K; //number of bins
int<lower=0> N;
int<lower=1> D;
int<lower=1,upper=K> Y[N];
row_vector[D] X[N];
}
parameters{
vector[D] beta_1;
vector[N] M_cont; //latent variable
ordered[K+1-3] c; //vector of variable cutoffs (3 cutoffs are fixed for identification)
}
model{
vector[K] theta;
//likelihood
for (n in 1:N) {
real eta;
eta = X[n] * beta_1;
if(Y[n]==1) M_cont[n] ~ normal(eta, 1) T[-1000,0];
if(Y[n]==2) M_cont[n] ~ normal(eta, 1) T[0,c[1]];
if((2<Y[n])&&(K>Y[n])) M_cont[n] ~ normal(eta, 1) T[c[Y[n]-2],c[Y[n]-1]];
if(Y[n]==K) M_cont[n] ~ normal(eta, 1) T[c[K-2],1000];
}
}
My data generating process is as per the example in bayesm package in R, and I initialize the sampler as follows:
#Generating Data
simordprobit=function(X, betas, cutoff){
z = X%*%betas + rnorm(nobs)
y = cut(z, br = cutoff, right=TRUE, include.lowest = TRUE, labels = FALSE)
return(list(y = y, X = X, k=(length(cutoff)-1), betas= betas, cutoff=cutoff ))
}
nobs=10
X=cbind(rep(1,nobs),runif(nobs, min=0, max=5),runif(nobs,min=0, max=5))
k=7
betas=c(0.5, 1, -0.5)
cutoff=c(-100, 0, 1, 2, 3, 4, 5, 100)
simout=simordprobit(X, betas, cutoff)
Data=list(X=simout$X,y=simout$y, k=k)
#estimation
R=2000
M_cont_init=ifelse(Data$y==1, -.3,ifelse(Data$y==2, .05,ifelse(Data$y==3,.15,ifelse(Data$y==4,.25,ifelse(Data$y==5,.35,ifelse(Data$y==6,.45,.55))))))
init_f <- function() { list(beta_1= rep(0,3),c=c(.1,.2,.3,.4,.5),M_cont=M_cont_init); }
stanfit_ordprobit = stan(file="Ordprobit_Aug.stan",
data = list(K = k, N = nobs, D = dim(Data$X)[2], Y = Data$y, X = Data$X),
pars = c("beta_1","c","M_cont"),chains = 1,iter = R,warmup = 100, init = init_f)
However I get the error message:
Rejecting initial value:
** Gradient evaluated at the initial value is not finite.**
** Stan can’t start sampling from this initial value.**
[1] "Error in sampler$call_sampler(args_list[[i]]) : Initialization failed."
I would appreciate if you could help me pull this off without facing gradient problems!
Thank you!