Dear All stan user:
I am a Stan novice, and have trouble in recovering the model parameters. Specifically, when using my model (a class of mixture hierarchical model) to fit the simulation data, two estimated parameters (phi and gamma parameters) are way off from the true parameters, but other model parameters can successfully be recovered. I have spent a couple of weeks working on this with no success. Could you please give me some suggestions, I am very grateful.
Simulated data
set.seed(123)
library("MASS")
library("rstan")
library("rstudioapi")
rstan_options(javascript=FALSE)
rstan_options(auto_write=TRUE)
pacman::p_load(rstan,loo)
options(mc.cores=parallel::detectCores())
K = 30 # number of items
N = 500 # number of examinees
### Person parameters simulation
Cov <- matrix(c(1,-0.6,0,-0.6,1,0.25,0,0.25,0.25),3,3)
ture_person<-mvrnorm(N,rep(0,3),Cov)
ture_phi <- ture_person[,1]
true_theta <- ture_person[,2]
ture_tua <- ture_person[,3]
### Item parameters simulation
ture_gamma <- rnorm(K,2.25,1)
ture_slip <- runif(K,0,0.1)
ture_b <- rnorm(K,0,1)
true_a <- runif(K,1,2.5)
ture_beta <- runif(K,-0.2,0.2)
ture_alpha <- 1/runif(K,1.5,2.5)
ture_beta_c <- -1
ture_alpha_c <- sqrt(0.1)
### Simulating responses data and responses time
responses<-matrix(data=NA,nrow=N,ncol=K)
logRT <- matrix(data=NA,nrow=N,ncol=K)
for(i in 1:N){
for(j in 1:K){
pDelta<- 1/(1+exp(-ture_phi[i]+ture_gamma[j]))
pCorre<- 1/(1+exp(-1.7*true_a[j]*(true_theta[i]-ture_b[j]))) # item responses model: 2PL model
p_sum<-(1-pDelta)*pCorre+pDelta*(1-ture_slip[j])
random_numer<-runif(1,min=0,max=1)
if(random_numer<=p_sum){
responses[i,j] = 1 # responses
}else{
responses[i,j] = 0} # responses
log_norm.time <- rnorm(1,ture_beta[j]-ture_tua[i],ture_alpha[j]) # lognormal model for response times
log_cheat.time <- rnorm(1,ture_beta_c,ture_alpha_c)
logRT[i,j] = (1-pDelta)*log_norm.time + pDelta*log_cheat.time # logRT
}
}
y = as.vector(responses)
logRT = as.vector(logRT)
D = 3
Ntot = K*N
ii = rep(1,K)%x%c(1:N)
jj = c(1:K)%x%rep(1,N)
data_list = list(D=D,N=N,K=K,Ntot=Ntot,jj=jj,ii=ii,y=y,logRT=logRT)
fit<-stan(file = "H_ICP.stan", data=data_list,iter=5000,chains=3,
warmup = 2500,cores = 4)
Stan code
data{
int<lower = 1> D; // number of dimensions of person parameters
int<lower = 1> N; // number of examinees
int<lower = 1> K; // number of items
int<lower = 1> Ntot; // number of data points
int<lower = 1> jj[Ntot]; // item id
int<lower = 1> ii[Ntot]; // person id
int<lower = 0, upper = 1> y[Ntot]; // responses
real logRT[Ntot]; // log response times
}
parameters{
matrix[D,N] z_trait;
corr_matrix[D] cor_P;
vector<lower=0>[D-1] sigma_phi_tua;
vector[K] gamma;
vector<lower=0,upper=1>[K] slip;
vector<lower=0>[K] a;
vector[K] b;
vector<lower=0>[K] diff_beta;
vector<lower=0>[K] alpha;
real beta_c;
real<lower=0> alpha_c;
}
transformed parameters{
cov_matrix[D] var_P;
vector[D] sigma_P;
matrix[D,D] L;
matrix[N,D] PersPar; // PersPar[,1]=phi, PersPar[,2]=theta, PersPar[,3]=tua,
vector[K] beta;
sigma_P[2]=1;
sigma_P[1]=sigma_phi_tua[1];
sigma_P[3]=sigma_phi_tua[2];
var_P=quad_form_diag(cor_P,sigma_P);
L = cholesky_decompose(var_P);
PersPar = (L*z_trait)';
for(j in 1:K) beta[j] = beta_c+diff_beta[j];
}
model{
// prior person parameter
cor_P ~ lkj_corr(1);
sigma_phi_tua ~ cauchy(0,2.5);
to_vector(z_trait)~std_normal();
// prior item parameter
gamma ~ normal(0,5);
slip~ beta(1,10);
a ~ cauchy(0,2.5);
b ~ normal(0,5);
diff_beta ~ normal(0,5);
beta_c ~ normal(0,5);
alpha ~ cauchy(0,2.5);
alpha_c ~ cauchy(0,2.5);
// target distribution
for (n in 1:Ntot) {
real pDelta;
real pCorre;
pDelta = 1/(1+exp(-PersPar[ii[n],1]+gamma[jj[n]]));
pCorre = 1/(1+exp(-1.7*a[jj[n]]*(PersPar[ii[n],2]-b[jj[n]])));
target += log_sum_exp(log1m(pDelta)+bernoulli_lpmf(y[n]|pCorre)+
normal_lpdf(logRT[n]|(beta[jj[n]]-PersPar[ii[n],3]),alpha[jj[n]]),
log(pDelta)+bernoulli_lpmf(y[n]|(1-slip[jj[n]]))+
normal_lpdf(logRT[n]|beta_c,alpha_c));
}
}