Edit: I’ve heavily reworked the model and cleared all error messages, but I’m still getting back NaN values from MatlabStan and empty .csv files. I’m updating the code below in this edit
Hey all,
I am really new at Stan and have taken on the challenging project of converting an MLE-specified model into a Bayesian hierarchical model. Debugging had been going smoothly, with my code eventually compiling, and with it throwing exceptions when indices or dimensions of the data were off, but now I’ve come up against the wall of this error:
output-1.csv: Initialization failed.
output-2.csv: Initialization failed.
output-3.csv: Initialization failed.
output-4.csv: Initialization failed.
I don’t have the expertise to recognize obviously wrong code, and, for all I know, I’ve really mucked things up. But, hopefully, this is a series of relatively minor fixes in my syntax or model specification. I’m including my code below. The logic of the code is: the log likelihood model is specified with the parameters and stimuli the participant was presented with, each of these parameters having a prior and a hyperprior. The only participant data is the response matrix.
My version of Stan is 2.26.1, and I’m interfacing through MatlabStan-2.15.1.0
I’ve checked the data and it is free of NaNs, and the length of the orientation vector matches the length of the response matrix.
I appreciate any help or advice.
functions {
//custom function for lognormal inverse CDF
row_vector logncdfinv(row_vector x, real m, real s) {
int sz; //declaring size variable
sz = size(x); //calculating size variable
row_vector[sz] y; //declaring return variable
y = exp( s .* sqrt(2) .* inv_erfc( -2 .* x + 2 ) + m ); //calculating result
return y; //returning result
}
//likelihood model for CASANDRE
real casandre_log(matrix z, real j, real k, real l, real m, real n, vector o, int p) {
vector[p] sm; //declaring the rescaled sensitivity vector
real sc; //declaring the rescaled confidence criterion variable
sm = k.*o; //calculating the rescaled stimulus sensivity vector (stimulus sensitivity * orientation vector)
sc = k*n; //calculating the rescaled confidence criterion variable (stimulus sensitivity * confidence criterion)
row_vector[100] xtrans; //declaring the row vector for the transformed x-axis
xtrans = logncdfinv( linspaced_row_vector(100,.5/100,1-.5/100),
log( 1 / sqrt( m^2 + 1 ) ),
sqrt( log( m^2 + 1 ) ) ); //calculating the lognormal inverse cdf for 100 equally spaced x-axis intervals
matrix[p,4] llhC; //declaring the matrix of choice likelihoods
for (tr in 1:p){ //for loop through every trial for this subject
row_vector[3] confs; //declaring the confidence level vector
confs = [-n, 0, n]; //defining the confidence level vector
vector[100] avgs; //declaring the mu vector
avgs = (1./xtrans').*(sm[tr]-sc); //calculating the mu vector
vector[100] sds; //declaring the sigma vector
sds = 1./xtrans'; //calculating the sigma vector
matrix[100,3] raws; //declaring the normal cdf matrix of the transformed data
for (cls in 1:3) { //for loop through each confidence criterion (i.e. output column)
for (rws in 1:100) { //for loop through each sampled normal distribution (i.e. output row)
raws[rws,cls] = normal_cdf(confs[cls],avgs[rws],sds[rws]); //calculating the normal cdf matrix of the transformed data
}
}
vector[100] raw1; //declaring vectors for the rows of the normal cdf matrix
vector[100] raw2;
vector[100] raw3;
raw1 = raws[:,1]; //extracting vectors for the rows of the normal cdf matrix
raw2 = raws[:,2];
raw3 = raws[:,3];
real avg1; //declaring the variables for the row averages
real avg2;
real avg3;
avg1 = mean(raw1); //calculating the row averages
avg2 = mean(raw2);
avg3 = mean(raw3);
vector[3] ratiodist; //declaring the vector for the means of the lognormally scaled normal distributions
ratiodist[1] = avg1; //collecting the means into the vector
ratiodist[2] = avg2;
ratiodist[3] = avg3;
llhC[tr,1] = ((j/4) + ((1-j)*ratiodist[1])); //likelihood of high confidence clockwise judgment
llhC[tr,2] = ((j/4) + ((1-j)*(ratiodist[2]-ratiodist[1]))); //likelihood of low condidence clockwise judgment
llhC[tr,3] = ((j/4) + ((1-j)*(ratiodist[3]-ratiodist[2]))); //likelihood of low confidence counter-clockwise judgment
llhC[tr,4] = ((j/4) + ((1-j)*(1-ratiodist[3]))); //likelihood of high confidence counter-clockwise judgment
}
real ll; //declare log likelihood sum for these choices, given these orientations, and these parameter choices
ll = sum(z.*log(llhC)); //calculate the log likelihood sum for these choices, given these orientations, and these parameter choices
return ll; //return the log likelihood value
}
}
data {
int ns; //number of subjects
int nt; //number of trials
matrix[4,ns*nt] respslong; //responses for all subjects as one long matrix
row_vector[ns*nt] orislong; //orientations for all subjects as one long vector
}
transformed data {
matrix[ns*nt,4] respstall; //declare variable for tall matrix of responses
vector[ns*nt] oristall; //declare variable for tall vector of orientations
int nth; //declare index for breaking up subjects into separate matrices
respstall = respslong'; //make tall matrix of responses
oristall = orislong'; //make tall vector of vectors
array[ns] matrix[nt,4] resps; //declare final data array for responses
array[ns] vector[nt] oris; //declare final data array for orientations
nth = 1; //begin index
for (sub in 1:ns) { //for loop to build final data arrays
resps[sub] = respstall[nth:(nth+nt-1),1:4]; //build each matrix of responses for final data array
oris[sub] = oristall[nth:(nth+nt-1)]; //build each vector of orientations for final data array
nth = nth + nt; //increment the indices
}
}
parameters {
//Parameters
vector<lower=0,upper=1>[ns] guess; //guess rate
vector<lower=0>[ns] sens; //stimulus sensitivity
vector[ns] crit; //decision criterion
vector<lower=0>[ns] meta; //meta-uncertainty
vector<lower=0>[ns] conf; //confidence criterion
//Hyperparameters
real<lower=0> ga;
real<lower=0> gb;
real snm;
real<lower=0> ss;
real cm;
real<lower=0> cs;
real mm;
real<lower=0> ms;
real ccm;
real<lower=0> ccs;
}
model {
//Hyperpriors
ga ~ lognormal(0,1); //alpha hyperparameter for beta distribution of guess rate
gb ~ lognormal(0,1); //beta hyperparameter for beta distribution of guess rate
snm ~ normal(0,1); //mu hyperparameter for lognormal distribution of stimulus sensitivity
ss ~ lognormal(0,1); //sigma hyperparameter for lognormal distribution of stimulus sensitivity
cm ~ normal(0,1); //mu hyperparameter for normal distribution of decision criterion
cs ~ lognormal(0,1); //sigma hyperparameter for normal distribution of decision criterion
mm ~ normal(0,1); //mu hyperparameter for lognormal distribution of meta-uncertainty
ms ~ lognormal(0,1); //sigma hyperparameter for lognormal distribution of meta-uncertainty
ccm ~ normal(0,1); //mu hyperparameter for lognormal distribution of confidence criterion
ccs ~ lognormal(0,1); //sigma hyperparameter for lognormal distribution of confidence criterion
//Loop through the pariticpants
for (i in 1:ns) {
//Priors
guess[i] ~ beta(ga,gb); //guess rate is a parameter bounded between 0 and 1
sens[i] ~ lognormal(snm,ss); //stimulus sensitivity is a strictly positive parameter
crit[i] ~ normal(cm,cs); //decision criterion is an unconstrained parameter
meta[i] ~ lognormal(mm,ms); //meta-uncertainty is a strictly positive parameter
conf[i] ~ lognormal(ccm,ccs); //strictly positive parameter
//Likelihood Model
resps[i] ~ casandre(guess[i],sens[i],crit[i],meta[i],conf[i],oris[i],nt); //likelihood model for this this participant
}
}