Hi. I’m trying to fit a model implementing a cognitive Bayesian hypothesis testing model for a learning task. The cognitive learning model is written in a function (with a few additional auxilary functions), and after checking with (exporting the function) it works well. The main output of the model is called pRight and it is a vector specifying the probability that the participant will choose the ‘right’ option out of two (left/right).
However, when trying to fit the model in STAN to simulated data, it constantly results in the error “Gradient evaluated at the initial value is not finite”. What is even more strange about it, is that when I add the statement print(target()) to the model it gives a finite number, suggesting that the lp is ok. Moreover, when I try to run the same model but with a subset of the data, including only 2 trials (out of 88) - the model runs fine. No matter which 2 trials I randomlly sample. 3 trials already create an error! Does this implies some sort of an float-number overflow problem?
I attach the stan model, as well as an Rdata file with a datalist to be used in the model (with an .R extension, but it can be loaded with the load function in R. sorry for that)
any help would be much appreciated.
functions{
row_vector na_to0r(row_vector X){
row_vector [cols(X)] x1;
for (i in 1:cols(X)) x1[i]=is_nan(X[i])?0:X[i];
return(x1);
}
vector na_to0c(vector X){
vector [rows(X)] x1;
for (i in 1:rows(X)) x1[i]=is_nan(X[i])?0:X[i];
return(x1);
}
vector col_sums(matrix X) {
vector[cols(X)] s ;
for (j in 1:cols(X)) s[j] = sum(na_to0c(col(X, j))) ;
return s ;
}
vector row_sums(matrix X) {
vector[rows(X)] s ;
for (i in 1:rows(X)) s[i] = sum(na_to0r(row(X, i))) ;
return s ;
}
row_vector normalize(row_vector X){
row_vector[cols(X)] norm_x;
real sm;
sm=sum(X);
if (sm>0){
norm_x=X/sm;
}else{
norm_x=X;
}
return(norm_x);
}
vector SAmodel(matrix cues, vector target, vector response, int nTrials, real gamma, real tau, real w, real lambda0){
int nL=nTrials;
matrix[nTrials,92] RES;
matrix[nTrials,3] ll;
matrix[nL,3] xCondL[nTrials];
matrix[nTrials+1,3] x;
matrix[nTrials,nL] lt;
matrix[nTrials,nL]A;
matrix[nL,nL]B;
vector[nTrials] pRight;
matrix[nTrials,3] pRightCond;
matrix[nTrials,3] pRespCond;
matrix[nL,3] pIdent[nTrials];
matrix[nL,3] pDiff[nTrials];
matrix[nL,3] lambda[nTrials];
matrix[nL,3] LR[nTrials];
matrix[nL,3] g[nTrials];
vector[3] x0;
vector[nL]lt0;
vector[3] lambda0a;
for (t in 1:nTrials){
for (c in 1:3){
ll[t,c]=(cues[t,c]==target[t])?gamma:(1-gamma);
pRespCond[t,c]=(cues[t,c]==response[t])?(w+0.5*(1-w)):(0.5*(1-w));
pRightCond[t,c]=(cues[t,c]==1)?(w+0.5*(1-w)):(0.5*(1-w));
}
}
x0=rep_vector(1,3);
x0=x0/3;
lambda0a=rep_vector(lambda0,3);
lt0=rep_vector(0,nL);
lt0[1]=1;
///////for t=1//////
//response model
pRight[1]=dot_product(pRightCond[1,1:3],x0[1:3]');
//Experimenter's model - after response and before feedback
xCondL[1,1,]=pRespCond[1,1:3].*x0[1:3]';
xCondL[1,2:nL,1:3]=rep_matrix(0,nL-1,3);
A[1,]=row_sums(xCondL[1,,])';
for (l in 1:nL) xCondL[1,l,]=normalize(xCondL[1,l,]);
lt[1,]=A[1,].*lt0';
lt[1,]=normalize(lt[1,]);
x[2,1:3]=col_sums(xCondL[1,,].*rep_matrix(lt[1,]',3))';
x[2,]=normalize(x[2,]);
//Agent's learning model after feedback at t=1
pIdent[1,1,1:3]=ll[1,1:3].*lambda0a';
pDiff[1,1,1:3]=0.5*(1-lambda0a)';
lambda[1,1,1:3]=pIdent[1,1,1:3]./(pIdent[1,1,1:3]+pDiff[1,1,1:3]);
LR[1,1,1:3]=log(lambda[1,1,1:3]./(1-lambda[1,1,1:3]));
g[1,1,1:3]=1.0./(1.0+exp(20*(LR[1,1,1:3]-0)));
for (t in 2:nTrials){
//Response model
pRight[t]=dot_product(pRightCond[t,1:3],x[t,]);
//Experimenter's model (after response is made but before feedback is given)
xCondL[t,1,]=pRespCond[t,1:3].*x0[1:3]';
for (l in 2:nL)xCondL[t,l,1:3]=pRespCond[t,1:3].*xCondL[t-1,l-1,1:3];
A[t,]=row_sums(xCondL[t,,])';
for (l in 1:nL) xCondL[t,l,]=normalize(xCondL[t,l,]);
B=rep_matrix(0,nL,nL);
B[,1]=row_sums(xCondL[t-1,,].*(g[t-1,,]));
for (i in 1:nL){
for (j in 1:nL){
if(j==(i+1)){
B[i,j]=sum(xCondL[t-1,i,1:3].*(1-g[t-1,i,1:3]));
}
}
}
lt[t,]=A[t,].*col_sums(B .* rep_matrix(lt[t-1,]',nL))';
lt[t,]=normalize(lt[t,]);
x[t+1,1:3]=col_sums(xCondL[t,,].*rep_matrix(lt[t,]',3))';
x[t+1,]=normalize(x[t+1,]);
//Agent's learning model (after feedback is given)
pIdent[t,1,1:3]=ll[t,1:3].*lambda0a';
pDiff[t,1,1:3]=0.5*(1-lambda0a)';
for (l in 2:nL){
pIdent[t,l,1:3]=ll[t,1:3].*(lambda[t-1,l-1,1:3]*tau+((1-lambda[t-1,l-1,1:3])*(1-tau)/2));
pDiff[t,l,1:3]=0.5*(lambda[t-1,l-1,1:3]*(1-tau)+((1-lambda[t-1,l-1,1:3])*tau));
}
lambda[t,,1:3]=pIdent[t,,1:3]./(pIdent[t,,1:3]+pDiff[t,,1:3]);
LR[t,,1:3]=log(lambda[t,,1:3]./(1-lambda[t,,1:3]));
g[t,,1:3]=1.0./(1.0+exp(20*(LR[t,,1:3]-0)));
}
return(pRight);
}
}
data{
int nTrials;
matrix [nTrials,3] cues;
vector [nTrials] targets;
int response [nTrials];
vector [nTrials] response1;
}
parameters{
real<lower=0.5, upper=1> tau;
real<lower=0.5, upper=1> gamma;
real<lower=0, upper=1> w;
}
model{
vector [nTrials] pRight;
pRight=SAmodel(cues,targets,response1,nTrials,gamma,tau,w,0.5);//[,1];
response~bernoulli(pRight);
}
"
<a class="attachment" href="//cdck-file-uploads-canada1.s3.dualstack.ca-central-1.amazonaws.com/flex030/uploads/mc_stan/original/2X/b/b657c4f63818a99b9e36ddc7740361a2252f8144.R">datalist.R</a> (367 Bytes)