Hey all,
I’m having slightly new coding woes trying to implement the conditional version of our model.
Version: cmdstan-2.30.1-mpi
I showed my working paralellized code for the single condition model here. However, this is only built to handle part of our data. Our real data has multiple conditions that the parameters depend on. Specifically, there are four contexts for our three confidence variables (meta, nconf, pconf), and six contrast levels that serve as contexts for our psychometric variables (sens, crit), and our lapse rate parameter spans all conditions (guess). The problem is that the conditions each have different numbers of contrast levels, which means that they also have a different numbers of trials. We also remove subjects with low variability in the confidence parameters. So we have a very ragged data set. To fix this, I pad all of the data sets: zero rows for missing trials, zero matrices for missing subjects, and entire arrays of zero matrices for missing contrasts. We do not want all of these zeros to be running through the model, greatly diminishing its efficiency. To combat this, I had hoped to implement conditionals at different check points to not run data sets that are entirely zero (there doesn’t seem to be much we can do about the padded trials, but that’s small fry compared to the entirely empty contrasts). I tried to implement as follows:
functions {
//Custom function for lognormal inverse CDF
matrix logncdfinv(vector x, vector m, vector s) {
int szx;
int szm;
szx = size(x);
szm = size(m);
matrix[szx,szm] y;
y = exp( rep_matrix(s',szx) * sqrt(2) .* rep_matrix(inv_erfc( -2 .* x + 2 ),szm) + rep_matrix(m',szx));
return y;
}
//Likelihood model for CASANDRE
real casandre_log(array[] matrix resps, vector guess, matrix sds, matrix se, vector nconf, vector pconf) {
real ll;
ll = 0.0;
for (n in 1:size(resps)) {
if (resps[n][:] == 0) {
} else {
matrix[size(se[:,n]),4] llhC;
matrix[size(se[:,n]),size(sds[:,n])] avgs;
avgs = rep_matrix(se[:,n],size(sds[:,n])).*rep_matrix(sds[:,n]',size(se[:,n]));
for (tr in 1:size(se[:,n])){
matrix[3,size(sds[:,n])] raws;
for (rws in 1:size(sds[:,n])) {
raws[1,rws] = normal_cdf(-nconf[n],avgs[tr,rws],sds[rws,n]);
}
for (rws in 1:size(sds[:,n])) {
raws[2,rws] = normal_cdf(0,avgs[tr,rws],sds[rws,n]);
}
for (rws in 1:size(sds[:,n])) {
raws[3,rws] = normal_cdf(pconf[n],avgs[tr,rws],sds[rws,n]);
}
vector[3] ratiodist;
ratiodist[1] = mean(raws[1,:]);
ratiodist[2] = mean(raws[2,:]);
ratiodist[3] = mean(raws[3,:]);
llhC[tr,1] = ((guess[n]/4) + ((1-guess[n])*ratiodist[1]));
llhC[tr,2] = ((guess[n]/4) + ((1-guess[n])*(ratiodist[2]-ratiodist[1])));
llhC[tr,3] = ((guess[n]/4) + ((1-guess[n])*(ratiodist[3]-ratiodist[2])));
llhC[tr,4] = ((guess[n]/4) + ((1-guess[n])*(1-ratiodist[3])));
}
}
if (resps[n][:] == 0) {
ll += 0;
} else {
ll += sum(columns_dot_product(resps[n], log(llhC)));
}
}
return ll;
}
//Partial sum function
real partial_sum_casandre_log(array[] matrix slice_n_resps, int start, int end, vector guess, matrix sds, matrix se, vector nconf, vector pconf) {
return casandre_log(slice_n_resps, guess[start:end], sds[:,start:end], se[:,start:end], nconf[start:end], pconf[start:end]);
}
}
data {
int ncond;
int ncont;
int ns;
int nt;
array[ncond,ncont,ns] matrix[nt,4] resps;
array[ncond,ncont] matrix[nt,ns] oris;
int sampn;
vector[sampn] sampx;
}
parameters {
//Parameters
vector<lower=0,upper=1>[ns] guess;
matrix<lower=0>[ns,6] sens;
matrix[ns,6] crit;
matrix<lower=0>[ns,4] meta;
matrix<lower=0>[ns,4] nconf;
matrix<lower=0>[ns,4] pconf;
//Hyperparameters
real snm;
real<lower=0> sns;
real cm;
real<lower=0> cs;
real mm;
real<lower=0> ms;
real nccm;
real<lower=0> nccs;
real pccm;
real<lower=0> pccs;
}
model {
//Hyperpriors
snm ~ normal(0,1);
sns ~ lognormal(0,1);
cm ~ normal(0,1);
cs ~ lognormal(0,1);
mm ~ normal(0,1);
ms ~ lognormal(0,1);
nccm ~ normal(0,1);
nccs ~ lognormal(0,1);
pccm ~ normal(0,1);
pccs ~ lognormal(0,1);
//Priors
guess ~ beta(1,193.0/3.0);
sens ~ lognormal(snm,sns);
crit ~ normal(cm,cs);
meta ~ lognormal(mm,ms);
nconf ~ lognormal(ccm,ccs);
pconf ~ lognormal(pccm,pccs);
//Likelihood model (with local variable calculations)
for (cond in 1:4) {
xtrans = logncdfinv(sampx,log(1/sqrt(meta[:,cond].^2+1)),sqrt(log(meta[:,cond].^2+1)));
sds = 1./xtrans;
for (cont in 1:6) {
if (oris[cond][cont][:] == 0) {
} else {
sm = oris[cond][cont].*rep_matrix(sens[:,cont]',nt);
sc = crit.*sens[:,cont];
se = sm-rep_matrix(sc',nt);
target += reduce_sum(partial_sum_casandre_log, resps[cond][cont], guess, sds, nconf[:,cond], pcond[:,cond]);
}
}
}
}
However, I’m running into an error where the conditional is not allowed to compare a matrix to an integer. I tried using rep_matrix to create an equal-sized matrix of zeros to compare it to, but received an error that you cannot compare matrices to matrices either. I’m not sure how to procede and am hoping someone knows how I can wrangle with conditionals to prevent empty data sets from needlessly running through the likelihood function. Even better if someone has a better idea for how to deal with our very ragged data set. Thanks!