Hi, slowly transitioning over to Stan(or rather, trying to do so quickly, but achieving it slowly) and have been really impressed so far (yes, BUGS convert). I’m having trouble solving a problem that seems as if it should be extremely straightforward to solve, an invalid simplex where one value is negative. The model is a two state occupancy model where pr(occurrence) is derived using an icloglog, and the two states can be misclassified, although false negative and false positive probabilities have slightly different structures. The error that I’m getting is "pi[k0__][k1__] is not a valid simplex. pi[k0__][k1__][1] = -0.640618, but should be greater than or equal to 0 (in [model] at line 76). I’d gotten a similar error at a similar spot previously (pi = nan) that I guess relates to overflow that I’ve tried to constrain with fmax and fmin arguments. I’ve thought that I was manually f-ing up the inverse cloglog link, but using that Stan function leads to the same results. I guess my primary question is whether this error (which is one that happens at initialization) is related to bad starting values, or essentially invalid parameter values? Sloppy and inefficient code is below–thanks for any insight!
data {
int<lower=1> ncells; // number of cells in lattice for prediction
int<lower=1> ncams; //number of point samples in a subset of cells
int<lower=1> nbatches; //number of distinct batches--data organization artifact
int<lower=1> nyear; //number of years
real<lower=0> ndays[nbatches]; //effort associated with samples
int y[nbatches]; // observation vector
int npix[nbatches]; //number of observations at each site
int cameranum[nbatches]; //for indexing
int Cell[nbatches]; //for indexing
int Year[nbatches]; //for indexing
vector[ncells] Forest;
vector[ncells] Conifer;
vector[ncells] AggIndex;
vector[ncells] INTEVI;
vector[ncells] PeakEVISD;
vector[ncams] PtForest;
int Maintained[ncams];
int Water[ncams];
vector[ncells] Area; //offset term for Pois intensity
}
parameters {
vector[nyear] a0_psi[ncells];
real a1_psi;
real a2_psi;
real a3_psi;
real a4_psi;
real a5_psi;
real mu_psi;
real<lower=0> var_psi;
real mu_p;
real<lower=0> var_p;
vector[nyear] b0_p[ncams];
real b1_p;
real b2_p;
real b3_p;
real<lower=0, upper=1> s10; //per sample fp prob
}
transformed parameters {
vector[nyear] log_lam[ncells]; //
vector<lower=0>[nyear] lam[ncells]; //E(N)
vector<lower=0, upper=1>[nyear] psi[ncells]; // occupancy probability
vector<lower=0, upper=1>[nyear] pbase[ncams]; //prob of true detection per unit given covs
vector<lower=0, upper=1>[nbatches] pstar; //prob of (true) detection given effort
vector[nyear] Nanimals;
simplex[2] phi[ncells, nyear]; //state array
simplex[2] pi[nbatches, 2]; //observation array
vector<lower=0, upper=1>[nbatches] p10; //prob all obs are fp
for (year in 1:nyear){
for (i in 1:ncells) {
log_lam[i, year]= a0_psi[i, year]+a1_psi*Forest[i]+a2_psi*Conifer[i]+a3_psi*AggIndex[i]+
a4_psi*INTEVI[i]+a5_psi*PeakEVISD[i];
lam[i, year] = exp(log_lam[i, year]);
psi[i, year] = 1-exp(-1*(lam[i, year]*Area[i]));
This is (phi), I think, where the error is being thrown based on the line number:
//categorical state matrix--site, year, state
phi[i,year, 1]=1-psi[i, year];
phi[i,year, 2]=psi[i, year];
Nanimals[year] = sum(lam[, year]);
}
for (j in 1:ncams){
pbase[j, year] = inv_logit(b0_p[j, year]+b1_p*PtForest[j]+b2_p*Maintained[j]+
b3_p*Water[j]);
}
}
There had been a numerical overflow issue (e.g., values within a simplex = nan, etc.) at this point. The max function argument got rid of that.
for (k in 1:nbatches){
//tP prob
pstar[k]= fmin(1-(1-pbase[cameranum[k],Year[k]])^ndays[k], .9999);
//fp prob
p10[k]=fmax(s10^npix[k], .0001);
//observation matrix
//batch, state(not occu, occu), obs(not seen, seen)
pi[k,1,1] = 1-p10[k];
pi[k,1,2] = p10[k];
pi[k,2,1] = 1-(pstar[k]+p10[k]);
pi[k,2,2] = pstar[k]+p10[k];
}
}
model {
// priors
mu_psi ~ normal(0, 1);
var_psi ~ uniform (0, 1);
a1_psi ~ normal (0,2.5);
a2_psi ~ normal (0,2.5);
a3_psi ~ normal (0,2.5);
a4_psi ~ normal (0,2.5);
a5_psi ~ normal (0,2.5);
mu_p ~ normal (0,2.5);
var_p ~ uniform (0, 2);
b1_p ~ normal (0,2.5);
b2_p ~ normal (0,2.5);
b3_p ~ normal (0,2.5);
for (year in 1:nyear){
for (i in 1:ncells){
a0_psi[i, year]~normal(mu_psi, var_psi);
}
for (j in 1:ncams){
b0_p[j, year]~normal(mu_p, var_p);
}
}
s10 ~ beta(3,34);
// likelihood
for (i in 1:nbatches) {
vector[2] lp;
for (x in 1:2){
lp[x] = categorical_lpmf(x | phi[Cell[i],Year[i],])+categorical_lpmf(y[i]|pi[i, x,]);
}
target += log_sum_exp(lp);
}
}