Hi all,
I want to check my Stan model with simulated data in R. To this end, I generated data in R as follows:
rm(list=ls())
set.seed(998)
simfun <- function(n,f,g, beta0, beta1, beta2, beta3,scale,shape){
data <- list()
m=31
mi=20
mis = rep(mi,m)
id=rep(1:m,mis)
n = length(id)
data$status <- rep(NA,n)
data$iid0=rtnorm(n=31, mean = 0, sd = 1, lower = 0)
data$iid = rep(data$iid0,mis)
c <- runif(n,f,g)
data$U <- runif(n)
data$x1 <- rbinom(n, size=1, prob=0.5)
data$x2 <- rnorm(n,0,2)
data$model <- RMgauss(var=4,scale=2)
data$x=seq(1,31,1)
data$sim.spatcov=RFsimulate(data$model , x=data$x)
data$spatcov0=as.data.frame(data$sim.spatcov)
data$spatcov=rep(data$spatcov0[,1],mis )
data$constant=rep(1,n)
data$eta <- (exp((-beta0 - beta1*data$x1 - beta2*data$x2 - beta3*data$spatcov)*shape))* (data$iid)
data$y<-(((1 / data$U)-1)^(1/shape)) * (1 / (data$eta ^ (1/shape)))
for (i in 1:n){
if(data$y[i] <= c[i]) {data$status[i]=1} else {data$status[i]=0}
}
data$T <- data$status*data$y+(1-data$status)* c
data<- data.frame(cbind(id,data$status, data$U, data$x1,data$x2,data$spatcov, data$constant,data$eta,data$y,c,data$T ))
names(data)= c("id", "status","U", "x1","x2","spatcov","constant","eta","y","c","T")
return (data)
}
After generating the data, I wrote the following codes to run the model in the RStan as follows:
kernel=read.csv("D:\\attachments\\kernel.1001grid.csv",sep=';',header=F)
kernel=as.matrix(kernel)
dim(kernel)
stanfunc<-function(stan_model1,data3,kernel, chains,warmup,iter01,thin,adapt_delta,max_treedepth,num,number,m,n_grid){
Onicescu.model <- stan_model(file = stan_model1)
data3=simfun(n=620,f=10,g=150,beta0=1, beta1=1, beta2=1,beta3=-0.50, scale=0.80,shape=0.90)
county <- data3$id
n = length(data3$id)
status <- data3$status
spatcov=data3$spatcov
cr <- 1-(sum(status)/n)
p=c()
for (j in 1:m){
p[j] <- sum(data3$status[data3$id==j]) / length(data3$status[data3$id==j])
}
spatcovmean=c()
for (k in 1:m){
spatcovmean[k] <- sum(data3$spatcov[data3$id==k]) / length(data3$spatcov[data3$id==k])
}
da<-list(data3=data3,number=number,kernel=kernel,p=p,n_grid=n_grid,num=num,m=m, county= county, n=n)
fit <- sampling(object = Onicescu.model,data = da,iter = iter01,warmup=warmup, cores =8,chains = chains, thin = thin,control=list(adapt_delta=adapt_delta, max_treedepth=max_treedepth),open_progress = F,verbose = F)
estimate <- as.data.frame(extract(fit, pars = c("beta","sigma"," r_county")))
spdf1 <- extract(fit)$spdf
estimate.mean <- apply(estimate,2,mean)
spdf1.mean <- apply(spdf1,2,mean)
return(list("fit"=fit,"p"=p,"cr"=cr,"spdf1.mean"=spdf1.mean,"estimate.mean"=estimate.mean,"spatcovmean"=spatcovmean))
}
###run the model with iterat=50
iterat <- 50
estimate.mean1<- matrix(NA, ncol=36, nrow=iterat)
ID<-rep(NA,iterat)
p1 <- matrix(NA, ncol=31, nrow=iterat)
cr<- rep(NA,iterat)
spatcovmean1 <- matrix(NA, ncol=31, nrow=iterat)
spdf11 <- matrix(NA, ncol=31, nrow=iterat)
boots <- function(iterat,result){
for (i in 1:iterat){
print(i)
result = stanfunc("sim.depen model.20nd oct.stan",data3=data3,kernel=kernel , chains=1, warmup=10000 , iter01=20000,thin=10,adapt_delta=0.99,max_treedepth=15,num=4,number=11,m=31,n_grid=1001)
ID[i]=i
p1[i,] <- result$p
cr[i] <- result$cr
spdf11 [i,] <- result$spdf1.mean
spatcovmean1[i,] <- result$spatcovmean
estimate.mean1[i,] <- result$estimate.mean
}
return(list("ID"=ID,"p1"=p1,"cr"=cr,"spdf11"=spdf11,"estimate.mean1"=estimate.mean1,"spatcovmean1"=spatcovmean1))
}
### running the model with iteration=50
result00 <- boots(iterat, result)
When I consider the iterations=50, my model runs only up to 6 iterations, and then I got the following error:
Chain 1: Rejecting initial value:
Chain 1: Log probability evaluates to log(0), i.e. negative infinity.
Chain 1: Stan can't start sampling from this initial value.
Chain 1: Rejecting initial value:
Chain 1: Log probability evaluates to log(0), i.e. negative infinity.
Chain 1: Stan can't start sampling from this initial value.
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Chain 1: Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] "Error in sampler$call_sampler(args_list[[i]]) : Initialization failed."
[2] "In addition: Warning messages:"
[3] "1: In readLines(file, warn = TRUE) :"
[4] " incomplete final line found on 'D:\\Eisa Nazar\\generate spatial data using spat covariate.right\\December\\sim.depen model with spat cov.normalprior.right.final.stan'"
[5] "2: In readLines(file, warn = TRUE) :"
[6] " incomplete final line found on 'D:\\Eisa Nazar\\generate spatial data using spat covariate.right\\December\\sim.depen model with spat cov.normalprior.right.final.stan'"
[7] "3: In readLines(file, warn = TRUE) :"
[8] " incomplete final line found on 'D:\\generate spatial data .right\\December\\sim.depen model.20nd oct.stan'"
[1] "error occurred during calling the sampler; sampling not done"
Stan model 'sim.depen model' does not contain samples.
Error in estimate.mean1[i, ] <- result$estimate.mean :
replacement has length zero
Can someone help me to solve this problem?
Thank you!
Here are my RStan codes (sim.depen model.20nd oct.stan):
functions{
vector a (vector p,matrix kernel, vector exp_x){
vector[num_elements(p)] a;
vector[num_elements(p)] landa;
for (k in 1 : num_elements(p)) {
landa[k] = kernel[k,] * exp_x * p[k];
}
a = landa / sum(landa); // assign `a`
return a;
}
// defines the log survival
vector log_S (vector t,vector p, matrix kernel, vector exp_x, int[] county, real shape,vector scale){
vector[num_elements(t)] log_S;
vector[num_elements(p)] spdf;
spdf = a (p,kernel, exp_x);
for (i in 1:num_elements(t)){
log_S[i] = log(spdf[county[i]]) + log(1- ((scale [i]*(t[i])^shape)/ (1+(scale [i]*(t[i])^shape)))); // county is a vector that shows the location of patients with dim = num_elements(t)
}
return log_S;
}
//defines the log pdf
vector log_f (vector t,vector p, matrix kernel, vector exp_x, int[] county, real shape,vector scale){
vector[num_elements(t)] log_f;
vector[num_elements(p)] spdf;
spdf = a (p,kernel, exp_x);
for (i in 1:num_elements(t)){
log_f[i] = log(spdf[county[i]]) + log((scale[i]*shape*t[i]^(shape-1))/((1+(scale[i]*t[i]^shape))^2));
}
return log_f;
}
//defines the log-likelihood for right censored data
real surv_loglogistic_lpdf(vector t,vector d,vector p, matrix kernel, vector exp_x, int[] county, real shape,vector scale){
vector[num_elements(t)] log_lik;
real prob;
vector[num_elements(t)] ls_s;
vector[num_elements(t)] ls_f;
ls_s = log_S( t, p, kernel, exp_x, county, shape, scale);
ls_f = log_f( t, p, kernel, exp_x, county, shape, scale);
log_lik = d .* log_f(t, p, kernel, exp_x, county, shape, scale)+ (1 - d) .* log_S(t, p, kernel, exp_x, county, shape, scale);
prob = sum(log_lik);
return prob;
}
}
//data block
data{
int <lower=0> m;
int <lower=0> n_grid;
int <lower=0> n;
int <lower=0> num;
int <lower=0> number;
int <lower =0 , upper = m> county[n];
vector <lower=0> [m] p;
matrix [m,n_grid] kernel;
matrix [n,number] data3;
}
//parameters block
parameters{
vector [num] beta;
real <lower=0> sigma;//scale parameter sigma=1/shape
vector [n_grid] x;
vector <lower=0> [m] r_county;
}
// transformed parameters block
transformed parameters{
vector [num] time_ratio;
vector[n] lambdaa;
real <lower=0> alpha = 1 / sigma;
vector [n_grid] exp_x;
time_ratio = exp(beta);
exp_x = exp(x);
lambdaa = exp (((-1 * data3[,4:7] * beta) / sigma)+log(r_county [county]));
}
// model block
model{
target += normal_lpdf(x|0,5);
target += normal_lpdf(beta | 0, 1000);
target += gamma_lpdf(alpha | 0.001, 0.001);
target += normal_lpdf(r_county| 0, 1);
target += surv_loglogistic_lpdf (data3[,11] | data3[,2],p, kernel, exp_x, county, alpha, lambdaa);
//model for the data
}
// generated quantities block
generated quantities{
vector[n] log_lik;//log-likelihood
vector[num_elements(p)] spdf;
spdf = a (p,kernel, exp_x);
{ for(j in 1:n){
log_lik[j] = data3[j,2] * (log((exp (((-1 * data3[j,4:7] * beta) * alpha)+log(r_county [county[j]]))*alpha*data3[j,11]^(alpha-1)) / ((1 + (exp (((-1 * data3[j,4:7] * beta) * alpha) + log(r_county [county[j]]))*data3[j,11]^alpha))^2))) + (1 - data3[j,2]) * (log(1- ((exp (((-1 * data3[j,4:7] * beta) * alpha) + log(r_county [county[j]]))*(data3[j,11])^alpha) / (1+(exp (((-1 * data3[j,4:7] * beta) * alpha)+log(r_county [county[j]]))*(data3[j,11])^alpha)))));}
}
}
EDIT: @maxbiostat edited this post for syntax highlighting.