## Usage: "Rscript sim_bayesinf.R compile" to make the require stan models ## Author: Daniel Lawson (dan.lawson@bristol.ac.uk),modified by Daniel Trejo Banos ## Date: August 2020 ## Licence: GPLv3 library(cmdstanr) library(rstan) #rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) with_cmdstan=FALSE set_cmdstan_path(path = "/Users/admin/repo/cmdstan/") args=commandArgs(TRUE) S <- -0.5 Fst <- 0.1 seed <- 1234 set.seed(seed) print(seed) if(length(args)>0) { if(args[[1]]=="cmdstan"){ with_cmdstan=TRUE print("compiling with cmdstan") } } ## Create the stan model compiled object if(with_cmdstan){ sm <- cmdstan_model("simple.stan") smd <- cmdstan_model("drift.stan") }else{ sm <- stan_model("simple.stan") smd <- stan_model("drift.stan") } ##################### ## create data in the form stan likes, from a 2 column data frame of beta and f data_frame_to_stan_list=function(data,use="obs",Fst=NULL){ if(use=="obs"){ ret=list(J=dim(data)[1],beta=data[,"beta"],f=data[,"f"]) }else if(use=="true"){ ret=list(J=dim(data)[1],beta=data[,"b"],f=data[,"p"]) }else stop("invalid \"use\"") if(!is.null(Fst)) ret$Fst=Fst ret } ## Make example test data make_test_data=function(N,sigma_b=0.01,sigma_beta=0,sigma_f=0.1,S=-1){ p=runif(2*N,0.01,0.5) b=rnorm(2*N,0,sigma_b*(p*(1-p))^(S/2)) if(sigma_f==0){ f=p }else f=rbeta(length(p),(1-sigma_f)*p/sigma_f,(1-sigma_f)*(1-p)/sigma_f) # Balding and Nichols model ok=which((f>0.01)&(p>0.01)) if(sum(ok)>=N) { ok=sample(ok,N) }else ok=sample(ok,N,replace=TRUE) f[f>0.5]=1-f[f>0.5] f[f<0.01]=0.01 if(sigma_beta==0){ beta=b }else{ beta=rnorm(N,b,sigma_beta) } r=data.frame(f=f,beta=beta,p=p,b=b) r[ok,] } ######### ######### ######### ## START OF DATA GENERATION ## Some parameters we don't need to change thin=500 # Report mcmc samples after this many steps N=10000 # Number of SNPs iter=4000 # Number of MCMC iterations (increase if you have convergence problems) ## Make appropriate data test=make_test_data(N,sigma_b=0.01,sigma_f=Fst,S=S) ## Where the f and beta are taken from the "true" simulated f and beta's, "in Africa" These are for when we have "direct access" to the data. data_direct=data_frame_to_stan_list(test,"true") ## Where the f and beta are taken from the drifted f. data_obs=data_frame_to_stan_list(test,"obs",Fst=Fst) ######### ## START OF INFERENCE ## Infer in the drift model, observed and direct datasets if(with_cmdstan){ smd_obs<-smd$sample(data_obs,chains=2,iter_warmup =ceiling(iter/2), iter_sampling = ceiling(iter/2),thin=thin) # test_obs<-sm$sample(data_obs,chains=2,iter_warmup =ceiling(iter/2), iter_sampling = ceiling(iter/2),thin=thin) # test_direct<-sm$sample(data=data_direct,chains=2,iter=iter,thin=thin) }else{ smd_obs<-sampling(smd,data_obs,chains=2,iter=iter,thin=thin) # test_obs<-sampling(sm,data_obs,chains=2,iter=iter,thin=thin) # test_direct<-sampling(sm,data=data_direct,chains=2,iter=iter,thin=thin) }