Successive sampling without chkptstanr

LLcod.stan (2.1 KB)
ppcod.stan (2.0 KB)
do successive sampling runs after a full warmup, without chkptstanr


#here is a reproducible example of some issues re bypassing chkptstanr.
#Anyone with R 4.1.0 can just install chkptstanr and ignore all this
#I'm postponing that for now. Comments will help!
#The questions are contained further below, and concern:

#1) how is stepsize actually computed from the warmup
#2) are stepsize, the inv_metric, and the inital parameter values the only things #needed to initialise the sampling
#3) when doing successive runs, should the initial parameter values be set as the final #values from the previous run
#or as an average over some tail of the previous draws
#4) is there a way to use the draws assembled from successive runs, along with the #model and data to construct a brmsfit object?

#Below is in four parts. A) concerns a small synthetic data set and model which is run #in mgcv and brms.
#The remaining sections are in cmdstanr. B) concerns the warmup period. 
#C) concerns successive sampling, which is then used to assemble a full sample, with #accompanying nuts parameters. 
#D) concerns a few tests of whether the output from C) is comparable to that from #brms in A).

R
#setwd() as desired
path<-getwd()
library(mgcv)
library(brms)
library(cmdstanr)
options(mc.cores=2, brms.normalize=TRUE,brms.backend="cmdstanr")
#set_cmdstan_path() as approprate
library(abind)
library(reshape2)

#save the attached ppcod.stan and LLcod.stan to the working directory

#A) data, model, mgcv and brms

set.seed(123)
x1<-runif(400)
x2<-runif(400)
f<-function(t) sin(10*t)*exp(-2*t)
g<-function(t) cos(2*pi*t)
u<-sample(1:10,400, replace=T)
U<-factor(u)
r<-0.4*rnorm(10)

y<--1.8+f(x1)+g(x2)+r[u]+0.2*rnorm(400)

par(mfrow=c(1,2))
plot(x1,y,pch=u)
plot(x2,y,pch=u)
par(mfrow=c(1,1))

m1<-gam(y~s(U,bs="re")+s(x1,k=7)+s(x2,k=7))
plot(m1,pages=1)
summary(m1)
gam.check(m1)

dat2<-data.frame(x1,x2,U,y)
bf2<-bf(y~(1|U)+s(x1,k=7)+s(x2,k=7))
bf2
brm2<-brm(bf2,data=dat2,iter=2000,seed=456,control=list(adapt_delta=0.9),file="brm2")

summary(brm2)
np<-nuts_params(brm2)

plot(fitted(brm2)[,1],m1$fit)
lines(c(-5,5),c(-5,5),col=2)

#B) in cmdstanr, do warmup and one sample
#pretending we don't have brm2, just dat2 and bf2

scod2<-make_stancode(bf2,data=dat2)
write_stan_file(scod2,dir=path,basename="scod2.stan")
cmd2<-cmdstan_model(stan_file="scod2.stan")
sdat2<-make_standata(bf2,dat2)

fitcmd2w.s1<-cmd2$sample(data=sdat2,seed=789,iter_warmup=2000,iter_sampling=1,output_dir=path,
output_basename="out_cmd2w.s1",save_warmup=TRUE, save_latent_dynamics=TRUE,adapt_delta=0.9)

fitcmd2w.s1$save_object(file="fitcmd2w.s1.RDS")
#fitcmd2w.s1<-readRDS("fitcmd2w.s1.RDS")

#warmup data for chain 1
wm1<-read.csv("out_cmd2w.s1-1.csv",header=T,comment.char="#")
dim(wm1)
colnames(wm1)[1:7]
plot(1:2000,wm1[-1,3])
plot(1:2000,wm1[-1,3],ylim=c(0,0.1),col=rgb(0,0,1,0.3),xlab="",ylab="stepsize")

ss1<-fitcmd2w.s1$sampler_diagnostics()[,1,5]
print(ss1,8)

abline(h=ss1,col=2)

#Q1
#how was this estimate of stepsize obtained from the output for Chain 1?
#is it an average over the final window?
mean(wm1[1977:2001,3])
#not exactly

ss2<-fitcmd2w.s1$sampler_diagnostics()[,2,5]
ss3<-fitcmd2w.s1$sampler_diagnostics()[,3,5]
ss4<-fitcmd2w.s1$sampler_diagnostics()[,4,5]

im1<-fitcmd2w.s1$inv_metric(matrix=F)[[1]]
length(im1)
#the model has 27 parameters, see scod2 parameters block where some are defined #as vectors or arrays

vari<-c("Intercept","bs","zs_1_1","sds_1_1","zs_2_1","sds_2_1","sigma","sd_1","z_1")
attributes(fitcmd2w.s1$draws()[1,1,])$dimnames$variable

im2<-fitcmd2w.s1$inv_metric(matrix=F)[[2]]
im3<-fitcmd2w.s1$inv_metric(matrix=F)[[3]]
im4<-fitcmd2w.s1$inv_metric(matrix=F)[[4]]

par1<-fitcmd2w.s1$draws()[1,1,2:28]
par2<-fitcmd2w.s1$draws()[1,2,2:28]
par3<-fitcmd2w.s1$draws()[1,3,2:28]
par4<-fitcmd2w.s1$draws()[1,4,2:28]

#Q2
#are ss1...ss4, im1...im4, par1...par4 the only info needed to initialise the further #sampling (coming next)

#C) sampling using ss1...ss4, im1...im4, and for the first run par1...par4.

list1.1<-list(Intercept=as.numeric(par1[1,1,1]),bs=as.numeric(par1[1,1,2:3]),
zs_1_1=as.numeric(par1[1,1,4:8]),sds_1_1=as.numeric(par1[1,1,9]),
zs_2_1=as.numeric(par1[1,1,10:14]),sds_2_1=as.numeric(par1[1,1,15]),
sigma=as.numeric(par1[1,1,16]),sd_1=as.numeric(par1[1,1,17]),
z_1=as.numeric(par1[1,1,18:27]))

list2.1<-list(Intercept=as.numeric(par2[1,1,1]),bs=as.numeric(par2[1,1,2:3]),
zs_1_1=as.numeric(par2[1,1,4:8]),sds_1_1=as.numeric(par2[1,1,9]),
zs_2_1=as.numeric(par2[1,1,10:14]),sds_2_1=as.numeric(par2[1,1,15]),
sigma=as.numeric(par2[1,1,16]),sd_1=as.numeric(par2[1,1,17]),
z_1=as.numeric(par2[1,1,18:27]))

list3.1<-list(Intercept=as.numeric(par3[1,1,1]),bs=as.numeric(par3[1,1,2:3]),
zs_1_1=as.numeric(par3[1,1,4:8]),sds_1_1=as.numeric(par3[1,1,9]),
zs_2_1=as.numeric(par3[1,1,10:14]),sds_2_1=as.numeric(par3[1,1,15]),
sigma=as.numeric(par3[1,1,16]),sd_1=as.numeric(par3[1,1,17]),
z_1=as.numeric(par3[1,1,18:27]))

list4.1<-list(Intercept=as.numeric(par4[1,1,1]),bs=as.numeric(par4[1,1,2:3]),
zs_1_1=as.numeric(par4[1,1,4:8]),sds_1_1=as.numeric(par4[1,1,9]),
zs_2_1=as.numeric(par4[1,1,10:14]),sds_2_1=as.numeric(par4[1,1,15]),
sigma=as.numeric(par4[1,1,16]),sd_1=as.numeric(par4[1,1,17]),
z_1=as.numeric(par4[1,1,18:27]))

fitcmd2ini.s500.1<-cmd2$sample(data=sdat2,init=list(list1.1,list2.1,list3.1,list4.1),
step_size=c(ss1,ss2,ss3,ss4),inv_metric=list(im1,im2,im3,im4),seed=1011,
iter_warmup=0,adapt_engaged=FALSE,iter_sampling=500,adapt_delta=0.9,output_dir=path,
output_basename="out_cmd2ini.s500.1",save_warmup=FALSE, save_latent_dynamics=TRUE)

fitcmd2ini.s500.1$save_object(file="fitcmd2ini.s500.1.RDS")
#fitcmd2ini.s500.1<-readRDS("fitcmd2ini.s500.1.RDS")

#now take the last draw to obtain starting parameters for the next run
#Q3
#is this sensible?
#or should we average over the last 25 draws, say?

par1.1<-fitcmd2ini.s500.1$draws()[500,1,2:28]
par2.1<-fitcmd2ini.s500.1$draws()[500,2,2:28]
par3.1<-fitcmd2ini.s500.1$draws()[500,3,2:28]
par4.1<-fitcmd2ini.s500.1$draws()[500,4,2:28]

list1.2<-list(Intercept=as.numeric(par1.1[1]),bs=as.numeric(par1.1[2:3]),
zs_1_1=as.numeric(par1.1[4:8]),sds_1_1=as.numeric(par1.1[9]),
zs_2_1=as.numeric(par1.1[10:14]),sds_2_1=as.numeric(par1.1[15]),
sigma=as.numeric(par1.1[16]),sd_1=as.numeric(par1.1[17]),
z_1=as.numeric(par1.1[18:27]))

list2.2<-list(Intercept=as.numeric(par2.1[1]),bs=as.numeric(par2.1[2:3]),
zs_1_1=as.numeric(par2.1[4:8]),sds_1_1=as.numeric(par2.1[9]),
zs_2_1=as.numeric(par2.1[10:14]),sds_2_1=as.numeric(par2.1[15]),
sigma=as.numeric(par2.1[16]),sd_1=as.numeric(par2.1[17]),
z_1=as.numeric(par2.1[18:27]))

list3.2<-list(Intercept=as.numeric(par3.1[1]),bs=as.numeric(par3.1[2:3]),
zs_1_1=as.numeric(par3.1[4:8]),sds_1_1=as.numeric(par3.1[9]),
zs_2_1=as.numeric(par3.1[10:14]),sds_2_1=as.numeric(par3.1[15]),
sigma=as.numeric(par3.1[16]),sd_1=as.numeric(par3.1[17]),
z_1=as.numeric(par3.1[18:27]))

list4.2<-list(Intercept=as.numeric(par4.1[1]),bs=as.numeric(par4.1[2:3]),
zs_1_1=as.numeric(par4.1[4:8]),sds_1_1=as.numeric(par4.1[9]),
zs_2_1=as.numeric(par4.1[10:14]),sds_2_1=as.numeric(par4.1[15]),
sigma=as.numeric(par4.1[16]),sd_1=as.numeric(par4.1[17]),
z_1=as.numeric(par4.1[18:27]))

fitcmd2ini.s500.2<-cmd2$sample(data=sdat2,init=list(list1.2,list2.2,list3.2,list4.2),
step_size=c(ss1,ss2,ss3,ss4),inv_metric=list(im1,im2,im3,im4),seed=1213,
iter_warmup=0,adapt_engaged=FALSE,iter_sampling=500,adapt_delta=0.9,output_dir=path,
output_basename="out_cmd2ini.s500.2",save_warmup=FALSE, save_latent_dynamics=TRUE)

fitcmd2ini.s500.2$save_object(file="fitcmd2ini.s500.2.RDS")
#fitcmd2ini.s500.2<-readRDS("fitcmd2ini.s500.2.RDS")

fitcmd2ini.s500.1$diagnostic_summary()
fitcmd2ini.s500.2$diagnostic_summary()

#now combine the two runs

drw1<-fitcmd2ini.s500.1$draws()
class(drw1)
dim(drw1)
attributes(drw1)

drw2<-fitcmd2ini.s500.2$draws()
attributes(drw2)$dimnames$iteration<-as.character(501:1000)

drw<-abind(drw1,drw2,along=1)
attributes(drw)
adrw<-as_draws_array(drw)
identical(as.numeric(drw),as.numeric(adrw))
#ok

#use generated quantities to make ppx from adrw

#make ppcod.stan by deleting the model block and priors but reinstating mu
#then generate y_tilde as normal_rng(mu,sigma)
#see attached

ppxcod<-cmdstan_model(stan_file="ppcod.stan")
fit_gq<-ppxcod$generate_quantities(fitted_params=adrw,data=sdat2,seed=2001)
ppx<-fit_gq$draws()
dim(ppx)
class(ppx)
mppx<-as.matrix(ppx)
dim(mppx)<-c(4000,400)
identical(as.numeric(ppx[537,3,]),mppx[2537,])
#ok

#nuts

np1<-fitcmd2ini.s500.1$sampler_diagnostics()
np2<-fitcmd2ini.s500.2$sampler_diagnostics()
attributes(np2)$dimnames$iteration<-as.character(501:1000)
npx<-abind(np1,np2,along=1)
npx<-melt(npx)
npx<-npx[,c(2,1,3,4)]
colnames(npx)<-c("Chain","Iteration","Parameter","Value")

#include more runs as desired

#D) compare drw with the sampling output of brm2

#Q4
#is there a way to use drw along with the model and data to construct a brmsfit #object? without that, things can be done by hand

pp<-posterior_predict(brm2)
sam<-sample(1:4000,50)
p1<-ppc_dens_overlay(y,pp[sam,])+ggtitle("using draw2")
p2<-ppc_dens_overlay(y,mppx[sam,])+ggtitle("using drw")
grid.arrange(p1,p2,nrow=2)

#to compare the mcmc_interval plots etc we need to adjust the variable names
draw2<-as.array(brm2)
dim(draw2)

names(draw2[1,1,])
names(drw[1,1,])
#note reordering and renaming
#adjust naming

attributes(drw)$dimnames[[3]][3:4]<-attributes(draw2)$dimnames[[3]][2:3]
attributes(drw)$dimnames[[3]][c(10,16)]<-attributes(draw2)$dimnames[[3]][5:6]
attributes(drw)$dimnames[[3]][18]<-attributes(draw2)$dimnames[[3]][4]
attributes(drw)$dimnames[[3]][29:38]<-attributes(draw2)$dimnames[[3]][19:28]
attributes(drw)$dimnames[[3]][39:48]<-attributes(draw2)$dimnames[[3]][9:18]

which(!attributes(drw)$dimnames[[3]]%in%attributes(draw2)$dimnames[[3]])
#ok

vars<-variables(brm2)
mtc<-match(vars,attributes(drw)$dimnames[[3]])

p<-mcmc_intervals(brm2,pars=vars[1:10])
for (i in 1:10)
{
p<-p+annotate("point",x=mean(drw[,,mtc[i]]),y=11.2-i,size=4,pch=1,colour="red")
p<-p+annotate("segment",x=quantile(drw[,,mtc[i]],0.25),xend=quantile(drw[,,mtc[i]],0.75),y=11.2-i,yend=11.2-i,size=1)
p<-p+annotate("segment",x=quantile(drw[,,mtc[i]],0.05),xend=quantile(drw[,,mtc[i]],0.95),y=11.2-i,yend=11.2-i,size=0.1)
}
p

p<-mcmc_intervals(brm2,pars=vars[11:20])
for (i in 11:20)
{
p<-p+annotate("point",x=mean(drw[,,mtc[i]]),y=21.2-i,size=4,pch=1,colour="red")
p<-p+annotate("segment",x=quantile(drw[,,mtc[i]],0.25),xend=quantile(drw[,,mtc[i]],0.75),y=21.2-i,yend=21.2-i,size=1)
p<-p+annotate("segment",x=quantile(drw[,,mtc[i]],0.05),xend=quantile(drw[,,mtc[i]],0.95),y=21.2-i,yend=21.2-i,size=0.1)
}
p

#etc

graphics.off()
mcmc_scatter(brm2,pars=vars[1:2])+ggtitle("from draw2")+xlim(c(-2.5,-0.5))+ylim(c(0,5))
dev.new()
mcmc_scatter(drw,pars=vars[1:2])+ggtitle("from drw")+xlim(c(-2.5,-0.5))+ylim(c(0,5))

graphics.off()
mcmc_scatter(brm2,pars=vars[3:4])+ggtitle("from draw2")+xlim(c(-3,3))+ylim(c(0,1.5))
dev.new()
mcmc_scatter(drw,pars=vars[3:4])+ggtitle("from drw")+xlim(c(-3,3))+ylim(c(0,1.5))

np<-nuts_params(brm2)

graphics.off()
mcmc_pairs(brm2,pars=vars[c(4:6,30)],np=np,np_style=pairs_style_np())
dev.new()
mcmc_pairs(drw,pars=vars[c(4:6,30)],np=npx,np_style=pairs_style_np())


#for loo, need pointwise loglik

loo1<-loo(brm2,save_psis=TRUE)

#see attached LLcod.stan

LLcod<-cmdstan_model(stan_file="LLcod.stan")
ptLL<-LLcod$generate_quantities(fitted_params=adrw,data=sdat2,seed=2002)
LLx<-ptLL$draws()
dim(LLx)
class(LLx)
#LLx is the array of pointwise log-likelihoods

loo2<-loo(LLx,save_psis=TRUE)

loo1
loo2

graphics.off()
loop1<-ppc_loo_pit_overlay(y,yrep=pp,lw=weights(loo1$psis_object))+ggtitle("from brm2")
loop2<-ppc_loo_pit_overlay(y,yrep=mppx,lw=weights(loo2$psis_object))+ggtitle("from drw")
grid.arrange(loop1,loop2)

* Operating System: linux mint 19.3
* brms Version: 2.17.0
* cmdstanr 0.5.2