Below is the script to do the (translated) loggamma and the raw elpd calculations. It tales the loglik1 input from the stan run.
I have also done bootstrap estimates of the mcse, also below, for one of my actuarial models. I donât want to inflict the actuarial model itself unless you really want it. but just below are the bootstrap results. Note that the translated loggamma (tlg) has the lowest mcse
Just for the record, in the paper I am writing for actuaries, I will be using the psis (and noting the pareto k issue) I consider you guys to be the gold standard on this topic. But I am interested in the mcse error, as some of my model comparisons are close calls.
print(co_model$mcse)
$mcse_test_boot
[1] 0.331
$mcse_psis_boot
[1] 0.215
$mcse_tlg_boot
[1] 0.111
$mcse_raw_boot
[1] 0.208
function to fit loggamma distribution by the method of moments
reference - p227 of Loss Distributions - Hogg and Klugman 1984
fit_loggamma=function(x){
minlogx=min(log(x))
tlogx=log(x)-minlogx
m1=mean(tlogx) # translate to make all numbers positive
m2=mean(tlogx^2)
alpha=m1^2/(m2-m1^2)
lambda=m1/(m2-m1^2)
mean_tlg=1/(1-1/lambda)^alpha*exp(minlogx) #use exp(minlogx) to untranslate
output=list(mean_tlg = mean_tlg,
alpha_tlg = alpha,
lambda_tlg = lambda)
return(output)
}
function to get elpd by three different formulas
elpd_calc=function(loglik){
calc #1 with the âlooâ package
loo_psis â loo(loglik)
calculate importance ratios
impratio=matrix(0,dim(loglik)[1],dim(loglik)[2])
for (i in 1:dim(loglik)[2]){
impratio[,i]=1/exp(loglik[,i])
}
calculate raw elpd
prloo_raw=rep(0,dim(loglik)[2])
varloo_raw=rep(0,dim(loglik)[2])
for (i in 1:dim(loglik)[2]){
prloo_raw[i]=1/mean(impratio[,i])
varloo_raw[i]=var(impratio[,i])/dim(loglik)[1]/mean(impratio[,i])^2
}
elpd_raw=sum(log(prloo_raw))
se_elpd_raw=sum(sqrt(varloo_raw))
calculate elpd_tlg
prloo_tlg=rep(0,dim(loglik)[2])
lambda_tlg=NULL
for(i in 1:dim(loglik)[2]){
tlg=fit_loggamma(impratio[,i])
prloo_tlg[i]=1/tlg$mean_tlg
lambda_tlg=c(lambda_tlg,tlg$lambda_tlg)
}
elpd_tlg=sum(log(prloo_tlg))
output=list(elpd_psis = round(loo_psis$estimates[1,1],3),
elpd_raw = round(elpd_raw,3),
elpd_tlg = round(elpd_tlg,3),
p_psis = round(loo_psis$estimates[2,1],3),
max_k_psis = round(max(loo_psis$diagnostics$pareto_k),3),
min_lam_tlg = round(min(lambda_tlg),3))
return(output)
}
bootstrap to get an estimate of the mcmc sample variance
nboot=100
set.seed(12345)
cl â makePSOCKcluster(4)
registerDoParallel(cl)
boot.se=foreach (nb=1:nboot,.combine=rbind,.export=ls(.GlobalEnv)) %dopar%{
test_elpd_boot=0
sam=sample(1:num.mcmc,num.mcmc,replace=T)
for (i in 1:dim(test_data)[1]){
mu.test=log(Premium[test_data$w[i]])+logelr[sam]+
alpha[sam,test_data$w[i]]+
beta[sam,test_data$d[i]]*(1-gamma[sam])^(test_data$w[i]-1)
test_elpd_boot=test_elpd_boot+
log(mean(dnorm(log(test[i]),mu.test,sig[sam,test_data$d[i]],log=F)))
}
temp=elpd_calc(loglik1[sam,])
elpd_psis_boot=temp$elpd_psis
elpd_tlg_boot=temp$elpd_tlg
elpd_raw_boot=temp$elpd_raw
boot=c(test_elpd_boot,elpd_psis_boot,elpd_tlg_boot,elpd_raw_boot)
return(boot)
}
stopCluster(cl)
mcse=list(mcse_test_boot=round(sd(boot.se[,1]),3),
mcse_psis_boot=round(sd(boot.se[,2]),3),
mcse_tlg_boot =round(sd(boot.se[,3]),3),
mcse_raw_boot =round(sd(boot.se[,4]),3))