nickreich/coarseDataTools

implement WAIC and LOOIC metrics

Opened this issue · 0 comments

Per request from users who were asked to provide WAIC and LOOIC metrics in a paper, they created the script below. It would be nice if this were integrated into the package directly.

# Calculate WAIC and LOOIC for serial interval distributions
# November 30, 2022

library(loo)

# Load likelihood function from coarseDataTools
source("estimate_params/serial interval/likelihood_function.R")

# Load data (pick symptom onset or rash)
#symp <- readRDS("estimate_params/serial interval/symp_si_data_for_loo.RDS") # EARLIEST SYMPTOM ONSET
symp <- readRDS("estimate_params/serial interval/rash_si_data_for_loo.RDS") #RASH

###### GAMMA

# Load in MCMC samples (pick symptom onset or rash)
#symp_si <- readRDS("estimate_params/serial interval/MCMC_samples_symp_gamma.RDS")
symp_si <- readRDS("estimate_params/serial interval/MCMC_samples_rash_gamma.RDS")

# Prepare data and parameters for loo package
# Need: an S by N matrix, where S is the size of the posterior sample (with all chains
# merged) and N is the number of data points
mat <- matrix(NA, nrow = length(symp_si@samples$var1), ncol = nrow(symp)) 

# diclik2 is used by coursedatatools to calculate the likelihood

for (i in 1:nrow(symp)) {
	for (j in 1:length(symp_si@samples$var1)){
L <- diclik2(par1 = symp_si@samples$var1[j],
			  par2 = symp_si@samples$var2[j], 
			  EL = symp$EL[i], ER = symp$ER[i], SL = symp$SL[i], SR = symp$SR[i], 
			  dist = "G")
mat[j,i] <- L
}
}

waic(log(mat)) # now we have to take the log to get log likelihood
loo(log(mat))

###### LOG NORMAL

# Load in MCMC samples (pick symptom onset or rash)
#symp_si <- readRDS("estimate_params/serial interval/MCMC_samples_symp_lognormal.RDS")
symp_si <- readRDS("estimate_params/serial interval/MCMC_samples_rash_lognormal.RDS")

# Prepare data and parameters for loo package
# Need: an S by N matrix, where S is the size of the posterior sample (with all chains
# merged) and N is the number of data points
mat <- matrix(NA, nrow = length(symp_si@samples$var1), ncol = nrow(symp)) 

for (i in 1:nrow(symp)) {
	for (j in 1:length(symp_si@samples$var1)){
		L <- diclik2(par1 = symp_si@samples$var1[j],
					  par2 = symp_si@samples$var2[j], 
					  EL = symp$EL[i], ER = symp$ER[i], SL = symp$SL[i], SR = symp$SR[i], 
					  dist = "L")
		mat[j,i] <- L
	}
}
beepr::beep()
waic(log(mat))
loo(log(mat))

###### Weibull

# Load in MCMC samples (pick symptom onset or rash)
#symp_si <- readRDS("estimate_params/serial interval/MCMC_samples_symp_weibull.RDS")
symp_si <- readRDS("estimate_params/serial interval/MCMC_samples_rash_weibull.RDS")

# Prepare data and parameters for loo package
# Need: an S by N matrix, where S is the size of the posterior sample (with all chains
# merged) and N is the number of data points
mat <- matrix(NA, nrow = length(symp_si@samples$var1), ncol = nrow(symp)) 

for (i in 1:nrow(symp)) {
	for (j in 1:length(symp_si@samples$var1)){
		L <- diclik2(par1 = symp_si@samples$var1[j],
					  par2 = symp_si@samples$var2[j], 
					  EL = symp$EL[i], ER = symp$ER[i], SL = symp$SL[i], SR = symp$SR[i], 
					  dist = "W")
		mat[j,i] <- L
	}
}
beepr::beep()
waic(log(mat))
loo(log(mat))