coda.samples not working with observed data
Closed this issue · 4 comments
I'm having some trouble. I set up a network for coronary artery disease, it compiled fine, and I was able to run coda.samples to get the posterior samples.
Then I fed it some data, and I got an error. What's going on?
g <- HydeNetwork(
~ riskScore | female*age*black*totchol*hdl*antihypertensives*sbp*smoke*diabetes
+ cad.0 | riskScore*black*female
)
g <- setNode(g, female, "dbern", p = 0.40)
g <- setNode(g, black, "dbern", p = 0.13)
g <- setNode(g, smoke, "dbern", p = 0.30)
g <- setNode(g, diabetes, "dbern", p = 0.10)
g <- setNode(g, antihypertensives, "dbern", p = 0.20)
g <- setNode(g, totchol, "dnorm", mu = 220, tau = 1/(20^2))
g <- setNode(g, hdl, "dnorm", mu = 55, tau = 1/( 5^2))
g <- setNode(g, age, "dnorm", mu = 60, tau = 1/(10^2))
g <- setNode(g, sbp, "dnorm", mu = 145, tau = 1/(10^2))
g <- setNode(g, riskScore, "determ", define=fromFormula(),
nodeFormula =
riskScore ~
ifelse(black==1,
ifelse(female==1,
17.114*log(age) + 0.94*log(totchol) - 18.92*log(hdl) + 4.475*log(age)*log(hdl) +
ifelse(antihypertensives==1,29.291-6.432*log(age),27.820-6.087*log(age))*log(sbp) +
0.691*smoke + 0.874*diabetes -86.61,
2.469*log(age) + 0.302*log(totchol) - 0.307*log(hdl) +
ifelse(antihypertensives==1,1.916,1.809)*log(sbp) + 0.549*(smoke==1) +
0.645*(diabetes==1) - 19.54
),
ifelse(female==1,
-29.799*log(age) + 4.884*pow(log(age),2) + 13.54*log(totchol) -
3.114*log(age)*log(totchol) - 13.578*log(hdl) + 3.149*log(age)*log(hdl) +
ifelse(antihypertensives==1,2.019,1.957)*log(sbp) + 7.574*smoke +
-1.665*log(age)*smoke + 0.661*diabetes + 29.18,
12.344*log(age) + 11.853*log(totchol) - 2.664*log(age)*log(totchol) -
7.99*log(hdl) + 1.769*log(age)*log(hdl) +
ifelse(antihypertensives==1,1.797,1.764)*log(sbp) + 7.837*(smoke==1) -
1.795*log(age)*(smoke==1) + 0.658*(diabetes==1) - 61.18
)
)
)
g <- setNode(g, cad.0, nodeType = "dbern",
p = "1.0-pow(ifelse(black==1,ifelse(female==1,0.9533,0.8954),ifelse(female==1,0.9665,0.9144)),exp(riskScore))")
plot(g)
writeNetworkModel(g, pretty=TRUE)
model{
riskScore <- ifelse(black == 1, ifelse(female == 1, 17.114 * log(age) + 0.94 * log(totchol) - 18.92 * log(hdl) + 4.475 * log(age) * log(hdl) + ifelse(antihypertensives == 1, 29.291 - 6.432 * log(age), 27.82 - 6.087 * log(age)) * log(sbp) + 0.691 * smoke + 0.874 * diabetes - 86.61, 2.469 * log(age) + 0.302 * log(totchol) - 0.307 * log(hdl) + ifelse(antihypertensives == 1, 1.916, 1.809) * log(sbp) + 0.549 * (smoke == 1) + 0.645 * (diabetes == 1) - 19.54), ifelse(female == 1, -29.799 * log(age) + 4.884 * pow(log(age),
2) + 13.54 * log(totchol) - 3.114 * log(age) * log(totchol) - 13.578 * log(hdl) + 3.149 * log(age) * log(hdl) + ifelse(antihypertensives == 1, 2.019, 1.957) * log(sbp) + 7.574 * smoke + -1.665 * log(age) * smoke + 0.661 * diabetes + 29.18, 12.344 * log(age) + 11.853 * log(totchol) - 2.664 * log(age) * log(totchol) - 7.99 * log(hdl) + 1.769 * log(age) * log(hdl) + ifelse(antihypertensives == 1, 1.797, 1.764) * log(sbp) + 7.837 * (smoke == 1) - 1.795 * log(age) * (smoke == 1) + 0.658 * (diabetes ==
1) - 61.18))
female ~ dbern(0.4)
age ~ dnorm(60, 0.01)
black ~ dbern(0.13)
totchol ~ dnorm(220, 0.0025)
hdl ~ dnorm(55, 0.04)
antihypertensives ~ dbern(0.2)
sbp ~ dnorm(145, 0.01)
smoke ~ dbern(0.3)
diabetes ~ dbern(0.1)
cad.0 ~ dbern(1.0-pow(ifelse(black==1,ifelse(female==1,0.9533,0.8954),ifelse(female==1,0.9665,0.9144)),exp(riskScore)))
}
##########################################################################
# Prediction without any information whatsoever
##########################################################################
monitoredNodes <- c("female","age","black","totchol","hdl","antihypertensives","sbp","smoke","diabetes","riskScore","cad.0")
g0 <- compileJagsModel(g, n.chains = 3, n.adapt = 5000)
S0 <- coda.samples(g0$jags, monitoredNodes, n.iter=16667)
S0 <- as.data.frame(do.call("rbind", S0))
#dplyr::sample_n(S0, 20)
p <- data.frame(stage = "0: No information entered", p = mean(S0$cad.0))
p
stage p
1 0: No information entered 0.1398172
##########################################################################
# Enter some baseline demographic information
##########################################################################
g1 <- compileJagsModel(g, data=list(age=74), n.chains = 3, n.adapt = 5000)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph Size: 150
Initializing model
S1 <- coda.samples(g1$jags, c("cad.0"), n.iter=16667)
Error in coda.samples(g1$jags, c("cad.0"), n.iter = 16667) :
attempt to apply non-function
I also get the error message when I use HydePosterior()
:
g1 <- compileJagsModel(g, data = list(age=73, black=1, smoke=1), n.chains = 3, n.adapt = 5000)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph Size: 149
Initializing model
S1 <- HydePosterior(g1, variable.names = c("riskScore", "cad.0", "black", "age"), n.iter=16667)
Error in rjags::coda.samples(cHN[[j]]$jags[[1]], variable.names = variable.names, :
attempt to apply non-function
okay - so I completely hacked the HydePosterior()
function and seem to have gotten it working.
But I did this mindlessly and don't want to commit it to the development branch. In fact I think it's wrong. But it works for my current use (no policy matrices).
(I also have a prior version of the package sitting on my laptop and don't want to sync/reinstall until you've been able to handle the stuff with cpt()
and inputCPT()
).
So I'll paste the function below.
The issue seems to be the fact that the original (correct) version of the function referred to the cHN
object as a list with multiple compiled HydeNet objects within. But when I ran compileJagsModel()
, it seems the structure had changed - instead of cHN[[j]]$jags[[1]]
, what I think the HydePosterior()
function really wanted in my case was cHN$jags[[1]]
. Same with cHN$observed
and cHN$factorRef
.
#' @name HydePosterior
#' @export HydePosterior
#'
#' @title Posterior Distributions of a Decision Network
#' @description The posterior distributions of the decision network can be
#' evaluated to determine the probabilistic outcomes based on the decision
#' inputs in the model as well as subject specific factors.
#'
#' @param cHN A \code{compiledHydeNetwork} object as returned by
#' \code{compileJagsNetwork}.
#' @param variable.names a character vector giving the names of variables to be monitored.
#' @param n.iter number of iterations to monitor.
#' @param thin thinning interval for monitors.
#' @param ... options arguments that are passed to the update method for
#' jags model objects.
#' @param monitor_observed If TRUE, the observed or fixed variables (those
#' passed to the \code{data} argument in \code{compileJagsNetwork}) are
#' forced into \code{variable.names} if not already provided. This is
#' recommended, especially if you will be binding multiple JAGS runs
#' together.
#'
#' @details This is essentially a wrapper around \code{coda.samples} that
#' returns in a list the output for each run of \code{coda.samples} over
#' the rows of the policy/decision matrix given in the \code{data} argument
#' of \code{compileJagsNetwork}.
#'
#' @return A list of class \code{HydePosterior} with elements \code{codas}
#' (the MCMC matrices from \code{coda.samples}), \code{observed} (the values
#' of the variables that were observed), \code{dag} (the dag object for
#' convenience in displaying the network), and \code{factorRef} (giving the
#' mappings of factor levels to factor variables).
#'
#' The only rationale for giving this object its own class was because it
#' produces an enormous amount of material to be printed. A distinct
#' \code{print} method has been written for this object.
#'
#' @author Jarrod Dalton and Benjamin Nutter
#'
#' @examples
#' data(PE, package="HydeNet")
#' Net <- HydeNetwork(~ wells +
#' pe | wells +
#' d.dimer | pregnant*pe +
#' angio | pe +
#' treat | d.dimer*angio +
#' death | pe*treat,
#' data = PE)
#'
#'
#' compiledNet <- compileJagsModel(Net, n.chains=5)
#'
#' #* Generate the posterior distribution
#' Posterior <- HydePosterior(compiledNet,
#' variable.names = c("d.dimer", "death"),
#' n.iter = 1000)
#'
#' #* Posterior Distributions for a Decision Model
#' Net <- setDecisionNodes(Net, angio, treat)
#' decisionNet <- compileDecisionModel(Net, n.chains=5)
#' decisionsPost <- HydePosterior(decisionNet,
#' variable.names = c("d.dimer", "death"),
#' n.iter = 1000)
#'
#'
HydePosterior <- function(cHN, variable.names, n.iter, thin=1, ...,
monitor_observed=TRUE){
if (monitor_observed){
variable.names <-
if (class(cHN$jags) == "jags")
unique(c(variable.names, names(cHN$observed)))
else unique(c(variable.names, names(cHN[[1]]$observed)))
}
if (class(cHN$jags) == "jags"){
codas <- coda.samples(cHN$jags, variable.names, n.iter, thin, ...)
HydePost <- list(codas = codas,
observed = cHN$observed,
dag = cHN$dag,
factorRef = cHN$factorRef)
}
else{
codas <- rjags::coda.samples(cHN$jags[[1]],
variable.names = variable.names,
n.iter = n.iter,
thin = thin,
...)
observed <- do.call("rbind", cHN$observed)
HydePost <- list(codas = codas,
observed = observed,
dag = cHN$dag,
factorRef = cHN$factorRef)
}
class(HydePost) <- "HydePosterior"
HydePost
}
It looks like the problem is that sometimes compileJagsModel
is creating the jags
element as a JAGS model, and sometimes it's creating the jags
element as a length 1 list of JAGS models. It shouldn't be a list.
So the problem is actually in compileJagsModel
. I'm working on it now.
Found it. I had tried to implement something to make compileDecisionModel
more efficient and then decided the place to make that work was compileDecisionModel
, but I forgot to restore the original code to compileJagsModel
. All should be right with the world now. HydePosterior
is also working. I'd recommend you not commit your changes.
Sorry for the error. Thanks for catching it.