jacobbien/simulator

Add Model Component to existing simulation

sahirbhatnagar opened this issue · 2 comments

In the Benjamini-Hochberg Vignette you show how to add more draws to an existing simulation. But I'm trying to add more Model Components to a simulation. The reprex below is my attempt. But it looks like the draws are being regenerated for the existing Model Components. Is there a way such that only the newly added Model Component gets new draws?

library(simulator)
library(mvtnorm)
make_correlated_pvalues <- function(n, pi0, rho) {
  # Gaussian copula model...
  #
  # n pvalues, the first n*pi0 of which are null, coming from a multivariate
  # normal with all correlations rho.
  sigma <- matrix(rho, n, n)
  diag(sigma) <- 1
  n0 <- round(n * pi0)
  delta <- 2 # size of signal
  mu <- rep(c(0, delta), c(n0, n - n0)) # n0 are null
  new_model(name = "correlated-pvalues",
            label = sprintf("pi0 = %s, rho = %s", pi0, rho),
            params = list(n = n, rho = rho, sigma = sigma,
                          pi0 = pi0, mu = mu, delta = delta,
                          nonnull = which(mu != 0)),
            simulate = function(n, mu, sigma, nsim) {
              # this function must return a list of length nsim
              x <- rmvnorm(nsim, mean = mu, sigma = sigma)
              pvals <- 1 - pnorm(x)
              return(split(pvals, row(pvals))) # make each row its own list element
            })
}


make_bh <- function(q) {
  # q is the desired level of control for the FDR
  new_method(name = paste0("bh", q),
             label = sprintf("BH (q = %s)", q),
             settings = list(q = q),
             method = function(model, draw, q) {
               p <- sort(draw)
               cutline <- seq(model$n) * q / model$n
               threshold <- max(p[p <= cutline], 0)
               list(rejected = which(draw <= threshold))
             })
}

qvalues <- c(0.05, 0.1, 0.2)
bh_methods <- sapply(qvalues, make_bh)



fdp <- new_metric(name = "fdp",
                  label = "false discovery proportion",
                  metric = function(model, out) {
                    fp <- setdiff(out$rejected, model$nonnull)
                    nd <- max(length(out$rejected), 1)
                    return(length(fp) / nd)
                  })

nd <- new_metric(name = "nd",
                 label = "number of discoveries",
                 metric = function(model, out) length(out$rejected))


name_of_simulation <- "fdr"
sim <- new_simulation(name = name_of_simulation,
                      dir = "simulation/",
                      label = "False Discovery Rate") %>%
  generate_model(make_correlated_pvalues, seed = 123,
                 n = 100,
                 pi0 = 0.8,
                 rho = 0.1) %>%
  simulate_from_model(nsim = 2, index = 1:2) %>%
  run_method(bh_methods, parallel = list(socket_names = 4, libraries = "mvtnorm")) %>%
  evaluate(list(fdp, nd))
#> ..Created model and saved in correlated-pvalues/n_100/pi0_0.8/rho_0.1/model.Rdata
#> ..Simulated 2 draws in 0.08 sec and saved in correlated-pvalues/n_100/pi0_0.8/rho_0.1/r1.Rdata
#> ..Simulated 2 draws in 0.07 sec and saved in correlated-pvalues/n_100/pi0_0.8/rho_0.1/r2.Rdata
#> Shutting down cluster.
#> ..Performed BH (q = 0.05) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.05) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.1) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.1) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.2) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.2) in 0 seconds (on average over 2 sims)
#> ..Created 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 1, method_name = "bh0.05", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 2, method_name = "bh0.05", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 1, method_name = "bh0.1", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 2, method_name = "bh0.1", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 1, method_name = "bh0.2", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 2, method_name = "bh0.2", out_loc = "out", simulator.files = "files") 
#> in parallel.
#> ..Evaluated BH (q = 0.05) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.05) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.1) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.1) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.2) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.2) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)

sim %>% model()
#> Model Component
#>  name: correlated-pvalues/n_100/pi0_0.8/rho_0.1
#>  label: pi0 = 0.8, rho = 0.1
#>  params: n rho sigma pi0 mu delta nonnull
#>  (Add @params to end of this object to see parameters.)
#>  (Add @simulate to end of this object to see how data is simulated.)

sim <- sim %>% 
  generate_model(make_correlated_pvalues, seed = 123,
                 n = 100,
                 pi0 = 1,
                 rho = 0.9) %>%
  simulate_from_model(nsim = 2, index = 1:2) %>%
  run_method(bh_methods, parallel = list(socket_names = 4, libraries = "mvtnorm")) %>%
  evaluate(list(fdp, nd))
#> ..Created model and saved in correlated-pvalues/n_100/pi0_1/rho_0.9/model.Rdata
#> ..Simulated 2 draws in 0.03 sec and saved in correlated-pvalues/n_100/pi0_0.8/rho_0.1/r1.Rdata
#> ..Simulated 2 draws in 0.02 sec and saved in correlated-pvalues/n_100/pi0_0.8/rho_0.1/r2.Rdata
#> ..Simulated 2 draws in 0.03 sec and saved in correlated-pvalues/n_100/pi0_1/rho_0.9/r1.Rdata
#> ..Simulated 2 draws in 0.04 sec and saved in correlated-pvalues/n_100/pi0_1/rho_0.9/r2.Rdata
#> Shutting down cluster.
#> ..Performed BH (q = 0.05) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.05) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.1) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.1) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.2) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.2) in 0 seconds (on average over 2 sims)
#> ..Created 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 1, method_name = "bh0.05", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 2, method_name = "bh0.05", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 1, method_name = "bh0.1", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 2, method_name = "bh0.1", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 1, method_name = "bh0.2", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_0.8/rho_0.1", index = 2, method_name = "bh0.2", out_loc = "out", simulator.files = "files") 
#> in parallel.
#> Shutting down cluster.
#> ..Performed BH (q = 0.05) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.05) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.1) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.1) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.2) in 0 seconds (on average over 2 sims)
#> ..Performed BH (q = 0.2) in 0 seconds (on average over 2 sims)
#> ..Created 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_1/rho_0.9", index = 1, method_name = "bh0.05", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_1/rho_0.9", index = 2, method_name = "bh0.05", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_1/rho_0.9", index = 1, method_name = "bh0.1", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_1/rho_0.9", index = 2, method_name = "bh0.1", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_1/rho_0.9", index = 1, method_name = "bh0.2", out_loc = "out", simulator.files = "files") 
#> new("OutputRef", dir = "/tmp/RtmpVM9xW2/reprex153e551eed9f/simulation", model_name = "correlated-pvalues/n_100/pi0_1/rho_0.9", index = 2, method_name = "bh0.2", out_loc = "out", simulator.files = "files") 
#> in parallel.
#> ..Evaluated BH (q = 0.05) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.05) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.1) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.1) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.2) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.2) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.05) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.05) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.1) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.1) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.2) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)
#> ..Evaluated BH (q = 0.2) in terms of 
#> false discovery proportion, number of discoveries, Computing time (sec)

sim %>% model()
#> [[1]]
#> Model Component
#>  name: correlated-pvalues/n_100/pi0_0.8/rho_0.1
#>  label: pi0 = 0.8, rho = 0.1
#>  params: n rho sigma pi0 mu delta nonnull
#>  (Add @params to end of this object to see parameters.)
#>  (Add @simulate to end of this object to see how data is simulated.)
#> 
#> [[2]]
#> Model Component
#>  name: correlated-pvalues/n_100/pi0_1/rho_0.9
#>  label: pi0 = 1, rho = 0.9
#>  params: n rho sigma pi0 mu delta nonnull
#>  (Add @params to end of this object to see parameters.)
#>  (Add @simulate to end of this object to see how data is simulated.)
#> 
#> attr(,"class")
#> [1] "listofModels" "list"

Created on 2019-12-04 by the reprex package (v0.3.0)

Thanks, Sahir, for this question!  Indeed, this is possible. The key thing to keep in mind is that one doesn't have to always pass simulation objects as the first argument to simulate_from_model (and the other main simulator functions).  Instead one can pass a reference to a model (or a list of such references).  In such a case, simulate_from_model will return a list of references to the generated draws, which can then be added to your simulation.  Here is a code snippet demonstrating how this works:

sim <- new_simulation(name = "fdr",
                      label = "False Discovery Rate") %>%
  generate_model(make_correlated_pvalues, seed = 123,
                 n = 100,
                 pi0 = 0.8,
                 rho = 0.1) %>%
  simulate_from_model(nsim = 2, index = 1:2) 

# add another model to the simulation:
sim <- sim %>%
  generate_model(make_correlated_pvalues, seed = 123,
                 n = 100,
                 pi0 = 1,
                 rho = 0.9)

# to simulate from this model without touching other model's draws...

# 1) Get a reference to the specific model we want to simulate from
model_ref <- sim %>% model(subset = 2, reference = TRUE)
# 2) Simulate from this model: 
newdraws_ref <- simulate_from_model(model_ref, nsim = 2, index = 1:2)
# 3) Add these new draws to simulation:
sim <- sim %>% add(newdraws_ref)

Thanks Jacob. This is very helpful