skgallagher/EpiCompare

Weird issue with geom_prediction_band and/or simulate_sir

Closed this issue · 7 comments

@skgallagher @benjaminleroy

I create two aggregate data frames using two sets of initial parameter values from simulate_agents. When I rowbind these two data frames together, the simulations will not plot in geom_prediction_band()

See the test in test-prediction-band.R

@skgallagher, 51b96ef attempts to solve this problem. Given the code was in ggplot class object style (meaning devtools::check() doesn't really looking into them much), there were a few dplyr:: were missing.

In the future, issues should include reproducible examples like the following (just alternation of what you put in the test-prediction-band.R file):

  ## This is the SIR representation
  trans_mat <- matrix(c("X0 * (1 - X1 * par1 / N)", "X0 * X1  * par1 / N", "0",
                        "0", "X1 * (1 - par2)", "par2 * X1",
                        "0", "0", "X2"), byrow = TRUE, nrow = 3)
  rownames(trans_mat) <- c("S", "I", "R")
  init_vals <- c(187, 1, 0)
  par_vals <- c("par1" = .2, "par2" = .1)
  max_T <- 55
  n_sims <- 20
  
  B <- 5
  
  
  sigma <- matrix(c(6.401576e-04, 2.706480e-15,
                    2.706480e-15, 7.437683e-05),
                  nrow = 2, byrow = T)
  mu <- c(0.362442, 0.126167)
  
  par_val_mat <- mvrnorm(n = B, mu = mu, Sigma = sigma)
  
  
  set.seed(1)
  sim_list <- vector(mode = "list", length = B)
  for(bb in 1:B){
    
    par_vals <-  c("par1" = par_val_mat[bb, 1],
                   "par2" = par_val_mat[bb, 2])
    
    
    abm <- simulate_agents(trans_mat = trans_mat,
                           init_vals = init_vals,
                           par_vals = par_vals,
                           max_T = max_T,
                           n_sims = 2,
                           verbose = FALSE)
    agg_model <- abm %>% dplyr::group_by(sim) %>% 
      agents_to_aggregate(states = c(I, R)) %>%
      ungroup()
    agg_model$batch <- bb
    agg_model$beta <- par_vals[1]
    agg_model$gamma <- par_vals[2]
    sim_list[[bb]] <- agg_model
    
  }
  
  sim_df <- dplyr::bind_rows(sim_list)
  sim_df$id <- paste0(sim_df$batch, ".",
                      sim_df$sim)
  
  
  
  plot_df <- sim_df %>% dplyr::filter(t != 0) %>%
    dplyr::select(id, t, X0, X1, X2)
  
  
  ggplot() +
    geom_prediction_band(data = plot_df,
                         aes(x = X0, y = X1, z = X2, 
                             sim_group = as.numeric(id)), alpha = .5,
                         fill = "cornflowerblue") +
    coord_tern() + theme_sir() +
    labs(title = "Prediction band for best parameters")
  

I've found that

trans_mat <- matrix(c("X0 * (1 - X1 * par1 / N)", "X0 * X1  * par1 / N", "0",
                        "0", "X1 * (1 - par2)", "par2 * X1",
                        "0", "0", "X2"), byrow = TRUE, nrow = 3)
  rownames(trans_mat) <- c("S", "I", "R")
  init_vals <- c(187, 1, 0)
  par_vals <- c("par1" = .2, "par2" = .1)
  max_T <- 55
  n_sims <- 20
  
  B <- 5
  
  
  sigma <- matrix(c(6.401576e-04, 2.706480e-15,
                    2.706480e-15, 7.437683e-05),
                  nrow = 2, byrow = T)
  mu <- c(0.362442, 0.126167)
  
  par_val_mat <- mvrnorm(n = B, mu = mu, Sigma = sigma)
  
  
  set.seed(11)
  sim_list <- vector(mode = "list", length = B)
  for(bb in 1:B){
    
    par_vals <-  c("par1" = par_val_mat[bb, 1],
                   "par2" = par_val_mat[bb, 2])
    
    
    abm <- simulate_agents(trans_mat = trans_mat,
                           init_vals = init_vals,
                           par_vals = par_vals,
                           max_T = max_T,
                           n_sims = 2,
                           verbose = FALSE)
    agg_model <- abm %>% dplyr::group_by(sim) %>% 
      agents_to_aggregate(states = c(I, R)) %>%
      ungroup()
    agg_model$batch <- bb
    agg_model$beta <- par_vals[1]
    agg_model$gamma <- par_vals[2]
    sim_list[[bb]] <- agg_model
    
  }
  
  sim_df <- dplyr::bind_rows(sim_list)
  table(sim_df$batch, sim_df$sim) # shouldn't all be the same for check
  sim_df$id <- paste0(sim_df$batch, ".",
                      sim_df$sim)
  
  
  
  plot_df <- sim_df %>% dplyr::filter(t != 0) %>%
    dplyr::select(id, t, X0, X1, X2)
  
  
  
  pb_type = c("delta_ball", "kde", "spherical_ball", "convex_hull")[1]
  ggplot() +
    geom_prediction_band(data = plot_df,
                         aes(x = X0, y = X1, z = X2, 
                             sim_group = as.numeric(id)), alpha = .5,
                         fill = "cornflowerblue",
                         pb_type = pb_type) +
    coord_tern() + theme_sir() +
    labs(title = "Prediction band for best parameters")

  pb_type = c("delta_ball", "kde", "spherical_ball", "convex_hull")[2]
  ggplot() +
    geom_prediction_band(data = plot_df,
                         aes(x = X0, y = X1, z = X2, 
                             sim_group = as.numeric(id)), alpha = .5,
                         fill = "cornflowerblue",
                         pb_type = pb_type) +
    coord_tern() + theme_sir() +
    labs(title = "Prediction band for best parameters")
  
  
  pb_type = c("delta_ball", "kde", "spherical_ball", "convex_hull")[3]
  ggplot() +
    geom_prediction_band(data = plot_df,
                         aes(x = X0, y = X1, z = X2, 
                             t = as.numeric(t)), alpha = .5,
                         fill = "cornflowerblue",
                         pb_type = pb_type) +
    coord_tern() + theme_sir() +
    labs(title = "Prediction band for best parameters")
  
  pb_type = c("delta_ball", "kde", "spherical_ball", "convex_hull")[4]
  ggplot() +
    geom_prediction_band(data = plot_df,
                         aes(x = X0, y = X1, z = X2, 
                             sim_group = as.numeric(id)), alpha = .5,
                         fill = "cornflowerblue",
                         pb_type = pb_type) +
    coord_tern() + theme_sir() +
    labs(title = "Prediction band for best parameters")

Fails for the delta_ball and convex_hull because not all simulations are the same length (specifically the 3rd and 4th simulation has both batch tracks that are less than length 56. This is an assumption underlying both the "uniform" prediction bands as they both calculate the distance between curves using

This equation requires either some compression of the representation of each path or the same number of time points for each observation. @skgallagher - can we discuss this relative to our time-invariant approach?

> table(sim_df$batch, sim_df$sim) # shouldn't all be the same for check
   
     1  2
  1 56 56
  2 56 56
  3 55 55
  4  5  5
  5 56 56

7afdd98 corrects the above problems

Still having an issue when I move to larger groups. Try this:

set.seed(2020)
## This is the SIR representation
trans_mat <- matrix(c("X0 * (1 - X3 * par1 / N)", "0", "0", "X0 * X3  * par1 / N", "0",
                      "0", "X1 * (1 - X3 * par2 / N)", "0", "X1 * X3  * par2 / N", "0",
                      "0", "0" ,"X2 * (1 - X3 * par3 / N)",  "X2 * X3  * par3 / N", "0",
                      "0", "0", "0", "X3 * (1 - par4)", "par4 * X3",
                      "0", "0", "0", "0", "X4"), byrow =  TRUE, nrow = 5)
rownames(trans_mat) <- c("S0","S1", "S2", "I", "R")
init_vals <- c(90, 30, 67, 1, 0)
par_vals <- c(0.27, .999, 0.43, 0.13)
##par_vals <- c(par1 = best_params$par[1], par2 = best_params$par[2],
##              par3 = best_params$par[3], par4 = best_params$par[4])
max_T <- 55
n_sims <- 100

abm <- simulate_agents(trans_mat,
                       init_vals,
                       par_vals,
                       max_T,
                       n_sims,
                       verbose = FALSE)

agg_model <- abm %>% 
  pivot_longer(cols = c(S0, S1, S2),
               names_to = "class",
               values_to = "X0",
               values_drop_na = TRUE) %>%
  group_by(sim, class) %>%
  agents_to_aggregate(states = c(I, R))


## Delta ball

ggplot() +
    geom_prediction_band(data = agg_model %>% filter(t != 0),
         aes(x = X0, y = X1, z = X2, fill = class,
              sim_group = sim), alpha = .5,
                         conf_level = .95,
         pb_type = "delta_ball") +
    coord_tern() + theme_sir() 

Convex hull is not erroring/showing anything

ggplot() +
    geom_prediction_band(data = agg_model %>% filter(t != 0),
         aes(x = X0, y = X1, z = X2, fill = class,
               sim_group = sim), alpha = .5,
                         conf_level = .95,
         over_delta = .02,
         pb_type = "convex_hull") +
    coord_tern() + theme_sir() 

@skgallagher definitely another good error. I believe this is related to a path not actually moving anywhere (I need to check in with filament_compression and equa_dist_points_direction to think about how to deal with this). Try a different seed right now?

cd60619 should fix this last problem (relating to paths with a cumulative length of 0).