tlverse/tmle3

problem with exercise 2 in TMLE chapter, stratified tmle

jbriggs7 opened this issue · 1 comments

`## ----tmle3-ex2----------------------------------------------------------------
ist_data <- fread(
paste0(
"https://raw.githubusercontent.com/tlverse/deming2019-workshop/",
"master/data/ist_sample.csv"
)
)

ist <- ist %>% mutate(REGION = as.factor(REGION))

----tmle3-node-list----------------------------------------------------------

node_list <- list(
W = c(
"RDELAY", "RCONSC", "SEX", "AGE",
"RSLEEP", "RATRIAL", "RCT", "RVISINF",
"RHEP24", "RASP3", "RSBP","RDEF1",
"RDEF2","RDEF3","RDEF4", "RDEF5",
"RDEF6", "RDEF7", "RDEF8", "STYPE",
"RXHEP","REGION", "MISSING_RATRIAL_RASP3","MISSING_RHEP24"
),
A = "RXASP",
Y = "DRSISC"
)

----tmle3-ate-spec-----------------------------------------------------------

ate_spec <- tmle_ATE(
treatment_level = 1,
control_level = 0
)

----tmle3-learner-list-------------------------------------------------------

lrnr_mean <- make_learner(Lrnr_mean)
lrnr_glmfast <- make_learner(Lrnr_glm_fast)

define metalearner appropriate to data types

metalearner <- make_learner(
Lrnr_solnp,
loss_function = loss_loglik_binomial,
learner_function = metalearner_logistic_binomial
)

sl_Y <- Lrnr_sl$new(
learners = list(lrnr_mean, lrnr_glmfast),
metalearner = metalearner
)
sl_A <- Lrnr_sl$new(
learners = list(lrnr_mean, lrnr_glmfast),
metalearner = metalearner
)

sl_Delta <- Lrnr_sl$new(
learners = list(lrnr_mean, lrnr_glmfast),
metalearner = metalearner
)

learner_list <- list(A = sl_A, delta_Y = sl_Delta, Y = sl_Y)

----tmle3-spec-fit-----------------------------------------------------------

tmle_fit <- tmle3(ate_spec, ist, node_list, learner_list)
print(tmle_fit)

----tmle3-spec-summary-------------------------------------------------------

node2 <- node_list
node2$V = "REGION"
node2$W <- setdiff(node_list$W, node2$V)

ist2 <- ist

tmle_spec <- tmle_stratified(ate_spec)
stratified_fit <- tmle3(tmle_spec, ist2, node2, learner_list)

`

ERROR(S):
stratified_fit <- tmle3(tmle_spec, ist2, node2, learner_list)
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 44 of j is 47 which is outside the column number range [1,ncol=46]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 44 of j is 47 which is outside the column number range [1,ncol=46]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 44 of j is 47 which is outside the column number range [1,ncol=46]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 40 of j is 43 which is outside the column number range [1,ncol=42]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 41 of j is 44 which is outside the column number range [1,ncol=43]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 41 of j is 44 which is outside the column number range [1,ncol=43]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 41 of j is 44 which is outside the column number range [1,ncol=43]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 39 of j is 42 which is outside the column number range [1,ncol=40]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 40 of j is 43 which is outside the column number range [1,ncol=41]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 40 of j is 43 which is outside the column number range [1,ncol=41]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 40 of j is 43 which is outside the column number range [1,ncol=41]
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in ind_ref_mat[as.numeric(x), , drop = FALSE] :
incorrect number of dimensions
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 36 of j is 37 which is outside the column number range [1,ncol=36]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 37 of j is 38 which is outside the column number range [1,ncol=37]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 37 of j is 38 which is outside the column number range [1,ncol=37]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 37 of j is 38 which is outside the column number range [1,ncol=37]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) :
Item 43 of j is 46 which is outside the column number range [1,ncol=45]
Error in self$subset_covariates(task) :
Task missing the following covariates expected by Lrnr_solnp_TRUE_TRUE_FALSE_1e-05: Lrnr_glm_fast_TRUE_Cholesky
Failed on predict
Error in self$compute_step() : Error in self$subset_covariates(task) :
Task missing the following covariates expected by Lrnr_solnp_TRUE_TRUE_FALSE_1e-05: Lrnr_glm_fast_TRUE_Cholesky

Cleaned up example with fix:

library(sl3)
library(tmle3)
library(tidyverse)
ist_data <- fread(
  paste0(
    "https://raw.githubusercontent.com/tlverse/deming2019-workshop/",
    "master/data/ist_sample.csv"
  )
)

ist <- ist_data %>% mutate(REGION = as.factor(REGION))

  node_list <- list(
    W = c(
      "RDELAY", "RCONSC", "SEX", "AGE",
      "RSLEEP", "RATRIAL", "RCT", "RVISINF",
      "RHEP24", "RASP3", "RSBP","RDEF1",
      "RDEF2","RDEF3","RDEF4", "RDEF5",
      "RDEF6", "RDEF7", "RDEF8", "STYPE",
      "RXHEP","REGION", "MISSING_RATRIAL_RASP3","MISSING_RHEP24"
    ),
    A = "RXASP",
    Y = "DRSISC"
  )

  ate_spec <- tmle_ATE(
    treatment_level = 1,
    control_level = 0
  )

lrnr_mean <- make_learner(Lrnr_mean)
lrnr_glmfast <- make_learner(Lrnr_glm_fast)

# define metalearner appropriate to data types
metalearner <- make_learner(
  Lrnr_solnp,
  loss_function = loss_loglik_binomial,
  learner_function = metalearner_logistic_binomial
)

sl_Y <- Lrnr_sl$new(
  learners = list(lrnr_mean, lrnr_glmfast),
  metalearner = metalearner
)
sl_A <- Lrnr_sl$new(
  learners = list(lrnr_mean, lrnr_glmfast),
  metalearner = metalearner
)

sl_Delta <- Lrnr_sl$new(
  learners = list(lrnr_mean, lrnr_glmfast),
  metalearner = metalearner
)

learner_list <- list(A = sl_A, delta_Y = sl_Delta, Y = sl_Y)

tmle_fit <- tmle3(ate_spec, ist, node_list, learner_list)

node2 <- node_list
node2$V = "REGION"
node2$W <- setdiff(node_list$W, node2$V)

ist2 <- ist

char_cols <- names(ist2)[sapply(ist2, is.character)]
for (char_col in char_cols) {
  set(ist2, , char_col, as.factor(unlist(ist2[, char_col, with = FALSE])))
}

stratified_fit <- tmle3(tmle_spec, ist2, node2, learner_list)

Adding a processing step analogous to sl3::process_data to tmle3_task would resolve this