tlverse/tmle3

CATE estimation

juandavidgutier opened this issue · 0 comments

Hi I am new with tmle3, and I am trying to estimate the effect of coca crops (variable tertile coca) on the incidence of leishmaniasis (variable sir). Currently, I can estimate the average treatment effect, but I want to know if it is possible to estimate the conditional average treatment effect (CATE) by forest coverage (variable forest) and the confidence interval of the CATE with tmle3?

Note: I transform the continuous variables to sd units to facilitate the convergence of the models

Here is my dataset and code
coca.csv

`library(data.table)
library(dplyr)
library(tmle3)
library(sl3)
library(MKdescr)
library(tidyr)

data_all <- read.csv("D:/coca.csv")

#z-score
z_sir <- zscore(data_all$sir, na.rm = TRUE)
z_sir <- as.data.frame(z_sir)
data_all <- cbind(data_all, z_sir)
z_misery <- zscore(data_all$misery, na.rm = TRUE)
z_misery <- as.data.frame(z_misery)
data_all <- cbind(data_all, z_misery)
z_forest <- zscore(data_all$forest, na.rm = TRUE)
z_forest <- as.data.frame(z_forest)
data_all <- cbind(data_all, z_forest)
z_mining <- zscore(data_all$mining, na.rm = TRUE)
z_mining <- as.data.frame(z_mining)
data_all <- cbind(data_all, z_mining)
z_fire <- zscore(data_all$fire, na.rm = TRUE)
z_fire <- as.data.frame(z_fire)
data_all <- cbind(data_all, z_fire)
z_deforest <- zscore(data_all$deforest, na.rm = TRUE)
z_deforest <- as.data.frame(z_deforest)
data_all <- cbind(data_all, z_deforest)

#dataset zomac
data_jd <- dplyr::select(data_all, z_sir, tertile_coca, zomac,
z_misery, z_forest, z_mining, z_fire, z_deforest)
data_jd <- data_jd %>% drop_na()

#nodes
node_list <- list(
W = c("zomac", "z_misery", "z_forest", "z_mining", "z_fire", "z_deforest"), #covariates
A = "tertile_coca", #exposure
Y = "z_sir") #outcome

#ate
ate_spec <- tmle_ATE(
treatment_level = 1,
control_level = 0)

#learners for continuous (outcome) and binomial variable (treatment)
rf_lrnr <- Lrnr_ranger$new(num.trees=1000)
hal_lrnr <- Lrnr_hal9001$new(max_degree = 3, n_folds = 3)
pols_lrnr <- Lrnr_polspline$new(cv=5)
Cgam_lrnr <- Lrnr_gam$new(family="Gamma") #for continuous variable (treatment)
Bgam_lrnr <- Lrnr_gam$new(family="binomial") #for binomial variable (treatment)
rfst_lrnr <- Lrnr_randomForest$new(ntree=1000)
xgb_lrnr <- Lrnr_xgboost$new(ntree=1000)

#define metalearners appropriate to data types
ls_metalearner <- make_learner(Lrnr_nnls)

sl_Y <- Lrnr_sl$new(
learners = list(rf_lrnr, hal_lrnr, pols_lrnr, Cgam_lrnr, rfst_lrnr, xgb_lrnr),
metalearner = ls_metalearner
)
sl_A <- Lrnr_sl$new(
learners = list(rf_lrnr, hal_lrnr, pols_lrnr, Bgam_lrnr, rfst_lrnr, xgb_lrnr),
metalearner = ls_metalearner
)
learner_list <- list(A = sl_A, Y = sl_Y)

#fit
tmle_fit <- tmle3(ate_spec, data_jd, node_list, learner_list)
print(tmle_fit)`