tagteam/riskRegression

Brier-Score reproduction error: riskRegression::Score()

Closed this issue · 4 comments

We wish to apply the brier score to a model that is not supported by riskRegression (casebase).

In an attempt to recreate a right-censored brier score, I repurposed code posted by Patrick Breheny from his course BIOS:7210, using the same data from the IPA vignette in riskRegression. We are using a cox model for this to insure that our code is correctly calculating the right-censored brier score. https://cran.r-project.org/web/packages/riskRegression/vignettes/IPA.html

There appears to be a disagreement after censoring begins to impact our brier-scores. I'm not sure why this is, as the calculated IPCW between both our estimation and yours from riskRegression are the same. Any help in understanding the discrepancy is appreciated.

#library(casebase)
#> See example usage at http://sahirbhatnagar.com/casebase/

library(survival)
library(riskRegression)
#> riskRegression version 2019.11.03

library(reprex)
library(reshape2)
library(ggplot2)

## code repurposed from https://myweb.uiowa.edu/pbreheny/7210/f18/notes/11-15.R
##using data from IPA vignette in riskRegression


#set seed and simulate data
set.seed(18)
astrain <- simActiveSurveillance(278)
astest <- simActiveSurveillance(208)

#fit a cox model
coxfit <- coxph(Surv(time,event!=0)~.,data=astrain,x=TRUE)

#specify prediction times of interest
times=sort(unique(astest$time))
# calculate the IPA/brier scores

X2 <- Score(list("PredictionModel"=coxfit),data=astest,            formula=Surv(time,event!=0)~1,summary="ipa",se.fit=0L,metrics="brier",contrasts=FALSE,times=times)



#predictions using cox model
predSurvs=summary(survfit(coxfit,newdata=astest),times=times)$surv
#inverse probability censoring weights
#note that we remove the last time, as this would give us 0. I shift all times by 1.
IPCW=summary(survfit(Surv(time, event==0)~1, astest),c(min(times),head(times,-1)))$surv


#Empty matrix that will be filled in with the following loop
Score <- matrix(NA, nrow(predSurvs), ncol(predSurvs))

#for each point in time we have predicted
for (i in 1:length(times)) {
    #get number of censored individuals so long as their survival time is less than t[i]
    #these individuals do not have an effect on right-censored brier score.
    CensBefore <- astest$event==0 & astest$time < times[i]
    #y encompasses all survival times larger than t[i] with a 1, 0 otherwise
    y <- drop(t(astest$time > times[i]))
    #above permits the two parts of right-censored brier score to be calculated, without IPCW, in one line
    Score[i,] <- (y - predSurvs[i,])^2
    # above calculated individuals who don't have an effect on brier score at specfied time. they're scores are set to 0.
    Score[i, CensBefore] <- 0
}
#apply IPCW to all scores
Err <- sweep(Score, 1, IPCW, '/')
#Average curve demonstrating right-censored brier averaged over test-set, for each time of interest.
Err=apply(Err, 1, mean)

#restructuring to make plotting easier
results=data.frame(riskRegression=X2$Brier$score$Brier[X2$Brier$score$model=="PredictionModel"],CustomBrier=Err,times=times)
results <- melt(results, id.vars="times")

ggplot(data=results,mapping=aes(x=times,y=value,col=variable))+
    geom_line()+
    geom_rug(sides = 'b')+
    xlab("Times")+
    ylab("Brier-Score")

sessionInfo()
#> R version 3.6.0 (2019-04-26)
#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
#> Running under: macOS High Sierra 10.13.6
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ggplot2_3.2.1             reshape2_1.4.3           
#> [3] reprex_0.3.0              riskRegression_2019.11.03
#> [5] survival_3.1-7            casebase_0.2.1.9001      
#> 
#> loaded via a namespace (and not attached):
#>  [1] VGAM_1.1-2          splines_3.6.0       foreach_1.4.7      
#>  [4] prodlim_2019.11.13  Formula_1.2-3       assertthat_0.2.1   
#>  [7] highr_0.8           stats4_3.6.0        latticeExtra_0.6-28
#> [10] yaml_2.2.0          timereg_1.9.4       numDeriv_2016.8-1.1
#> [13] pillar_1.4.3        backports_1.1.5     lattice_0.20-38    
#> [16] quantreg_5.52       glue_1.3.1          digest_0.6.23      
#> [19] RColorBrewer_1.1-2  checkmate_1.9.4     colorspace_1.4-1   
#> [22] sandwich_2.5-1      cmprsk_2.2-9        rms_5.1-4          
#> [25] htmltools_0.4.0     Matrix_1.2-17       plyr_1.8.5         
#> [28] pkgconfig_2.0.3     SparseM_1.77        purrr_0.3.3        
#> [31] mvtnorm_1.0-11      scales_1.1.0        lava_1.6.6         
#> [34] MatrixModels_0.4-1  htmlTable_1.13.2    tibble_2.1.3       
#> [37] mgcv_1.8-31         farver_2.0.3        TH.data_1.0-10     
#> [40] withr_2.1.2         nnet_7.3-12         lazyeval_0.2.2     
#> [43] magrittr_1.5        crayon_1.3.4        polspline_1.1.17   
#> [46] evaluate_0.14       fs_1.3.1            nlme_3.1-142       
#> [49] MASS_7.3-51.4       foreign_0.8-72      tools_3.6.0        
#> [52] data.table_1.12.8   lifecycle_0.1.0     multcomp_1.4-10    
#> [55] stringr_1.4.0       munsell_0.5.0       cluster_2.1.0      
#> [58] compiler_3.6.0      rlang_0.4.4         grid_3.6.0         
#> [61] iterators_1.0.12    rstudioapi_0.10     htmlwidgets_1.5.1  
#> [64] base64enc_0.1-3     labeling_0.3        rmarkdown_2.1      
#> [67] gtable_0.3.0        codetools_0.2-16    R6_2.4.1           
#> [70] gridExtra_2.3       zoo_1.8-6           knitr_1.27         
#> [73] dplyr_0.8.3         Hmisc_4.3-0         stringi_1.4.5      
#> [76] Rcpp_1.0.3          rpart_4.1-15        acepack_1.4.1      
#> [79] tidyselect_0.2.5    xfun_0.12

Created on 2020-03-02 by the reprex package (v0.3.0)

Thank you for the insight! The two seem much more comparable now, but there seems to be an overweighting for some reason. I'm now using prodlim per your suggestion and treat the left hand and right had side of the equation separately when re-weighting. The left hand side uses the IPCW as before, but I fix re-weighting to be the IPCW at time t, when survival time > t.

Breheny's code can be found here:
https://myweb.uiowa.edu/pbreheny/7210/f18/notes.html
along with slides, for date 11-15. (Quantifying predictive accuracy in Cox models)

I was unable to find an attached document, perhaps it was not attached? may you send it again?

Thank you for your time.

#library(casebase)
# See example usage at http://sahirbhatnagar.com/casebase/

library(survival)

library(riskRegression)
#> riskRegression version 2019.11.03



library(reprex)
library(reshape2)
library(ggplot2)
library(prodlim)


#set seed and simulate data
set.seed(18)
astrain <- simActiveSurveillance(278)
astest <- simActiveSurveillance(208)

#fit a cox model
coxfit <- coxph(Surv(time,event!=0)~.,data=astrain,x=TRUE)

#specify prediction times of interest
times=sort(unique(astest$time))
# calculate the IPA/brier scores

X2 <- Score(list("PredictionModel"=coxfit),data=astest,formula=Surv(time,event!=0)~1,summary="ipa",se.fit=0L,metrics="brier",contrasts=FALSE,times=times)



#predictions using cox model
predSurvs=summary(survfit(coxfit,newdata=astest),times=times)$surv
#inverse probability censoring weights
#note that we remove the last time, as this would give us 0. I shift all times by 1.
#IPCW=summary(survfit(Surv(time, event==0)~1, astest),c(min(times),head(times,-1)))$surv
fitCens=prodlim::prodlim(Hist(time, event!=0)~1,astest,reverse = TRUE)
IPCW.subject.times=prodlim::predictSurvIndividual(fitCens,lag=1)#G(t-|X)

#Empty matrix that will be filled in with the following loop
Score <- matrix(NA, nrow(predSurvs), ncol(predSurvs))
IPCWMatrix<-matrix(NA, nrow(predSurvs), ncol(predSurvs))
#for each point in time we have predicted
for (i in 1:length(times)) {
    #get number of censored individuals so long as their survival time is less than times[i]
    #these individuals do not have an effect on right-censored brier score.
    CensBefore <- astest$event==0 & astest$time < times[i]
    #y encompasses all survival times larger than t[i] with a 1, 0 otherwise
    y <- drop(t(astest$time > times[i]))
    #above permits the two parts of right-censored brier score to be calculated, without IPCW, in one line
    Score[i,] <- (y - predSurvs[i,])^2
    
    #Generate IPCWMatrix
    IPCWMatrix[i,y==0]<-IPCW.subject.times[y==0]#G(t-|X) filled in corresponding positions
    IPCW.time <- predict(fitCens,newdata=astest,times=times[i],level.chaos=1,mode="matrix",type="surv",lag=1)
    IPCWMatrix[i,y==1]<-IPCW.time#G(t) filled, same value, for remaining positions.
    # above calculated individuals who don't have an effect on brier score at specfied time. they're scores are set to 0.
    Score[i, CensBefore] <- 0
}

#apply IPCW to all scores
Err<-Score/IPCWMatrix

#Average curve demonstrating right-censored brier averaged over test-set, for each time of interest.
Err<-apply(Err,1,mean)
#restructuring to make plotting easier
results=data.frame(riskRegression=X2$Brier$score$Brier[X2$Brier$score$model=="PredictionModel"],CustomBrier=Err,times=times)
results <- melt(results, id.vars="times")

ggplot(data=results,mapping=aes(x=times,y=value,col=variable))+
    geom_line()+
    geom_rug(sides = 'b')+
    xlab("Times")+
    ylab("Brier-Score")

sessionInfo()
#> R version 3.6.0 (2019-04-26)
#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
#> Running under: macOS High Sierra 10.13.6
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] prodlim_2019.11.13        ggplot2_3.2.1            
#> [3] reshape2_1.4.3            reprex_0.3.0             
#> [5] riskRegression_2019.11.03 survival_3.1-7           
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.3          mvtnorm_1.0-11      lattice_0.20-38    
#>  [4] zoo_1.8-6           assertthat_0.2.1    digest_0.6.23      
#>  [7] foreach_1.4.7       plyr_1.8.5          R6_2.4.1           
#> [10] backports_1.1.5     acepack_1.4.1       MatrixModels_0.4-1 
#> [13] evaluate_0.14       highr_0.8           pillar_1.4.3       
#> [16] rlang_0.4.4         lazyeval_0.2.2      multcomp_1.4-10    
#> [19] rstudioapi_0.10     data.table_1.12.8   SparseM_1.77       
#> [22] rpart_4.1-15        Matrix_1.2-17       checkmate_1.9.4    
#> [25] rmarkdown_2.1       labeling_0.3        splines_3.6.0      
#> [28] stringr_1.4.0       foreign_0.8-72      htmlwidgets_1.5.1  
#> [31] munsell_0.5.0       numDeriv_2016.8-1.1 compiler_3.6.0     
#> [34] xfun_0.12           pkgconfig_2.0.3     base64enc_0.1-3    
#> [37] htmltools_0.4.0     nnet_7.3-12         tidyselect_0.2.5   
#> [40] tibble_2.1.3        gridExtra_2.3       htmlTable_1.13.2   
#> [43] Hmisc_4.3-0         rms_5.1-4           codetools_0.2-16   
#> [46] withr_2.1.2         crayon_1.3.4        dplyr_0.8.3        
#> [49] timereg_1.9.4       MASS_7.3-51.4       grid_3.6.0         
#> [52] nlme_3.1-142        polspline_1.1.17    gtable_0.3.0       
#> [55] lifecycle_0.1.0     magrittr_1.5        scales_1.1.0       
#> [58] stringi_1.4.5       farver_2.0.3        fs_1.3.1           
#> [61] latticeExtra_0.6-28 sandwich_2.5-1      Formula_1.2-3      
#> [64] TH.data_1.0-10      lava_1.6.6          RColorBrewer_1.1-2 
#> [67] iterators_1.0.12    tools_3.6.0         cmprsk_2.2-9       
#> [70] glue_1.3.1          purrr_0.3.3         yaml_2.2.0         
#> [73] colorspace_1.4-1    cluster_2.1.0       knitr_1.27         
#> [76] quantreg_5.52

Created on 2020-03-04 by the reprex package (v0.3.0)

Your solution
data.table::setorder(astrain,time,-event) and
data.table::setorder(astest,time,-event)

worked! There's a minimal difference but I'm sure I can figure that out.

thank you for your time!

#library(casebase)
# See example usage at http://sahirbhatnagar.com/casebase/

library(survival)

library(riskRegression)
#> riskRegression version 2019.11.03



library(reprex)
library(reshape2)
library(ggplot2)
library(prodlim)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:reshape2':
#> 
#>     dcast, melt

#set seed and simulate data
set.seed(18)
astrain <- simActiveSurveillance(278)
data.table::setorder(astrain,time,-event)

astest <- simActiveSurveillance(208)
data.table::setorder(astest,time,-event)
#fit a cox model
coxfit <- coxph(Surv(time,event!=0)~.,data=astrain,x=TRUE)

#specify prediction times of interest
times=sort(unique(astest$time))
# calculate the IPA/brier scores

X2 <- Score(list("PredictionModel"=coxfit),data=astest,formula=Surv(time,event!=0)~1,summary="ipa",se.fit=0L,metrics="brier",contrasts=FALSE,times=times)



#predictions using cox model
predSurvs=summary(survfit(coxfit,newdata=astest),times=times)$surv
#inverse probability censoring weights
#note that we remove the last time, as this would give us 0. I shift all times by 1.
#IPCW=summary(survfit(Surv(time, event==0)~1, astest),c(min(times),head(times,-1)))$surv
fitCens=prodlim::prodlim(Hist(time, event!=0)~1,astest,reverse = TRUE)
IPCW.subject.times=prodlim::predictSurvIndividual(fitCens,lag=1)#G(t-|X)

#Empty matrix that will be filled in with the following loop
Score <- matrix(NA, nrow(predSurvs), ncol(predSurvs))
IPCWMatrix<-matrix(NA, nrow(predSurvs), ncol(predSurvs))
#for each point in time we have predicted
for (i in 1:length(times)) {
    #get number of censored individuals so long as their survival time is less than times[i]
    #these individuals do not have an effect on right-censored brier score.
    CensBefore <- astest$event==0 & astest$time < times[i]
    #y encompasses all survival times larger than t[i] with a 1, 0 otherwise
    y <- drop(t(astest$time > times[i]))
    #above permits the two parts of right-censored brier score to be calculated, without IPCW, in one line
    Score[i,] <- (y - predSurvs[i,])^2
    
    #Generate IPCWMatrix
    IPCWMatrix[i,y==0]<-IPCW.subject.times[y==0]#G(t-|X) filled in corresponding positions
    IPCW.time <- predict(fitCens,newdata=astest,times=times[i],level.chaos=1,mode="matrix",type="surv",lag=1)
    IPCWMatrix[i,y==1]<-IPCW.time#G(t) filled, same value, for remaining positions.
    # above calculated individuals who don't have an effect on brier score at specfied time. they're scores are set to 0.
    Score[i, CensBefore] <- 0
}

#apply IPCW to all scores
Err<-Score/IPCWMatrix

#Average curve demonstrating right-censored brier averaged over test-set, for each time of interest.
Err<-apply(Err,1,mean)
#restructuring to make plotting easier
results=data.frame(riskRegression=X2$Brier$score$Brier[X2$Brier$score$model=="PredictionModel"],CustomBrier=Err,times=times)
results <- melt(results, id.vars="times")
#> Warning in melt(results, id.vars = "times"): The melt generic in data.table has
#> been passed a data.frame and will attempt to redirect to the relevant reshape2
#> method; please note that reshape2 is deprecated, and this redirection is now
#> deprecated as well. To continue using melt methods from reshape2 while both
#> libraries are attached, e.g. melt.list, you can prepend the namespace like
#> reshape2::melt(results). In the next version, this warning will become an error.

ggplot(data=results,mapping=aes(x=times,y=value,col=variable))+
    geom_line()+
    geom_rug(sides = 'b')+
    xlab("Times")+
    ylab("Brier-Score")

sessionInfo()
#> R version 3.6.0 (2019-04-26)
#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
#> Running under: macOS High Sierra 10.13.6
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] data.table_1.12.8         prodlim_2019.11.13       
#> [3] ggplot2_3.2.1             reshape2_1.4.3           
#> [5] reprex_0.3.0              riskRegression_2019.11.03
#> [7] survival_3.1-7           
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.3          mvtnorm_1.0-11      lattice_0.20-38    
#>  [4] zoo_1.8-6           assertthat_0.2.1    digest_0.6.23      
#>  [7] foreach_1.4.7       plyr_1.8.5          R6_2.4.1           
#> [10] backports_1.1.5     acepack_1.4.1       MatrixModels_0.4-1 
#> [13] evaluate_0.14       highr_0.8           pillar_1.4.3       
#> [16] rlang_0.4.4         lazyeval_0.2.2      multcomp_1.4-10    
#> [19] rstudioapi_0.10     SparseM_1.77        rpart_4.1-15       
#> [22] Matrix_1.2-17       checkmate_1.9.4     rmarkdown_2.1      
#> [25] labeling_0.3        splines_3.6.0       stringr_1.4.0      
#> [28] foreign_0.8-72      htmlwidgets_1.5.1   munsell_0.5.0      
#> [31] numDeriv_2016.8-1.1 compiler_3.6.0      xfun_0.12          
#> [34] pkgconfig_2.0.3     base64enc_0.1-3     htmltools_0.4.0    
#> [37] nnet_7.3-12         tidyselect_0.2.5    tibble_2.1.3       
#> [40] gridExtra_2.3       htmlTable_1.13.2    Hmisc_4.3-0        
#> [43] rms_5.1-4           codetools_0.2-16    withr_2.1.2        
#> [46] crayon_1.3.4        dplyr_0.8.3         timereg_1.9.4      
#> [49] MASS_7.3-51.4       grid_3.6.0          nlme_3.1-142       
#> [52] polspline_1.1.17    gtable_0.3.0        lifecycle_0.1.0    
#> [55] magrittr_1.5        scales_1.1.0        stringi_1.4.5      
#> [58] farver_2.0.3        fs_1.3.1            latticeExtra_0.6-28
#> [61] sandwich_2.5-1      Formula_1.2-3       TH.data_1.0-10     
#> [64] lava_1.6.6          RColorBrewer_1.1-2  iterators_1.0.12   
#> [67] tools_3.6.0         cmprsk_2.2-9        glue_1.3.1         
#> [70] purrr_0.3.3         yaml_2.2.0          colorspace_1.4-1   
#> [73] cluster_2.1.0       knitr_1.27          quantreg_5.52

Created on 2020-03-04 by the reprex package (v0.3.0)