ropensci-books/drake

Document ability of file_in() and file_out() to handle directories

gregrs-uk opened this issue · 2 comments

It would be great if the ability of file_in() and file_out() to handle directories (thanks to this pull request) could be documented under "drake plans" > "External files".

I needed to track a directory of input files of a changing number, and also a directory of output files with names depending on data (using dynamic branching), but didn't know these functions could handle directories so started writing my own hashing function before I came across the PR linked above!

Many thanks for this new functionality.

Below is an example of my use case. Feel free to use if helpful or to ignore if not!

library(drake)
library(readr)
suppressPackageStartupMessages(library(dplyr))
library(ggplot2)
library(purrr)
if(!dir.exists("input")) dir.create("input")

iris %>%
  split(.$Species) %>%
  walk2(
    .,
    names(.),
    ~ write_csv(.x, file.path("input", paste0(.y, ".csv")))
  )

if(!dir.exists("output")) dir.create("output")
import_data <- function(dir) {
  map_dfr(
    list.files(dir),
    ~ read_csv(file.path(dir, .))
  )
}

create_plot <- function(data) {
  data[[1]] %>%
    ggplot(aes(Petal.Length, Petal.Width)) +
    geom_point()
}
plan <- drake_plan(
  species_data = import_data(file_in("input")) %>%
    split(.$Species),
  species_names = names(species_data),
  plot = target(
    species_data %>%
      create_plot() %>%
      ggsave(
        file.path(file_out("output"), paste0(species_names, ".png")),
        plot = .
      ),
    dynamic = map(species_data, species_names)
  )
)

make(plan)
#> target species_data
#> Target species_data messages:
#>   Parsed with column specification:
#> cols(
#>   Sepal.Length = col_double(),
#>   Sepal.Width = col_double(),
#>   Petal.Length = col_double(),
#>   Petal.Width = col_double(),
#>   Species = col_character()
#> )
#>   Parsed with column specification:
#> cols(
#>   Sepal.Length = col_double(),
#>   Sepal.Width = col_double(),
#>   Petal.Length = col_double(),
#>   Petal.Width = col_double(),
#>   Species = col_character()
#> )
#>   Parsed with column specification:
#> cols(
#>   Sepal.Length = col_double(),
#>   Sepal.Width = col_double(),
#>   Petal.Length = col_double(),
#>   Petal.Width = col_double(),
#>   Species = col_character()
#> )
#> target species_names
#> dynamic plot
#> subtarget plot_f019abad
#> Target plot_f019abad messages:
#>   Saving 7 x 5 in image
#> subtarget plot_8bd5b0af
#> Target plot_8bd5b0af messages:
#>   Saving 7 x 5 in image
#> subtarget plot_cfee0958
#> Target plot_cfee0958 messages:
#>   Saving 7 x 5 in image
#> aggregate plot

# delete an output file and see that plot is outdated
unlink(file.path("output", "setosa.png"))
drake_config(plan) %>% outdated()
#> [1] "plot"

# when we make again, plot target is made
make(plan)
#> dynamic plot
#> subtarget plot_f019abad
#> Target plot_f019abad messages:
#>   Saving 7 x 5 in image
#> subtarget plot_8bd5b0af
#> Target plot_8bd5b0af messages:
#>   Saving 7 x 5 in image
#> subtarget plot_cfee0958
#> Target plot_cfee0958 messages:
#>   Saving 7 x 5 in image
#> aggregate plot

Created on 2020-01-02 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.6.1 (2019-07-05)
#>  os       macOS Sierra 10.12.6        
#>  system   x86_64, darwin15.6.0        
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_GB.UTF-8                 
#>  ctype    en_GB.UTF-8                 
#>  tz       Europe/London               
#>  date     2020-01-02                  
#> 
#> ─ Packages ──────────────────────────────────────────────────────────────
#>  package     * version    date       lib source        
#>  assertthat    0.2.1      2019-03-21 [1] CRAN (R 3.6.0)
#>  backports     1.1.5      2019-10-02 [1] CRAN (R 3.6.0)
#>  base64url     1.4        2018-05-14 [1] CRAN (R 3.6.0)
#>  callr         3.3.2      2019-09-22 [1] CRAN (R 3.6.1)
#>  cli           1.1.0      2019-03-19 [1] CRAN (R 3.6.0)
#>  colorspace    1.4-1      2019-03-18 [1] CRAN (R 3.6.0)
#>  crayon        1.3.4      2017-09-16 [1] CRAN (R 3.6.0)
#>  desc          1.2.0      2018-05-01 [1] CRAN (R 3.6.0)
#>  devtools      2.2.0      2019-09-07 [1] CRAN (R 3.6.0)
#>  digest        0.6.23     2019-11-23 [1] CRAN (R 3.6.1)
#>  dplyr       * 0.8.3      2019-07-04 [1] CRAN (R 3.6.0)
#>  drake       * 7.9.0.9000 2020-01-02 [1] local         
#>  DT            0.9        2019-09-17 [1] CRAN (R 3.6.0)
#>  ellipsis      0.3.0      2019-09-20 [1] CRAN (R 3.6.0)
#>  evaluate      0.14       2019-05-28 [1] CRAN (R 3.6.0)
#>  farver        2.0.1      2019-11-13 [1] CRAN (R 3.6.0)
#>  filelock      1.0.2      2018-10-05 [1] CRAN (R 3.6.0)
#>  fs            1.3.1      2019-05-06 [1] CRAN (R 3.6.0)
#>  ggplot2     * 3.2.1      2019-08-10 [1] CRAN (R 3.6.0)
#>  glue          1.3.1      2019-03-12 [1] CRAN (R 3.6.0)
#>  gtable        0.3.0      2019-03-25 [1] CRAN (R 3.6.0)
#>  highr         0.8        2019-03-20 [1] CRAN (R 3.6.0)
#>  hms           0.5.1      2019-08-23 [1] CRAN (R 3.6.0)
#>  htmltools     0.4.0      2019-10-04 [1] CRAN (R 3.6.0)
#>  htmlwidgets   1.3        2018-09-30 [1] CRAN (R 3.6.0)
#>  igraph        1.2.4.2    2019-11-27 [1] CRAN (R 3.6.0)
#>  knitr         1.25       2019-09-18 [1] CRAN (R 3.6.0)
#>  labeling      0.3        2014-08-23 [1] CRAN (R 3.6.0)
#>  lazyeval      0.2.2      2019-03-15 [1] CRAN (R 3.6.0)
#>  lifecycle     0.1.0      2019-08-01 [1] CRAN (R 3.6.0)
#>  magrittr      1.5        2014-11-22 [1] CRAN (R 3.6.0)
#>  memoise       1.1.0      2017-04-21 [1] CRAN (R 3.6.0)
#>  munsell       0.5.0      2018-06-12 [1] CRAN (R 3.6.0)
#>  pillar        1.4.2      2019-06-29 [1] CRAN (R 3.6.0)
#>  pkgbuild      1.0.5      2019-08-26 [1] CRAN (R 3.6.0)
#>  pkgconfig     2.0.3      2019-09-22 [1] CRAN (R 3.6.1)
#>  pkgload       1.0.2      2018-10-29 [1] CRAN (R 3.6.0)
#>  prettyunits   1.0.2      2015-07-13 [1] CRAN (R 3.6.0)
#>  processx      3.4.1      2019-07-18 [1] CRAN (R 3.6.0)
#>  ps            1.3.0      2018-12-21 [1] CRAN (R 3.6.0)
#>  purrr       * 0.3.2      2019-03-15 [1] CRAN (R 3.6.0)
#>  R6            2.4.1      2019-11-12 [1] CRAN (R 3.6.0)
#>  Rcpp          1.0.3      2019-11-08 [1] CRAN (R 3.6.0)
#>  readr       * 1.3.1      2018-12-21 [1] CRAN (R 3.6.0)
#>  remotes       2.1.0      2019-06-24 [1] CRAN (R 3.6.0)
#>  rlang         0.4.2      2019-11-23 [1] CRAN (R 3.6.1)
#>  rmarkdown     1.15       2019-08-21 [1] CRAN (R 3.6.0)
#>  rprojroot     1.3-2      2018-01-03 [1] CRAN (R 3.6.0)
#>  scales        1.1.0      2019-11-18 [1] CRAN (R 3.6.0)
#>  sessioninfo   1.1.1      2018-11-05 [1] CRAN (R 3.6.0)
#>  storr         1.2.1      2018-10-18 [1] CRAN (R 3.6.0)
#>  stringi       1.4.3      2019-03-12 [1] CRAN (R 3.6.0)
#>  stringr       1.4.0      2019-02-10 [1] CRAN (R 3.6.0)
#>  testthat      2.2.1      2019-07-25 [1] CRAN (R 3.6.0)
#>  tibble        2.1.3      2019-06-06 [1] CRAN (R 3.6.0)
#>  tidyselect    0.2.5      2018-10-11 [1] CRAN (R 3.6.0)
#>  txtq          0.2.0      2019-10-15 [1] CRAN (R 3.6.0)
#>  usethis       1.5.1      2019-07-04 [1] CRAN (R 3.6.0)
#>  vctrs         0.2.0      2019-07-05 [1] CRAN (R 3.6.0)
#>  withr         2.1.2      2018-03-15 [1] CRAN (R 3.6.0)
#>  xfun          0.9        2019-08-21 [1] CRAN (R 3.6.0)
#>  yaml          2.2.0      2018-07-25 [1] CRAN (R 3.6.0)
#>  zeallot       0.1.0      2018-01-28 [1] CRAN (R 3.6.0)
#> 
#> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

Sure, that's easy enough to address. Thanks for speaking up.