ropensci/stats19

Get multiple years

Closed this issue · 5 comments

library(purrr)
    library(stats19)
#> Data provided under OGL v3.0. Cite the source and link to:
#> www.nationalarchives.gov.uk/doc/open-government-licence/version/3/
    y = 2013:2017
    a = map_dfr(y, get_stats19, type = "accidents", ask = FALSE)
#> Files identified: DfTRoadSafety_Accidents_2013.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/DfTRoadSafety_Accidents_2013.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2013/DfTRoadSafety_Accidents_2013.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2013/DfTRoadSafety_Accidents_2013.csv
#> Files identified: DfTRoadSafety_Accidents_2014.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/DfTRoadSafety_Accidents_2014.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2014/DfTRoadSafety_Accidents_2014.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2014/DfTRoadSafety_Accidents_2014.csv
#> Files identified: RoadSafetyData_Accidents_2015.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/RoadSafetyData_Accidents_2015.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/RoadSafetyData_Accidents_2015/Accidents_2015.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/RoadSafetyData_Accidents_2015/Accidents_2015.csv
#> Files identified: dftRoadSafety_Accidents_2016.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafety_Accidents_2016.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/dftRoadSafety_Accidents_2016/dftRoadSafety_Accidents_2016.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/dftRoadSafety_Accidents_2016/dftRoadSafety_Accidents_2016.csv
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/dftRoadSafetyData_Accidents_2017/Acc.csv
    a
#> # A tibble: 691,641 x 32
#>    accident_index location_eastin… location_northi… longitude latitude
#>    <chr>                     <int>            <int>     <dbl>    <dbl>
#>  1 201301BS70003            527060           177970    -0.171     51.5
#>  2 201301BS70005            526900           178940    -0.173     51.5
#>  3 201301BS70006            524240           181460    -0.211     51.5
#>  4 201301BS70007            524320           181290    -0.210     51.5
#>  5 201301BS70009            525450           178660    -0.194     51.5
#>  6 201301BS70010            526460           177470    -0.180     51.5
#>  7 201301BS70012            527580           179460    -0.163     51.5
#>  8 201301BS70013            526830           178800    -0.174     51.5
#>  9 201301BS70015            524490           179370    -0.208     51.5
#> 10 201301BS70018            525300           179430    -0.196     51.5
#> # … with 691,631 more rows, and 27 more variables: police_force <chr>,
#> #   accident_severity <chr>, number_of_vehicles <int>,
#> #   number_of_casualties <int>, date <dttm>, day_of_week <chr>,
#> #   time <chr>, local_authority_district <chr>,
#> #   local_authority_highway <chr>, first_road_class <chr>,
#> #   first_road_number <int>, road_type <chr>, speed_limit <int>,
#> #   junction_detail <chr>, junction_control <chr>,
#> #   second_road_class <chr>, second_road_number <int>,
#> #   pedestrian_crossing_human_control <chr>,
#> #   pedestrian_crossing_physical_facilities <chr>, light_conditions <chr>,
#> #   weather_conditions <chr>, road_surface_conditions <chr>,
#> #   special_conditions_at_site <chr>, carriageway_hazards <chr>,
#> #   urban_or_rural_area <chr>,
#> #   did_police_officer_attend_scene_of_accident <int>,
#> #   lsoa_of_accident_location <chr>

Created on 2019-07-15 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.6.0 (2019-04-26)
#>  os       Debian GNU/Linux 9 (stretch)
#>  system   x86_64, linux-gnu           
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       Etc/UTC                     
#>  date     2019-07-15                  
#> 
#> ─ Packages ──────────────────────────────────────────────────────────────
#>  package     * version date       lib source        
#>  assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.0)
#>  backports     1.1.4   2019-04-10 [1] CRAN (R 3.6.0)
#>  callr         3.3.0   2019-07-04 [1] CRAN (R 3.6.0)
#>  cli           1.1.0   2019-03-19 [1] CRAN (R 3.6.0)
#>  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.6.0)
#>  desc          1.2.0   2018-05-01 [1] CRAN (R 3.6.0)
#>  devtools      2.1.0   2019-07-06 [1] CRAN (R 3.6.0)
#>  digest        0.6.20  2019-07-04 [1] CRAN (R 3.6.0)
#>  dplyr         0.8.3   2019-07-04 [1] CRAN (R 3.6.0)
#>  evaluate      0.14    2019-05-28 [1] CRAN (R 3.6.0)
#>  fansi         0.4.0   2018-10-05 [1] CRAN (R 3.6.0)
#>  fs            1.3.1   2019-05-06 [1] CRAN (R 3.6.0)
#>  glue          1.3.1   2019-03-12 [1] CRAN (R 3.6.0)
#>  highr         0.8     2019-03-20 [1] CRAN (R 3.6.0)
#>  hms           0.4.2   2018-03-10 [1] CRAN (R 3.6.0)
#>  htmltools     0.3.6   2017-04-28 [1] CRAN (R 3.6.0)
#>  knitr         1.23    2019-05-18 [1] CRAN (R 3.6.0)
#>  magrittr      1.5     2014-11-22 [1] CRAN (R 3.6.0)
#>  memoise       1.1.0   2017-04-21 [1] CRAN (R 3.6.0)
#>  pillar        1.4.2   2019-06-29 [1] CRAN (R 3.6.0)
#>  pkgbuild      1.0.3   2019-03-20 [1] CRAN (R 3.6.0)
#>  pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.6.0)
#>  pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.6.0)
#>  prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.6.0)
#>  processx      3.4.0   2019-07-03 [1] CRAN (R 3.6.0)
#>  ps            1.3.0   2018-12-21 [1] CRAN (R 3.6.0)
#>  purrr       * 0.3.2   2019-03-15 [1] CRAN (R 3.6.0)
#>  R6            2.4.0   2019-02-14 [1] CRAN (R 3.6.0)
#>  Rcpp          1.0.1   2019-03-17 [1] CRAN (R 3.6.0)
#>  readr         1.3.1   2018-12-21 [1] CRAN (R 3.6.0)
#>  remotes       2.1.0   2019-06-24 [1] CRAN (R 3.6.0)
#>  rlang         0.4.0   2019-06-25 [1] CRAN (R 3.6.0)
#>  rmarkdown     1.13    2019-05-22 [1] CRAN (R 3.6.0)
#>  rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.6.0)
#>  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.0)
#>  stats19     * 0.2.1   2019-04-03 [1] CRAN (R 3.6.0)
#>  stringi       1.4.3   2019-03-12 [1] CRAN (R 3.6.0)
#>  stringr       1.4.0   2019-02-10 [1] CRAN (R 3.6.0)
#>  testthat      2.1.1   2019-04-23 [1] CRAN (R 3.6.0)
#>  tibble        2.1.3   2019-06-06 [1] CRAN (R 3.6.0)
#>  tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.6.0)
#>  usethis       1.5.1   2019-07-04 [1] CRAN (R 3.6.0)
#>  utf8          1.1.4   2018-05-24 [1] CRAN (R 3.6.0)
#>  vctrs         0.2.0   2019-07-05 [1] CRAN (R 3.6.0)
#>  withr         2.1.2   2018-03-15 [1] CRAN (R 3.6.0)
#>  xfun          0.8     2019-06-25 [1] CRAN (R 3.6.0)
#>  yaml          2.2.0   2018-07-25 [1] CRAN (R 3.6.0)
#>  zeallot       0.1.0   2018-01-28 [1] CRAN (R 3.6.0)
#> 
#> [1] /usr/local/lib/R/site-library
#> [2] /usr/local/lib/R/library
layik commented

Should we also apply same solution to dl_stats19, read and format functions, too?

Also would be great to change year to years without breaking other people's work.

I think keeping it as year is fine. year = 2017:2018 isn't too bad...

Should we also apply same solution to dl_stats19, read and format functions, too?

I don't think so, format_*() functions already work on multi-year datasets and dl_stats19() is subservient to get_stats19() and will rarely be used in practice, especially after #144 .

layik commented

Then quick solution. All good.

Great fix @layik - works well as shown below:

# Aim: test multi-year downloads
devtools::install_github("ropensci/stats19", "years")
#> Skipping install of 'stats19' from a github remote, the SHA1 (52d14833) has not changed since last install.
#>   Use `force = TRUE` to force installation
a1 = stats19::get_stats19(year = 2017)
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Attempt downloading from:
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
a2 = stats19::get_stats19(year = 2018)
#> Files identified: dftRoadSafetyData_Accidents_2018.csv
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2018.csv
#> Attempt downloading from:
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
# a3 = stats19::get_stats19(year = 2017:2018)
# Multiple matches. Which do you want to download?
# 1: dftRoadSafetyData_Accidents_2017.zip
# 2: dftRoadSafetyData_Accidents_2018.csv
a3 = rbind(a1, a2)
a4 = stats19::get_stats19(year = 2017)
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
a5 = stats19::get_stats19(year = 2018)
#> Files identified: dftRoadSafetyData_Accidents_2018.csv
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2018.csv
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
a6 = stats19::get_stats19(year = 2017:2018)
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Files identified: dftRoadSafetyData_Accidents_2018.csv
#>    http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2018.csv
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
nrow(a3)
#> [1] 252617
nrow(a6)
#> [1] 252617
ncol(a3)
#> [1] 32
ncol(a6)
#> [1] 32
identical(a3, a6)
#> [1] TRUE
a3[1:9, 1:5]
#> # A tibble: 9 x 5
#>   accident_index location_easting_osgr location_northing_osgr longitude latitude
#>   <chr>                          <int>                  <int>     <dbl>    <dbl>
#> 1 2017010001708                 532920                 196330   -0.0801     51.7
#> 2 2017010009342                 526790                 181970   -0.174      51.5
#> 3 2017010009344                 535200                 181260   -0.0530     51.5
#> 4 2017010009348                 534340                 193560   -0.0607     51.6
#> 5 2017010009350                 533680                 187820   -0.0724     51.6
#> 6 2017010009351                 514510                 172370   -0.354      51.4
#> 7 2017010009353                 508640                 181870   -0.435      51.5
#> 8 2017010009354                 527880                 181950   -0.158      51.5
#> 9 2017010009357                 520940                 192820   -0.254      51.6
a6[1:9, 1:5]
#> # A tibble: 9 x 5
#>   accident_index location_easting_osgr location_northing_osgr longitude latitude
#>   <chr>                          <int>                  <int>     <dbl>    <dbl>
#> 1 2017010001708                 532920                 196330   -0.0801     51.7
#> 2 2017010009342                 526790                 181970   -0.174      51.5
#> 3 2017010009344                 535200                 181260   -0.0530     51.5
#> 4 2017010009348                 534340                 193560   -0.0607     51.6
#> 5 2017010009350                 533680                 187820   -0.0724     51.6
#> 6 2017010009351                 514510                 172370   -0.354      51.4
#> 7 2017010009353                 508640                 181870   -0.435      51.5
#> 8 2017010009354                 527880                 181950   -0.158      51.5
#> 9 2017010009357                 520940                 192820   -0.254      51.6

Created on 2020-01-15 by the reprex package (v0.3.0)