Get multiple years
Closed this issue · 5 comments
Robinlovelace commented
library(purrr)
library(stats19)
#> Data provided under OGL v3.0. Cite the source and link to:
#> www.nationalarchives.gov.uk/doc/open-government-licence/version/3/
y = 2013:2017
a = map_dfr(y, get_stats19, type = "accidents", ask = FALSE)
#> Files identified: DfTRoadSafety_Accidents_2013.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/DfTRoadSafety_Accidents_2013.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2013/DfTRoadSafety_Accidents_2013.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2013/DfTRoadSafety_Accidents_2013.csv
#> Files identified: DfTRoadSafety_Accidents_2014.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/DfTRoadSafety_Accidents_2014.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2014/DfTRoadSafety_Accidents_2014.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/DfTRoadSafety_Accidents_2014/DfTRoadSafety_Accidents_2014.csv
#> Files identified: RoadSafetyData_Accidents_2015.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/RoadSafetyData_Accidents_2015.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/RoadSafetyData_Accidents_2015/Accidents_2015.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/RoadSafetyData_Accidents_2015/Accidents_2015.csv
#> Files identified: dftRoadSafety_Accidents_2016.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafety_Accidents_2016.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/dftRoadSafety_Accidents_2016/dftRoadSafety_Accidents_2016.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/dftRoadSafety_Accidents_2016/dftRoadSafety_Accidents_2016.csv
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Attempt downloading from:
#> Data saved at /tmp/Rtmp0f0UFy/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/Rtmp0f0UFy/dftRoadSafetyData_Accidents_2017/Acc.csv
a
#> # A tibble: 691,641 x 32
#> accident_index location_eastin… location_northi… longitude latitude
#> <chr> <int> <int> <dbl> <dbl>
#> 1 201301BS70003 527060 177970 -0.171 51.5
#> 2 201301BS70005 526900 178940 -0.173 51.5
#> 3 201301BS70006 524240 181460 -0.211 51.5
#> 4 201301BS70007 524320 181290 -0.210 51.5
#> 5 201301BS70009 525450 178660 -0.194 51.5
#> 6 201301BS70010 526460 177470 -0.180 51.5
#> 7 201301BS70012 527580 179460 -0.163 51.5
#> 8 201301BS70013 526830 178800 -0.174 51.5
#> 9 201301BS70015 524490 179370 -0.208 51.5
#> 10 201301BS70018 525300 179430 -0.196 51.5
#> # … with 691,631 more rows, and 27 more variables: police_force <chr>,
#> # accident_severity <chr>, number_of_vehicles <int>,
#> # number_of_casualties <int>, date <dttm>, day_of_week <chr>,
#> # time <chr>, local_authority_district <chr>,
#> # local_authority_highway <chr>, first_road_class <chr>,
#> # first_road_number <int>, road_type <chr>, speed_limit <int>,
#> # junction_detail <chr>, junction_control <chr>,
#> # second_road_class <chr>, second_road_number <int>,
#> # pedestrian_crossing_human_control <chr>,
#> # pedestrian_crossing_physical_facilities <chr>, light_conditions <chr>,
#> # weather_conditions <chr>, road_surface_conditions <chr>,
#> # special_conditions_at_site <chr>, carriageway_hazards <chr>,
#> # urban_or_rural_area <chr>,
#> # did_police_officer_attend_scene_of_accident <int>,
#> # lsoa_of_accident_location <chr>
Created on 2019-07-15 by the reprex package (v0.3.0)
Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#> setting value
#> version R version 3.6.0 (2019-04-26)
#> os Debian GNU/Linux 9 (stretch)
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Etc/UTC
#> date 2019-07-15
#>
#> ─ Packages ──────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
#> backports 1.1.4 2019-04-10 [1] CRAN (R 3.6.0)
#> callr 3.3.0 2019-07-04 [1] CRAN (R 3.6.0)
#> cli 1.1.0 2019-03-19 [1] CRAN (R 3.6.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.0)
#> devtools 2.1.0 2019-07-06 [1] CRAN (R 3.6.0)
#> digest 0.6.20 2019-07-04 [1] CRAN (R 3.6.0)
#> dplyr 0.8.3 2019-07-04 [1] CRAN (R 3.6.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
#> fansi 0.4.0 2018-10-05 [1] CRAN (R 3.6.0)
#> fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.0)
#> glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.0)
#> highr 0.8 2019-03-20 [1] CRAN (R 3.6.0)
#> hms 0.4.2 2018-03-10 [1] CRAN (R 3.6.0)
#> htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.6.0)
#> knitr 1.23 2019-05-18 [1] CRAN (R 3.6.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.0)
#> pillar 1.4.2 2019-06-29 [1] CRAN (R 3.6.0)
#> pkgbuild 1.0.3 2019-03-20 [1] CRAN (R 3.6.0)
#> pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.6.0)
#> pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.0)
#> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.6.0)
#> processx 3.4.0 2019-07-03 [1] CRAN (R 3.6.0)
#> ps 1.3.0 2018-12-21 [1] CRAN (R 3.6.0)
#> purrr * 0.3.2 2019-03-15 [1] CRAN (R 3.6.0)
#> R6 2.4.0 2019-02-14 [1] CRAN (R 3.6.0)
#> Rcpp 1.0.1 2019-03-17 [1] CRAN (R 3.6.0)
#> readr 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
#> remotes 2.1.0 2019-06-24 [1] CRAN (R 3.6.0)
#> rlang 0.4.0 2019-06-25 [1] CRAN (R 3.6.0)
#> rmarkdown 1.13 2019-05-22 [1] CRAN (R 3.6.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
#> stats19 * 0.2.1 2019-04-03 [1] CRAN (R 3.6.0)
#> stringi 1.4.3 2019-03-12 [1] CRAN (R 3.6.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
#> testthat 2.1.1 2019-04-23 [1] CRAN (R 3.6.0)
#> tibble 2.1.3 2019-06-06 [1] CRAN (R 3.6.0)
#> tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.6.0)
#> usethis 1.5.1 2019-07-04 [1] CRAN (R 3.6.0)
#> utf8 1.1.4 2018-05-24 [1] CRAN (R 3.6.0)
#> vctrs 0.2.0 2019-07-05 [1] CRAN (R 3.6.0)
#> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0)
#> xfun 0.8 2019-06-25 [1] CRAN (R 3.6.0)
#> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0)
#> zeallot 0.1.0 2018-01-28 [1] CRAN (R 3.6.0)
#>
#> [1] /usr/local/lib/R/site-library
#> [2] /usr/local/lib/R/library
layik commented
Should we also apply same solution to dl_stats19
, read and format functions, too?
Also would be great to change year
to years
without breaking other people's work.
Robinlovelace commented
I think keeping it as year
is fine. year = 2017:2018
isn't too bad...
Robinlovelace commented
Should we also apply same solution to dl_stats19, read and format functions, too?
I don't think so, format_*()
functions already work on multi-year datasets and dl_stats19()
is subservient to get_stats19()
and will rarely be used in practice, especially after #144 .
layik commented
Then quick solution. All good.
Robinlovelace commented
Great fix @layik - works well as shown below:
# Aim: test multi-year downloads
devtools::install_github("ropensci/stats19", "years")
#> Skipping install of 'stats19' from a github remote, the SHA1 (52d14833) has not changed since last install.
#> Use `force = TRUE` to force installation
a1 = stats19::get_stats19(year = 2017)
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Attempt downloading from:
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
a2 = stats19::get_stats19(year = 2018)
#> Files identified: dftRoadSafetyData_Accidents_2018.csv
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2018.csv
#> Attempt downloading from:
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
# a3 = stats19::get_stats19(year = 2017:2018)
# Multiple matches. Which do you want to download?
# 1: dftRoadSafetyData_Accidents_2017.zip
# 2: dftRoadSafetyData_Accidents_2018.csv
a3 = rbind(a1, a2)
a4 = stats19::get_stats19(year = 2017)
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
a5 = stats19::get_stats19(year = 2018)
#> Files identified: dftRoadSafetyData_Accidents_2018.csv
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2018.csv
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
a6 = stats19::get_stats19(year = 2017:2018)
#> Files identified: dftRoadSafetyData_Accidents_2017.zip
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2017.zip
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2017/Acc.csv
#> Files identified: dftRoadSafetyData_Accidents_2018.csv
#> http://data.dft.gov.uk.s3.amazonaws.com/road-accidents-safety-data/dftRoadSafetyData_Accidents_2018.csv
#> Data already exists in data_dir, not downloading
#> Data saved at /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
#> Reading in:
#> /tmp/RtmpMoTpcP/dftRoadSafetyData_Accidents_2018.csv
nrow(a3)
#> [1] 252617
nrow(a6)
#> [1] 252617
ncol(a3)
#> [1] 32
ncol(a6)
#> [1] 32
identical(a3, a6)
#> [1] TRUE
a3[1:9, 1:5]
#> # A tibble: 9 x 5
#> accident_index location_easting_osgr location_northing_osgr longitude latitude
#> <chr> <int> <int> <dbl> <dbl>
#> 1 2017010001708 532920 196330 -0.0801 51.7
#> 2 2017010009342 526790 181970 -0.174 51.5
#> 3 2017010009344 535200 181260 -0.0530 51.5
#> 4 2017010009348 534340 193560 -0.0607 51.6
#> 5 2017010009350 533680 187820 -0.0724 51.6
#> 6 2017010009351 514510 172370 -0.354 51.4
#> 7 2017010009353 508640 181870 -0.435 51.5
#> 8 2017010009354 527880 181950 -0.158 51.5
#> 9 2017010009357 520940 192820 -0.254 51.6
a6[1:9, 1:5]
#> # A tibble: 9 x 5
#> accident_index location_easting_osgr location_northing_osgr longitude latitude
#> <chr> <int> <int> <dbl> <dbl>
#> 1 2017010001708 532920 196330 -0.0801 51.7
#> 2 2017010009342 526790 181970 -0.174 51.5
#> 3 2017010009344 535200 181260 -0.0530 51.5
#> 4 2017010009348 534340 193560 -0.0607 51.6
#> 5 2017010009350 533680 187820 -0.0724 51.6
#> 6 2017010009351 514510 172370 -0.354 51.4
#> 7 2017010009353 508640 181870 -0.435 51.5
#> 8 2017010009354 527880 181950 -0.158 51.5
#> 9 2017010009357 520940 192820 -0.254 51.6
Created on 2020-01-15 by the reprex package (v0.3.0)