crash-data-vis-demo

The goal of crash-data-vis-demo is to demonstrate how to get and visualise data on road traffic casualties and road geometries.

# Install the remotes package if not yet installed:
if (!"remotes" %in% installed.packages()) {
  install.packages("remotes")
}
# Check you have the packages installed:
remotes::install_cran(c("tidyverse", "stats19"))
#> Skipping install of 'tidyverse' from a cran remote, the SHA1 (2.0.0) has not changed since last install.
#>   Use `force = TRUE` to force installation
#> Skipping install of 'stats19' from a cran remote, the SHA1 (3.0.3) has not changed since last install.
#>   Use `force = TRUE` to force installation
library(tidyverse) # for data manipulation and visualisation
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr     1.1.4          ✔ readr     2.1.5     
#> ✔ forcats   1.0.0          ✔ stringr   1.5.1     
#> ✔ ggplot2   3.4.4.9000     ✔ tibble    3.2.1     
#> ✔ lubridate 1.9.3          ✔ tidyr     1.3.1     
#> ✔ purrr     1.0.2          
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats19) # for getting road casualty data
#> Data provided under OGL v3.0. Cite the source and link to:
#> www.nationalarchives.gov.uk/doc/open-government-licence/version/3/
library(tmap)
#> 
#> Attaching package: 'tmap'
#> 
#> The following object is masked from 'package:datasets':
#> 
#>     rivers
tmap_mode("view")
#> tmap mode set to 'view'

Get road casualty data

# Get road casualty data for 2022
cas = get_stats19(year = 2022, type = "cas")
#> Files identified: dft-road-casualty-statistics-casualty-2022.csv
#>    https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-casualty-2022.csv
#> Data already exists in data_dir, not downloading
#> Rows: 135480 Columns: 19
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr  (3): accident_index, accident_reference, lsoa_of_casualty
#> dbl (16): accident_year, vehicle_reference, casualty_reference, casualty_cla...
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
col = get_stats19(year = 2022, type = "col")
#> Files identified: dft-road-casualty-statistics-collision-2022.csv
#> 
#>    https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-2022.csv
#> Data already exists in data_dir, not downloading
#> Reading in: 
#> ~/data/stats19/dft-road-casualty-statistics-collision-2022.csv
#> Rows: 106004 Columns: 36── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr   (6): accident_index, accident_reference, date, local_authority_ons_dis...
#> dbl  (29): accident_year, location_easting_osgr, location_northing_osgr, lon...
#> time  (1): time
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.date and time columns present, creating formatted datetime column

If you want to get more data, you can change 2022 to 1979, which downloads all data from 1979 to the last year for which data is available.

We’ll get the data for Edinburgh as follows:

names(col)
#>  [1] "accident_index"                             
#>  [2] "accident_year"                              
#>  [3] "accident_reference"                         
#>  [4] "location_easting_osgr"                      
#>  [5] "location_northing_osgr"                     
#>  [6] "longitude"                                  
#>  [7] "latitude"                                   
#>  [8] "police_force"                               
#>  [9] "accident_severity"                          
#> [10] "number_of_vehicles"                         
#> [11] "number_of_casualties"                       
#> [12] "date"                                       
#> [13] "day_of_week"                                
#> [14] "time"                                       
#> [15] "local_authority_district"                   
#> [16] "local_authority_ons_district"               
#> [17] "local_authority_highway"                    
#> [18] "first_road_class"                           
#> [19] "first_road_number"                          
#> [20] "road_type"                                  
#> [21] "speed_limit"                                
#> [22] "junction_detail"                            
#> [23] "junction_control"                           
#> [24] "second_road_class"                          
#> [25] "second_road_number"                         
#> [26] "pedestrian_crossing_human_control"          
#> [27] "pedestrian_crossing_physical_facilities"    
#> [28] "light_conditions"                           
#> [29] "weather_conditions"                         
#> [30] "road_surface_conditions"                    
#> [31] "special_conditions_at_site"                 
#> [32] "carriageway_hazards"                        
#> [33] "urban_or_rural_area"                        
#> [34] "did_police_officer_attend_scene_of_accident"
#> [35] "trunk_road_flag"                            
#> [36] "lsoa_of_accident_location"                  
#> [37] "datetime"
table(col$police_force)
#> 
#>   Avon and Somerset        Bedfordshire      Cambridgeshire            Cheshire 
#>                2404                1285                1485                1685 
#>      City of London           Cleveland             Cumbria          Derbyshire 
#>                 175                 674                 854                1974 
#>  Devon and Cornwall              Dorset              Durham         Dyfed-Powys 
#>                2752                1449                 621                 982 
#>               Essex     Gloucestershire  Greater Manchester               Gwent 
#>                2901                 930                2804                 587 
#>           Hampshire       Hertfordshire          Humberside                Kent 
#>                3485                1780                1913                3983 
#>          Lancashire      Leicestershire        Lincolnshire          Merseyside 
#>                2765                1076                1604                2267 
#> Metropolitan Police             Norfolk         North Wales     North Yorkshire 
#>               23327                1537                 802                1174 
#>    Northamptonshire         Northumbria     Nottinghamshire     Police Scotland 
#>                1229                1874                1889                4125 
#>         South Wales     South Yorkshire       Staffordshire             Suffolk 
#>                 944                2038                 507                1163 
#>              Surrey              Sussex       Thames Valley        Warwickshire 
#>                2783                3678                3256                 865 
#>         West Mercia       West Midlands      West Yorkshire           Wiltshire 
#>                1658                4943                4397                1380
col_scotland = col |> 
  filter(str_detect(police_force, "Scotland"))

Let’s get a case study area:

edinburgh_boundary = zonebuilder::zb_zone("Edinburgh", n_circles = 2)

We’ll convert the collisions to sf objects so we can do a spatial join:

col_sf = format_sf(col_scotland, lonlat = TRUE)
#> 0 rows removed with no coordinates

Now we can do a spatial filter:

col_edinburgh = col_sf[edinburgh_boundary, ]

We can now plot these on a map:

tm_shape(col_edinburgh) +
  tm_dots(col = "accident_severity", size = 0.1) +
  tm_layout(legend.position = c("left", "bottom"))