ipeaGIT/gtfs2gps

Analysis of gtfs2gps function for the current and new snap method

Joaobazzo opened this issue · 7 comments

The total number of shape_id actually processed between the current and new method changes substantially. Perhaps we might lose too much info @pedro-andrade-inpe .

> df_output
       city old_method new_method
 1: bra_cur        456        203
 2: bra_for        597        216
 3: bra_spo       2379       2130
 4: bra_poa        353        334
 5: can_win        627        108
 6: irl_dub        390         98
 7: usa_col        157        133
 8: usa_det        107         72
 9: bra_cam        389        172
10: aus_can        123          9
11: aus_syd       3987        665
12: aus_bri       1883        194
13: aus_mel       1501        347
14: aus_ade       1267        488
15: can_tor        442        223
16: can_mon        614         65
17: cze_prg       1580        172
18: fra_nan        356        193
19: fra_gre         67         46
20: ita_rom        992        489
21: usa_hou        393        124
22: usa_nyc          7          0
23: usa_chi        719        232
24: usa_sfo        179         61

check shape_ids that were excluded from the old_method to new_method, to analyse what usually happens

rodar emissoes para ambas estratégias @Joaobazzo

> df_output
       city old_method new_method  diff
 1: bra_cur        451        456   1.1
 2: bra_for        595        597   0.3
 3: bra_spo       2106       2106   0.0
 4: bra_poa        353        353   0.0
 5: can_win        627        616  -1.8
 6: irl_dub        390        390   0.0
 7: usa_col        157        157   0.0
 8: usa_det        107        106  -0.9
 9: bra_cam        389        389   0.0
10: aus_can        123        123   0.0
11: aus_syd       3985       3970  -0.4
12: aus_bri       1883       1880  -0.2
13: aus_mel       1500       1501   0.1
14: aus_ade       1266       1260  -0.5
15: can_tor        442        433  -2.0
16: cze_prg       1578       1578   0.0
17: fra_nan        356        355  -0.3
18: fra_gre         65         65   0.0
19: ita_rom        991        968  -2.3
20: usa_hou        393        393   0.0
21: usa_nyc          7          7   0.0
22: usa_chi          1          1   0.0
23: usa_sfo        179        157 -12.3

olha, muito pouca diferença @pedro-andrade-inpe @rafapereirabr

Porque aquele 1o teste deu uma diferença tão grande, e agora a diferença está pequena?

This is the code to save every shapeid that cannot not be snapped:

require(gtfs2gps)

save_data <- function(gtfs, gps, ids, prefix, other_strategy){
  getStops <- function(gtfs_data, shapeid){
    routeid <- gtfs_data$trips[shape_id == shapeid]$route_id[1]
    
    all_tripids <- unique(gtfs_data$trips[shape_id == shapeid & route_id == routeid, ]$trip_id )
    nstop <- gtfs_data$stop_times[trip_id %chin% all_tripids, .N, by = "trip_id"]$N
    
    if(length(nstop) == 0) return(NULL)
    
    stops_seq <- gtfs_data$stop_times[trip_id == all_tripids[which.max(nstop)], .(stop_id, stop_sequence, departure_time)]
    stops_seq[gtfs_data$stops, on = "stop_id", c('stop_lat', 'stop_lon') := list(i.stop_lat, i.stop_lon)] # add lat long info
    data.table::setorderv(stops_seq, "stop_sequence")
    
    stops_sf <- sfheaders::sf_point(stops_seq, x = "stop_lon", y = "stop_lat", keep = TRUE)
    return(stops_sf)
  }
  
  for(i in ids)
  {
    stops <- getStops(gtfs, i)

    if(is.null(stops))
       cat(paste0("STOPS NULL FOR ID ", i, "\n"))
    else{
      sf::write_sf(stops, paste0(prefix, "_", i, "_gtfs_stop.shp"))
    
      sub_gtfs <- gtfs %>% filter_by_shape_id(i)
      gtfs_shapes_as_sf(sub_gtfs) %>% sf::write_sf(paste0(prefix, "_", i, "_gtfs_shape.shp"))
  
      sub_gps <- dplyr::filter(gps, shape_id == i)
      
      if(dim(sub_gps)[1] > 0){
        mls <- gps_as_sflinestring(sub_gps)
        mpts <- gps_as_sfpoints(sub_gps)
  
        mls %>% sf::write_sf(paste0(prefix, "_", i, "_", other_strategy, "_gps_lines.shp"))
        mpts %>% sf::write_sf(paste0(prefix, "_", i, "_", other_strategy, "_gps_points.shp"))
      }
    }
  }
}

save_shapes_with_problems <- function(filename, prefix){
  gtfs <- read_gtfs(filename)

  gps_n <- gtfs2gps::gtfs2gps(gtfs, method = "nearest")
  gps_r <- gtfs2gps::gtfs2gps(gtfs, method = "restrictive")

  ids <- gtfs$shapes$shape_id %>% unique()
  ids_n <- gps_n$shape_id %>% unique()
  ids_r <- gps_r$shape_id %>% unique()

  text = ""
  
  missing_r <- setdiff(ids, ids_r)
  if(length(missing_r) > 0){
    text <- paste0(text, length(missing_r), " RESTRICTIVE ARE MISSING\n")
    save_data(gtfs, gps_n, missing_r, prefix, "nearest")
  }
  else
    text <- paste0(text, "NO RESTRICTIVE IS MISSING\n")
  
  missing_n <- setdiff(ids, ids_n)
  if(length(missing_n) > 0){
    text <- paste0(text, length(missing_r), " NEAREST ARE MISSING\n")
    save_data(gtfs, gps_r, missing_n, prefix, "restrictive")
  }
  else
    text <- paste0(text, "NO NEAREST IS MISSING\n")

  cat(text)
}

save_shapes_with_problems(system.file("extdata/poa.zip", package = "gtfs2gps"), "poa")
save_shapes_with_problems(system.file("extdata/saopaulo.zip", package = "gtfs2gps"), "saopaulo")
save_shapes_with_problems(system.file("extdata/fortaleza.zip", package = "gtfs2gps"), "fortaleza")
save_shapes_with_problems(system.file("extdata/berlin.zip", package = "gtfs2gps"), "berlin")
save_shapes_with_problems(system.file("extdata/warsaw.zip", package = "gtfs2gps"), "warsaw")

I believe we can close this issue now, right?