Allow passthrough of columns not used in the query
Opened this issue · 0 comments
krlmlr commented
From the rWCVP package. Ideally, we would detect that scientificName
is not used in the query, and never even load it into duckdb.
lhs <- tibble::tibble(scientificName = "\xd7 ", a = 1L)
rhs <- tibble::tibble(a = 1:2)
duckplyr:::duckplyr_left_join(lhs, rhs, by = "a", na_matches = "never")
#> materializing:
#> ---------------------
#> --- Relation Tree ---
#> ---------------------
#> Projection [scientificName_x as scientificName, ___coalesce(lhs.a_x, rhs.a_y) as a]
#> Join REGULAR LEFT ==(lhs.a_x, rhs.a_y)
#> Projection [scientificName as scientificName_x, a as a_x]
#> r_dataframe_scan(0x12196c888)
#> Projection [a as a_y]
#> r_dataframe_scan(0x107aa4940)
#>
#> ---------------------
#> -- Result Columns --
#> ---------------------
#> - scientificName (VARCHAR)
#> - a (INTEGER)
#> Error: Error evaluating duckdb query: Invalid Input Error: Invalid unicode (byte sequence mismatch) detected in value construction
duckplyr:::duckplyr_left_join(lhs, rhs, by = "a")
#> materializing:
#> ---------------------
#> --- Relation Tree ---
#> ---------------------
#> Projection [scientificName_x as scientificName, ___coalesce(lhs.a_x, rhs.a_y) as a]
#> Join REGULAR LEFT ___eq_na_matches_na(lhs.a_x, rhs.a_y)
#> Projection [scientificName as scientificName_x, a as a_x]
#> r_dataframe_scan(0x107b9d088)
#> Projection [a as a_y]
#> r_dataframe_scan(0x120cd4678)
#>
#> ---------------------
#> -- Result Columns --
#> ---------------------
#> - scientificName (VARCHAR)
#> - a (INTEGER)
#>
#> # A tibble: 1 × 2
#> scientificName a
#> <chr> <int>
#> 1 "\xd7 " 1
Created on 2024-05-07 with reprex v2.1.0