How to specify column types for `df_from_csv()`

Question

How to specify column types for `df_from_csv()`

TimTaylor opened this issue 6 months ago · 1 comments

Is it possible to specify column types to df_from_csv() and, if so, is it worth adding some examples to the documentation?

Answer 1 · 2024-06-22T21:25:40.000Z

I wanted to try this too, and options does not seem to be it. Is there a way forward here?

library(duckplyr)
#> ✔ Overwriting dplyr methods with duckplyr methods.
#> ℹ Turn off with `duckplyr::methods_restore()`.
#> 
#> Attaching package: 'duckplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

# Create simple CSV file
path <- tempfile("duckplyr_test_", fileext = ".csv")
write.csv(data.frame(a = 1:3, b = letters[4:6]), path, row.names = FALSE)

# attempt to set col_types  (integer - char)
df_from_file(
  path,
  "read_csv",
  options = list(delim = ",", col_types = c("ic"))
)
#> Error: {"exception_type":"Binder","exception_message":"Invalid named parameter \"col_types\" for function read_csv\nCandidates:\n    hive_types_autocast BOOLEAN\n    hive_types ANY\n    union_by_name BOOLEAN\n    filename BOOLEAN\n    dtypes ANY\n    null_padding BOOLEAN\n    parallel BOOLEAN\n    decimal_separator VARCHAR\n    buffer_size UBIGINT\n    all_varchar BOOLEAN\n    store_rejects BOOLEAN\n    names VARCHAR[]\n    compression VARCHAR\n    ignore_errors BOOLEAN\n    rejects_scan VARCHAR\n    quote VARCHAR\n    max_line_size VARCHAR\n    types ANY\n    skip BIGINT\n    column_types ANY\n    rejects_table VARCHAR\n    normalize_names BOOLEAN\n    nullstr ANY\n    auto_type_candidates ANY\n    sample_size BIGINT\n    auto_detect BOOLEAN\n    timestampformat VARCHAR\n    force_not_null VARCHAR[]\n    rejects_limit BIGINT\n    columns ANY\n    new_line VARCHAR\n    maximum_line_size VARCHAR\n    allow_quoted_nulls BOOLEAN\n    escape VARCHAR\n    header BOOLEAN\n    hive_partitioning BOOLEAN\n    sep VARCHAR\n    column_names VARCHAR[]\n    dateformat VARCHAR\n    delim VARCHAR\n"}

duckplyr_df_from_csv(
  path,
  options = list(delim = ",", col_types = c("ic"))
)
#> Error: {"exception_type":"Binder","exception_message":"Invalid named parameter \"col_types\" for function read_csv_auto\nCandidates:\n    hive_types_autocast BOOLEAN\n    hive_types ANY\n    union_by_name BOOLEAN\n    filename BOOLEAN\n    dtypes ANY\n    null_padding BOOLEAN\n    parallel BOOLEAN\n    decimal_separator VARCHAR\n    buffer_size UBIGINT\n    all_varchar BOOLEAN\n    store_rejects BOOLEAN\n    names VARCHAR[]\n    compression VARCHAR\n    ignore_errors BOOLEAN\n    rejects_scan VARCHAR\n    quote VARCHAR\n    max_line_size VARCHAR\n    types ANY\n    skip BIGINT\n    column_types ANY\n    rejects_table VARCHAR\n    normalize_names BOOLEAN\n    nullstr ANY\n    auto_type_candidates ANY\n    sample_size BIGINT\n    auto_detect BOOLEAN\n    timestampformat VARCHAR\n    force_not_null VARCHAR[]\n    rejects_limit BIGINT\n    columns ANY\n    new_line VARCHAR\n    maximum_line_size VARCHAR\n    allow_quoted_nulls BOOLEAN\n    escape VARCHAR\n    header BOOLEAN\n    hive_partitioning BOOLEAN\n    sep VARCHAR\n    column_names VARCHAR[]\n    dateformat VARCHAR\n    delim VARCHAR\n"}

^{Created on 2024-06-22 with reprex v2.1.0}