/r-polarssql

A Polars backend for R DBI and dbplyr

Primary LanguageROtherNOASSERTION

polarssql

polarssql status badge CRAN status

{polarssql} is an experimental DBI-compliant interface to Polars.

Polars is not an actual database, so does not support full {DBI} functionality. Please check the Polars User Guide for supported SQL features.

Installation

The polars R package and {polarssql} can be installed from R-universe:

Sys.setenv(NOT_CRAN = "true") # for installing the polars package with pre-built binary
install.packages("polarssql", repos = c("https://rpolars.r-universe.dev", getOption("repos")))

Example

library(DBI)

con <- dbConnect(polarssql::polarssql())
dbWriteTable(con, "mtcars", mtcars)

# We can fetch all results:
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
dbFetch(res)
#>     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
#> 1  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
#> 2  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
#> 3  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
#> 4  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
#> 5  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
#> 6  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
#> 7  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
#> 8  27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
#> 9  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
#> 10 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
#> 11 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

# Clear the result
dbClearResult(res)

# Or a chunk at a time
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
while (!dbHasCompleted(res)) {
  chunk <- dbFetch(res, n = 5)
  print(nrow(chunk))
}
#> [1] 5
#> [1] 5
#> [1] 1

# Clear the result
dbClearResult(res)

# We can use table functions to read files directly:
tf <- tempfile(fileext = ".parquet")
on.exit(unlink(tf))
polars::as_polars_lf(mtcars)$sink_parquet(tf)

dbGetQuery(con, paste0("SELECT * FROM read_parquet('", tf, "') ORDER BY mpg DESC LIMIT 3"))
#>    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> 1 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1
#> 2 32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1
#> 3 30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2

{polarssql} also provides functions that are simpler to use, inspired by the {duckdb} package,

library(polarssql)

# These functions use the built-in connection by default, so we don't need to specify connection

# Resgister a data.frame to the built-in connection
polarssql_register(df = mtcars)

# Get the query result as a polars LazyFrame
polarssql_query("SELECT * FROM df WHERE cyl = 4")
#> polars LazyFrame
#>  $describe_optimized_plan() : Show the optimized query plan.
#> 
#> Naive plan:
#>  SELECT [col("mpg"), col("cyl"), col("disp"), col("hp"), col("drat"), col("wt"), col("qsec"), col("vs"), col("am"), col("gear"), col("carb")] FROM
#>   FILTER [(col("cyl")) == (4)] FROM
#> 
#>   DF ["mpg", "cyl", "disp", "hp"]; PROJECT */11 COLUMNS; SELECTION: "None"

# Unregister the table
polarssql_unregister("df")

And, basic supports for {dbplyr} is also implemented.

library(dplyr, warn.conflicts = FALSE)

# Resgister a data.frame to the built-in connection, and query it via dbplyr
tbl_polarssql(mtcars) |>
  filter(cyl == 4) |>
  arrange(desc(mpg)) |>
  head(3) |>
  compute()
#> shape: (3, 11)
#> ┌──────┬─────┬──────┬───────┬───┬─────┬─────┬──────┬──────┐
#> │ mpg  ┆ cyl ┆ disp ┆ hp    ┆ … ┆ vs  ┆ am  ┆ gear ┆ carb │
#> │ ---  ┆ --- ┆ ---  ┆ ---   ┆   ┆ --- ┆ --- ┆ ---  ┆ ---  │
#> │ f64  ┆ f64 ┆ f64  ┆ f64   ┆   ┆ f64 ┆ f64 ┆ f64  ┆ f64  │
#> ╞══════╪═════╪══════╪═══════╪═══╪═════╪═════╪══════╪══════╡
#> │ 33.9 ┆ 4.0 ┆ 71.1 ┆ 65.0  ┆ … ┆ 1.0 ┆ 1.0 ┆ 4.0  ┆ 1.0  │
#> │ 32.4 ┆ 4.0 ┆ 78.7 ┆ 66.0  ┆ … ┆ 1.0 ┆ 1.0 ┆ 4.0  ┆ 1.0  │
#> │ 30.4 ┆ 4.0 ┆ 95.1 ┆ 113.0 ┆ … ┆ 1.0 ┆ 1.0 ┆ 5.0  ┆ 2.0  │
#> └──────┴─────┴──────┴───────┴───┴─────┴─────┴──────┴──────┘