duckdb/duckdb-rs

BUG: Panic on `SELECT * FROM arrow(?, ?)` when using `TimestampMillisecondArray` arrow type.

Jeadie opened this issue · 5 comments

Overview

When creating a table in DuckDB using an Arrow record batch, the duckDB panics when the RecordBatch has a column of type TimestampMillisecondArray (and true of other timestamp array types).

To reproduce

main.rs

use std::sync::Arc;
use duckdb::{arrow::{array::{ArrayRef, Int32Array, StringArray, TimestampMillisecondArray}, record_batch::RecordBatch}, vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab}, Connection};

fn main() {
    let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
    let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "a"]));
    let c: ArrayRef = Arc::new(TimestampMillisecondArray::from(vec![11111111, 22222222, 33333333]).with_timezone("+10:00".to_string()));
    let batch = RecordBatch::try_from_iter(vec![
        ("x", a),
        ("y", b),
        ("z", c),
    ]).unwrap();

    let db = Connection::open_in_memory().unwrap();
    db.register_table_function::<ArrowVTab>("arrow").unwrap();
    let name = "my_table";  

    let mut stmt = db.prepare(format!(r#"CREATE TABLE "{name}" AS SELECT * FROM arrow(?, ?)"#).as_str()).unwrap();
    stmt.execute(arrow_recordbatch_to_query_params(batch)).unwrap();
    
    match db.query_row("SELECT count(1) FROM my_table", [], |x| x.get::<_, i32>(0),) {
        Ok(x) => println!("num_rows: {}", x),
        Err(e) => println!("Error: {}", e),
    }
}

cargo.toml

[package]
name = "example"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = "51.0.0"
duckdb = {path="../", features=["bundled",
    "r2d2",
    "vtab",
    "vtab-arrow"] 
}

Stacktrace

thread 'main' panicked at src/vtab/arrow.rs:334:13:
not yet implemented
stack backtrace:
   0: rust_begin_unwind
             at /rustc/7cf61ebde7b22796c69757901dd346d0fe70bd97/library/std/src/panicking.rs:647:5
   1: core::panicking::panic_fmt
             at /rustc/7cf61ebde7b22796c69757901dd346d0fe70bd97/library/core/src/panicking.rs:72:14
   2: core::panicking::panic
             at /rustc/7cf61ebde7b22796c69757901dd346d0fe70bd97/library/core/src/panicking.rs:144:5
   3: duckdb::vtab::arrow::primitive_array_to_vector
             at /Users/jeadie/Github/duckdb-rs/src/vtab/arrow.rs:334:13
   4: duckdb::vtab::arrow::record_batch_to_duckdb_data_chunk
             at /Users/jeadie/Github/duckdb-rs/src/vtab/arrow.rs:216:17
   5: <duckdb::vtab::arrow::ArrowVTab as duckdb::vtab::VTab>::func
             at /Users/jeadie/Github/duckdb-rs/src/vtab/arrow.rs:109:17
   6: duckdb::vtab::func
             at /Users/jeadie/Github/duckdb-rs/src/vtab/mod.rs:132:18
   7: _ZN6duckdb14CTableFunctionERNS_13ClientContextERNS_18TableFunctionInputERNS_9DataChunkE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/capi/table_function-c.cpp:168:2
   8: _ZNK6duckdb17PhysicalTableScan7GetDataERNS_16ExecutionContextERNS_9DataChunkERNS_19OperatorSourceInputE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/execution/operator/scan/physical_table_scan.cpp:74:2
   9: _ZN6duckdb16PipelineExecutor7GetDataERNS_9DataChunkERNS_19OperatorSourceInputE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/parallel/pipeline_executor.cpp:466:26
  10: _ZN6duckdb16PipelineExecutor15FetchFromSourceERNS_9DataChunkE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/parallel/pipeline_executor.cpp:492:13
  11: _ZN6duckdb16PipelineExecutor7ExecuteEy
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/parallel/pipeline_executor.cpp:203:21
  12: _ZN6duckdb12PipelineTask11ExecuteTaskENS_17TaskExecutionModeE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/parallel/pipeline.cpp:40:33
  13: _ZN6duckdb12ExecutorTask7ExecuteENS_17TaskExecutionModeE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/parallel/executor_task.cpp:32:10
  14: _ZN6duckdb8Executor11ExecuteTaskEb
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/parallel/executor.cpp:516:24
  15: _ZN6duckdb13ClientContext19ExecuteTaskInternalERNS_17ClientContextLockERNS_15BaseQueryResultEb
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/client_context.cpp:539:47
  16: _ZN6duckdb18PendingQueryResult19ExecuteTaskInternalERNS_17ClientContextLockE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/pending_query_result.cpp:63:18
  17: _ZN6duckdb18PendingQueryResult15ExecuteInternalERNS_17ClientContextLockE
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/pending_query_result.cpp:73:22
  18: _ZN6duckdb18PendingQueryResult7ExecuteEv
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/pending_query_result.cpp:86:9
  19: _ZN6duckdb17PreparedStatement7ExecuteERNSt3__113unordered_mapINS1_12basic_stringIcNS1_11char_traitsIcEENS1_9allocatorIcEEEENS_5ValueENS_33CaseInsensitiveStringHashFunctionENS_29CaseInsensitiveStringEqualityENS6_INS1_4pairIKS8_S9_EEEEEEb
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/prepared_statement.cpp:77:18
  20: duckdb_execute_prepared_arrow
             at /Users/jeadie/Github/duckdb-rs/libduckdb-sys/duckdb/src/main/capi/arrow-c.cpp:165:36
  21: duckdb::raw_statement::RawStatement::execute
             at /Users/jeadie/Github/duckdb-rs/src/raw_statement.rs:227:22
  22: duckdb::statement::Statement::execute_with_bound_parameters
             at /Users/jeadie/Github/duckdb-rs/src/statement.rs:507:9
  23: duckdb::statement::Statement::execute
             at /Users/jeadie/Github/duckdb-rs/src/statement.rs:65:9
  24: example::main
             at ./src/main.rs:21:5
  25: core::ops::function::FnOnce::call_once
             at /rustc/7cf61ebde7b22796c69757901dd346d0fe70bd97/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
fatal runtime error: Rust panics must be rethrown
zsh: abort      cargo run

Appears to just be an unimplemented conversion in primitive_array_to_vector.

This commit 626a451 resolves the panic, but may not handle proper timezone conversion.

We're also having the same issue Date32.

Edit: Looks like you've already identified the issue, ill give it a shot.

There are multiple layers to this as well, we should return an error instead of panicking, and fix the pointer issue that causes the following segfault