Possible bug in mmap of complex types
Closed this issue · 2 comments
andy-thomason commented
I'm using columns with a schema of List(Struct(Dictionary(...))) to represent
indexed database tables with compact keys.
I'm getting the error:
Error: OutOfSpec("buffer's length is too small in mmap")
When I use the mmap chunk reader, but the schema works fine with the regular reader.
I've managed to cut my failing example down to the following code, but I'll concede that
I may have cut too far!
This shows a working and failing example.
use arrow2::{datatypes::{Schema, DataType, Field, IntegerType}, array::{DictionaryArray, FixedSizeBinaryArray, Int32Array, ListArray, StructArray}, chunk::Chunk, offset::OffsetsBuffer};
type BDE = Box<dyn std::error::Error + Send + Sync + 'static>;
fn main() -> Result<(), BDE> {
let result = vec![];
let mut writer = arrow2::io::ipc::write::FileWriter::try_new(
result,
schema(),
None,
Default::default(),
)?;
let keys = Int32Array::new_empty(DataType::Int32);
let values = FixedSizeBinaryArray::new_empty(DataType::FixedSizeBinary(20));
let accounts_arrays = vec![
DictionaryArray::<i32>::try_from_keys(keys, values.boxed())?.boxed(),
];
let chunk = Chunk::try_new(vec![
ListArray::try_new(
DataType::List(Box::new(Field::new(
"list",
DataType::Struct(accounts_fields()),
false,
))),
OffsetsBuffer::<i32>::try_from(vec![0, 0])?,
StructArray::try_new(
DataType::Struct(accounts_fields()),
accounts_arrays,
None,
)?
.boxed(),
None,
)?
.boxed(),
])?;
writer.write(&chunk, None)?;
writer.finish()?;
let bytes = writer.into_inner();
// we first read the files' metadata
let mut reader = std::io::Cursor::new(&bytes);
let metadata = arrow2::io::ipc::read::read_file_metadata(&mut reader)?;
if true {
let data = std::sync::Arc::new(bytes);
let dictionaries =
unsafe { arrow2::mmap::mmap_dictionaries_unchecked(&metadata, data.clone())? };
let _fails = unsafe { arrow2::mmap::mmap_unchecked(&metadata, &dictionaries, data, 0)? };
} else {
let mut reader = arrow2::io::ipc::read::FileReader::new(reader, metadata, None, None);
let _works = reader.next().ok_or("no chunks")??;
}
Ok(())
}
fn schema() -> Schema {
use DataType::*;
Schema::from(vec![
Field::new(
"accounts",
List(Box::new(Field::new(
"list",
Struct(accounts_fields()),
false,
))),
false,
),
])
}
fn accounts_fields() -> Vec<Field> {
use DataType::*;
use IntegerType::Int32;
vec![
// Address: 32 bits
Field::new(
"address",
Dictionary(Int32, Box::new(FixedSizeBinary(20)), false),
false,
),
]
}
andy-thomason commented
Happy to submit a PR if I get there first.
andy-thomason commented
The problem here may be that FixedSizeBinary with zero elements fails the round trip.