`unreachable!()` in rle_v2_decode_bit_width is reachable
Closed this issue · 2 comments
progval commented
the unreachable!()
statement in this function:
datafusion-orc/src/reader/decode/util.rs
Lines 225 to 236 in 1ee1df9
can be reached while reading files created with pyorc
. To reproduce:
wget https://softwareheritage.s3.amazonaws.com/graph/2023-09-06/orc/release/release-00d4739e-c723-4843-863e-e4a895c58005.orc
then run this code with ./release-00d4739e-c723-4843-863e-e4a895c58005.orc
as parameter:
use std::fs::File;
use std::path::PathBuf;
use anyhow::Result;
use orc_rust::arrow_reader::ArrowReaderBuilder;
use orc_rust::projection::ProjectionMask;
pub fn main() -> Result<()> {
let file_path = PathBuf::from(std::env::args().skip(1).next().unwrap());
println!("reading {}", file_path.display());
let file = File::open(&file_path)?;
let reader_builder = ArrowReaderBuilder::try_new(file)?;
let projection = ProjectionMask::named_roots(
reader_builder.file_metadata().root_data_type(),
["date"].as_slice(),
);
let reader = reader_builder
.with_projection(projection)
.with_batch_size(10)
.build();
for (i, _) in reader.enumerate() {
println!("chunk {}", i);
}
Ok(())
}
and this small patch to datafusion-orc:
diff --git a/src/reader/decode/util.rs b/src/reader/decode/util.rs
index 468bd7d..2d5bad6 100644
--- a/src/reader/decode/util.rs
+++ b/src/reader/decode/util.rs
@@ -231,7 +231,7 @@ pub fn rle_v2_decode_bit_width(encoded: u8) -> usize {
29 => 48,
30 => 56,
31 => 64,
- _ => unreachable!(),
+ _ => unreachable!("rle_v2_decode_bit_width({})", encoded),
}
}
which prints:
[...]
chunk 1715
chunk 1716
chunk 1717
chunk 1718
chunk 1719
chunk 1720
chunk 1721
chunk 1722
chunk 1723
chunk 1724
thread 'main' panicked at /home/vlorentz/datafusion-orc/src/reader/decode/util.rs:234:14:
internal error: entered unreachable code: rle_v2_decode_bit_width(26)
stack backtrace:
0: rust_begin_unwind
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/std/src/panicking.rs:645:5
1: core::panicking::panic_fmt
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/panicking.rs:72:14
2: orc_rust::reader::decode::util::rle_v2_decode_bit_width
at /home/vlorentz/datafusion-orc/src/reader/decode/util.rs:234:14
3: orc_rust::reader::decode::rle_v2::patched_base::<impl orc_rust::reader::decode::rle_v2::RleReaderV2<N,R>>::read_patched_base
at /home/vlorentz/datafusion-orc/src/reader/decode/rle_v2/patched_base.rs:32:31
4: orc_rust::reader::decode::rle_v2::RleReaderV2<N,R>::decode_batch
at /home/vlorentz/datafusion-orc/src/reader/decode/rle_v2/mod.rs:38:42
5: <orc_rust::reader::decode::rle_v2::RleReaderV2<N,R> as core::iter::traits::iterator::Iterator>::next
at /home/vlorentz/datafusion-orc/src/reader/decode/rle_v2/mod.rs:53:19
6: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/alloc/src/boxed.rs:1949:9
7: <&mut I as core::iter::traits::iterator::Iterator>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/iter/traits/iterator.rs:4169:9
8: <core::iter::adapters::zip::Zip<A,B> as core::iter::adapters::zip::ZipImpl<A,B>>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/iter/adapters/zip.rs:166:21
9: <core::iter::adapters::zip::Zip<A,B> as core::iter::traits::iterator::Iterator>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/iter/adapters/zip.rs:85:9
10: <orc_rust::arrow_reader::column::timestamp::TimestampIterator as core::iter::traits::iterator::Iterator>::next
at /home/vlorentz/datafusion-orc/src/arrow_reader/column/timestamp.rs:31:13
11: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/alloc/src/boxed.rs:1949:9
12: orc_rust::arrow_reader::decoder::PrimitiveArrayDecoder<T>::next_primitive_batch
at /home/vlorentz/datafusion-orc/src/arrow_reader/decoder/mod.rs:65:35
13: <orc_rust::arrow_reader::decoder::timestamp::TimestampOffsetArrayDecoder as orc_rust::arrow_reader::decoder::ArrayBatchDecoder>::next_batch
at /home/vlorentz/datafusion-orc/src/arrow_reader/decoder/timestamp.rs:109:21
14: orc_rust::arrow_reader::decoder::NaiveStripeDecoder::inner_decode_next_batch
at /home/vlorentz/datafusion-orc/src/arrow_reader/decoder/mod.rs:408:25
15: orc_rust::arrow_reader::decoder::NaiveStripeDecoder::decode_next_batch
at /home/vlorentz/datafusion-orc/src/arrow_reader/decoder/mod.rs:420:22
16: <orc_rust::arrow_reader::decoder::NaiveStripeDecoder as core::iter::traits::iterator::Iterator>::next
at /home/vlorentz/datafusion-orc/src/arrow_reader/decoder/mod.rs:288:26
17: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/alloc/src/boxed.rs:1949:9
18: <orc_rust::arrow_reader::ArrowReader<R> as core::iter::traits::iterator::Iterator>::next
at /home/vlorentz/datafusion-orc/src/arrow_reader/mod.rs:159:23
19: <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::next
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/iter/adapters/enumerate.rs:47:17
20: repro::main
at ./rust/src/bin/repro.rs:22:19
21: core::ops::function::FnOnce::call_once
at /rustc/07dca489ac2d933c78d3c5158e3f43beefeb02ce/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
Jefffrey commented
Thanks for finding this, I will take a look; should be a quick fix 🙏