[Question] Recommended way to read bytes
alexhallam opened this issue · 0 comments
alexhallam commented
Which part is this question about
This has to do with the arrow2::io::ipc::read
Describe your question
I do not see a natural way to read bytes. In my usecase I am trying to read ipc
files that I put in a tar. I know that I can unpack the tar and read the files after unpacking, but I would like to read the arrow data directly and I am not sure how.
Additional context
use arrow2::io::ipc::read::deserialize_schema;
use arrow2::io::ipc::read::read_file_metadata;
use std::fs::File;
use std::io::prelude::*;
use std::str::from_utf8;
use tar::{Archive, Builder, Entry};
fn main() {
// write tar with file1 and file2
let file_to_read = "df/test.arrow";
let file = File::create("foo.tar").unwrap();
let mut a = Builder::new(file);
a.append_path("df/test.arrow").expect("file1 not found");
a.append_path("df/test2.arrow").expect("file2 not found");
// read file2 without unpack
let file = File::open("foo.tar").unwrap();
let mut a = Archive::new(file);
// file_contents
let mut file_contents: Option<Entry<'_, std::fs::File>> = Option::None;
for file in a.entries().unwrap() {
let file = file.unwrap();
let file_name = file.header().path().unwrap();
println!("File name: {}", file_name.display());
// get file contents if the file is file2
match file_name.to_str().unwrap() {
"df/test.arrow" => {
file_contents = Some(file);
break;
}
_ => continue,
};
}
let mut buffer = Vec::new();
file_contents.unwrap().read_to_end(&mut buffer).unwrap();
let buffer_slice = buffer.as_slice();
let schema = deserialize_schema(buffer_slice).unwrap();
println!("File contents: [{:?}]", schema);
}
Error
File name: df/test.arrow
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: OutOfSpec("Unable deserialize message: Error { source_location: ErrorLocation { type_: \"[MessageRef]\", method: \"read_as_root\", byte_offset: 0 }, error_kind: InvalidOffset }")', src/main.rs:37:51