Error UnspecifiedXRefEntry on reading pdf with only two images
SebastianRzk opened this issue · 1 comments
SebastianRzk commented
I tried to read a simple pdf (generated by a scanner, with utsushi (ImageScanV3) with the read.rs
example code from this repository.
Then the page
is err
:(.
With other pdfs, evereything works as expected.
Example-PDF:
Cargo.toml:
[package]
name = "pdf-image-extractor"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
pdf = "0.8.0"
Console-log:
Finished dev [unoptimized + debuginfo] target(s) in 0.62s
Running `target/debug/pdf-image-extractor`
read: /pathToProject/data/Unbenannt.pdf
PDF
page ok
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Shared { source: Try { file: "/home/unbekannt/.cargo/registry/src/github.com-1ecc6299db9ec823/pdf-0.8.0/src/file.rs", line: 87, column: 27, context: Context([]), source: UnspecifiedXRefEntry { id: 10 } } }', src/main.rs:39:18
stack backtrace:
0: rust_begin_unwind
at /rustc/8460ca823e8367a30dda430efda7
90588b8c84d3/library/std/src/panicking.rs:575:5
1: core::panicking::panic_fmt
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/panicking.rs:64:14
2: core::result::unwrap_failed
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/result.rs:1790:5
3: core::result::Result<T,E>::unwrap
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/result.rs:1112:23
4: pdf_image_extractor::main
at ./src/main.rs:39:13
5: core::ops::function::FnOnce::call_once
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
* The terminal process "cargo 'run', '--package', 'pdf-image-extractor', '--bin', 'pdf-image-extractor'" terminated with exit code: 101.
* Terminal will be reused by tasks, press any key to close it.
Example code:
extern crate pdf;
use std::time::SystemTime;
use std::fs;
use std::collections::HashMap;
use pdf::file::File;
use pdf::object::*;
use pdf::primitive::Primitive;
use pdf::error::PdfError;
use pdf::enc::StreamFilter;
fn main() -> Result<(), PdfError> {
let path = "/pathtoobject/data/Unbenannt.pdf";
println!("read: {}", path);
let now = SystemTime::now();
let file = File::<Vec<u8>>::open(&path).unwrap();
if let Some(ref info) = file.trailer.info_dict {
let title = info.get("Title").and_then(|p| p.to_string_lossy().ok());
let author = info.get("Author").and_then(|p| p.to_string_lossy().ok());
let descr = match (title, author) {
(Some(title), None) => title,
(None, Some(author)) => format!("[no title] – {}", author),
(Some(title), Some(author)) => format!("{} – {}", title, author),
_ => "PDF".into()
};
println!("{}", descr);
}
let mut images: Vec<_> = vec![];
let mut fonts = HashMap::new();
for page in file.pages() {
if page.is_err(){
// why is page err?
page.unwrap();
continue;
}
println!("page ok");
let page = page.unwrap();
let resources = page.resources().unwrap();
for (i, font) in resources.fonts.values().enumerate() {
let name = match &font.name {
Some(name) => name.as_str().into(),
None => i.to_string(),
};
fonts.insert(name, font.clone());
images.extend(resources.xobjects.iter().map(|(_name, &r)| file.get(r))
.filter( |o| o.is_ok())
.map(|o| o.unwrap())
.filter(|o| matches!(**o, XObject::Image(_)))
);
}
//images.extend(resources.xobjects.iter().map(|(_name, &r)| file.get(r).unwrap())
// .filter(|o| matches!(**o, XObject::Image(_)))
//);
}
for (i, o) in images.iter().enumerate() {
let img = match **o {
XObject::Image(ref im) => im,
_ => continue
};
let (data, filter) = img.raw_image_data(&file)?;
let ext = match filter {
Some(StreamFilter::DCTDecode(_)) => "jpeg",
Some(StreamFilter::JBIG2Decode) => "jbig2",
Some(StreamFilter::JPXDecode) => "jp2k",
_ => continue,
};
let fname = format!("extracted_image_{}.{}", i, ext);
fs::write(fname.as_str(), data).unwrap();
println!("Wrote file {}", fname);
}
println!("Found {} image(s).", images.len());
for (name, font) in fonts.iter() {
let fname = format!("font_{}", name);
if let Some(Ok(data)) = font.embedded_data(&file) {
fs::write(fname.as_str(), data).unwrap();
println!("Wrote file {}", fname);
}
}
println!("Found {} font(s).", fonts.len());
if let Some(ref forms) = file.get_root().forms {
println!("Forms:");
for field in forms.fields.iter() {
print!(" {:?} = ", field.name);
match field.value {
Primitive::String(ref s) => {
match s.to_string_lossy() {
Ok(s) => println!("{:?}", s),
Err(_) => println!("{:?}", s),
}
}
Primitive::Integer(i) => println!("{}", i),
Primitive::Name(ref s) => println!("{}", s),
ref p => println!("{:?}", p),
}
}
}
if let Ok(elapsed) = now.elapsed() {
println!("Time: {}s", elapsed.as_secs() as f64
+ elapsed.subsec_nanos() as f64 * 1e-9);
}
Ok(())
}
SebastianRzk commented
Full debug log
Finished dev [unoptimized + debuginfo] target(s) in 0.01s
Finished dev [unoptimized + debuginfo] target(s) in 0.01s
Running `/pathToProjectgit/pdf-image-extractor/target/debug/pdf-image-extractor`
Running `/pathToProjectgit/pdf-image-extractor/target/debug/pdf-image-extractor`
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Shared { source: Try { file: "/pathToProject.cargo/registry/src/github.com-1ecc6299db9ec823/pdf-0.8.0/src/file.rs", line: 87, column: 27, context: Context([]), source: UnspecifiedXRefEntry { id: 10 } } }', src/main.rs:39:18
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Shared { source: Try { file: "/pathToProject.cargo/registry/src/github.com-1ecc6299db9ec823/pdf-0.8.0/src/file.rs", line: 87, column: 27, context: Context([]), source: UnspecifiedXRefEntry { id: 10 } } }', src/main.rs:39:18
stack backtrace:
stack backtrace:
0: 0x5601e498c08a - 0: 0x5601e498c08a - std::backtrace_rs::backtrace::libunwind::trace::hba70c054c9cdbd74
std::backtrace_rs::backtrace::libunwind::trace::hba70c054c9cdbd74
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/../../backtrace/src/backtrace/libunwind.rs: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
1: 93:5
1: 0x5601e498c08a - std::backtrace_rs::backtrace::trace_unsynchronized::hfff24a4d77b00fef
0x5601e498c08a - std::backtrace_rs::backtrace::trace_unsynchronized::hfff24a4d77b00fef
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x5601e498c08a - std::sys_common::backtrace::_print_fmt::h6fb3e9652d3b4f4e
0x5601e498c08a - std::sys_common::backtrace::_print_fmt::h6fb3e9652d3b4f4e
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:65:5
3: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:65:5
3: 0x5601e498c08a - <std::sys_common::backtrace::_print 0x5601e498c08a - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::DisplayBacktrace as core::fmt::Display>::fmt::h254ba81a1e20fed0
::fmt::h254ba81a1e20fed0
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:44:22
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:44:22
4: 0x5601e49acc4e - core:: 4: 0x5601e49acc4e - core::fmt::write::h232ccd94259bfe24
fmt::write::h232ccd94259bfe24
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/fmt/mod.rs:1213:17
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/fmt/mod.rs:1213:17
5: 0x5601e4989ef5 - std::io 5: 0x5601e4989ef5 - std::io::Write::write_fmt::h963cfaecfdd596f7
::Write::write_fmt::h963cfaecfdd596f7
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/io/mod.rs:1682:15
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/io/mod.rs:1682:15
6: 0x5601e498be55 - std:: 6: 0x5601e498be55 - std::sys_common::backtrace::_print::h6fbc4343523214ce
sys_common::backtrace::_print::h6fbc4343523214ce
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:47:5
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:47:5
7: 0x5601e498be55 - std::sys_common::backtrace 7: 0x5601e498be55 - std::sys_common::backtrace::print::h55ab07cec21aacd5
::print::h55ab07cec21aacd5
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:34:9
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:34:9
8: 0x5601e498d63f - std::panicking 8: 0x5601e498d63f - std::panicking::default_hook::{{closure}}::hc10df65206eee69e
::default_hook::{{closure}}::hc10df65206eee69e
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:267:22
267:22
9: 0x5601e498d37b - std:: 9: 0x5601e498d37b - std::panicking::default_hook::hdd684731d8d78925
panicking::default_hook::hdd684731d8d78925
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:286:9
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:286:9
10: 0x5601e498dd49 - std::panicking 10: 0x5601e498dd49 - std::panicking::rust_panic_with_hook::h58681788b2d08dc0
::rust_panic_with_hook::h58681788b2d08dc0
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:688:13
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:688:13
11: 0x5601e498dae9 - std:: 11: 0x5601e498dae9 - std::panicking::begin_panic_handler::{{closure}}::he6d9da406579493c
panicking::begin_panic_handler::{{closure}}::he6d9da406579493c
at at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:579:13
/rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:579:13
12: 0x5601e498c53c - std::sys_common::backtrace::12: 0x5601e498c53c - std::sys_common::backtrace::__rust_end_short_backtrace::h5b1f3b233c047d47
__rust_end_short_backtrace::h5b1f3b233c047d47
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:137:18
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:137:18
13: 0x5601e498d7f2 - rust_begin_unwind 13: 0x5601e498d7f2 - rust_begin_unwind
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:575:5
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:575:5
14: 0x5601e4696783 - 14: 0x5601e4696783 - core::panicking::panic_fmt::hea602a2467b5109d
core::panicking::panic_fmt::hea602a2467b5109d
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/panicking.rs:64:14
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/panicking.rs:64:14
15: 15: 0x5601e4696c33 - core::result::unwrap_failed::he3f6a4db4030a3f8
0x5601e4696c33 - core::result::unwrap_failed::he3f6a4db4030a3f8
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/result.rs: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/result.rs:1790:5
1790:5
16: 0x5601e47ac882 - core::result:: 16: 0x5601e47ac882 - core::result::Result<T,E>::unwrap::hd1617ae5f4a1be4c
Result<T,E>::unwrap::hd1617ae5f4a1be4c
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/result.rs:1112:23
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/result.rs:1112:23
17: 0x5601e47c8eb8 - pdf_image_extractor::main:: 17: 0x5601e47c8eb8 - pdf_image_extractor::main::h0dbf0546c6f40cd8
at /pathToProjectgit/pdf-image-extractor/src/main.rsh0dbf0546c6f40cd8
at /pathToProjectgit/pdf-image-extractor/src/main.rs:39:13
:39:13
18: 0x5601e47041d2 - 18: 0x5601e47041d2 - core::ops::function::FnOnce::call_once::h1414c805c2cd5bad
core::ops::function::FnOnce::call_once::h1414c805c2cd5bad
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/ops/function.rs:250 at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/ops/function.rs:250:5
:5
19: 0x5601e46e1ef5 - 19: 0x5601e46e1ef5 - std::sys_common::backtrace::__rust_begin_short_backtrace::h04581fca3248f830
std::sys_common::backtrace::__rust_begin_short_backtrace::h04581fca3248f830
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:121:18
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/sys_common/backtrace.rs:121:18
20: 0x5601e47405b6 - 20: 0x5601e47405b6 - std::rt::lang_start::{{closure}}::hb47817ff59d8bd3e
std::rt::lang_start::{{closure}}::hb47817ff59d8bd3e
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs:166:18
:166:18
21: 0x5601e498651c - 21: 0x5601e498651c - core::ops::function::impls::<impl core::ops::function::FnOncecore::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::h2dd1a24ae3e0569f
<A> for &F>::call_once::h2dd1a24ae3e0569f
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/ops/function.rs:287:13
22: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/core/src/ops/function.rs:287:13
22: 0x5601e498651c - std::panicking::try::do_call::h71e38d3ed05d0919
0x5601e498651c - std::panicking::try::do_call::h71e38d3ed05d0919
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:483:40
23: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:483:40
23: 0x5601e498651c - std::panicking::try::h9dd8fea17c119511
0x5601e498651c - std::panicking::try::h9dd8fea17c119511
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:447:19
24: 0x5601e498651c - 447:19
24: 0x5601e498651c - std::panic::catch_unwind::h073a10d358958706
std::panic::catch_unwind::h073a10d358958706
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panic.rs:140:14
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panic.rs:140:14
25: 0x5601e498651c - std::rt 25: 0x5601e498651c - std::rt::lang_start_internal::{{closure}}::h0cf5d9b5652f6b98
::lang_start_internal::{{closure}}::h0cf5d9b5652f6b98
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs:148:48
26: 0x5601e498651c:148:48
26: 0x5601e498651c - std::panicking::try::do_call::hc59ab1d339fa21e7
- std::panicking::try::do_call::hc59ab1d339fa21e7
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:483:40
27: 0x5601e498651c - 483:40
27: 0x5601e498651c - std::panicking::try::h40dd3124b394a6da
std::panicking::try::h40dd3124b394a6da
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:447:19 at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panicking.rs:447:19
28: 0x5601e498651c - std::panic::catch_unwind::hff10c6c48e0fc17d
28: 0x5601e498651c - std::panic::catch_unwind::hff10c6c48e0fc17d
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panic.rs: at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/panic.rs:140:14
29: 0x5601e498651c - 140:14
29: 0x5601e498651c - std::rt::lang_start_internal::h7868f0ffe3ad1ec2
std::rt::lang_start_internal::h7868f0ffe3ad1ec2
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs:148:20
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs:148:20
30: 0x 30: 0x5601e474058a - std::rt::lang_start::h80b50788ba6911f9
5601e474058a - std::rt::lang_start::h80b50788ba6911f9
at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs:165 at /rustc/8460ca823e8367a30dda430efda790588b8c84d3/library/std/src/rt.rs:165:17
:17
31: 0x5601e47ca76e - 31: 0x5601e47ca76e - main
main
32: 0x7f64dccdd790 - 32: 0x7f64dccdd790 - <unknown>
33: 0x7f64dccdd84a - <unknown>
33: 0x7f64dccdd84a - __libc_start_main
__libc_start_main
34: 0x5601e4696e15 - 34: 0x5601e4696e15 - _start
35: _start
35: 0x0 - <unknown>
0x0 - <unknown>