Consider Adding Support for Disguised Fast-Path Cases
Alexhuszagh opened this issue · 0 comments
Issue
fast-float-rust does not handle disguised fast path cases, such as "1.2345e30"
, and therefore falls back to compute_float
when it's unnecessary. The changes are trivial, and lead to ~30% performance improvements for disguised fast-path cases without affecting other benchmarks. See rust-dec2flt for exact specifics on these benchmarks.
Benchmarks
These benchmarks were run on an i7-6560U CPU @ 2.20GHz, on a target of x86_64-unknown-linux-gnu, running kernel version 5.11.16-100, on a Rust version of rustc 1.53.0-nightly (132b4e5d1 2021-04-13). The performance CPU governor was used for all benchmarks, and were run consecutively on A/C power with only tmux and Sublime Text open for all benchmarks. The floats that were parsed are as follows:
// Example fast-path value.
const FAST: &str = "1.2345e22";
// Example disguised fast-path value.
const DISGUISED: &str = "1.2345e30";
// Example moderate path value: clearly not halfway `1 << 53`.
const MODERATE: &str = "9007199254740992.0";
// Example exactly-halfway value `(1<<53) + 1`.
const HALFWAY: &str = "9007199254740993.0";
// Example large, near-halfway value.
const LARGE: &str = "8.988465674311580536566680e307";
// Example denormal, near-halfway value.
const DENORMAL: &str = "8.442911973260991817129021e-309";
disguised
fast-float/fast time: [20.446 ns 20.530 ns 20.625 ns]
fast-float/disguised time: [20.771 ns 20.814 ns 20.870 ns]
fast-float/moderate time: [34.970 ns 35.268 ns 35.638 ns]
fast-float/halfway time: [34.346 ns 34.794 ns 35.337 ns]
fast-float/large time: [14.346 us 14.439 us 14.549 us]
fast-float/denormal time: [72.166 ns 72.652 ns 73.251 ns]
master
Note: I ran these benchmarks numerous times, to try to ensure fast-float/fast
was at least as fast as those for the disguised
case, but they never were. Since this introduced an additional branch when parsing, we should assume that the fast-path case is at least as fast here as on the disguised branch.
fast-float/fast time: [21.513 ns 21.614 ns 21.727 ns]
fast-float/disguised time: [30.283 ns 30.567 ns 30.903 ns]
fast-float/moderate time: [34.232 ns 34.408 ns 34.615 ns]
fast-float/halfway time: [34.598 ns 34.734 ns 34.897 ns]
fast-float/large time: [13.897 us 13.944 us 13.999 us]
fast-float/denormal time: [76.306 ns 76.799 ns 77.338 ns]
DIff
The changes are trivial, and do not affect binary size:
diff --git a/src/float.rs b/src/float.rs
index 39bec41..b31f1f2 100644
--- a/src/float.rs
+++ b/src/float.rs
@@ -5,6 +5,25 @@ mod private {
pub trait Sealed {}
}
+pub const INT_POW10: [u64; 16] = [
+ 1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000,
+ 100000000000,
+ 1000000000000,
+ 10000000000000,
+ 100000000000000,
+ 1000000000000000,
+];
+
#[doc(hidden)]
pub trait Float:
Sized
@@ -31,6 +50,7 @@ pub trait Float:
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
const MIN_EXPONENT_FAST_PATH: i64;
const MAX_EXPONENT_FAST_PATH: i64;
+ const MAX_DISGUISED_EXPONENT_FAST_PATH: i64;
const MINIMUM_EXPONENT: i32;
const INFINITE_POWER: i32;
const SIGN_INDEX: usize;
@@ -56,6 +76,7 @@ impl Float for f32 {
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
const MAX_EXPONENT_FAST_PATH: i64 = 10;
+ const MAX_DISGUISED_EXPONENT_FAST_PATH: i64 = 17;
const MINIMUM_EXPONENT: i32 = -127;
const INFINITE_POWER: i32 = 0xFF;
const SIGN_INDEX: usize = 31;
@@ -90,6 +111,7 @@ impl Float for f64 {
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
const MAX_EXPONENT_FAST_PATH: i64 = 22;
+ const MAX_DISGUISED_EXPONENT_FAST_PATH: i64 = 37;
const MINIMUM_EXPONENT: i32 = -1023;
const INFINITE_POWER: i32 = 0x7FF;
const SIGN_INDEX: usize = 63;
diff --git a/src/number.rs b/src/number.rs
index ecb76c2..4c78c97 100644
--- a/src/number.rs
+++ b/src/number.rs
@@ -1,5 +1,5 @@
use crate::common::{is_8digits_le, AsciiStr, ByteSlice};
-use crate::float::Float;
+use crate::float::{Float, INT_POW10};
const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
@@ -15,14 +15,15 @@ impl Number {
#[inline]
fn is_fast_path<F: Float>(&self) -> bool {
F::MIN_EXPONENT_FAST_PATH <= self.exponent
- && self.exponent <= F::MAX_EXPONENT_FAST_PATH
+ && self.exponent <= F::MAX_DISGUISED_EXPONENT_FAST_PATH
&& self.mantissa <= F::MAX_MANTISSA_FAST_PATH
&& !self.many_digits
}
#[inline]
pub fn try_fast_path<F: Float>(&self) -> Option<F> {
- if self.is_fast_path::<F>() {
+ let is_fast = self.is_fast_path::<F>();
+ if is_fast && self.exponent <= F::MAX_EXPONENT_FAST_PATH {
let mut value = F::from_u64(self.mantissa);
if self.exponent < 0 {
value = value / F::pow10_fast_path((-self.exponent) as _);
@@ -33,6 +34,19 @@ impl Number {
value = -value;
}
Some(value)
+ } else if is_fast {
+ let max_exp = F::MAX_EXPONENT_FAST_PATH;
+ let shift = self.exponent - max_exp;
+ let mant = self.mantissa.checked_mul(INT_POW10[shift as usize])?;
+ if mant > F::MAX_MANTISSA_FAST_PATH {
+ return None;
+ }
+ let mut value = F::from_u64(mant);
+ value = value * F::pow10_fast_path(max_exp as _);
+ if self.negative {
+ value = -value;
+ }
+ Some(value)
} else {
None
}