From ee96066a203eb7653dace45d5d758f9c151ed046 Mon Sep 17 00:00:00 2001 From: "liuqiang.06" Date: Wed, 6 Nov 2024 14:48:15 +0800 Subject: [PATCH] opt: optimize skip number --- Cargo.toml | 2 +- src/parser.rs | 40 +++++++++++++++++----------------------- src/value/node.rs | 15 +++++++++++---- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c477486..46fdc55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ itoa = "1.0" ryu = "1.0" serde = { version = "1.0", features = ["rc", "derive"] } simdutf8 = "0.1" -thiserror = "1.0" +thiserror = "2.0" [dev-dependencies] bytes = { version = "1.4", features = ["serde"] } diff --git a/src/parser.rs b/src/parser.rs index 937c9a2..77a1d8d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1248,9 +1248,7 @@ where // fast path for the single digit let mut is_float: bool = false; match second { - Some(b'0'..=b'9') => { - self.read.eat(1); - } + Some(b'0'..=b'9') => self.read.eat(1), Some(b'.') => { is_float = true; self.read.eat(1); @@ -1268,33 +1266,30 @@ where let v = unsafe { i8x32::from_slice_unaligned_unchecked(chunk) }; let zero = i8x32::splat(b'0' as i8); let nine = i8x32::splat(b'9' as i8); - let nondigits = (zero.gt(&v) | v.gt(&nine)).bitmask(); + let mut nondigits = (zero.gt(&v) | v.gt(&nine)).bitmask(); if nondigits != 0 { - let cnt = nondigits.trailing_zeros() as usize; + let mut cnt = nondigits.trailing_zeros() as usize; let ch = chunk[cnt]; if ch == b'.' && !is_float { self.read.eat(cnt + 1); // check the first digit after the dot self.skip_single_digit()?; - let traversed = cnt + 2; // check the remaining digits - let nondigts = nondigits.wrapping_shr((traversed) as u32); - if nondigts != 0 { - while let Some(ch) = self.read.peek() { - if ch == b'e' || ch == b'E' { - self.read.eat(1); - return self.skip_exponent(); - } else if ch.is_ascii_digit() { - self.read.eat(1); - continue; - } else { - return Ok(()); - } + cnt += 2; + nondigits = nondigits.wrapping_shr(cnt as u32); + if nondigits != 0 { + let offset = nondigits.trailing_zeros() as usize; + let ch = chunk[cnt + offset]; + if ch == b'e' || ch == b'E' { + self.read.eat(offset + 1); + return self.skip_exponent(); + } else { + self.read.eat(offset); + return Ok(()); } } else { - // long digits - self.read.eat(32 - traversed); + self.read.eat(32 - cnt); is_float = true; continue; } @@ -1305,10 +1300,9 @@ where self.read.eat(cnt); return Ok(()); } - } else { - // long digits - self.read.eat(32); } + // long digits + self.read.eat(32); } // has less than 32 bytes diff --git a/src/value/node.rs b/src/value/node.rs index 9ba1f51..58c9571 100644 --- a/src/value/node.rs +++ b/src/value/node.rs @@ -573,6 +573,7 @@ impl Value { } } + #[inline(always)] fn get_enum(&self) -> ValueRefInner<'_> { match self.unpack_ref() { ValueDetail::Null => ValueRefInner::Null, @@ -589,6 +590,7 @@ impl Value { } } + #[inline(always)] fn unpack_ref(&self) -> ValueDetail<'_> { unsafe { match self.meta.get_type() { @@ -2068,16 +2070,21 @@ mod test { #[test] fn test_arbitrary_precision() { - use crate::{Deserialize, Deserializer}; - - let nums = [ "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", + use crate::Deserializer; + + let nums = [ + "43.420273000", + "1e123", + "0.001","0e+12","0.1e+12", + "0", "0.0", "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345e+1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", + "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", "1.23456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567e89012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", "-0.000000023456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567e+89012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", ]; for num in nums { let mut de = Deserializer::from_str(num).use_rawnumber(); - let value: Value = Deserialize::deserialize(&mut de).unwrap(); + let value: Value = de.deserialize().unwrap(); assert_eq!(value.as_raw_number().unwrap().as_str(), num); assert_eq!(value.to_string(), num); }