Skip to content

Commit

Permalink
perf(es/fast-lexer): Optimize bound checks (#10157)
Browse files Browse the repository at this point in the history
**Description:**

These bound checks are unnecessary, and the performance optimization has a noticeable impact on mac os.


<img width="1840" alt="image"
src="https://github.com/user-attachments/assets/12073f7c-f0bb-4e8c-81d0-9176312a15d5"
/>
  • Loading branch information
kdy1 authored Mar 6, 2025
1 parent fb610b0 commit d74360e
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 44 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ resolver = "2"
ascii = "1.1.0"
assert_cmd = "2.0.12"
assert_fs = "1.0.13"
assume = "0.5.0"
auto_impl = "1.2.0"
backtrace = "0.3.61"
base64 = "0.22.1"
Expand Down
1 change: 1 addition & 0 deletions crates/swc_ecma_fast_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ swc_atoms = { version = "5.0.0", path = "../swc_atoms" }
swc_common = { version = "8.0.0", path = "../swc_common" }
swc_ecma_ast = { version = "8.0.0", path = "../swc_ecma_ast" }

assume = { workspace = true }
num-bigint = { workspace = true }
phf = { workspace = true, features = ["macros"] }
wide = { workspace = true }
Expand Down
11 changes: 6 additions & 5 deletions crates/swc_ecma_fast_parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
//!
//! This cursor operates directly on UTF-8 bytes for maximum performance.
use assume::assume;
use swc_common::BytePos;
use wide::u8x16;

use crate::util::{likely, unlikely};
use crate::util::unlikely;

/// High-performance cursor for traversing input bytes
#[repr(C)] // Ensure predictable memory layout for better cache behavior
Expand Down Expand Up @@ -78,15 +79,15 @@ impl<'a> Cursor<'a> {
/// Advance the cursor by one byte
#[inline(always)]
pub fn advance(&mut self) {
if likely(!self.is_eof()) {
self.pos += 1;
}
assume!(unsafe: !self.is_eof());
self.pos += 1;
}

/// Advance the cursor by n bytes
#[inline(always)]
pub fn advance_n(&mut self, n: u32) {
self.pos = (self.pos + n).min(self.len);
assume!(unsafe: self.pos + n <= self.len);
self.pos += n;
}

/// Advance until the predicate returns false or EOF is reached
Expand Down
83 changes: 51 additions & 32 deletions crates/swc_ecma_fast_parser/src/lexer/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,8 @@ impl Lexer<'_> {
match len {
// Direct lookup for 2-letter keywords
2 => {
let word_bytes = [ident_bytes[0], ident_bytes[1]];
let word_bytes =
unsafe { [*ident_bytes.get_unchecked(0), *ident_bytes.get_unchecked(1)] };
let word_value = u16::from_be_bytes(word_bytes);

for &(keyword_value, token_type) in &KEYWORDS_LEN2 {
Expand All @@ -348,7 +349,14 @@ impl Lexer<'_> {

// Direct lookup for 3-letter keywords
3 => {
let word_bytes = [ident_bytes[0], ident_bytes[1], ident_bytes[2], 0];
let word_bytes = unsafe {
[
*ident_bytes.get_unchecked(0),
*ident_bytes.get_unchecked(1),
*ident_bytes.get_unchecked(2),
0,
]
};
let word_value = (u32::from_be_bytes(word_bytes)) >> 8;

for &(keyword_value, token_type) in &KEYWORDS_LEN3 {
Expand All @@ -365,12 +373,14 @@ impl Lexer<'_> {

// Direct lookup for 4-letter keywords
4 => {
let word_bytes = [
ident_bytes[0],
ident_bytes[1],
ident_bytes[2],
ident_bytes[3],
];
let word_bytes = unsafe {
[
*ident_bytes.get_unchecked(0),
*ident_bytes.get_unchecked(1),
*ident_bytes.get_unchecked(2),
*ident_bytes.get_unchecked(3),
]
};
let word_value = u32::from_be_bytes(word_bytes);

for &(keyword_value, token_type) in &KEYWORDS_LEN4 {
Expand All @@ -387,16 +397,18 @@ impl Lexer<'_> {

// Direct lookup for 5-letter keywords
5 => {
let word_bytes = [
ident_bytes[0],
ident_bytes[1],
ident_bytes[2],
ident_bytes[3],
ident_bytes[4],
0,
0,
0,
];
let word_bytes = unsafe {
[
*ident_bytes.get_unchecked(0),
*ident_bytes.get_unchecked(1),
*ident_bytes.get_unchecked(2),
*ident_bytes.get_unchecked(3),
*ident_bytes.get_unchecked(4),
0,
0,
0,
]
};
let word_value = (u64::from_be_bytes(word_bytes)) >> 24;

for &(keyword_value, token_type) in &KEYWORDS_LEN5 {
Expand All @@ -413,16 +425,18 @@ impl Lexer<'_> {

// Direct lookup for 6-letter keywords
6 => {
let word_bytes = [
ident_bytes[0],
ident_bytes[1],
ident_bytes[2],
ident_bytes[3],
ident_bytes[4],
ident_bytes[5],
0,
0,
];
let word_bytes = unsafe {
[
*ident_bytes.get_unchecked(0),
*ident_bytes.get_unchecked(1),
*ident_bytes.get_unchecked(2),
*ident_bytes.get_unchecked(3),
*ident_bytes.get_unchecked(4),
*ident_bytes.get_unchecked(5),
0,
0,
]
};
let word_value = (u64::from_be_bytes(word_bytes)) >> 16;

for &(keyword_value, token_type) in &KEYWORDS_LEN6 {
Expand All @@ -440,11 +454,16 @@ impl Lexer<'_> {
// Fast path for longer keywords using the lookup table
7..=16 => {
// Get index in KEYWORD_LOOKUP using our index table
let lookup_idx = KEYWORD_INDEX[len - 1][(ident_bytes[0] - b'a') as usize];
let first_char_idx = unsafe { (*ident_bytes.get_unchecked(0) - b'a') as usize };
let lookup_idx = unsafe {
*KEYWORD_INDEX
.get_unchecked(len - 1)
.get_unchecked(first_char_idx)
};

if lookup_idx != 255 {
// Check if the word matches the entry
let entry = &KEYWORD_LOOKUP[lookup_idx as usize];
let entry = unsafe { KEYWORD_LOOKUP.get_unchecked(lookup_idx as usize) };
if entry.keyword == ident_str {
if let Some(token_type) = entry.token_type {
return Ok(Token::new(
Expand Down Expand Up @@ -485,12 +504,12 @@ impl Lexer<'_> {
/// Super fast check for ASCII identifier start character
#[inline(always)]
pub(crate) fn is_ascii_id_start(ch: u8) -> bool {
ch < 128 && (IDENT_CHAR[ch as usize] & 1) != 0
ch < 128 && unsafe { (IDENT_CHAR.get_unchecked(ch as usize) & 1) != 0 }
}

/// Super fast check for ASCII identifier continue character
#[inline(always)]
pub(crate) fn is_ascii_id_continue(ch: u8) -> bool {
ch < 128 && (IDENT_CHAR[ch as usize] & 2) != 0
ch < 128 && unsafe { (IDENT_CHAR.get_unchecked(ch as usize) & 2) != 0 }
}
}
14 changes: 7 additions & 7 deletions crates/swc_ecma_fast_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ impl<'a> Lexer<'a> {

// Fast path for ASCII tokens using lookup table
if likely(ch < 128) {
let char_type = ASCII_LOOKUP[ch as usize];
let char_type = unsafe { *ASCII_LOOKUP.get_unchecked(ch as usize) };

// Fast path for single-character tokens (very common)
if char_type & CHAR_SPECIAL != 0 {
Expand Down Expand Up @@ -420,7 +420,7 @@ impl<'a> Lexer<'a> {

// Handle ASCII characters
if likely(ch < 128) {
let char_type = ASCII_LOOKUP[ch as usize];
let char_type = unsafe { *ASCII_LOOKUP.get_unchecked(ch as usize) };

// Fast path for common whitespace
if char_type & CHAR_WHITESPACE != 0 {
Expand Down Expand Up @@ -518,7 +518,7 @@ impl<'a> Lexer<'a> {
// Get current 16 bytes
let input = self.cursor.rest();
let mut data = [0u8; 16];
data.copy_from_slice(&input[0..16]);
data.copy_from_slice(unsafe { input.get_unchecked(0..16) });
let chunk = u8x16::new(data);

// Compare with our whitespace vectors
Expand All @@ -543,20 +543,20 @@ impl<'a> Lexer<'a> {
// This is more efficient than trying to process the entire chunk at once
// when we need to handle special cases like CR+LF and comments

if is_basic_ws_arr[0] != 0 {
if unsafe { *is_basic_ws_arr.get_unchecked(0) } != 0 {
// Regular whitespace - just advance
self.cursor.advance();
return true;
}

if is_newline_arr[0] != 0 {
if unsafe { *is_newline_arr.get_unchecked(0) } != 0 {
// Newline - need to set had_line_break
self.cursor.advance();
self.had_line_break = LineBreak::Present;
return true;
}

if is_cr_arr[0] != 0 {
if unsafe { *is_cr_arr.get_unchecked(0) } != 0 {
// Carriage return - need to check for CRLF sequence
self.cursor.advance();
if let Some(b'\n') = self.cursor.peek() {
Expand All @@ -566,7 +566,7 @@ impl<'a> Lexer<'a> {
return true;
}

if is_slash_arr[0] != 0 {
if unsafe { *is_slash_arr.get_unchecked(0) } != 0 {
// Potential comment - need to check next character
if let Some(b'/') | Some(b'*') = self.cursor.peek_at(1) {
return false; // Let the caller handle comments
Expand Down

0 comments on commit d74360e

Please sign in to comment.