Skip to content

refactor(es/lexer): token eof #10880

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
//! x Expected ':', got '<eof>'
//! ,-[1:1]
//! 1 | a ? b ? c : (d) : e => f // Legal JS
//! : ^
//! : ^^
//! 2 |
//! `----
//// [fileTs.ts]
//! x Expected ':', got '<eof>'
//! ,----
//! 1 | a ? b ? c : (d) : e => f
//! : ^
//! : ^^
//! `----
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
//! x Expected ':', got '<eof>'
//! ,-[1:1]
//! 1 | a ? b ? c : (d) : e => f // Legal JS
//! : ^
//! : ^^
//! 2 |
//! `----
//// [fileTs.ts]
//! x Expected ':', got '<eof>'
//! ,----
//! 1 | a ? b ? c : (d) : e => f
//! : ^
//! : ^^
//! `----
1 change: 1 addition & 0 deletions crates/swc_common/src/syntax_pos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ impl Span {
}

#[inline]
#[track_caller]
pub fn new_with_checked(lo: BytePos, hi: BytePos) -> Self {
debug_assert!(lo <= hi, "lo: {lo:#?}, hi: {hi:#?}");
Span { lo, hi }
Expand Down
23 changes: 11 additions & 12 deletions crates/swc_ecma_lexer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,17 @@ typescript = []
verify = ["swc_ecma_visit"]

[dependencies]
arrayvec = { workspace = true }
bitflags = { workspace = true }
either = { workspace = true }
new_debug_unreachable = { workspace = true }
num-bigint = { workspace = true }
phf = { workspace = true, features = ["macros"] }
rustc-hash = { workspace = true }
seq-macro = { workspace = true }
serde = { workspace = true, features = ["derive"] }
smallvec = { workspace = true }
smartstring = { workspace = true }
tracing = { workspace = true }
arrayvec = { workspace = true }
bitflags = { workspace = true }
either = { workspace = true }
num-bigint = { workspace = true }
phf = { workspace = true, features = ["macros"] }
rustc-hash = { workspace = true }
seq-macro = { workspace = true }
serde = { workspace = true, features = ["derive"] }
smallvec = { workspace = true }
smartstring = { workspace = true }
tracing = { workspace = true }

swc_atoms = { version = "7.0.0", path = "../swc_atoms" }
swc_common = { version = "14.0.1", path = "../swc_common" }
Expand Down
16 changes: 8 additions & 8 deletions crates/swc_ecma_lexer/src/common/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1730,7 +1730,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
}

/// `#`
fn read_token_number_sign(&mut self) -> LexResult<Option<Self::Token>> {
fn read_token_number_sign(&mut self) -> LexResult<Self::Token> {
debug_assert!(self.cur().is_some_and(|c| c == '#'));

self.bump(); // '#'
Expand All @@ -1741,7 +1741,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
!self.input().is_at_start() || self.cur() != Some('!'),
"#! should have already been handled by read_shebang()"
);
Ok(Some(Self::Token::HASH))
Ok(Self::Token::HASH)
}

/// Read a token given `.`.
Expand Down Expand Up @@ -1936,14 +1936,14 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
}

#[inline(never)]
fn read_slash(&mut self) -> LexResult<Option<Self::Token>> {
fn read_slash(&mut self) -> LexResult<Self::Token> {
debug_assert_eq!(self.cur(), Some('/'));
self.bump(); // '/'
Ok(Some(if self.eat(b'=') {
Ok(if self.eat(b'=') {
Self::Token::DIV_EQ
} else {
Self::Token::DIV
}))
})
}

/// This can be used if there's no keyword starting with the first
Expand Down Expand Up @@ -2084,7 +2084,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
fn read_keyword_with(
&mut self,
convert: &dyn Fn(&str) -> Option<Self::Token>,
) -> LexResult<Option<Self::Token>> {
) -> LexResult<Self::Token> {
debug_assert!(self.cur().is_some());

let start = self.cur_pos();
Expand All @@ -2100,11 +2100,11 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
SyntaxError::EscapeInReservedWord { word: Atom::new(s) },
)
} else {
Ok(Some(word))
Ok(word)
}
} else {
let atom = self.atom(s);
Ok(Some(Self::Token::unknown_ident(atom, self)))
Ok(Self::Token::unknown_ident(atom, self))
}
}

Expand Down
6 changes: 6 additions & 0 deletions crates/swc_ecma_lexer/src/common/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ pub trait TokenFactory<'a, TokenAndSpan, I: Tokens<TokenAndSpan>>: Sized + Parti
const CASE: Self;
const DEFAULT: Self;
const DEBUGGER: Self;
const EOF: Self;

fn jsx_name(name: &'a str, lexer: &mut Self::Lexer) -> Self;
fn is_jsx_name(&self) -> bool;
fn take_jsx_name(self, buffer: &mut Self::Buffer) -> Atom;

fn str(value: Atom, raw: Atom, lexer: &mut Self::Lexer) -> Self;
fn is_str(&self) -> bool;
fn is_str_raw_content(&self, content: &str, buffer: &Self::Buffer) -> bool;
fn take_str(self, buffer: &mut Self::Buffer) -> (Atom, Atom);

fn template(cooked: LexResult<Atom>, raw: Atom, lexer: &mut Self::Lexer) -> Self;
Expand Down Expand Up @@ -586,6 +588,10 @@ pub trait TokenFactory<'a, TokenAndSpan, I: Tokens<TokenAndSpan>>: Sized + Parti
fn is_exp(&self) -> bool {
Self::EXP.eq(self)
}
#[inline(always)]
fn is_eof(&self) -> bool {
Self::EOF.eq(self)
}
fn is_no_substitution_template_literal(&self) -> bool;
fn is_template_head(&self) -> bool;
}
87 changes: 32 additions & 55 deletions crates/swc_ecma_lexer/src/common/parser/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use debug_unreachable::debug_unreachable;
use swc_atoms::Atom;
use swc_common::{BytePos, Span};
use swc_ecma_ast::EsVersion;

use super::token_and_span::TokenAndSpan as TokenAndSpanTrait;
use crate::common::{
context::Context, input::Tokens, lexer::token::TokenFactory, syntax::SyntaxFlags,
context::Context,
input::Tokens,
lexer::{token::TokenFactory, LexResult},
syntax::SyntaxFlags,
};

pub trait NextTokenAndSpan {
Expand All @@ -30,9 +33,9 @@ pub trait Buffer<'a> {
fn set_next(&mut self, token: Option<Self::Next>);
fn next_mut(&mut self) -> &mut Option<Self::Next>;

fn cur(&mut self) -> Option<&Self::Token>;
fn get_cur(&self) -> Option<&Self::TokenAndSpan>;
fn get_cur_mut(&mut self) -> &mut Option<Self::TokenAndSpan>;
fn cur(&self) -> &Self::Token;
fn get_cur(&self) -> &Self::TokenAndSpan;
fn get_cur_mut(&mut self) -> &mut Self::TokenAndSpan;

fn prev_span(&self) -> Span;
fn set_prev_span(&mut self, span: Span);
Expand All @@ -43,47 +46,34 @@ pub trait Buffer<'a> {

fn store(&mut self, token: Self::Token) {
debug_assert!(self.next().is_none());
debug_assert!(self.get_cur().is_none());
debug_assert!(!self.cur().is_eof());
let span = self.prev_span();
let token = Self::TokenAndSpan::new(token, span, false);
self.set_cur(token);
}

#[allow(dead_code)]
fn cur_debug<'b>(&'b self) -> Option<&'b Self::Token>
where
Self::TokenAndSpan: 'b,
{
self.get_cur().map(|it| it.token())
}

fn dump_cur(&mut self) -> String;

/// Returns current token.
fn bump(&mut self) -> Self::Token {
let prev = match self.get_cur_mut().take() {
Some(t) => t,
None => unsafe {
debug_unreachable!(
"Current token is `None`. Parser should not call bump() without knowing \
current token"
)
},
};
self.set_prev_span(prev.span());
prev.take_token()
}
fn dump_cur(&self) -> String;

/// find next token.
fn bump(&mut self);
fn expect_word_token_and_bump(&mut self) -> Atom;
fn expect_number_token_and_bump(&mut self) -> (f64, Atom);
fn expect_string_token_and_bump(&mut self) -> (Atom, Atom);
fn expect_bigint_token_and_bump(&mut self) -> (Box<num_bigint::BigInt>, Atom);
fn expect_regex_token_and_bump(&mut self) -> (Atom, Atom);
fn expect_template_token_and_bump(&mut self) -> (LexResult<Atom>, Atom);
fn expect_error_token_and_bump(&mut self) -> crate::error::Error;
fn expect_jsx_name_token_and_bump(&mut self) -> Atom;
fn expect_jsx_text_token_and_bump(&mut self) -> (Atom, Atom);
fn expect_shebang_token_and_bump(&mut self) -> Atom;

#[inline]
fn knows_cur(&self) -> bool {
self.get_cur().is_some()
!self.cur().is_eof()
}

fn had_line_break_before_cur(&mut self) -> bool {
self.cur();
self.get_cur()
.map(|it| it.had_line_break())
.unwrap_or_else(|| true)
fn had_line_break_before_cur(&self) -> bool {
self.get_cur().had_line_break()
}

/// This returns true on eof.
Expand Down Expand Up @@ -118,8 +108,8 @@ pub trait Buffer<'a> {
if span.hi != next.span().lo {
return;
}
let cur = self.get_cur_mut().take().unwrap();
let next = self.next_mut().take().unwrap();
let cur = self.get_cur();
let cur_token = cur.token();
let token = if cur_token.is_greater() {
let next_token = next.token();
Expand All @@ -139,7 +129,6 @@ pub trait Buffer<'a> {
// >>>=
Self::Token::ZERO_FILL_RSHIFT_EQ
} else {
self.set_cur(cur);
self.set_next(Some(next));
return;
}
Expand All @@ -155,12 +144,10 @@ pub trait Buffer<'a> {
// <<=
Self::Token::LSHIFT_EQ
} else {
self.set_cur(cur);
self.set_next(Some(next));
return;
}
} else {
self.set_cur(cur);
self.set_next(Some(next));
return;
};
Expand All @@ -170,8 +157,8 @@ pub trait Buffer<'a> {
}

#[inline(always)]
fn is(&mut self, expected: &Self::Token) -> bool {
self.cur().is_some_and(|cur| cur == expected)
fn is(&self, expected: &Self::Token) -> bool {
self.cur() == expected
}

#[inline(always)]
Expand All @@ -185,23 +172,13 @@ pub trait Buffer<'a> {

/// Returns start of current token.
#[inline]
fn cur_pos(&mut self) -> BytePos {
let _ = self.cur();
self.get_cur()
.map(|item| item.span().lo)
.unwrap_or_else(|| {
// eof
self.last_pos()
})
fn cur_pos(&self) -> BytePos {
self.get_cur().span().lo
}

#[inline]
fn cur_span(&self) -> Span {
let data = self
.get_cur()
.map(|item| item.span())
.unwrap_or(self.prev_span());
Span::new_with_checked(data.lo, data.hi)
self.get_cur().span()
}

/// Returns last byte position of previous token.
Expand Down
Loading
Loading