diff --git a/core/parser/src/parser/mod.rs b/core/parser/src/parser/mod.rs index 5c4bb0ed622..d711dd69a7c 100644 --- a/core/parser/src/parser/mod.rs +++ b/core/parser/src/parser/mod.rs @@ -6,6 +6,8 @@ mod statement; pub(crate) mod function; +mod parse_loop; + #[cfg(test)] mod tests; @@ -15,6 +17,7 @@ use crate::{ parser::{ cursor::Cursor, function::{FormalParameters, FunctionStatementList}, + parse_loop::{ControlFlow, ParseLoop, ParsedNode, SavedState, TokenLoopParser}, }, source::ReadChar, Error, Source, @@ -387,15 +390,17 @@ where type Output = StatementList; fn parse(self, cursor: &mut Cursor, interner: &mut Interner) -> ParseResult { - let (body, _end) = statement::StatementList::new( + let entry = statement::StatementList::new( false, false, false, &[], self.directive_prologues, self.strict, - ) - .parse(cursor, interner)?; + ); + let stmt_list_node = ParseLoop::parse_loop(cursor, interner, entry)?; + let (body, _end) = (stmt_list_node.list, stmt_list_node.pos); + // let (body, _end) = entry.parse(cursor, interner)?; if !self.direct_eval { // It is a Syntax Error if StatementList Contains super unless the source text containing super is eval diff --git a/core/parser/src/parser/parse_loop.rs b/core/parser/src/parser/parse_loop.rs new file mode 100644 index 00000000000..e57e803ad0e --- /dev/null +++ b/core/parser/src/parser/parse_loop.rs @@ -0,0 +1,217 @@ +use boa_ast::declaration::LexicalDeclaration; +use boa_ast::Position; +use boa_interner::Interner; + +use boa_ast as ast; +use crate::error::{ParseResult, Error}; +use crate::lexer::Token; +use crate::source::ReadChar; +use crate::parser::Cursor; +use crate::parser::statement::{StatementList, StatementListLocal, StatementListNode}; + +#[macro_export] +macro_rules! parse_cmd { + // or move into `pop_local_state!` + [[POP LOCAL]: $state:ident => Empty] => {{ + let Ok($crate::parser::SavedState::Empty) = $state.pop_local_state() else { + return Err($state.general_error(concat!("expect `Empty` saved state"))) + }; + }}; + // or move into `pop_local_state!` + [[POP LOCAL]: $state:ident => $variant:ident] => {{ + let Ok($crate::parser::SavedState::$variant(ret)) = $state.pop_local_state() else { + return Err($state.general_error(concat!("expect `", stringify!($variant) ,"Local` saved state"))) + }; + ret + }}; + + // or move into `pop_node!` + [[POP NODE]: $state:ident => $variant:ident] => {{ + let Ok($crate::parser::ParsedNode::$variant(ret)) = $state.pop_node() else { + return Err($state.general_error(concat!("expect `", stringify!($variant) ,"` node"))) + }; + ret + }}; + + // or move into `peek_node!` + [[PEEK NODE]: $state:ident match $($variants:ident)+] => {{ + if !matches!($state.peek_node(), Some($(| $crate::parser::ParsedNode::$variants(_))+)) { + return Err($state.general_error(concat!("expect `", $(stringify!($variants), "` | `",)+ "` node"))) + } + }}; + + // or move into `sub_parse!` + [[SUB PARSE]: $item:expr; $state:ident <= $local:ident as $variant:ident ($point:literal)] => {{ + $state.push_local($crate::parser::SavedState::$variant($local)); + return ParseResult::Ok($crate::parser::ControlFlow::SubParse { node: Box::new($item), point: $point }); + }}; + // or move into `sub_parse!` + [[SUB PARSE]: $item:expr; $state:ident <= Empty ($point:literal)] => {{ + $state.push_local($crate::parser::SavedState::Empty); + return ParseResult::Ok($crate::parser::ControlFlow::SubParse { node: Box::new($item), point: $point }); + }}; + + // or move into `parse_done!` + [[DONE]: $state:ident <= $variant:ident($node:expr)] => {{ + $state.push_node($crate::parser::ParsedNode::$variant($node)); + return Ok($crate::parser::ControlFlow::Done) + }}; +} + +pub(super) struct ParseLoop; + +impl ParseLoop { + pub(super) fn parse_loop( + cursor: &mut Cursor, + interner: &mut Interner, + entry: StatementList + ) -> ParseResult { + let mut state: ParseState<'_, R> = ParseState::new(cursor, interner); + + let mut parse_stack: Vec>> = vec![Box::new(entry)]; + let mut continue_points = vec![0]; + + loop { + debug_assert!(!parse_stack.is_empty()); + debug_assert_eq!(continue_points.len(), parse_stack.len()); + + // SAFETY: + // we push (entry, 0) on first iteration & it will pop only on + // last `ControlFlow::Done` after which this fuction returns + let continue_point = continue_points.pop().unwrap(); + let parser = parse_stack.last_mut().unwrap(); + + match parser.parse_loop(&mut state, continue_point)? { + ControlFlow::SubParse { node, point } => { + continue_points.push(point); // reinsert current updated `continue_point` + continue_points.push(0); // insert continue point for new sub parsing node + + parse_stack.push(node); + } + ControlFlow::Done => { + // remove parsing node from stack (`continue_point` already removed) + parse_stack.pop(); + + if parse_stack.is_empty() { + let stmt_list_node = parse_cmd![[POP NODE]: state => StatementList]; + assert!(state.nodes.is_empty()); + return Ok(stmt_list_node) + } + } + } + + } + } +} + +/// Trait implemented by parsers. +/// +/// This makes it possible to abstract over the underlying implementation of a parser. +pub(super) trait TokenLoopParser +where + R: ReadChar, +{ + /// Parses the token stream using the current parser. + /// + /// This method needs to be provided by the implementor type. + /// + /// # Errors + /// + /// It will fail if the cursor is not placed at the beginning of the expected non-terminal. + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, continue_point: usize) -> ParseResult>; +} + +pub(super) enum ControlFlow +where R: ReadChar, +{ + SubParse{node: Box>, point: usize}, + Done, +} + +pub(super) struct ParseState<'a, R> { + nodes: Vec, + saved_state: Vec, + pub cursor: &'a mut Cursor, + pub interner: &'a mut Interner, +} +impl<'a, R: ReadChar> ParseState<'a, R> { + pub(super) fn new(cursor: &'a mut Cursor, interner: &'a mut Interner) -> Self { + Self { + nodes: Vec::new(), + saved_state: Vec::new(), + cursor, + interner, + } + } + pub(super) fn mut_inner(&mut self) -> (&mut Cursor, &mut Interner) { + (&mut self.cursor, &mut self.interner) + } + + pub(super) fn cursor(&mut self) -> &Cursor { + &self.cursor + } + pub(super) fn cursor_mut(&mut self) -> &mut Cursor { + &mut self.cursor + } + pub(super) fn interner(&self) -> &Interner { + &self.interner + } + pub(super) fn interner_mut(&mut self) -> &mut Interner { + &mut self.interner + } + + pub(super) fn push_node(&mut self, node: ParsedNode) { + self.nodes.push(node); + } + pub(super) fn push_local(&mut self, local: SavedState) { + self.saved_state.push(local); + } + + pub(super) fn pop_node(&mut self) -> ParseResult { + self.nodes.pop().ok_or_else(||self.general_error("expect parsed node")) + } + pub(super) fn peek_node(&mut self) -> Option<&ParsedNode> { + self.nodes.last() + } + + pub(super) fn pop_local_state(&mut self) -> ParseResult { + self.saved_state.pop().ok_or_else(||self.general_error("expect saved state")) + } + + pub(super) fn continue_point_error(&self, continue_point: usize) -> ParseResult { + Err(self.general_error(format!("unexpected continue point ({continue_point})"))) + } + + pub(super) fn general_error>(&self, msg: S) -> Error { + Error::general( + format!("{}; linear position: {}", msg.as_ref(), self.cursor.linear_pos()), + Position::new(1, 1) // TODO: something to take last position see `self.cursor.linear_pos()` + ) + } + + ///Peeks a future token, without consuming it or advancing the cursor. This peeking skips line terminators. + /// + /// You can skip some tokens with the `skip_n` option. + pub(super) fn peek(&mut self, skip_n: usize) -> ParseResult> { + self.cursor.peek(skip_n, &mut self.interner) + } + + /// Check if the peeked token is a line terminator. + pub(super) fn peek_is_line_terminator(&mut self, skip_n: usize) -> ParseResult> { + self.cursor.peek_is_line_terminator(skip_n, &mut self.interner) + } + +} + +pub(super) enum ParsedNode { + Empty, + StatementListItem(ast::StatementListItem), + StatementList(StatementListNode), + Declaration(ast::Declaration), + Statement(ast::Statement), +} + +pub(super) enum SavedState { + Empty, + StatementList(StatementListLocal), +} diff --git a/core/parser/src/parser/statement/declaration/hoistable/mod.rs b/core/parser/src/parser/statement/declaration/hoistable/mod.rs index 33aee09a9c7..3ebe88e56eb 100644 --- a/core/parser/src/parser/statement/declaration/hoistable/mod.rs +++ b/core/parser/src/parser/statement/declaration/hoistable/mod.rs @@ -27,6 +27,7 @@ use crate::{ source::ReadChar, Error, }; +use crate::{parse_cmd, parser::{ControlFlow, TokenLoopParser, parse_loop::ParseState}}; use boa_ast::{ self as ast, expression::Identifier, @@ -125,6 +126,14 @@ where } } +impl TokenLoopParser for HoistableDeclaration { + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, _continue_point: usize) -> ParseResult> { + let (cursor, interner) = state.mut_inner(); + let ok = self.parse(cursor, interner)?; + parse_cmd!([DONE]: state <= Declaration(ok)) + } +} + trait CallableDeclaration { fn error_context(&self) -> &'static str; fn is_default(&self) -> bool; diff --git a/core/parser/src/parser/statement/declaration/lexical.rs b/core/parser/src/parser/statement/declaration/lexical.rs index 35ebfb7d345..eedd61f7de6 100644 --- a/core/parser/src/parser/statement/declaration/lexical.rs +++ b/core/parser/src/parser/statement/declaration/lexical.rs @@ -18,6 +18,7 @@ use crate::{ source::ReadChar, Error, }; +use crate::{parse_cmd, parser::{ControlFlow, TokenLoopParser, parse_loop::ParseState}}; use ast::operations::bound_names; use boa_ast::{self as ast, declaration::Variable, Keyword, Punctuator}; use boa_interner::{Interner, Sym}; @@ -121,6 +122,14 @@ where } } +impl TokenLoopParser for LexicalDeclaration { + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, _continue_point: usize) -> ParseResult> { + let (cursor, interner) = state.mut_inner(); + let ok = self.parse(cursor, interner)?; + parse_cmd!([DONE]: state <= Declaration(ok.into())) + } +} + /// Check if the given token is valid after the `let` keyword of a lexical declaration. pub(crate) fn allowed_token_after_let(token: Option<&Token>) -> bool { matches!( diff --git a/core/parser/src/parser/statement/declaration/mod.rs b/core/parser/src/parser/statement/declaration/mod.rs index 15e6c72f8d7..7d9a047e3ca 100644 --- a/core/parser/src/parser/statement/declaration/mod.rs +++ b/core/parser/src/parser/statement/declaration/mod.rs @@ -23,10 +23,7 @@ pub(in crate::parser) use self::{ lexical::{allowed_token_after_let, LexicalDeclaration}, }; use crate::{ - lexer::TokenKind, - parser::{AllowAwait, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser}, - source::ReadChar, - Error, + lexer::TokenKind, parse_cmd, parser::{parse_loop::ParseState, AllowAwait, AllowYield, ControlFlow, Cursor, OrAbrupt, ParseResult, TokenLoopParser, TokenParser}, source::ReadChar, Error }; use boa_ast::{self as ast, Keyword}; use boa_interner::{Interner, Sym}; @@ -93,6 +90,43 @@ where } } +impl TokenLoopParser for Declaration { + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, continue_point: usize) -> ParseResult> { + if continue_point == 1 { + parse_cmd![[POP LOCAL]: state => Empty]; + parse_cmd![[PEEK NODE]: state match Declaration]; // pass value up + return Ok(ControlFlow::Done) + } else if continue_point > 1 { + return state.continue_point_error(continue_point) + } + + let tok = state.cursor.peek(0, state.interner).or_abrupt()?; + + match tok.kind() { + TokenKind::Keyword((Keyword::Function | Keyword::Async | Keyword::Class, _)) => { + let node = HoistableDeclaration::new(self.allow_yield, self.allow_await, false); + parse_cmd![[SUB PARSE]: node; state <= Empty (1)]; + } + TokenKind::Keyword((Keyword::Const | Keyword::Let, _)) => { + let node = LexicalDeclaration::new(true, self.allow_yield, self.allow_await, false); + parse_cmd![[SUB PARSE]: node; state <= Empty (1)]; + } + _ => return Err(Error::expected( + [ + Keyword::Function.to_string(), + Keyword::Async.to_string(), + Keyword::Class.to_string(), + Keyword::Const.to_string(), + Keyword::Let.to_string(), + ], + tok.to_string(state.interner), + tok.span(), + "export declaration", + )), + } + } +} + /// Parses a `from` clause. /// /// More information: diff --git a/core/parser/src/parser/statement/mod.rs b/core/parser/src/parser/statement/mod.rs index 8801b5b0ccb..1a1e8a4d913 100644 --- a/core/parser/src/parser/statement/mod.rs +++ b/core/parser/src/parser/statement/mod.rs @@ -39,13 +39,9 @@ use self::{ with::WithStatement, }; use crate::{ - lexer::{token::EscapeSequence, Error as LexError, InputElement, Token, TokenKind}, - parser::{ - expression::{BindingIdentifier, Initializer, PropertyName}, - AllowAwait, AllowReturn, AllowYield, Cursor, OrAbrupt, ParseResult, TokenParser, - }, - source::ReadChar, - Error, + lexer::{token::EscapeSequence, Error as LexError, InputElement, Token, TokenKind}, parse_cmd, parser::{ + expression::{BindingIdentifier, Initializer, PropertyName}, parse_loop::ParseState, AllowAwait, AllowReturn, AllowYield, ControlFlow, Cursor, OrAbrupt, ParseResult, ParsedNode, TokenLoopParser, TokenParser + }, source::ReadChar, Error }; use ast::{ operations::{all_private_identifiers_valid, check_labels, contains_invalid_object_literal}, @@ -226,6 +222,14 @@ where } } +impl TokenLoopParser for Statement { + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, _continue_point: usize) -> ParseResult> { + let (cursor, interner) = state.mut_inner(); + let ok = self.parse(cursor, interner)?; + parse_cmd!([DONE]: state <= Statement(ok)) + } +} + /// Reads a list of statements. /// /// More information: @@ -378,6 +382,144 @@ where } } +pub(super) struct StatementListNode { + pub list: ast::StatementList, + pub pos: Option, +} + +pub(super) struct StatementListLocal { + items: Vec, + directives_stack: Vec<(Position, EscapeSequence)>, + linear_pos_end: boa_ast::LinearPosition, + end_position: Option, + global_strict: bool, + directive_prologues: bool, + strict: bool, +} +impl StatementListLocal { + pub(super) fn new(stmt_list: &StatementList, state: &mut ParseState<'_, R>) -> Self { + Self { + items: Vec::new(), + + directives_stack: Vec::new(), + linear_pos_end: state.cursor().linear_pos(), + end_position: None, + + global_strict: state.cursor().strict(), + directive_prologues: stmt_list.directive_prologues, + strict: stmt_list.strict, + } + } +} + +impl TokenLoopParser for StatementList +where + R: ReadChar, +{ + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, continue_point: usize) -> ParseResult> { + let mut local = match continue_point { + 0 => StatementListLocal::new(&self, state), + 1 => { + let mut local = parse_cmd![[POP LOCAL]: state => StatementList]; + Self::continue_point_list_item(&mut local, state)?; + local + } + _ => return state.continue_point_error(continue_point) + }; + + loop { + let peek_token = state.peek(0)?; + if let Some(peek_token) = peek_token { + local.linear_pos_end = peek_token.linear_span().end(); + local.end_position = Some(peek_token.span().end()); + } + + match peek_token { + Some(token) if self.break_nodes.contains(token.kind()) => break, + Some(token) if local.directive_prologues => { + if let TokenKind::StringLiteral((_, escape)) = token.kind() { + local.directives_stack.push((token.span().start(), *escape)); + } + } + None => break, + _ => {} + } + + let item = StatementListItem::new(self.allow_yield, self.allow_await, self.allow_return); + parse_cmd![[SUB PARSE]: item; state <= local as StatementList (1)]; + } + + state.cursor_mut().set_strict(local.global_strict); + + let node = StatementListNode { + list: ast::StatementList::new(local.items, local.linear_pos_end, local.strict), + pos: local.end_position, + }; + parse_cmd![[DONE]: state <= StatementList(node)]; + } +} + +impl StatementList { + fn continue_point_list_item(local: &mut StatementListLocal, state: &mut ParseState<'_, R>) -> ParseResult<()> + where R: ReadChar, + { + let item = parse_cmd![[POP NODE]: state => StatementListItem]; + + if local.directive_prologues { + if let ast::StatementListItem::Statement(statement) = &item { + if let ast::Statement::Expression(ast::Expression::Literal(lit)) = + statement.as_ref() + { + if let Some(string) = lit.as_string() { + if local.strict { + // TODO: should store directives in some place + } else if state.interner().resolve_expect(string).join( + |s| s == "use strict", + |g| g == utf16!("use strict"), + true, + ) && local.directives_stack.last().expect("token should exist").1 + == EscapeSequence::empty() + { + state.cursor_mut().set_strict(true); + local.strict = true; + + local.directives_stack.pop(); + + for (position, escape) in std::mem::take(&mut local.directives_stack) { + if escape.contains(EscapeSequence::LEGACY_OCTAL) { + return Err(Error::general( + "legacy octal escape sequences are not allowed in strict mode", + position, + )); + } + + if escape.contains(EscapeSequence::NON_OCTAL_DECIMAL) { + return Err(Error::general( + "decimal escape sequences are not allowed in strict mode", + position, + )); + } + } + } + } else { + local.directive_prologues = false; + local.directives_stack.clear(); + } + } else { + local.directive_prologues = false; + local.directives_stack.clear(); + } + } else { + local.directive_prologues = false; + local.directives_stack.clear(); + } + } + + local.items.push(item); + Ok(()) + } +} + /// Statement list item parsing /// /// A statement list item can either be an statement or a declaration. @@ -461,6 +603,60 @@ where } } +impl TokenLoopParser for StatementListItem +where + R: ReadChar, +{ + fn parse_loop(&mut self, state: &mut ParseState<'_, R>, continue_point: usize) -> ParseResult> { + if continue_point == 1 { + parse_cmd![[POP LOCAL]: state => Empty]; + match state.pop_node()? { + ParsedNode::Declaration(decl) => { + parse_cmd![[DONE]: state <= StatementListItem(ast::StatementListItem::from(decl))] + } + ParsedNode::Statement(stmt) => { + parse_cmd![[DONE]: state <= StatementListItem(ast::StatementListItem::from(stmt))] + } + _ => return Err(state.general_error(concat!("expect `Declaration` or `Statement` node"))) + } + } else if continue_point > 1 { + return state.continue_point_error(continue_point) + } + + let tok = state.peek(0).or_abrupt()?; + + let decl = Declaration::new(self.allow_yield, self.allow_await); + let stmt = Statement::new(self.allow_yield, self.allow_await, self.allow_return); + + match tok.kind().clone() { + TokenKind::Keyword((Keyword::Function | Keyword::Class | Keyword::Const, _)) => { + parse_cmd![[SUB PARSE]: decl; state <= Empty (1)] + } + TokenKind::Keyword((Keyword::Let, false)) if allowed_token_after_let(state.peek(1)?) => { + parse_cmd![[SUB PARSE]: decl; state <= Empty (1)] + } + TokenKind::Keyword((Keyword::Async, false)) => { + let skip_n = if state.peek_is_line_terminator(0).or_abrupt()? { + 2 + } else { + 1 + }; + + let is_line_terminator = state.peek_is_line_terminator(skip_n)?.unwrap_or(true); + + match state.peek(1)?.map(Token::kind) { + Some(TokenKind::Keyword((Keyword::Function, _))) if !is_line_terminator => { + parse_cmd![[SUB PARSE]: decl; state <= Empty (1)] + } + _ => parse_cmd![[SUB PARSE]: stmt; state <= Empty (1)] + } + } + _ => parse_cmd![[SUB PARSE]: stmt; state <= Empty (1)] + } + } +} + + /// `ObjectBindingPattern` pattern parsing. /// /// More information: