boa-dev · jasonwilliams · Jun 27, 2025 · Jun 28, 2025 · Jun 28, 2025 · Jul 1, 2025
diff --git a/core/parser/src/lexer/cursor.rs b/core/parser/src/lexer/cursor.rs
@@ -162,16 +162,28 @@ impl<R: ReadChar> Cursor<R> {
     /// It also stops when the next character is not an ascii or there is no next character.
     ///
     /// Note that all characters up until the stop character are added to the buffer, including the character right before.
-    pub(super) fn take_while_ascii_pred<F>(&mut self, buf: &mut Vec<u8>, pred: &F) -> io::Result<()>
+    #[allow(clippy::cast_possible_truncation)]
+    #[inline]
+    pub(super) fn take_while_ascii_pred<'a, F>(
+        &mut self,
+        buf: &'a mut [u8],
+        pred: &F,
+    ) -> io::Result<&'a [u8]>
     where
         F: Fn(char) -> bool,
     {
+        let mut count = 0;
         loop {
             if !self.next_is_ascii_pred(pred)? {
-                return Ok(());
+                return Ok(&buf[..count]);
             } else if let Some(byte) = self.next_char()? {
-                #[allow(clippy::cast_possible_truncation)]
-                buf.push(byte as u8);
+                buf[count] = byte as u8;
+                count += 1;
+            } else if count >= buf.len() {
+                return Err(Error::new(
+                    ErrorKind::UnexpectedEof,
+                    "Unexpected end of buffer while taking characters",
+                ));
             } else {
                 // next_is_pred will return false if the next value is None so the None case should already be handled.
                 unreachable!();

diff --git a/core/parser/src/lexer/regex.rs b/core/parser/src/lexer/regex.rs
@@ -3,8 +3,8 @@
 use crate::lexer::{Cursor, Error, Token, TokenKind, Tokenizer};
 use crate::source::ReadChar;
 use bitflags::bitflags;
-use boa_ast::{Position, PositionGroup};
-use boa_interner::{Interner, Sym};
+use boa_ast::PositionGroup;
+use boa_interner::Interner;
 use regress::{Flags, Regex};
 use std::fmt::{Display, Write};
 use std::str::{self, FromStr};
@@ -114,13 +114,17 @@ impl<R> Tokenizer<R> for RegexLiteral {
             }
         }
 
-        let mut flags = Vec::new();
+        let mut flags: [u8; 8] = [0; 8];
         let flags_start = cursor.pos();
-        cursor.take_while_ascii_pred(&mut flags, &char::is_alphabetic)?;
+        let flags_slice = cursor.take_while_ascii_pred(&mut flags, &char::is_alphabetic)?;
 
-        // SAFETY: We have already checked that the bytes are valid UTF-8.
-        let flags_str = unsafe { str::from_utf8_unchecked(flags.as_slice()) };
+        // TODO: Change this to if err() then convert flags_slice to str
+        let flags_string = match RegExpFlags::from_bytes(flags_slice) {
+            Err(message) => return Err(Error::Syntax(message.into(), flags_start)),
+            Ok(regex_flags) => regex_flags.to_string(),
+        };
 
+        let flags_str = flags_string.as_str();
         let mut body_utf16 = Vec::new();
 
         // We convert the body to UTF-16 since it may contain code points that are not valid UTF-8.
@@ -149,7 +153,7 @@ impl<R> Tokenizer<R> for RegexLiteral {
         Ok(Token::new_by_position_group(
             TokenKind::regular_expression_literal(
                 interner.get_or_intern(body_utf16.as_slice()),
-                parse_regex_flags(flags_str, flags_start, interner)?,
+                interner.get_or_intern(flags_str.to_string().as_str()),
             ),
             start_pos,
             cursor.pos_group(),
@@ -189,6 +193,45 @@ bitflags! {
     }
 }
 
+impl RegExpFlags {
+    fn from_bytes(bytes: &[u8]) -> Result<Self, String> {
+        let mut flags = Self::default();
+        for c in bytes {
+            let new_flag = match c {
+                b'g' => Self::GLOBAL,
+                b'i' => Self::IGNORE_CASE,
+                b'm' => Self::MULTILINE,
+                b's' => Self::DOT_ALL,
+                b'u' => Self::UNICODE,
+                b'y' => Self::STICKY,
+                b'd' => Self::HAS_INDICES,
+                b'v' => Self::UNICODE_SETS,
+                0x00 => continue,
+                _ => {
+                    return Err(format!(
+                        "invalid regular expression flag {}",
+                        char::from(c.to_owned())
+                    ));
+                }
+            };
+
+            if flags.contains(new_flag) {
+                return Err(format!(
+                    "repeated regular expression flag {}",
+                    char::from(c.to_owned())
+                ));
+            }
+            flags.insert(new_flag);
+        }
+
+        if flags.contains(Self::UNICODE) && flags.contains(Self::UNICODE_SETS) {
+            return Err("cannot use both 'u' and 'v' flags".into());
+        }
+
+        Ok(flags)
+    }
+}
+
 impl FromStr for RegExpFlags {
     type Err = String;
 
@@ -224,13 +267,6 @@ impl FromStr for RegExpFlags {
     }
 }
 
-fn parse_regex_flags(s: &str, start: Position, interner: &mut Interner) -> Result<Sym, Error> {
-    match RegExpFlags::from_str(s) {
-        Err(message) => Err(Error::Syntax(message.into(), start)),
-        Ok(flags) => Ok(interner.get_or_intern(flags.to_string().as_str())),
-    }
-}
-
 impl Display for RegExpFlags {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         if self.contains(Self::HAS_INDICES) {

diff --git a/core/parser/src/lexer/tests.rs b/core/parser/src/lexer/tests.rs
@@ -866,45 +866,46 @@ fn addition_no_spaces_e_number() {
 fn take_while_ascii_pred_simple() {
     let mut cur = Cursor::from(&b"abcdefghijk"[..]);
 
-    let mut buf: Vec<u8> = Vec::new();
+    let mut buf: [u8; 8] = [0; 8];
 
-    cur.take_while_ascii_pred(&mut buf, &|c| c == 'a' || c == 'b' || c == 'c')
+    let slice = cur
+        .take_while_ascii_pred(&mut buf, &|c| c == 'a' || c == 'b' || c == 'c')
         .unwrap();
 
-    assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abc");
+    assert_eq!(str::from_utf8(slice).unwrap(), "abc");
 }
 
 #[test]
 fn take_while_ascii_pred_immediate_stop() {
     let mut cur = Cursor::from(&b"abcdefghijk"[..]);
 
-    let mut buf: Vec<u8> = Vec::new();
+    let mut buf: [u8; 8] = [0; 8];
 
-    cur.take_while_ascii_pred(&mut buf, &|_| false).unwrap();
+    let slice = cur.take_while_ascii_pred(&mut buf, &|_| false).unwrap();
 
-    assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "");
+    assert_eq!(str::from_utf8(slice).unwrap(), "");
 }
 
 #[test]
 fn take_while_ascii_pred_entire_str() {
     let mut cur = Cursor::from(&b"abcdefghijk"[..]);
 
-    let mut buf: Vec<u8> = Vec::new();
+    let mut buf: [u8; 11] = [0; 11];
 
-    cur.take_while_ascii_pred(&mut buf, &|_| true).unwrap();
+    let slice = cur.take_while_ascii_pred(&mut buf, &|_| true).unwrap();
 
-    assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abcdefghijk");
+    assert_eq!(str::from_utf8(slice).unwrap(), "abcdefghijk");
 }
 
 #[test]
 fn take_while_ascii_pred_non_ascii_stop() {
     let mut cur = Cursor::from("abcde😀fghijk".as_bytes());
 
-    let mut buf: Vec<u8> = Vec::new();
+    let mut buf: [u8; 12] = [0; 12];
 
-    cur.take_while_ascii_pred(&mut buf, &|_| true).unwrap();
+    let slice = cur.take_while_ascii_pred(&mut buf, &|_| true).unwrap();
 
-    assert_eq!(str::from_utf8(buf.as_slice()).unwrap(), "abcde");
+    assert_eq!(str::from_utf8(slice).unwrap(), "abcde");
 }
 
 #[test]