Alternative fix implementated in the parser.

Roman Borschel · Roman Borschel · commit 9c9b60219544 · 2025-04-10T15:41:35.000+02:00
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -1714,6 +1714,42 @@ impl<'a> Parser<'a> {
                 && self.peek_token_ref().token == Token::LBracket
             {
                 self.parse_multi_dim_subscript(&mut chain)?;
+            } else if self.dialect.supports_numeric_prefix() {
+                // When we get a Word or Number token while parsing a compound expression that
+                // starts with a dot (.), and using a dialect that supports identifiers with numeric
+                // prefixes, these tokens are part of qualified, unquoted identifiers and must be
+                // split up accordingly.
+                match self.peek_token_ref() {
+                    TokenWithSpan {
+                        token: Token::Word(w),
+                        span,
+                    } if w.value.starts_with(".") => {
+                        let ident = w.value[1..].to_string();
+                        let new_span = Span::new(
+                            Location::new(span.start.line, span.start.column + 1),
+                            span.end,
+                        );
+                        let expr = Expr::Identifier(Ident::with_span(new_span, ident));
+                        chain.push(AccessExpr::Dot(expr));
+                        self.advance_token();
+                    }
+                    TokenWithSpan {
+                        token: Token::Number(n, _),
+                        span,
+                    } if n.to_string().starts_with(".") => {
+                        let ident = n.to_string()[1..].to_string();
+                        let new_span = Span::new(
+                            Location::new(span.start.line, span.start.column + 1),
+                            span.end,
+                        );
+                        let expr = Expr::Identifier(Ident::with_span(new_span, ident));
+                        chain.push(AccessExpr::Dot(expr));
+                        self.advance_token();
+                    }
+                    _ => {
+                        break;
+                    }
+                }
             } else {
                 break;
             }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -895,7 +895,7 @@ impl<'a> Tokenizer<'a> {
         };
 
         let mut location = state.location();
-        while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
+        while let Some(token) = self.next_token(&mut state)? {
             let span = location.span_to(state.location());
 
             buf.push(TokenWithSpan { token, span });
@@ -932,11 +932,7 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Get the next token or return None
-    fn next_token(
-        &self,
-        chars: &mut State,
-        prev_token: Option<&Token>,
-    ) -> Result<Option<Token>, TokenizerError> {
+    fn next_token(&self, chars: &mut State) -> Result<Option<Token>, TokenizerError> {
         match chars.peek() {
             Some(&ch) => match ch {
                 ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
@@ -1215,28 +1211,17 @@ impl<'a> Tokenizer<'a> {
                         chars.next();
                     }
 
-                    // If the dialect supports identifiers that start with a numeric prefix
-                    // and we have now consumed a dot, check if the previous token was a Word.
-                    // If so, what follows is definitely not part of a decimal number and
-                    // we should yield the dot as a dedicated token so compound identifiers
-                    // starting with digits can be parsed correctly.
-                    if s == "." && self.dialect.supports_numeric_prefix() {
-                        if let Some(Token::Word(_)) = prev_token {
-                            return Ok(Some(Token::Period));
-                        }
-                    }
-
                     // Consume fractional digits.
                     s += &peeking_next_take_while(chars, |ch, next_ch| {
                         ch.is_ascii_digit() || is_number_separator(ch, next_ch)
                     });
 
-                    // No fraction -> Token::Period
+                    // No fraction -> Token::Period.
                     if s == "." {
                         return Ok(Some(Token::Period));
                     }
 
-                    // Parse exponent as number
+                    // Parse exponent, if present.
                     let mut exponent_part = String::new();
                     if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
                         let mut char_clone = chars.peekable.clone();
@@ -1280,10 +1265,6 @@ impl<'a> Tokenizer<'a> {
                                 s += word.as_str();
                                 return Ok(Some(Token::make_word(s.as_str(), None)));
                             }
-                        } else if prev_token == Some(&Token::Period) {
-                            // If the previous token was a period, thus not belonging to a number,
-                            // the value we have is part of an identifier.
-                            return Ok(Some(Token::make_word(s.as_str(), None)));
                         }
                     }
 
@@ -3985,31 +3966,4 @@ mod tests {
                 ],
             );
     }
-
-    #[test]
-    fn test_tokenize_identifiers_numeric_prefix() {
-        all_dialects_where(|dialect| dialect.supports_numeric_prefix())
-            .tokenizes_to("123abc", vec![Token::make_word("123abc", None)]);
-
-        all_dialects_where(|dialect| dialect.supports_numeric_prefix())
-            .tokenizes_to("12e34", vec![Token::Number("12e34".to_string(), false)]);
-
-        all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to(
-            "t.12e34",
-            vec![
-                Token::make_word("t", None),
-                Token::Period,
-                Token::make_word("12e34", None),
-            ],
-        );
-
-        all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to(
-            "t.1two3",
-            vec![
-                Token::make_word("t", None),
-                Token::Period,
-                Token::make_word("1two3", None),
-            ],
-        );
-    }
 }
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
@@ -25,9 +25,9 @@ use matches::assert_matches;
 use sqlparser::ast::MysqlInsertPriority::{Delayed, HighPriority, LowPriority};
 use sqlparser::ast::*;
 use sqlparser::dialect::{GenericDialect, MySqlDialect};
-use sqlparser::parser::{ParserError, ParserOptions};
+use sqlparser::parser::{Parser, ParserError, ParserOptions};
 use sqlparser::tokenizer::Span;
-use sqlparser::tokenizer::Token;
+use sqlparser::tokenizer::{Location, Token};
 use test_utils::*;
 
 #[macro_use]
@@ -1926,6 +1926,59 @@ fn parse_select_with_numeric_prefix_column_name() {
     }
 }
 
+#[test]
+fn test_qualified_identifiers_with_numeric_prefix_span() {
+    match Parser::new(&MySqlDialect {})
+        .try_with_sql("SELECT t.15to29 FROM my_table AS t")
+        .unwrap()
+        .parse_statement()
+        .unwrap()
+    {
+        Statement::Query(q) => match *q.body {
+            SetExpr::Select(s) => match s.projection.last() {
+                Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => {
+                    assert_eq!(
+                        Span::new(Location::new(1, 8), Location::new(1, 9)),
+                        parts[0].span,
+                    );
+                    assert_eq!(
+                        Span::new(Location::new(1, 10), Location::new(1, 16)),
+                        parts[1].span,
+                    );
+                }
+                proj => panic!("Unexpected projection: {:?}", proj),
+            },
+            body => panic!("Unexpected statement body: {:?}", body),
+        },
+        stmt => panic!("Unexpected statement: {:?}", stmt),
+    }
+
+    match Parser::new(&MySqlDialect {})
+        .try_with_sql("SELECT t.15e29 FROM my_table AS t")
+        .unwrap()
+        .parse_statement()
+        .unwrap()
+    {
+        Statement::Query(q) => match *q.body {
+            SetExpr::Select(s) => match s.projection.last() {
+                Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => {
+                    assert_eq!(
+                        Span::new(Location::new(1, 8), Location::new(1, 9)),
+                        parts[0].span,
+                    );
+                    assert_eq!(
+                        Span::new(Location::new(1, 10), Location::new(1, 15)),
+                        parts[1].span,
+                    );
+                }
+                proj => panic!("Unexpected projection: {:?}", proj),
+            },
+            body => panic!("Unexpected statement body: {:?}", body),
+        },
+        stmt => panic!("Unexpected statement: {:?}", stmt),
+    }
+}
+
 #[test]
 fn parse_qualified_identifiers_with_numeric_prefix() {
     // Case 1: Qualified column name that starts with digits.