Skip to content

Commit 9c9b602

Browse files
author
Roman Borschel
committed
Alternative fix implementated in the parser.
1 parent e32c3c8 commit 9c9b602

File tree

3 files changed

+95
-52
lines changed

3 files changed

+95
-52
lines changed

src/parser/mod.rs

+36
Original file line numberDiff line numberDiff line change
@@ -1714,6 +1714,42 @@ impl<'a> Parser<'a> {
17141714
&& self.peek_token_ref().token == Token::LBracket
17151715
{
17161716
self.parse_multi_dim_subscript(&mut chain)?;
1717+
} else if self.dialect.supports_numeric_prefix() {
1718+
// When we get a Word or Number token while parsing a compound expression that
1719+
// starts with a dot (.), and using a dialect that supports identifiers with numeric
1720+
// prefixes, these tokens are part of qualified, unquoted identifiers and must be
1721+
// split up accordingly.
1722+
match self.peek_token_ref() {
1723+
TokenWithSpan {
1724+
token: Token::Word(w),
1725+
span,
1726+
} if w.value.starts_with(".") => {
1727+
let ident = w.value[1..].to_string();
1728+
let new_span = Span::new(
1729+
Location::new(span.start.line, span.start.column + 1),
1730+
span.end,
1731+
);
1732+
let expr = Expr::Identifier(Ident::with_span(new_span, ident));
1733+
chain.push(AccessExpr::Dot(expr));
1734+
self.advance_token();
1735+
}
1736+
TokenWithSpan {
1737+
token: Token::Number(n, _),
1738+
span,
1739+
} if n.to_string().starts_with(".") => {
1740+
let ident = n.to_string()[1..].to_string();
1741+
let new_span = Span::new(
1742+
Location::new(span.start.line, span.start.column + 1),
1743+
span.end,
1744+
);
1745+
let expr = Expr::Identifier(Ident::with_span(new_span, ident));
1746+
chain.push(AccessExpr::Dot(expr));
1747+
self.advance_token();
1748+
}
1749+
_ => {
1750+
break;
1751+
}
1752+
}
17171753
} else {
17181754
break;
17191755
}

src/tokenizer.rs

+4-50
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,7 @@ impl<'a> Tokenizer<'a> {
895895
};
896896

897897
let mut location = state.location();
898-
while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
898+
while let Some(token) = self.next_token(&mut state)? {
899899
let span = location.span_to(state.location());
900900

901901
buf.push(TokenWithSpan { token, span });
@@ -932,11 +932,7 @@ impl<'a> Tokenizer<'a> {
932932
}
933933

934934
/// Get the next token or return None
935-
fn next_token(
936-
&self,
937-
chars: &mut State,
938-
prev_token: Option<&Token>,
939-
) -> Result<Option<Token>, TokenizerError> {
935+
fn next_token(&self, chars: &mut State) -> Result<Option<Token>, TokenizerError> {
940936
match chars.peek() {
941937
Some(&ch) => match ch {
942938
' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
@@ -1215,28 +1211,17 @@ impl<'a> Tokenizer<'a> {
12151211
chars.next();
12161212
}
12171213

1218-
// If the dialect supports identifiers that start with a numeric prefix
1219-
// and we have now consumed a dot, check if the previous token was a Word.
1220-
// If so, what follows is definitely not part of a decimal number and
1221-
// we should yield the dot as a dedicated token so compound identifiers
1222-
// starting with digits can be parsed correctly.
1223-
if s == "." && self.dialect.supports_numeric_prefix() {
1224-
if let Some(Token::Word(_)) = prev_token {
1225-
return Ok(Some(Token::Period));
1226-
}
1227-
}
1228-
12291214
// Consume fractional digits.
12301215
s += &peeking_next_take_while(chars, |ch, next_ch| {
12311216
ch.is_ascii_digit() || is_number_separator(ch, next_ch)
12321217
});
12331218

1234-
// No fraction -> Token::Period
1219+
// No fraction -> Token::Period.
12351220
if s == "." {
12361221
return Ok(Some(Token::Period));
12371222
}
12381223

1239-
// Parse exponent as number
1224+
// Parse exponent, if present.
12401225
let mut exponent_part = String::new();
12411226
if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
12421227
let mut char_clone = chars.peekable.clone();
@@ -1280,10 +1265,6 @@ impl<'a> Tokenizer<'a> {
12801265
s += word.as_str();
12811266
return Ok(Some(Token::make_word(s.as_str(), None)));
12821267
}
1283-
} else if prev_token == Some(&Token::Period) {
1284-
// If the previous token was a period, thus not belonging to a number,
1285-
// the value we have is part of an identifier.
1286-
return Ok(Some(Token::make_word(s.as_str(), None)));
12871268
}
12881269
}
12891270

@@ -3985,31 +3966,4 @@ mod tests {
39853966
],
39863967
);
39873968
}
3988-
3989-
#[test]
3990-
fn test_tokenize_identifiers_numeric_prefix() {
3991-
all_dialects_where(|dialect| dialect.supports_numeric_prefix())
3992-
.tokenizes_to("123abc", vec![Token::make_word("123abc", None)]);
3993-
3994-
all_dialects_where(|dialect| dialect.supports_numeric_prefix())
3995-
.tokenizes_to("12e34", vec![Token::Number("12e34".to_string(), false)]);
3996-
3997-
all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to(
3998-
"t.12e34",
3999-
vec![
4000-
Token::make_word("t", None),
4001-
Token::Period,
4002-
Token::make_word("12e34", None),
4003-
],
4004-
);
4005-
4006-
all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to(
4007-
"t.1two3",
4008-
vec![
4009-
Token::make_word("t", None),
4010-
Token::Period,
4011-
Token::make_word("1two3", None),
4012-
],
4013-
);
4014-
}
40153969
}

tests/sqlparser_mysql.rs

+55-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ use matches::assert_matches;
2525
use sqlparser::ast::MysqlInsertPriority::{Delayed, HighPriority, LowPriority};
2626
use sqlparser::ast::*;
2727
use sqlparser::dialect::{GenericDialect, MySqlDialect};
28-
use sqlparser::parser::{ParserError, ParserOptions};
28+
use sqlparser::parser::{Parser, ParserError, ParserOptions};
2929
use sqlparser::tokenizer::Span;
30-
use sqlparser::tokenizer::Token;
30+
use sqlparser::tokenizer::{Location, Token};
3131
use test_utils::*;
3232

3333
#[macro_use]
@@ -1926,6 +1926,59 @@ fn parse_select_with_numeric_prefix_column_name() {
19261926
}
19271927
}
19281928

1929+
#[test]
1930+
fn test_qualified_identifiers_with_numeric_prefix_span() {
1931+
match Parser::new(&MySqlDialect {})
1932+
.try_with_sql("SELECT t.15to29 FROM my_table AS t")
1933+
.unwrap()
1934+
.parse_statement()
1935+
.unwrap()
1936+
{
1937+
Statement::Query(q) => match *q.body {
1938+
SetExpr::Select(s) => match s.projection.last() {
1939+
Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => {
1940+
assert_eq!(
1941+
Span::new(Location::new(1, 8), Location::new(1, 9)),
1942+
parts[0].span,
1943+
);
1944+
assert_eq!(
1945+
Span::new(Location::new(1, 10), Location::new(1, 16)),
1946+
parts[1].span,
1947+
);
1948+
}
1949+
proj => panic!("Unexpected projection: {:?}", proj),
1950+
},
1951+
body => panic!("Unexpected statement body: {:?}", body),
1952+
},
1953+
stmt => panic!("Unexpected statement: {:?}", stmt),
1954+
}
1955+
1956+
match Parser::new(&MySqlDialect {})
1957+
.try_with_sql("SELECT t.15e29 FROM my_table AS t")
1958+
.unwrap()
1959+
.parse_statement()
1960+
.unwrap()
1961+
{
1962+
Statement::Query(q) => match *q.body {
1963+
SetExpr::Select(s) => match s.projection.last() {
1964+
Some(SelectItem::UnnamedExpr(Expr::CompoundIdentifier(parts))) => {
1965+
assert_eq!(
1966+
Span::new(Location::new(1, 8), Location::new(1, 9)),
1967+
parts[0].span,
1968+
);
1969+
assert_eq!(
1970+
Span::new(Location::new(1, 10), Location::new(1, 15)),
1971+
parts[1].span,
1972+
);
1973+
}
1974+
proj => panic!("Unexpected projection: {:?}", proj),
1975+
},
1976+
body => panic!("Unexpected statement body: {:?}", body),
1977+
},
1978+
stmt => panic!("Unexpected statement: {:?}", stmt),
1979+
}
1980+
}
1981+
19291982
#[test]
19301983
fn parse_qualified_identifiers_with_numeric_prefix() {
19311984
// Case 1: Qualified column name that starts with digits.

0 commit comments

Comments
 (0)