Skip to content

Commit 195878d

Browse files
authored
feat: migrate to antlr4ng (#267)
* feat: replace antlr4ts with antlr4ng * feat: switch caseInsensitive option on * feat: recompile all g4 file * feat: update parser to fit antlr4ng * test: update test to fit antlr4ng
1 parent 5ce89cb commit 195878d

File tree

112 files changed

+652048
-662682
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+652048
-662682
lines changed

jest.config.js

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ module.exports = {
1717
clearMocks: true,
1818

1919
// Indicates whether the coverage information should be collected while executing the test
20-
collectCoverage: true,
20+
collectCoverage: false,
2121

2222
// An array of glob patterns indicating a set of files for which coverage information should be collected
2323
// collectCoverageFrom: undefined,
2424

2525
// The directory where Jest should output its coverage files
26-
coverageDirectory: 'coverage',
26+
// coverageDirectory: 'coverage',
2727

2828
// An array of regexp pattern strings used to skip coverage collection
2929
// coveragePathIgnorePatterns: [
@@ -76,7 +76,7 @@ module.exports = {
7676
// ],
7777

7878
// An array of file extensions your modules use
79-
moduleFileExtensions: ['js', 'mjs', 'cjs', 'jsx', 'ts', 'tsx', 'json', 'node'],
79+
moduleFileExtensions: ['js', 'mjs', 'ts'],
8080

8181
// A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
8282
moduleNameMapper: {
@@ -166,10 +166,11 @@ module.exports = {
166166
// A map from regular expressions to paths to transformers
167167
transform: {
168168
'\\.[jt]sx?$': ['@swc/jest'],
169+
'\\.mjs$': ['@swc/jest'],
169170
},
170171

171172
// An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
172-
extensionsToTreatAsEsm: ['.ts', '.tsx'],
173+
extensionsToTreatAsEsm: ['.ts'],
173174
// An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
174175
// unmockedModulePathPatterns: undefined,
175176

@@ -181,4 +182,6 @@ module.exports = {
181182

182183
// Whether to use watchman for file crawling
183184
// watchman: true
185+
186+
transformIgnorePatterns: ['dist/', '<rootDir>/node_modules/.pnpm/(?!(antlr4ng|antlr4-c3)@)'],
184187
};

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"@types/jest": "^29.5.1",
4242
"@types/node": "^18.15.11",
4343
"antlr-format-cli": "^1.2.1",
44-
"antlr4ts-cli": "^0.5.0-alpha.4",
44+
"antlr4ng-cli": "^1.0.7",
4545
"chalk": "4.1.2",
4646
"commitizen": "^4.3.0",
4747
"glob": "^10.3.10",
@@ -60,8 +60,8 @@
6060
"registry": "https://registry.npmjs.org/"
6161
},
6262
"dependencies": {
63-
"antlr4-c3": "3.1.1",
64-
"antlr4ts": "0.5.0-alpha.4"
63+
"antlr4-c3": "3.3.7",
64+
"antlr4ng": "2.0.11"
6565
},
6666
"sideEffects": false
6767
}

pnpm-lock.yaml

Lines changed: 29 additions & 22 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scripts/antlr4.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const outputPath = path.resolve(__dirname, '../src/lib');
1111

1212
const languageEntries = fs.readdirSync(grammarsPath);
1313

14-
const baseCmd = 'antlr4ts -visitor -listener -Xexact-output-dir -o';
14+
const baseCmd = 'antlr4ng -Dlanguage=TypeScript -visitor -listener -Xexact-output-dir -o';
1515

1616
function compile(language) {
1717
const cmd = `${baseCmd} ${outputPath}/${language} ${grammarsPath}/${language}/*.g4`;
@@ -31,7 +31,7 @@ function compile(language) {
3131
);
3232
} else {
3333
cleanComment(language);
34-
console.log(chalk.greenBright(`Compile ${language} succeeded!`));
34+
console.info(chalk.greenBright(`Compile ${language} succeeded!`));
3535
}
3636
});
3737
}

scripts/release.js

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,13 @@ function execStandardVersion(res) {
5353
cmd += ` --tag-prefix ${tagPrefix} `;
5454
cmd += ' --infile CHANGELOG.md ';
5555

56-
console.log(`Executing: ${cmd} \n`);
56+
console.info(`Executing: ${cmd} \n`);
5757

5858
runCommand(cmd)
5959
.then(({ message }) => {
60-
console.log('Please checkout recent commit, and then');
61-
console.log(
62-
'Push branch and new tag to github, publish package to npm'
63-
);
64-
// message && console.log(message)
60+
console.info('Please checkout recent commit, and then');
61+
console.info('Push branch and new tag to github, publish package to npm');
62+
// message && console.info(message)
6563
})
6664
.catch(({ error, code }) => {
6765
code && console.error('Error: process exit code' + code);

src/grammar/flinksql/FlinkSqlLexer.g4

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55

66
lexer grammar FlinkSqlLexer;
77

8+
options {
9+
caseInsensitive= true;
10+
}
11+
812
// SKIP
913

1014
SPACE : [ \t\r\n]+ -> channel(HIDDEN);
@@ -585,9 +589,9 @@ ID_LITERAL : ID_LITERAL_FRAG;
585589

586590
fragment JAR_FILE_PARTTARN : '`' ( '\\' . | '``' | ~('`' | '\\'))* '`';
587591
fragment EXPONENT_NUM_PART : 'E' [-+]? DEC_DIGIT+;
588-
fragment ID_LITERAL_FRAG : [A-Z_0-9a-z]*? [A-Z_a-z]+? [A-Z_0-9a-z]*;
592+
fragment ID_LITERAL_FRAG : [A-Z_0-9]*? [A-Z_]+? [A-Z_0-9]*;
589593
fragment DEC_DIGIT : [0-9];
590-
fragment DEC_LETTER : [A-Za-z];
594+
fragment DEC_LETTER : [A-Z];
591595
fragment DQUOTA_STRING : '"' ( '\\' . | '""' | ~('"' | '\\'))* '"';
592596
fragment SQUOTA_STRING : '\'' ('\\' . | '\'\'' | ~('\'' | '\\'))* '\'';
593597
fragment BIT_STRING_L : 'B' '\'' [01]+ '\'';

src/grammar/flinksql/FlinkSqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ parser grammar FlinkSqlParser;
66

77
options {
88
tokenVocab=FlinkSqlLexer;
9+
caseInsensitive= true;
910
}
1011

1112
program

src/grammar/hive/HiveSqlLexer.g4

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727

2828
lexer grammar HiveSqlLexer;
2929

30-
// unsupported option caseInsensitive in [email protected]
31-
// options { caseInsensitive = true; }
30+
options {
31+
caseInsensitive= true;
32+
}
3233

3334
// Keywords
3435
KW_ABORT : 'ABORT';
@@ -502,7 +503,7 @@ Identifier: (Letter | Digit) (Letter | Digit | '_')* | QuotedIdentifier | '`' Re
502503

503504
fragment QuotedIdentifier: '`' ('``' | ~'`')* '`';
504505

505-
fragment Letter: 'A' ..'Z' | 'a' ..'z';
506+
fragment Letter: 'A' ..'Z';
506507

507508
fragment HexDigit: 'A' ..'F';
508509

src/grammar/hive/HiveSqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ parser grammar HiveSqlParser;
2929
options
3030
{
3131
tokenVocab=HiveSqlLexer;
32+
caseInsensitive= true;
3233
}
3334

3435
program

src/grammar/impala/ImpalaSqlLexer.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ THE SOFTWARE.
2727
lexer grammar ImpalaSqlLexer;
2828

2929
options {
30-
caseInsensitive=true;
30+
caseInsensitive= true;
3131
}
3232

3333
KW_ADD : 'ADD';

src/grammar/impala/ImpalaSqlParser.g4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ parser grammar ImpalaSqlParser;
2121
options
2222
{
2323
tokenVocab=ImpalaSqlLexer;
24+
caseInsensitive= true;
2425
}
2526

2627
program
@@ -873,7 +874,7 @@ booleanExpression
873874
| left=booleanExpression operator=KW_OR right=booleanExpression # logicalBinary
874875
;
875876

876-
predicate[ParserRuleContext value]
877+
predicate[antlr.ParserRuleContext value]
877878
: comparisonOperator right=valueExpression # comparison
878879
| comparisonOperator comparisonQuantifier subQueryRelation # quantifiedComparison
879880
| KW_NOT? KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression # between

src/grammar/mysql/MySqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ parser grammar MySqlParser;
3535

3636
options {
3737
tokenVocab= MySqlLexer;
38+
caseInsensitive= true;
3839
}
3940

4041
// Top Level Description

src/grammar/pgsql/PostgreSQLLexer.g4

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636

3737
lexer grammar PostgreSQLLexer;
3838

39+
options {
40+
caseInsensitive= true;
41+
}
42+
3943
/**
4044
* Reference Doc: https://www.postgresql.org/docs/16.1/sql-commands.html
4145
*/
@@ -673,9 +677,9 @@ KW_BUFFER_USAGE_LIMIT : 'BUFFER_USAGE_LIMIT';
673677
Identifier: IdentifierStartChar IdentifierChar*;
674678

675679
fragment IdentifierStartChar: // these are the valid identifier start characters below 0x7F
676-
[a-zA-Z_]
680+
[A-Z_]
677681
| // these are the valid characters from 0x80 to 0xFF
678-
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]
682+
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00F8-\u00FF]
679683
| // these are the letters above 0xFF which only need a single UTF-16 code unit
680684
[\u0100-\uD7FF\uE000-\uFFFF]
681685
| // letters which require multiple UTF-16 code units
@@ -771,7 +775,7 @@ InvalidUnterminatedBinaryStringConstant: 'B' UnterminatedStringConstant;
771775

772776
HexadecimalStringConstant: UnterminatedHexadecimalStringConstant '\'';
773777

774-
UnterminatedHexadecimalStringConstant: 'X' '\'' [0-9a-fA-F]*;
778+
UnterminatedHexadecimalStringConstant: 'X' '\'' [0-9A-F]*;
775779

776780
InvalidHexadecimalStringConstant: InvalidUnterminatedHexadecimalStringConstant '\'';
777781

@@ -791,7 +795,7 @@ Numeric:
791795

792796
fragment Digits: [0-9]+;
793797

794-
PLSQLVARIABLENAME: ':' [a-zA-Z_] [a-zA-Z_0-9$]*;
798+
PLSQLVARIABLENAME: ':' [A-Z_] [A-Z_0-9$]*;
795799

796800
PLSQLIDENTIFIER: ':"' ('\\' . | '""' | ~ ('"' | '\\'))* '"';
797801
//
@@ -861,13 +865,13 @@ fragment EscapeStringText: (
861865
'\'\''
862866
| '\\' (
863867
// two-digit hex escapes are still valid when treated as single-digit escapes
864-
'x' [0-9a-fA-F]
865-
| 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
866-
| 'U' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
868+
'x' [0-9A-F]
869+
| 'u' [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F]
870+
| 'U' [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F] [0-9A-F]
867871
|
868872
// Any character other than the Unicode escapes can follow a backslash. Some have
869873
// special meaning, but that doesn't affect the syntax.
870-
~ [xuU]
874+
~ [xu]
871875
)
872876
| ~ ['\\]
873877
)*;

src/grammar/pgsql/PostgreSQLParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ parser grammar PostgreSQLParser;
4141

4242
options {
4343
tokenVocab= PostgreSQLLexer;
44+
caseInsensitive= true;
4445
}
4546

4647
program

src/grammar/plsql/PlSqlLexer.g4

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2366,7 +2366,7 @@ BIT_STRING_LIT: 'B' ('\'' [01]* '\'')+;
23662366
// Rule #284 <HEX_STRING_LIT> - subtoken typecast in <REGULAR_ID>
23672367
// Lowercase 'x' is a usual addition to the standard
23682368

2369-
HEX_STRING_LIT : 'X' ('\'' [A-Fa-f0-9]* '\'')+;
2369+
HEX_STRING_LIT : 'X' ('\'' [A-F0-9]* '\'')+;
23702370
DOUBLE_PERIOD : '..';
23712371
PERIOD : '.';
23722372

@@ -2468,10 +2468,8 @@ REMARK_COMMENT:
24682468
PROMPT_MESSAGE: 'PRO' {this.IsNewlineAtPos(-4)}? 'MPT'? (' ' ~('\r' | '\n')*)? NEWLINE_EOF;
24692469

24702470
// TODO: should starts with newline
2471-
START_CMD
2472-
//: 'STA' 'RT'? SPACE ~('\r' | '\n')* NEWLINE_EOF
2473-
: // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
2474-
'@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF; // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
2471+
START_CMD: // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12002.htm
2472+
'@' {this.IsNewlineAtPos(-2)}? '@'? ~('\r' | '\n')* NEWLINE_EOF; // https://docs.oracle.com/cd/B19306_01/server.102/b14357/ch12003.htm
24752473

24762474
REGULAR_ID: SIMPLE_LETTER (SIMPLE_LETTER | '$' | '_' | '#' | [0-9])*;
24772475

@@ -2481,7 +2479,7 @@ SPACES: [ \t\r\n]+ -> channel(HIDDEN);
24812479

24822480
fragment NEWLINE_EOF : NEWLINE | EOF;
24832481
fragment QUESTION_MARK : '?';
2484-
fragment SIMPLE_LETTER : [a-zA-Z];
2482+
fragment SIMPLE_LETTER : [A-Z];
24852483
fragment FLOAT_FRAGMENT : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+;
24862484
fragment NEWLINE : '\r'? '\n';
24872485
fragment SPACE : [ \t];

0 commit comments

Comments
 (0)