Skip to content

Commit d50b6ca

Browse files
authored
Merge pull request #340 from tree-sitter/scanner-and-generate
Tidy up scanner
2 parents 3a67773 + dffe55e commit d50b6ca

File tree

3 files changed

+67
-133
lines changed

3 files changed

+67
-133
lines changed

src/scanner.c

Lines changed: 49 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -25,24 +25,21 @@ void *tree_sitter_scala_external_scanner_create() {
2525
return createStack();
2626
}
2727

28-
void tree_sitter_scala_external_scanner_destroy(void *p) {
29-
free(p);
28+
void tree_sitter_scala_external_scanner_destroy(void *payload) {
29+
free(payload);
3030
}
3131

32-
void tree_sitter_scala_external_scanner_reset(void *p) {
33-
resetStack(p);
32+
unsigned tree_sitter_scala_external_scanner_serialize(void *payload, char *buffer) {
33+
return serialiseStack(payload, buffer);
3434
}
3535

36-
unsigned tree_sitter_scala_external_scanner_serialize(void *p, char *buffer) {
37-
return serialiseStack(p, buffer);
38-
}
39-
40-
void tree_sitter_scala_external_scanner_deserialize(void *p, const char *b,
41-
unsigned n) {
42-
deserialiseStack(p, b, n);
36+
void tree_sitter_scala_external_scanner_deserialize(void *payload, const char *buffer,
37+
unsigned length) {
38+
deserialiseStack(payload, buffer, length);
4339
}
4440

4541
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
42+
4643
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
4744

4845
static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_interpolation) {
@@ -63,24 +60,26 @@ static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_inte
6360
if (is_multiline && has_interpolation) {
6461
lexer->result_symbol = INTERPOLATED_MULTILINE_STRING_MIDDLE;
6562
return true;
66-
} else if (has_interpolation){
63+
}
64+
if (has_interpolation) {
6765
lexer->result_symbol = INTERPOLATED_STRING_MIDDLE;
6866
return true;
69-
} else {
70-
advance(lexer);
7167
}
68+
advance(lexer);
7269
} else {
7370
closing_quote_count = 0;
7471
if (lexer->lookahead == '\\') {
7572
advance(lexer);
76-
if (lexer->lookahead != 0) advance(lexer);
73+
if (!lexer->eof(lexer)) {
74+
advance(lexer);
75+
}
7776
} else if (lexer->lookahead == '\n') {
7877
if (is_multiline) {
7978
advance(lexer);
8079
} else {
8180
return false;
8281
}
83-
} else if (lexer->lookahead == 0) {
82+
} else if (lexer->eof(lexer)) {
8483
return false;
8584
} else {
8685
advance(lexer);
@@ -101,6 +100,16 @@ static bool detect_comment_start(TSLexer *lexer) {
101100
return false;
102101
}
103102

103+
static bool scan_word(TSLexer *lexer, const char* const word) {
104+
for (int i = 0; word[i] != '\0'; i++) {
105+
if (lexer->lookahead != word[i]) {
106+
return false;
107+
}
108+
advance(lexer);
109+
}
110+
return !iswalnum(lexer->lookahead);
111+
}
112+
104113
bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
105114
const bool *valid_symbols) {
106115
ScannerStack *stack = (ScannerStack *)payload;
@@ -114,8 +123,9 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
114123
newline_count++;
115124
indentation_size = 0;
116125
}
117-
else
126+
else {
118127
indentation_size++;
128+
}
119129
skip(lexer);
120130
}
121131

@@ -174,7 +184,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
174184
if (lexer->eof(lexer)) {
175185
stack->last_column = -1;
176186
} else {
177-
stack->last_column = lexer->get_column(lexer);
187+
stack->last_column = (int)lexer->get_column(lexer);
178188
}
179189
return true;
180190
}
@@ -203,14 +213,17 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
203213
// a
204214
// .b
205215
// .c
206-
if (lexer->lookahead == '.') return false;
216+
if (lexer->lookahead == '.') {
217+
return false;
218+
}
207219

208220
// Single-line and multi-line comments
209221
if (lexer->lookahead == '/') {
210222
advance(lexer);
211223
if (lexer->lookahead == '/') {
212224
return false;
213-
} else if (lexer->lookahead == '*') {
225+
}
226+
if (lexer->lookahead == '*') {
214227
advance(lexer);
215228
while (!lexer->eof(lexer)) {
216229
if (lexer->lookahead == '*') {
@@ -238,108 +251,42 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
238251
}
239252

240253
if (valid_symbols[ELSE]) {
241-
if (lexer->lookahead != 'e') return true;
242-
advance(lexer);
243-
if (lexer->lookahead != 'l') return true;
244-
advance(lexer);
245-
if (lexer->lookahead != 's') return true;
246-
advance(lexer);
247-
if (lexer->lookahead != 'e') return true;
248-
advance(lexer);
249-
if (iswalpha(lexer->lookahead)) return true;
250-
return false;
254+
return !scan_word(lexer, "else");
251255
}
252256

253257
if (valid_symbols[CATCH]) {
254-
if (lexer->lookahead != 'c' && lexer->lookahead != 'f') return true;
255-
advance(lexer);
256-
if (lexer->lookahead == 'a') {
257-
advance(lexer);
258-
if (lexer->lookahead != 't') return true;
259-
advance(lexer);
260-
if (lexer->lookahead != 'c') return true;
261-
advance(lexer);
262-
if (lexer->lookahead != 'h') return true;
263-
advance(lexer);
264-
if (iswalpha(lexer->lookahead)) return true;
265-
return false;
266-
} else if (lexer->lookahead == 'i') {
267-
advance(lexer);
268-
if (lexer->lookahead != 'n') return true;
269-
advance(lexer);
270-
if (lexer->lookahead != 'a') return true;
271-
advance(lexer);
272-
if (lexer->lookahead != 'l') return true;
273-
advance(lexer);
274-
if (lexer->lookahead != 'l') return true;
275-
advance(lexer);
276-
if (lexer->lookahead != 'y') return true;
277-
advance(lexer);
278-
if (iswalpha(lexer->lookahead)) return true;
279-
return false;
280-
} else {
281-
return true;
258+
if (lexer->lookahead == 'c') {
259+
return !scan_word(lexer, "catch");
282260
}
261+
if (lexer->lookahead == 'f') {
262+
return !scan_word(lexer, "finally");
263+
}
264+
return true;
283265
}
284266

285267
if (valid_symbols[FINALLY]) {
286-
if (lexer->lookahead != 'f') return true;
287-
advance(lexer);
288-
if (lexer->lookahead != 'i') return true;
289-
advance(lexer);
290-
if (lexer->lookahead != 'n') return true;
291-
advance(lexer);
292-
if (lexer->lookahead != 'a') return true;
293-
advance(lexer);
294-
if (lexer->lookahead != 'l') return true;
295-
advance(lexer);
296-
if (lexer->lookahead != 'l') return true;
297-
advance(lexer);
298-
if (lexer->lookahead != 'y') return true;
299-
advance(lexer);
300-
if (iswalpha(lexer->lookahead)) return true;
301-
return false;
268+
return !scan_word(lexer, "finally");
302269
}
303270

304271
if (valid_symbols[EXTENDS]) {
305-
if (lexer->lookahead != 'e') return true;
306-
advance(lexer);
307-
if (lexer->lookahead != 'x') return true;
308-
advance(lexer);
309-
if (lexer->lookahead != 't') return true;
310-
advance(lexer);
311-
if (lexer->lookahead != 'e') return true;
312-
advance(lexer);
313-
if (lexer->lookahead != 'n') return true;
314-
advance(lexer);
315-
if (lexer->lookahead != 'd') return true;
316-
advance(lexer);
317-
if (lexer->lookahead != 's') return true;
318-
advance(lexer);
319-
if (iswalpha(lexer->lookahead)) return true;
320-
return false;
272+
return !scan_word(lexer, "extends");
321273
}
322274

323275
if (valid_symbols[WITH]) {
324-
if (lexer->lookahead != 'w') return true;
325-
advance(lexer);
326-
if (lexer->lookahead != 'i') return true;
327-
advance(lexer);
328-
if (lexer->lookahead != 't') return true;
329-
advance(lexer);
330-
if (lexer->lookahead != 'h') return true;
331-
advance(lexer);
332-
if (iswalpha(lexer->lookahead)) return true;
333-
return false;
276+
return !scan_word(lexer, "with");
334277
}
335278

336-
if (newline_count > 1) return true;
279+
if (newline_count > 1) {
280+
return true;
281+
}
337282

338283
return true;
339284
}
340285

341286
while (iswspace(lexer->lookahead)) {
342-
if (lexer->lookahead == '\n') newline_count++;
287+
if (lexer->lookahead == '\n') {
288+
newline_count++;
289+
}
343290
skip(lexer);
344291
}
345292

src/stack.h

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
2-
#include <stdio.h>
31
#include <stdbool.h>
2+
#include <stdio.h>
43
#include <stdlib.h>
54
#include <string.h>
65

@@ -21,7 +20,7 @@ typedef struct ScannerStack {
2120
int last_column;
2221
} ScannerStack;
2322

24-
ScannerStack* createStack() {
23+
static ScannerStack* createStack() {
2524
ScannerStack* ptr = (ScannerStack*) malloc(sizeof(ScannerStack));
2625

2726
ptr -> top = 0;
@@ -33,37 +32,36 @@ ScannerStack* createStack() {
3332
return ptr;
3433
}
3534

36-
bool isEmptyStack(ScannerStack *stack) { return stack->top == 0; }
35+
static bool isEmptyStack(ScannerStack *stack) { return stack->top == 0; }
3736

38-
int peekStack(ScannerStack *stack) {
37+
static int peekStack(ScannerStack *stack) {
3938
return isEmptyStack(stack) ? -1 : stack->stack[stack->top - 1];
4039
}
4140

42-
void pushStack(ScannerStack *stack, unsigned int value) {
41+
static void pushStack(ScannerStack *stack, unsigned int value) {
4342
stack->top++;
44-
stack->stack[stack->top - 1] = value;
43+
stack->stack[stack->top - 1] = (int)value;
4544
}
4645

47-
int popStack(ScannerStack *stack) {
48-
if (isEmptyStack(stack))
46+
static int popStack(ScannerStack *stack) {
47+
if (isEmptyStack(stack)) {
4948
return -1;
50-
else {
51-
int result = peekStack(stack);
52-
stack->top--;
53-
54-
return result;
5549
}
50+
int result = peekStack(stack);
51+
stack->top--;
52+
53+
return result;
5654
}
5755

58-
void printStack(ScannerStack *stack, char *msg) {
56+
static void printStack(ScannerStack *stack, char *msg) {
5957
LOG("%s Stack[top = %d; ", msg, stack->top);
6058
for (int i = 0; i < stack->top; i++) {
6159
LOG("%d | ", stack->stack[i]);
6260
}
6361
LOG("]\n");
6462
}
6563

66-
unsigned serialiseStack(ScannerStack *stack, char *buf) {
64+
static unsigned serialiseStack(ScannerStack *stack, char *buf) {
6765
int elements = isEmptyStack(stack) ? 0 : stack->top;
6866
if (elements < 0) {
6967
elements = 0;
@@ -78,22 +76,15 @@ unsigned serialiseStack(ScannerStack *stack, char *buf) {
7876
return result_length;
7977
}
8078

81-
void deserialiseStack(ScannerStack* stack, const char* buf, unsigned n) {
82-
if (n != 0) {
79+
static void deserialiseStack(ScannerStack* stack, const char* buf, unsigned length) {
80+
if (length != 0) {
8381
int *intBuf = (int *)buf;
8482

85-
unsigned elements = n / sizeof(int) - 3;
86-
stack->top = elements;
83+
unsigned elements = length / sizeof(int) - 3;
84+
stack->top = (int)elements;
8785
memcpy(stack->stack, intBuf, elements * sizeof(int));
8886
stack->last_indentation_size = intBuf[elements];
8987
stack->last_newline_count = intBuf[elements + 1];
9088
stack->last_column = intBuf[elements + 2];
9189
}
9290
}
93-
94-
void resetStack(ScannerStack *p) {
95-
p->top = 0;
96-
p->last_indentation_size = -1;
97-
p->last_newline_count = 0;
98-
p->last_column = -1;
99-
}

test/test-stack.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,6 @@ int main() {
4040
assert(newStack -> top == 100);
4141
assert(popStack(newStack) == 99);
4242

43-
resetStack(newStack);
44-
45-
assert(isEmptyStack(newStack));
46-
4743
printStack(stack, "hello");
4844
printStack(newStack, "hello");
4945
return 0;

0 commit comments

Comments
 (0)