1
- #include "tag.h"
2
-
3
1
#include <wctype.h>
2
+ #include "tree_sitter/array.h"
3
+ #include "tag.h"
4
4
5
5
enum TokenType {
6
6
START_TAG_NAME ,
@@ -18,115 +18,27 @@ enum TokenType {
18
18
};
19
19
20
20
typedef struct {
21
- uint32_t len ;
22
- uint32_t cap ;
23
- Tag * data ;
24
- } tags_vec ;
25
-
26
- typedef struct {
27
- tags_vec tags ;
21
+ Array (Tag ) tags ;
28
22
} Scanner ;
29
23
30
24
#define MAX (a , b ) ((a) > (b) ? (a) : (b))
31
25
32
- #define VEC_RESIZE (vec , _cap ) \
33
- if ((_cap) > (vec).cap && (_cap) > 0) { \
34
- void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
35
- assert(tmp != NULL); \
36
- (vec).data = tmp; \
37
- (vec).cap = (_cap); \
38
- }
39
-
40
- #define VEC_GROW (vec , _cap ) \
41
- if ((vec).cap < (_cap)) { \
42
- VEC_RESIZE((vec), (_cap)); \
43
- }
44
-
45
- #define VEC_PUSH (vec , el ) \
46
- if ((vec).cap == (vec).len) { \
47
- VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \
48
- } \
49
- (vec).data[(vec).len++] = (el);
50
-
51
- #define VEC_POP (vec ) \
52
- { \
53
- if (VEC_BACK(vec).type == CUSTOM) { \
54
- tag_free(&VEC_BACK(vec)); \
55
- } \
56
- (vec).len--; \
57
- }
58
-
59
- #define VEC_BACK (vec ) ((vec).data[(vec).len - 1])
60
-
61
- #define VEC_FREE (vec ) \
62
- { \
63
- if ((vec).data != NULL) \
64
- free((vec).data); \
65
- (vec).data = NULL; \
66
- }
67
-
68
- #define VEC_CLEAR (vec ) \
69
- { \
70
- for (int i = 0; i < (vec).len; i++) { \
71
- tag_free(&(vec).data[i]); \
72
- } \
73
- (vec).len = 0; \
74
- }
75
-
76
- #define STRING_RESIZE (vec , _cap ) \
77
- void *tmp = realloc((vec).data, ((_cap) + 1) * sizeof((vec).data[0])); \
78
- assert(tmp != NULL); \
79
- (vec).data = tmp; \
80
- memset((vec).data + (vec).len, 0, (((_cap) + 1) - (vec).len) * sizeof((vec).data[0])); \
81
- (vec).cap = (_cap);
82
-
83
- #define STRING_GROW (vec , _cap ) \
84
- if ((vec).cap < (_cap)) { \
85
- STRING_RESIZE((vec), (_cap)); \
86
- }
87
-
88
- #define STRING_PUSH (vec , el ) \
89
- if ((vec).cap == (vec).len) { \
90
- STRING_RESIZE((vec), MAX(16, (vec).len * 2)); \
91
- } \
92
- (vec).data[(vec).len++] = (el);
93
-
94
- #define STRING_INIT (vec ) \
95
- { \
96
- (vec).data = calloc(1, sizeof(char) * 17); \
97
- (vec).len = 0; \
98
- (vec).cap = 16; \
99
- }
100
-
101
- #define STRING_FREE (vec ) \
102
- { \
103
- if ((vec).data != NULL) \
104
- free((vec).data); \
105
- (vec).data = NULL; \
106
- }
107
-
108
- #define STRING_CLEAR (vec ) \
109
- { \
110
- (vec).len = 0; \
111
- memset((vec).data, 0, (vec).cap * sizeof(char)); \
112
- }
113
-
114
26
static inline void advance (TSLexer * lexer ) { lexer -> advance (lexer , false); }
115
27
116
28
static inline void skip (TSLexer * lexer ) { lexer -> advance (lexer , true); }
117
29
118
30
static unsigned serialize (Scanner * scanner , char * buffer ) {
119
- uint16_t tag_count = scanner -> tags .len > UINT16_MAX ? UINT16_MAX : scanner -> tags .len ;
31
+ uint16_t tag_count = scanner -> tags .size > UINT16_MAX ? UINT16_MAX : scanner -> tags .size ;
120
32
uint16_t serialized_tag_count = 0 ;
121
33
122
34
unsigned size = sizeof (tag_count );
123
35
memcpy (& buffer [size ], & tag_count , sizeof (tag_count ));
124
36
size += sizeof (tag_count );
125
37
126
38
for (; serialized_tag_count < tag_count ; serialized_tag_count ++ ) {
127
- Tag tag = scanner -> tags .data [serialized_tag_count ];
39
+ Tag tag = scanner -> tags .contents [serialized_tag_count ];
128
40
if (tag .type == CUSTOM ) {
129
- unsigned name_length = tag .custom_tag_name .len ;
41
+ unsigned name_length = tag .custom_tag_name .size ;
130
42
if (name_length > UINT8_MAX ) {
131
43
name_length = UINT8_MAX ;
132
44
}
@@ -135,7 +47,7 @@ static unsigned serialize(Scanner *scanner, char *buffer) {
135
47
}
136
48
buffer [size ++ ] = (char )tag .type ;
137
49
buffer [size ++ ] = (char )name_length ;
138
- strncpy (& buffer [size ], tag .custom_tag_name .data , name_length );
50
+ strncpy (& buffer [size ], tag .custom_tag_name .contents , name_length );
139
51
size += name_length ;
140
52
} else {
141
53
if (size + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE ) {
@@ -150,7 +62,11 @@ static unsigned serialize(Scanner *scanner, char *buffer) {
150
62
}
151
63
152
64
static void deserialize (Scanner * scanner , const char * buffer , unsigned length ) {
153
- VEC_CLEAR (scanner -> tags );
65
+ for (unsigned i = 0 ; i < scanner -> tags .size ; i ++ ) {
66
+ tag_free (& scanner -> tags .contents [i ]);
67
+ }
68
+ array_clear (& scanner -> tags );
69
+
154
70
if (length > 0 ) {
155
71
unsigned size = 0 ;
156
72
uint16_t tag_count = 0 ;
@@ -162,37 +78,34 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
162
78
memcpy (& tag_count , & buffer [size ], sizeof (tag_count ));
163
79
size += sizeof (tag_count );
164
80
165
- VEC_RESIZE ( scanner -> tags , tag_count );
81
+ array_reserve ( & scanner -> tags , tag_count );
166
82
if (tag_count > 0 ) {
167
83
unsigned iter = 0 ;
168
84
for (iter = 0 ; iter < serialized_tag_count ; iter ++ ) {
169
- Tag tag = scanner -> tags . data [ iter ] ;
85
+ Tag tag = tag_new () ;
170
86
tag .type = (TagType )buffer [size ++ ];
171
87
if (tag .type == CUSTOM ) {
172
88
uint16_t name_length = (uint8_t )buffer [size ++ ];
173
- tag .custom_tag_name .len = name_length ;
174
- tag .custom_tag_name .cap = name_length ;
175
- tag .custom_tag_name .data = (char * )calloc (1 , sizeof (char ) * (name_length + 1 ));
176
- strncpy (tag .custom_tag_name .data , & buffer [size ], name_length );
89
+ array_reserve (& tag .custom_tag_name , name_length );
90
+ tag .custom_tag_name .size = name_length ;
91
+ memcpy (tag .custom_tag_name .contents , & buffer [size ], name_length );
177
92
size += name_length ;
178
93
}
179
- VEC_PUSH ( scanner -> tags , tag );
94
+ array_push ( & scanner -> tags , tag );
180
95
}
181
96
// add zero tags if we didn't read enough, this is because the
182
97
// buffer had no more room but we held more tags.
183
98
for (; iter < tag_count ; iter ++ ) {
184
- Tag tag = new_tag ();
185
- VEC_PUSH (scanner -> tags , tag );
99
+ array_push (& scanner -> tags , tag_new ());
186
100
}
187
101
}
188
102
}
189
103
}
190
104
191
105
static String scan_tag_name (TSLexer * lexer ) {
192
- String tag_name ;
193
- STRING_INIT (tag_name );
106
+ String tag_name = array_new ();
194
107
while (iswalnum (lexer -> lookahead ) || lexer -> lookahead == '-' || lexer -> lookahead == ':' ) {
195
- STRING_PUSH ( tag_name , towupper (lexer -> lookahead ));
108
+ array_push ( & tag_name , towupper (lexer -> lookahead ));
196
109
advance (lexer );
197
110
}
198
111
return tag_name ;
@@ -230,13 +143,13 @@ static bool scan_comment(TSLexer *lexer) {
230
143
}
231
144
232
145
static bool scan_raw_text (Scanner * scanner , TSLexer * lexer ) {
233
- if (scanner -> tags .len == 0 ) {
146
+ if (scanner -> tags .size == 0 ) {
234
147
return false;
235
148
}
236
149
237
150
lexer -> mark_end (lexer );
238
151
239
- const char * end_delimiter = VEC_BACK ( scanner -> tags ). type == SCRIPT ? "</SCRIPT" : "</STYLE" ;
152
+ const char * end_delimiter = array_back ( & scanner -> tags )-> type == SCRIPT ? "</SCRIPT" : "</STYLE" ;
240
153
241
154
unsigned delimiter_index = 0 ;
242
155
while (lexer -> lookahead ) {
@@ -258,70 +171,73 @@ static bool scan_raw_text(Scanner *scanner, TSLexer *lexer) {
258
171
}
259
172
260
173
static bool scan_implicit_end_tag (Scanner * scanner , TSLexer * lexer ) {
261
- Tag * parent = scanner -> tags .len == 0 ? NULL : & VEC_BACK ( scanner -> tags );
174
+ Tag * parent = scanner -> tags .size == 0 ? NULL : array_back ( & scanner -> tags );
262
175
263
176
bool is_closing_tag = false;
264
177
if (lexer -> lookahead == '/' ) {
265
178
is_closing_tag = true;
266
179
advance (lexer );
267
180
} else {
268
- if (parent && is_void (parent )) {
269
- VEC_POP ( scanner -> tags );
181
+ if (parent && tag_is_void (parent )) {
182
+ array_pop ( & scanner -> tags );
270
183
lexer -> result_symbol = IMPLICIT_END_TAG ;
271
184
return true;
272
185
}
273
186
}
274
187
275
188
String tag_name = scan_tag_name (lexer );
276
- if (tag_name .len == 0 && !lexer -> eof (lexer )) {
277
- STRING_FREE ( tag_name );
189
+ if (tag_name .size == 0 && !lexer -> eof (lexer )) {
190
+ array_delete ( & tag_name );
278
191
return false;
279
192
}
280
193
281
- Tag next_tag = for_name (tag_name . data );
194
+ Tag next_tag = tag_for_name (tag_name );
282
195
283
196
if (is_closing_tag ) {
284
197
// The tag correctly closes the topmost element on the stack
285
- if (scanner -> tags .len > 0 && tagcmp (& VEC_BACK (scanner -> tags ), & next_tag )) {
286
- STRING_FREE (tag_name );
198
+ if (scanner -> tags .size > 0 && tag_eq (array_back (& scanner -> tags ), & next_tag )) {
287
199
tag_free (& next_tag );
288
200
return false;
289
201
}
290
202
291
203
// Otherwise, dig deeper and queue implicit end tags (to be nice in
292
204
// the case of malformed HTML)
293
- for (unsigned i = scanner -> tags .len ; i > 0 ; i -- ) {
294
- if (scanner -> tags .data [i - 1 ].type == next_tag .type ) {
295
- VEC_POP (scanner -> tags );
205
+ for (unsigned i = scanner -> tags .size ; i > 0 ; i -- ) {
206
+ if (scanner -> tags .contents [i - 1 ].type == next_tag .type ) {
207
+ Tag popped_tag = array_pop (& scanner -> tags );
208
+ tag_free (& popped_tag );
296
209
lexer -> result_symbol = IMPLICIT_END_TAG ;
297
- STRING_FREE (tag_name );
298
210
tag_free (& next_tag );
299
211
return true;
300
212
}
301
213
}
302
- } else if (parent &&
303
- (!can_contain (parent , & next_tag ) ||
304
- (parent -> type == HTML || parent -> type == HEAD || parent -> type == BODY ) && lexer -> eof (lexer ))) {
305
- VEC_POP (scanner -> tags );
214
+ } else if (
215
+ parent &&
216
+ (
217
+ !tag_can_contain (parent , & next_tag ) ||
218
+ (parent -> type == HTML || parent -> type == HEAD || parent -> type == BODY ) && lexer -> eof (lexer )
219
+ )
220
+ ) {
221
+ Tag popped_tag = array_pop (& scanner -> tags );
222
+ tag_free (& popped_tag );
306
223
lexer -> result_symbol = IMPLICIT_END_TAG ;
307
- STRING_FREE (tag_name );
308
224
tag_free (& next_tag );
309
225
return true;
310
226
}
311
227
312
- STRING_FREE (tag_name );
313
228
tag_free (& next_tag );
314
229
return false;
315
230
}
316
231
317
232
static bool scan_start_tag_name (Scanner * scanner , TSLexer * lexer ) {
318
233
String tag_name = scan_tag_name (lexer );
319
- if (tag_name .len == 0 ) {
320
- STRING_FREE ( tag_name );
234
+ if (tag_name .size == 0 ) {
235
+ array_delete ( & tag_name );
321
236
return false;
322
237
}
323
- Tag tag = for_name (tag_name .data );
324
- VEC_PUSH (scanner -> tags , tag );
238
+
239
+ Tag tag = tag_for_name (tag_name );
240
+ array_push (& scanner -> tags , tag );
325
241
switch (tag .type ) {
326
242
case SCRIPT :
327
243
lexer -> result_symbol = SCRIPT_START_TAG_NAME ;
@@ -333,34 +249,37 @@ static bool scan_start_tag_name(Scanner *scanner, TSLexer *lexer) {
333
249
lexer -> result_symbol = START_TAG_NAME ;
334
250
break ;
335
251
}
336
- STRING_FREE (tag_name );
337
252
return true;
338
253
}
339
254
340
255
static bool scan_end_tag_name (Scanner * scanner , TSLexer * lexer ) {
341
256
String tag_name = scan_tag_name (lexer );
342
- if (tag_name .len == 0 ) {
343
- STRING_FREE (tag_name );
257
+
258
+ if (tag_name .size == 0 ) {
259
+ array_delete (& tag_name );
344
260
return false;
345
261
}
346
- Tag tag = for_name (tag_name .data );
347
- if (scanner -> tags .len > 0 && tagcmp (& VEC_BACK (scanner -> tags ), & tag )) {
348
- VEC_POP (scanner -> tags );
262
+
263
+ Tag tag = tag_for_name (tag_name );
264
+ if (scanner -> tags .size > 0 && tag_eq (array_back (& scanner -> tags ), & tag )) {
265
+ Tag popped_tag = array_pop (& scanner -> tags );
266
+ tag_free (& popped_tag );
349
267
lexer -> result_symbol = END_TAG_NAME ;
350
268
} else {
351
269
lexer -> result_symbol = ERRONEOUS_END_TAG_NAME ;
352
270
}
271
+
353
272
tag_free (& tag );
354
- STRING_FREE (tag_name );
355
273
return true;
356
274
}
357
275
358
276
static bool scan_self_closing_tag_delimiter (Scanner * scanner , TSLexer * lexer ) {
359
277
advance (lexer );
360
278
if (lexer -> lookahead == '>' ) {
361
279
advance (lexer );
362
- if (scanner -> tags .len > 0 ) {
363
- VEC_POP (scanner -> tags );
280
+ if (scanner -> tags .size > 0 ) {
281
+ Tag popped_tag = array_pop (& scanner -> tags );
282
+ tag_free (& popped_tag );
364
283
lexer -> result_symbol = SELF_CLOSING_TAG_DELIMITER ;
365
284
}
366
285
return true;
@@ -369,9 +288,6 @@ static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) {
369
288
}
370
289
371
290
static bool scan (Scanner * scanner , TSLexer * lexer , const bool * valid_symbols ) {
372
- if (scanner -> tags .len > 0 ) {
373
- Tag * parent = & VEC_BACK (scanner -> tags );
374
- }
375
291
if (valid_symbols [RAW_TEXT ] && !valid_symbols [START_TAG_NAME ] && !valid_symbols [END_TAG_NAME ]) {
376
292
return scan_raw_text (scanner , lexer );
377
293
}
@@ -439,9 +355,9 @@ void tree_sitter_html_external_scanner_deserialize(void *payload, const char *bu
439
355
440
356
void tree_sitter_html_external_scanner_destroy (void * payload ) {
441
357
Scanner * scanner = (Scanner * )payload ;
442
- for (unsigned i = 0 ; i < scanner -> tags .len ; i ++ ) {
443
- STRING_FREE ( scanner -> tags .data [i ]. custom_tag_name );
358
+ for (unsigned i = 0 ; i < scanner -> tags .size ; i ++ ) {
359
+ tag_free ( & scanner -> tags .contents [i ]);
444
360
}
445
- VEC_FREE ( scanner -> tags );
361
+ array_delete ( & scanner -> tags );
446
362
free (scanner );
447
363
}
0 commit comments