Skip to content

feat: Add PEP 750 template string support #305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ typedef enum {
Format = 1 << 4,
Triple = 1 << 5,
Bytes = 1 << 6,
Template = 1 << 7,
} Flags;

typedef struct {
Expand All @@ -45,6 +46,8 @@ static inline bool is_triple(Delimiter *delimiter) { return delimiter->flags & T

static inline bool is_bytes(Delimiter *delimiter) { return delimiter->flags & Bytes; }

static inline bool is_template(Delimiter *delimiter) { return delimiter->flags & Template; }

static inline int32_t end_character(Delimiter *delimiter) {
if (delimiter->flags & SingleQuote) {
return '\'';
Expand All @@ -66,6 +69,8 @@ static inline void set_triple(Delimiter *delimiter) { delimiter->flags |= Triple

static inline void set_bytes(Delimiter *delimiter) { delimiter->flags |= Bytes; }

static inline void set_template(Delimiter *delimiter) { delimiter->flags |= (Template | Format); }

static inline void set_end_character(Delimiter *delimiter, int32_t character) {
switch (character) {
case '\'':
Expand All @@ -85,7 +90,7 @@ static inline void set_end_character(Delimiter *delimiter, int32_t character) {
typedef struct {
Array(uint16_t) indents;
Array(Delimiter) delimiters;
bool inside_f_string;
bool inside_interpolated_string;
} Scanner;

static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
Expand All @@ -102,7 +107,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.size > 0 &&
(lexer->lookahead == '{' || lexer->lookahead == '}') && !error_recovery_mode) {
Delimiter *delimiter = array_back(&scanner->delimiters);
if (is_format(delimiter)) {
if (is_format(delimiter) || is_template(delimiter)) {
lexer->mark_end(lexer);
bool is_left_brace = lexer->lookahead == '{';
advance(lexer);
Expand All @@ -122,7 +127,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
int32_t end_char = end_character(delimiter);
bool has_content = advanced_once;
while (lexer->lookahead) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && is_format(delimiter)) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && (is_format(delimiter) || is_template(delimiter))) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return has_content;
Expand Down Expand Up @@ -177,7 +182,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
lexer->mark_end(lexer);
array_pop(&scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
scanner->inside_interpolated_string = false;
}
return true;
}
Expand All @@ -195,7 +200,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
advance(lexer);
array_pop(&scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
scanner->inside_interpolated_string = false;
}
lexer->mark_end(lexer);
return true;
Expand Down Expand Up @@ -280,7 +285,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
if ((valid_symbols[DEDENT] ||
(!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) &&
!within_brackets)) &&
indent_length < current_indent_length && !scanner->inside_f_string &&
indent_length < current_indent_length && !scanner->inside_interpolated_string &&

// Wait to create a dedent token until we've consumed any
// comments
Expand Down Expand Up @@ -309,6 +314,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
set_raw(&delimiter);
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
set_bytes(&delimiter);
} else if (lexer->lookahead == 't' || lexer->lookahead == 'T') {
set_template(&delimiter);
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
break;
}
Expand Down Expand Up @@ -349,7 +356,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
if (end_character(&delimiter)) {
array_push(&scanner->delimiters, delimiter);
lexer->result_symbol = STRING_START;
scanner->inside_f_string = is_format(&delimiter);
scanner->inside_interpolated_string = is_format(&delimiter) || is_template(&delimiter);
return true;
}
if (has_flags) {
Expand All @@ -365,7 +372,7 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff

size_t size = 0;

buffer[size++] = (char)scanner->inside_f_string;
buffer[size++] = (char)scanner->inside_interpolated_string;

size_t delimiter_count = scanner->delimiters.size;
if (delimiter_count > UINT8_MAX) {
Expand Down Expand Up @@ -398,7 +405,7 @@ void tree_sitter_python_external_scanner_deserialize(void *payload, const char *
if (length > 0) {
size_t size = 0;

scanner->inside_f_string = (bool)buffer[size++];
scanner->inside_interpolated_string = (bool)buffer[size++];

size_t delimiter_count = (uint8_t)buffer[size++];
if (delimiter_count > 0) {
Expand Down
152 changes: 152 additions & 0 deletions test/corpus/template_strings.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
================================================================================
Simple template string
================================================================================

t"Hello, {name}!"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))
(string_content)
(string_end))))

================================================================================
Template string with format spec
================================================================================

t"Price: {price:.2f}"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier)
(format_specifier))
(string_end))))

================================================================================
Raw template string
================================================================================

tr"Path: {path}\n"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))
(string_content)
(string_end))))

================================================================================
Triple quoted template string
================================================================================

t"""
Multi-line template
with {variable}
"""

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))
(string_content)
(string_end))))

================================================================================
Template string with multiple interpolations
================================================================================

t"Hello {first_name} {last_name}!"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))
(string_content)
(interpolation
(identifier))
(string_content)
(string_end))))

================================================================================
Template string with expression
================================================================================

t"Result: {a + b}"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(binary_operator
(identifier)
(identifier)))
(string_end))))

================================================================================
Template string with escaped braces
================================================================================

t"Use {{braces}} to escape"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content
(escape_interpolation)
(escape_interpolation))
(string_end))))
================================================================================
Template string with format spec (copy from f-string)
================================================================================

t"a {b:2} {c:34.5}"

--------------------------------------------------------------------------------

(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier)
(format_specifier))
(string_content)
(interpolation
(identifier)
(format_specifier))
(string_end))))