Skip to content

Commit b3ae442

Browse files
committed
improve line parsing, using regex to extract labels
Using a regex to parse each line makes the code easier to read and reason about. It uses capture groups to extract the exact substrings we need, removing the need for extra whitespace trimming and string splitting. Benchmarking showed that it even performs slightly better on the ESP32, while raising the committed memory by only 64 extra bytes of memory compared with the previous algorithm (measured after garbage collect).
1 parent 6fa631a commit b3ae442

File tree

2 files changed

+17
-23
lines changed

2 files changed

+17
-23
lines changed

esp32_ulp/assemble.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
ESP32 ULP Co-Processor Assembler
33
"""
44

5+
import re
56
from . import opcodes
67
from .nocomment import remove_comments as do_remove_comments
78
from .util import garbage_collect
@@ -91,6 +92,12 @@ def __init__(self, symbols=None, bases=None, globals=None):
9192
self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
9293
opcodes.symbols = self.symbols # XXX dirty hack
9394

95+
# regex for parsing assembly lines
96+
# format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
97+
# where [] means optional
98+
# initialised here once, instead of compiling once per line
99+
self.line_regex = re.compile(r'^(\s*([a-zA-Z0-9_$.]+):)?\s*((\S*)\s*(.*))$')
100+
94101
def init(self, a_pass):
95102
self.a_pass = a_pass
96103
self.sections = dict(text=[], data=[])
@@ -108,29 +115,14 @@ def parse_line(self, line):
108115
"""
109116
if not line:
110117
return
111-
has_label = ':' in line
112-
if has_label:
113-
orig_line = line.strip()
114-
label_line = orig_line.split(':', 1)
115-
if len(label_line) == 2:
116-
label, line = label_line
117-
else: # 1
118-
label, line = label_line[0], None
119-
120-
if label.strip('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_$.'): # if any chars remain
121-
# if label contains other chars than allowed, it's not a label
122-
label, line = None, orig_line
123-
else:
124-
label, line = None, line.lstrip()
125-
if not line:
126-
opcode, args = None, ()
127-
else:
128-
opcode_args = line.split(None, 1)
129-
if len(opcode_args) == 2:
130-
opcode, args = opcode_args
131-
args = tuple(arg.strip() for arg in args.split(','))
132-
else: # 1
133-
opcode, args = opcode_args[0], ()
118+
119+
matches = self.line_regex.match(line)
120+
label, opcode, args = matches.group(2), matches.group(4), matches.group(5)
121+
122+
label = label if label else None # force empty strings to None
123+
opcode = opcode if opcode else None # force empty strings to None
124+
args = tuple(arg.strip() for arg in args.split(',')) if args else ()
125+
134126
return label, opcode, args
135127

136128
def split_statements(self, lines):

tests/assemble.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def test_parse_labels_correctly():
6060
https://sourceware.org/binutils/docs/as/Labels.html
6161
"""
6262
a = Assembler()
63+
assert a.parse_line('') is None
6364
assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
6465
assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
6566
assert a.parse_line('label:') == ('label', None, ())
@@ -75,6 +76,7 @@ def test_parse_labels_correctly():
7576
assert a.parse_line('a_label:') == ('a_label', None, ())
7677
assert a.parse_line('$label:') == ('$label', None, ())
7778
assert a.parse_line('.label:') == ('.label', None, ())
79+
assert a.parse_line('&label:') == (None, '&label:', ()) # & not a valid char in a label
7880

7981

8082
def test_parse():

0 commit comments

Comments
 (0)