-
Notifications
You must be signed in to change notification settings - Fork 5
/
phase1.cpp
207 lines (165 loc) · 3.38 KB
/
phase1.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include "phase1.h"
#include <cassert>
enum {
st_text,
st_text_esc,
st_comment,
st_comment_esc,
st_vstring,
st_vstring_esc,
st_dstring,
st_dstring_esc,
st_sstring,
st_sstring_esc,
st_estring,
st_estring1,
st_estring1_esc,
st_estring2,
st_estring2_esc,
st_estring3,
st_fstring,
st_fstring_esc,
st_bstring,
st_bstring_esc,
};
int phase1::process(unsigned char c, int st) {
const unsigned char esc = 0xb6;
if (c == '\r' || c == '\n') {
switch (st) {
case st_text:
case st_comment:
default: // will error later.
flush();
multiline = false;
line++;
return st_text;
case st_comment_esc:
multiline = true;
line++;
return st_text;
case st_text_esc:
case st_vstring_esc:
case st_dstring_esc:
case st_sstring_esc:
case st_estring1_esc:
case st_estring2_esc:
case st_fstring_esc:
case st_bstring_esc:
multiline = true;
scratch.pop_back();
line++;
return st - 1;
}
}
if (st != st_comment) scratch.push_back(c);
switch(st) {
case st_text:
text:
switch(c) {
case '#':
scratch.pop_back();
return st_comment;
case esc:
return st_text_esc;
case '{':
return st_vstring;
case '"':
return st_dstring;
case '\'':
return st_sstring;
case '`':
return st_estring;
case '/':
return st_fstring;
case '\\':
return st_bstring;
default:
return st_text;
}
break;
case st_comment:
if (c == esc) return st_comment_esc;
return st_comment;
break;
case st_comment_esc:
case st_text_esc:
case st_dstring_esc:
case st_estring1_esc:
case st_estring2_esc:
return st-1;
break;
case st_sstring_esc:
// fall through
case st_sstring:
if (c == '\'') return st_text;
if (c == esc) return st_sstring_esc;
return st_sstring;
break;
case st_fstring_esc:
// fall through
case st_fstring:
if (c == '/') return st_text;
if (c == esc) return st_fstring_esc;
return st_fstring;
break;
case st_bstring_esc:
// fall through
case st_bstring:
if (c == '\\') return st_text;
if (c == esc) return st_bstring_esc;
return st_bstring;
break;
case st_dstring:
if (c == '\"') return st_text;
if (c == esc) return st_dstring_esc;
return st_dstring;
break;
case st_vstring_esc:
// fall through
case st_vstring:
// '{' var '}' or '{{' var '}}'
// don't care if {{ or { at this point. A single } terminates.
if (c == '}') return st_text;
if (c == esc) return st_vstring_esc;
return st_vstring;
case st_estring:
// ``...`` or `...`
if (c == '`') return st_estring2;
// fall through.
case st_estring1:
if (c == '`') return st_text;
if (c == esc) return st_estring1_esc;
return st_estring1;
case st_estring2:
if (c == '`') return st_estring3;
if (c == esc) return st_estring2_esc;
return st_estring2;
case st_estring3:
if (c == '`') return st_text;
// error! handled later.
goto text;
break;
}
assert(!"unknown state");
}
void phase1::parse(const unsigned char *begin, const unsigned char *end) {
while (begin != end) {
cs = process(*begin++, cs);
}
}
void phase1::finish() {
cs = process('\n', cs);
flush();
}
void phase1::reset() {
cs = st_text;
multiline = false;
line = 1;
scratch.clear();
}
void phase1::flush() {
multiline = false;
if (scratch.empty()) return;
if (_then) _then(std::move(scratch));
scratch.clear();
}