@@ -1195,18 +1195,17 @@ fn prefixModifier(patt: []RegOp, j: usize, op: RegOp) !usize {
1195
1195
// Try to detect multi-byte characters
1196
1196
switch (patt [find_j ]) {
1197
1197
.char = > | c | {
1198
- if (0x80 <= c and c <= 0x9f ) {
1199
- // Group a multi- byte.
1198
+ if (0x80 <= c and c <= 0xbf ) {
1199
+ // Go back to lead byte:
1200
1200
while (find_j > 0 and
1201
1201
patt [find_j ] == .char and
1202
1202
0x80 <= patt [find_j ].char and
1203
- patt [find_j ].char <= 0x9f ) : (find_j -= 1 )
1204
- {} // Move forward by two
1205
- if (find_j > 0 ) find_j -= 1 ;
1203
+ patt [find_j ].char <= 0xbf ) : (find_j -= 1 )
1204
+ {}
1206
1205
std .mem .copyBackwards (RegOp , patt [find_j + 2 .. ], patt [find_j .. j + 1 ]);
1207
1206
patt [find_j ] = op ;
1208
1207
patt [find_j + 1 ] = .left ;
1209
- patt [j + 1 ] = .right ;
1208
+ patt [j + 2 ] = .right ;
1210
1209
return 2 ;
1211
1210
}
1212
1211
},
@@ -1461,6 +1460,7 @@ fn compileRegex(RegexT: type, in: []const u8) ?RegexT {
1461
1460
}
1462
1461
const d1 , const c1 = parseByte (in [i .. ]) catch {
1463
1462
// This is fine, literal `}`
1463
+ // TODO: is it?
1464
1464
patt [j ] = RegOp { .char = '}' };
1465
1465
continue :dispatch ;
1466
1466
};
@@ -2020,7 +2020,17 @@ fn printPatternInternal(patt: []const RegOp) ?u8 {
2020
2020
switch (patt [j ]) {
2021
2021
.char ,
2022
2022
= > | op | {
2023
- std .debug .print ("{s} {u}" , .{ @tagName (patt [j ]), op });
2023
+ switch (op ) {
2024
+ 0... 0x3f = > {
2025
+ std .debug .print ("char 0x{x:0>2}" , .{op });
2026
+ },
2027
+ 0x40... 0x7e = > {
2028
+ std .debug .print ("char '{u}'" , .{op });
2029
+ },
2030
+ 0x7f... 0xff = > {
2031
+ std .debug .print ("char 0x{x:0>2}" , .{op });
2032
+ },
2033
+ }
2024
2034
},
2025
2035
.some ,
2026
2036
.up_to ,
@@ -2411,3 +2421,7 @@ test "Multibyte continues" {
2411
2421
test "Uppercase Greek" {
2412
2422
try testMatchAll ("(\\ xce[\\ x91-\\ xa9])+" , "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ" );
2413
2423
}
2424
+
2425
+ test "M of N multibyte" {
2426
+ try testMatchEnd ("abλ{3,5}" , "abλλλλ" );
2427
+ }
0 commit comments