Skip to content

Commit daced5a

Browse files
committed
regexec.c: Use ANYOF bitmap lookup in more cases
ANYOFish nodes have a bitmap. If we know the value is in the bitmap range, then flags that apply to out-of-range values are irrelevant. Other flags being set indicate that the desired answer is more complicated than just using a bitmap lookup. But exclude this irrelevant flag from that calculation when we know the value is in the bitmap. There are other flags that it is possible to exclude, but not without further conditionals, or unsharing code, and are either rarely set or are for node types that we don't worry so much about optimal performance, like /l and /d. The changes introduced by this commit are determined at .c compile time except for a runtime mask, and hence don't introduce new branches that may destroy the instruction cache pipeline.
1 parent 5b86415 commit daced5a

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

regexec.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -2221,7 +2221,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
22212221
REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
22222222
reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
22232223
}
2224-
else if (ANYOF_FLAGS(c)) {
2224+
else if (ANYOF_FLAGS(c) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
2225+
/* We know that s is in the bitmap range since the target isn't
2226+
* UTF-8, so what happens for out-of-range values is not relevant,
2227+
* so exclude that from the flags */
22252228
REXEC_FBC_CLASS_SCAN(0, reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
22262229
}
22272230
else {
@@ -6701,7 +6704,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
67016704
if (NEXTCHR_IS_EOS)
67026705
sayNO;
67036706
if ( (! utf8_target || UTF8_IS_INVARIANT(*locinput))
6704-
&& ! ANYOF_FLAGS(scan))
6707+
&& ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP))
67056708
{
67066709
if (! ANYOF_BITMAP_TEST(scan, * (U8 *) (locinput))) {
67076710
sayNO;
@@ -9363,7 +9366,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
93639366
hardcount++;
93649367
}
93659368
}
9366-
else if (ANYOF_FLAGS(p)) {
9369+
else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
93679370
while (scan < loceol
93689371
&& reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
93699372
scan++;

0 commit comments

Comments
 (0)