Skip to content

Commit 3f11a28

Browse files
committed
regexec engine - wrap and replace RX_OFFS() with better abstractions
RX_OFFS() exposes a bit too much about how capture buffers are represented. This adds RX_OFFS_START() and RX_OFFS_END() and RX_OFFS_VALID() to replace most of the uses of the RX_OFFS() macro or direct access to the rx->off[] array. (We add RX_OFFSp() for those rare cases that should have direct access to the array.) This allows us to replace this logic with more complicated macros in the future. Pretty much anything using RX_OFFS() is going to be broken by future changes, so changing the define allows us to track it down easily. Not all use of the rx->offs[] array are converted; some uses are required for the regex engine internals, but anything outside of the regex engine should be using the replacement macros, and most things in the regex internals should use it also.
1 parent 67244d9 commit 3f11a28

File tree

8 files changed

+88
-87
lines changed

8 files changed

+88
-87
lines changed

mg.c

+3-5
Original file line numberDiff line numberDiff line change
@@ -643,9 +643,7 @@ Perl_magic_regdata_cnt(pTHX_ SV *sv, MAGIC *mg)
643643
I32 paren = RX_LASTPAREN(rx);
644644

645645
/* return the last filled */
646-
while ( paren >= 0
647-
&& (RX_OFFS(rx)[paren].start == -1
648-
|| RX_OFFS(rx)[paren].end == -1) )
646+
while ( paren >= 0 && !RX_OFFS_VALID(rx,paren) )
649647
paren--;
650648
if (n == '-') {
651649
/* @- */
@@ -680,8 +678,8 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg)
680678
if (paren < 0)
681679
return 0;
682680
if (paren <= (I32)RX_NPARENS(rx) &&
683-
(s = RX_OFFS(rx)[paren].start) != -1 &&
684-
(t = RX_OFFS(rx)[paren].end) != -1)
681+
((s = RX_OFFS_START(rx,paren)) != -1) &&
682+
((t = RX_OFFS_END(rx,paren)) != -1))
685683
{
686684
SSize_t i;
687685

pp.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -6503,7 +6503,7 @@ PP(pp_split)
65036503
/* we never pass the REXEC_COPY_STR flag, so it should
65046504
* never get copied */
65056505
assert(!RX_MATCH_COPIED(rx));
6506-
m = RX_OFFS(rx)[0].start + orig;
6506+
m = RX_OFFS_START(rx,0) + orig;
65076507

65086508
if (gimme_scalar) {
65096509
iters++;
@@ -6518,8 +6518,8 @@ PP(pp_split)
65186518
if (RX_NPARENS(rx)) {
65196519
I32 i;
65206520
for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
6521-
s = RX_OFFS(rx)[i].start + orig;
6522-
m = RX_OFFS(rx)[i].end + orig;
6521+
s = orig + RX_OFFS_START(rx,i);
6522+
m = orig + RX_OFFS_END(rx,i);
65236523

65246524
/* japhy (07/27/01) -- the (m && s) test doesn't catch
65256525
parens that didn't match -- they should be set to
@@ -6541,7 +6541,7 @@ PP(pp_split)
65416541

65426542
}
65436543
}
6544-
s = RX_OFFS(rx)[0].end + orig;
6544+
s = RX_OFFS_END(rx,0) + orig;
65456545
}
65466546
}
65476547

pp_ctl.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -320,14 +320,14 @@ PP(pp_substcont)
320320
s = orig + (m - s);
321321
cx->sb_strend = s + (cx->sb_strend - m);
322322
}
323-
cx->sb_m = m = RX_OFFS(rx)[0].start + orig;
323+
cx->sb_m = m = RX_OFFS_START(rx,0) + orig;
324324
if (m > s) {
325325
if (DO_UTF8(dstr) && !SvUTF8(cx->sb_targ))
326326
sv_catpvn_nomg_utf8_upgrade(dstr, s, m - s, nsv);
327327
else
328328
sv_catpvn_nomg(dstr, s, m-s);
329329
}
330-
cx->sb_s = RX_OFFS(rx)[0].end + orig;
330+
cx->sb_s = RX_OFFS_END(rx,0) + orig;
331331
{ /* Update the pos() information. */
332332
SV * const sv
333333
= (pm->op_pmflags & PMf_NONDESTRUCT) ? cx->sb_dstr : cx->sb_targ;
@@ -407,8 +407,8 @@ Perl_rxres_save(pTHX_ void **rsp, REGEXP *rx)
407407
*p++ = (UV)RX_SUBOFFSET(rx);
408408
*p++ = (UV)RX_SUBCOFFSET(rx);
409409
for (i = 0; i <= RX_NPARENS(rx); ++i) {
410-
*p++ = (UV)RX_OFFS(rx)[i].start;
411-
*p++ = (UV)RX_OFFS(rx)[i].end;
410+
*p++ = (UV)RX_OFFSp(rx)[i].start;
411+
*p++ = (UV)RX_OFFSp(rx)[i].end;
412412
}
413413
}
414414

@@ -438,8 +438,8 @@ S_rxres_restore(pTHX_ void **rsp, REGEXP *rx)
438438
RX_SUBOFFSET(rx) = (I32)*p++;
439439
RX_SUBCOFFSET(rx) = (I32)*p++;
440440
for (i = 0; i <= RX_NPARENS(rx); ++i) {
441-
RX_OFFS(rx)[i].start = (I32)(*p++);
442-
RX_OFFS(rx)[i].end = (I32)(*p++);
441+
RX_OFFSp(rx)[i].start = (I32)(*p++);
442+
RX_OFFSp(rx)[i].end = (I32)(*p++);
443443
}
444444
}
445445

pp_hot.c

+16-20
Original file line numberDiff line numberDiff line change
@@ -3189,7 +3189,7 @@ PP(pp_match)
31893189
if (global && (gimme != G_LIST || (dynpm->op_pmflags & PMf_CONTINUE))) {
31903190
if (!mg)
31913191
mg = sv_magicext_mglob(TARG);
3192-
MgBYTEPOS_set(mg, TARG, truebase, RXp_OFFS(prog)[0].end);
3192+
MgBYTEPOS_set(mg, TARG, truebase, RXp_OFFS_END(prog,0));
31933193
if (RXp_ZERO_LEN(prog))
31943194
mg->mg_flags |= MGf_MINMATCH;
31953195
else
@@ -3211,20 +3211,16 @@ PP(pp_match)
32113211
EXTEND(SP, nparens + i);
32123212
EXTEND_MORTAL(nparens + i);
32133213
for (i = !i; i <= nparens; i++) {
3214-
if (LIKELY((RXp_OFFS(prog)[i].start != -1)
3215-
&& RXp_OFFS(prog)[i].end != -1 ))
3214+
if (LIKELY(RXp_OFFS_VALID(prog,i)))
32163215
{
3217-
const I32 len = RXp_OFFS(prog)[i].end - RXp_OFFS(prog)[i].start;
3218-
const char * const s = RXp_OFFS(prog)[i].start + truebase;
3219-
if (UNLIKELY( RXp_OFFS(prog)[i].end < 0
3220-
|| RXp_OFFS(prog)[i].start < 0
3221-
|| len < 0
3222-
|| len > strend - s)
3216+
const I32 len = RXp_OFFS_END(prog,i) - RXp_OFFS_START(prog,i);
3217+
const char * const s = RXp_OFFS_START(prog,i) + truebase;
3218+
if ( UNLIKELY( len < 0 || len > strend - s)
32233219
)
32243220
DIE(aTHX_ "panic: pp_match start/end pointers, i=%ld, "
32253221
"start=%ld, end=%ld, s=%p, strend=%p, len=%" UVuf,
3226-
(long) i, (long) RXp_OFFS(prog)[i].start,
3227-
(long)RXp_OFFS(prog)[i].end, s, strend, (UV) len);
3222+
(long) i, (long) RXp_OFFS_START(prog,i),
3223+
(long)RXp_OFFS_END(prog,i), s, strend, (IV) len);
32283224
PUSHs(newSVpvn_flags(s, len,
32293225
(DO_UTF8(TARG))
32303226
? SVf_UTF8|SVs_TEMP
@@ -3235,7 +3231,7 @@ PP(pp_match)
32353231
}
32363232
}
32373233
if (global) {
3238-
curpos = (UV)RXp_OFFS(prog)[0].end;
3234+
curpos = (UV)RXp_OFFS_END(prog,0);
32393235
had_zerolen = RXp_ZERO_LEN(prog);
32403236
PUTBACK; /* EVAL blocks may use stack */
32413237
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
@@ -4519,8 +4515,8 @@ PP(pp_subst)
45194515
char *d, *m;
45204516
if (RXp_MATCH_TAINTED(prog)) /* run time pattern taint, eg locale */
45214517
rxtainted |= SUBST_TAINT_PAT;
4522-
m = orig + RXp_OFFS(prog)[0].start;
4523-
d = orig + RXp_OFFS(prog)[0].end;
4518+
m = orig + RXp_OFFS_START(prog,0);
4519+
d = orig + RXp_OFFS_END(prog,0);
45244520
s = orig;
45254521
if (m - s > strend - d) { /* faster to shorten from end */
45264522
I32 i;
@@ -4550,15 +4546,15 @@ PP(pp_subst)
45504546
}
45514547
else {
45524548
char *d, *m;
4553-
d = s = RXp_OFFS(prog)[0].start + orig;
4549+
d = s = RXp_OFFS_START(prog,0) + orig;
45544550
do {
45554551
I32 i;
45564552
if (UNLIKELY(iters++ > maxiters))
45574553
DIE(aTHX_ "Substitution loop");
45584554
/* run time pattern taint, eg locale */
45594555
if (UNLIKELY(RXp_MATCH_TAINTED(prog)))
45604556
rxtainted |= SUBST_TAINT_PAT;
4561-
m = RXp_OFFS(prog)[0].start + orig;
4557+
m = RXp_OFFS_START(prog,0) + orig;
45624558
if ((i = m - s)) {
45634559
if (s != d)
45644560
Move(s, d, i, char);
@@ -4568,7 +4564,7 @@ PP(pp_subst)
45684564
Copy(c, d, clen, char);
45694565
d += clen;
45704566
}
4571-
s = RXp_OFFS(prog)[0].end + orig;
4567+
s = RXp_OFFS_END(prog,0) + orig;
45724568
} while (CALLREGEXEC(rx, s, strend, orig,
45734569
s == m, /* don't match same null twice */
45744570
TARG, NULL,
@@ -4611,7 +4607,7 @@ PP(pp_subst)
46114607
if (RXp_MATCH_TAINTED(prog)) /* run time pattern taint, eg locale */
46124608
rxtainted |= SUBST_TAINT_PAT;
46134609
repl = dstr;
4614-
s = RXp_OFFS(prog)[0].start + orig;
4610+
s = RXp_OFFS_START(prog,0) + orig;
46154611
dstr = newSVpvn_flags(orig, s-orig,
46164612
SVs_TEMP | (DO_UTF8(TARG) ? SVf_UTF8 : 0));
46174613
if (!c) {
@@ -4641,9 +4637,9 @@ PP(pp_subst)
46414637
s = orig + (old_s - old_orig);
46424638
strend = s + (strend - old_s);
46434639
}
4644-
m = RXp_OFFS(prog)[0].start + orig;
4640+
m = RXp_OFFS_START(prog,0) + orig;
46454641
sv_catpvn_nomg_maybeutf8(dstr, s, m - s, DO_UTF8(TARG));
4646-
s = RXp_OFFS(prog)[0].end + orig;
4642+
s = RXp_OFFS_END(prog,0) + orig;
46474643
if (first) {
46484644
/* replacement already stringified */
46494645
if (clen)

regcomp.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -15718,9 +15718,9 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
1571815718
* so we could match anywhere in that string. We have to rule out
1571915719
* matching a code point line */
1572015720
char * this_name_start = all_names_start
15721-
+ RX_OFFS(subpattern_re)->start;
15721+
+ RX_OFFS_START(subpattern_re,0);
1572215722
char * this_name_end = all_names_start
15723-
+ RX_OFFS(subpattern_re)->end;
15723+
+ RX_OFFS_END(subpattern_re,0);
1572415724
char * cp_start;
1572515725
char * cp_end;
1572615726
UV cp = 0; /* Silences some compilers */

regcomp_debug.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -514,15 +514,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
514514
}
515515
if ( k == REF && reginfo) {
516516
U32 n = ARG(o); /* which paren pair */
517-
I32 ln = prog->offs[n].start;
518-
if (prog->lastparen < n || ln == -1 || prog->offs[n].end == -1)
517+
I32 ln = RXp_OFFS_START(prog,n);
518+
if (prog->lastparen < n || ln == -1 || RXp_OFFS_END(prog,n) == -1)
519519
Perl_sv_catpvf(aTHX_ sv, ": FAIL");
520-
else if (ln == prog->offs[n].end)
520+
else if (ln == RXp_OFFS_END(prog,n))
521521
Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING");
522522
else {
523523
const char *s = reginfo->strbeg + ln;
524524
Perl_sv_catpvf(aTHX_ sv, ": ");
525-
Perl_pv_pretty( aTHX_ sv, s, prog->offs[n].end - prog->offs[n].start, 32, 0, 0,
525+
Perl_pv_pretty( aTHX_ sv, s, RXp_OFFS_END(prog,n) - RXp_OFFS_START(prog,n), 32, 0, 0,
526526
PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE );
527527
}
528528
}

0 commit comments

Comments
 (0)