Skip to content

Commit 2b46b21

Browse files
gh-67877: Fix memory leaks in terminated RE matching
If SRE(match) function terminates abruptly, either because of a signal or because memory allocation fails, allocated SRE_REPEAT blocks might be never released.
1 parent 7577307 commit 2b46b21

File tree

6 files changed

+198
-28
lines changed

6 files changed

+198
-28
lines changed

Lib/test/test_re.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2681,6 +2681,27 @@ def test_character_set_none(self):
26812681
self.assertIsNone(re.search(p, s))
26822682
self.assertIsNone(re.search('(?s:.)' + p, s))
26832683

2684+
def check_interrupt(self, pattern, string, maxcount):
2685+
class Interrupt(Exception):
2686+
pass
2687+
p = re.compile(pattern)
2688+
for n in range(maxcount):
2689+
p._fail_after(n, Interrupt)
2690+
try:
2691+
p.match(string)
2692+
return n
2693+
except Interrupt:
2694+
pass
2695+
2696+
@unittest.skipUnless(hasattr(re.Pattern, '_fail_after'), 'requires debug build')
2697+
def test_memory_leaks(self):
2698+
self.check_interrupt(r'(.)*:', 'abc:', 100)
2699+
self.check_interrupt(r'([^:])*?:', 'abc:', 100)
2700+
self.check_interrupt(r'([^:])*+:', 'abc:', 100)
2701+
self.check_interrupt(r'(.){2,4}:', 'abc:', 100)
2702+
self.check_interrupt(r'([^:]){2,4}?:', 'abc:', 100)
2703+
self.check_interrupt(r'([^:]){2,4}+:', 'abc:', 100)
2704+
26842705

26852706
def get_debug_out(pat):
26862707
with captured_stdout() as out:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix memory leaks when :mod:`regular expression <re>` matching terminates
2+
abruptly, either because of a signal or because memory allocation fails.

Modules/_sre/clinic/sre.c.h

Lines changed: 43 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/_sre/sre.c

Lines changed: 86 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,25 @@ _sre_unicode_tolower_impl(PyObject *module, int character)
397397
return sre_lower_unicode(character);
398398
}
399399

400+
LOCAL(void)
401+
state_clean_repeat_data(SRE_STATE* state)
402+
{
403+
SRE_REPEAT *rep = state->repeat;
404+
state->repeat = NULL;
405+
while (rep) {
406+
SRE_REPEAT *prev = rep->prev;
407+
PyMem_Free(rep);
408+
rep = prev;
409+
}
410+
rep = state->repstack;
411+
state->repstack = NULL;
412+
while (rep) {
413+
SRE_REPEAT *next = rep->next;
414+
PyMem_Free(rep);
415+
rep = next;
416+
}
417+
}
418+
400419
LOCAL(void)
401420
state_reset(SRE_STATE* state)
402421
{
@@ -406,8 +425,7 @@ state_reset(SRE_STATE* state)
406425
state->lastmark = -1;
407426
state->lastindex = -1;
408427

409-
state->repeat = NULL;
410-
428+
state_clean_repeat_data(state);
411429
data_stack_dealloc(state);
412430
}
413431

@@ -511,6 +529,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
511529
state->pos = start;
512530
state->endpos = end;
513531

532+
#ifdef Py_DEBUG
533+
state->fail_after_count = pattern->fail_after_count;
534+
state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
535+
#endif
536+
514537
return string;
515538
err:
516539
/* We add an explicit cast here because MSVC has a bug when
@@ -524,15 +547,21 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
524547
}
525548

526549
LOCAL(void)
527-
state_fini(SRE_STATE* state)
550+
state_fini(SRE_STATE* state, PatternObject *pattern)
528551
{
529552
if (state->buffer.buf)
530553
PyBuffer_Release(&state->buffer);
531554
Py_XDECREF(state->string);
555+
state_clean_repeat_data(state);
532556
data_stack_dealloc(state);
533557
/* See above PyMem_Free() for why we explicitly cast here. */
534558
PyMem_Free((void*) state->mark);
535559
state->mark = NULL;
560+
#ifdef Py_DEBUG
561+
if (pattern) {
562+
pattern->fail_after_count = -1;
563+
}
564+
#endif
536565
}
537566

538567
/* calculate offset from start of string */
@@ -619,6 +648,9 @@ pattern_traverse(PatternObject *self, visitproc visit, void *arg)
619648
Py_VISIT(self->groupindex);
620649
Py_VISIT(self->indexgroup);
621650
Py_VISIT(self->pattern);
651+
#ifdef Py_DEBUG
652+
Py_VISIT(self->fail_after_exc);
653+
#endif
622654
return 0;
623655
}
624656

@@ -628,6 +660,9 @@ pattern_clear(PatternObject *self)
628660
Py_CLEAR(self->groupindex);
629661
Py_CLEAR(self->indexgroup);
630662
Py_CLEAR(self->pattern);
663+
#ifdef Py_DEBUG
664+
Py_CLEAR(self->fail_after_exc);
665+
#endif
631666
return 0;
632667
}
633668

@@ -690,7 +725,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
690725
Py_ssize_t status;
691726
PyObject *match;
692727

693-
if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
728+
if (!state_init(&state, self, string, pos, endpos))
694729
return NULL;
695730

696731
INIT_TRACE(&state);
@@ -702,12 +737,12 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
702737

703738
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
704739
if (PyErr_Occurred()) {
705-
state_fini(&state);
740+
state_fini(&state, self);
706741
return NULL;
707742
}
708743

709744
match = pattern_new_match(module_state, self, &state, status);
710-
state_fini(&state);
745+
state_fini(&state, self);
711746
return match;
712747
}
713748

@@ -747,12 +782,12 @@ _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
747782

748783
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
749784
if (PyErr_Occurred()) {
750-
state_fini(&state);
785+
state_fini(&state, self);
751786
return NULL;
752787
}
753788

754789
match = pattern_new_match(module_state, self, &state, status);
755-
state_fini(&state);
790+
state_fini(&state, self);
756791
return match;
757792
}
758793

@@ -792,12 +827,12 @@ _sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
792827
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
793828

794829
if (PyErr_Occurred()) {
795-
state_fini(&state);
830+
state_fini(&state, self);
796831
return NULL;
797832
}
798833

799834
match = pattern_new_match(module_state, self, &state, status);
800-
state_fini(&state);
835+
state_fini(&state, self);
801836
return match;
802837
}
803838

@@ -826,7 +861,7 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
826861

827862
list = PyList_New(0);
828863
if (!list) {
829-
state_fini(&state);
864+
state_fini(&state, self);
830865
return NULL;
831866
}
832867

@@ -888,12 +923,12 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
888923
state.start = state.ptr;
889924
}
890925

891-
state_fini(&state);
926+
state_fini(&state, self);
892927
return list;
893928

894929
error:
895930
Py_DECREF(list);
896-
state_fini(&state);
931+
state_fini(&state, self);
897932
return NULL;
898933

899934
}
@@ -989,7 +1024,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
9891024

9901025
list = PyList_New(0);
9911026
if (!list) {
992-
state_fini(&state);
1027+
state_fini(&state, self);
9931028
return NULL;
9941029
}
9951030

@@ -1053,12 +1088,12 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
10531088
if (status < 0)
10541089
goto error;
10551090

1056-
state_fini(&state);
1091+
state_fini(&state, self);
10571092
return list;
10581093

10591094
error:
10601095
Py_DECREF(list);
1061-
state_fini(&state);
1096+
state_fini(&state, self);
10621097
return NULL;
10631098

10641099
}
@@ -1185,7 +1220,7 @@ pattern_subx(_sremodulestate* module_state,
11851220
list = PyList_New(0);
11861221
if (!list) {
11871222
Py_DECREF(filter);
1188-
state_fini(&state);
1223+
state_fini(&state, self);
11891224
return NULL;
11901225
}
11911226

@@ -1271,7 +1306,7 @@ pattern_subx(_sremodulestate* module_state,
12711306
goto error;
12721307
}
12731308

1274-
state_fini(&state);
1309+
state_fini(&state, self);
12751310

12761311
Py_DECREF(filter);
12771312

@@ -1303,7 +1338,7 @@ pattern_subx(_sremodulestate* module_state,
13031338

13041339
error:
13051340
Py_DECREF(list);
1306-
state_fini(&state);
1341+
state_fini(&state, self);
13071342
Py_DECREF(filter);
13081343
return NULL;
13091344

@@ -1381,6 +1416,29 @@ _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
13811416
return Py_NewRef(self);
13821417
}
13831418

1419+
#ifdef Py_DEBUG
1420+
/*[clinic input]
1421+
_sre.SRE_Pattern._fail_after
1422+
1423+
count: int
1424+
exception: object
1425+
/
1426+
1427+
For debugging.
1428+
[clinic start generated code]*/
1429+
1430+
static PyObject *
1431+
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1432+
PyObject *exception)
1433+
/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1434+
{
1435+
self->fail_after_count = count;
1436+
Py_INCREF(exception);
1437+
Py_XSETREF(self->fail_after_exc, exception);
1438+
Py_RETURN_NONE;
1439+
}
1440+
#endif /* Py_DEBUG */
1441+
13841442
static PyObject *
13851443
pattern_repr(PatternObject *obj)
13861444
{
@@ -1506,6 +1564,11 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
15061564
self->pattern = NULL;
15071565
self->groupindex = NULL;
15081566
self->indexgroup = NULL;
1567+
#ifdef Py_DEBUG
1568+
self->fail_after_count = -1;
1569+
self->fail_after_exc = NULL;
1570+
self->fail_after_exc = Py_NewRef(PyExc_RuntimeError);
1571+
#endif
15091572

15101573
self->codesize = n;
15111574

@@ -2680,7 +2743,7 @@ scanner_dealloc(ScannerObject* self)
26802743
PyTypeObject *tp = Py_TYPE(self);
26812744

26822745
PyObject_GC_UnTrack(self);
2683-
state_fini(&self->state);
2746+
state_fini(&self->state, self->pattern);
26842747
(void)scanner_clear(self);
26852748
tp->tp_free(self);
26862749
Py_DECREF(tp);
@@ -2826,7 +2889,8 @@ pattern_scanner(_sremodulestate *module_state,
28262889
return NULL;
28272890
}
28282891

2829-
scanner->pattern = Py_NewRef(self);
2892+
Py_INCREF(self);
2893+
scanner->pattern = self;
28302894

28312895
PyObject_GC_Track(scanner);
28322896
return (PyObject*) scanner;
@@ -3020,6 +3084,7 @@ static PyMethodDef pattern_methods[] = {
30203084
_SRE_SRE_PATTERN_SCANNER_METHODDEF
30213085
_SRE_SRE_PATTERN___COPY___METHODDEF
30223086
_SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3087+
_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
30233088
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
30243089
PyDoc_STR("See PEP 585")},
30253090
{NULL, NULL}

0 commit comments

Comments
 (0)