Skip to content

Commit 66e5b91

Browse files
committed
fix a bug when matching with empty strings
related to kokke/tiny-regex-c/issues/56
1 parent 61a0d57 commit 66e5b91

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

Diff for: src/re.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,9 @@ int re_matchp(re_t pattern, const char* text, int* matchlength) {
8484

8585
do {
8686
if (matchpattern(pattern, text, rune_size, matchlength)) {
87-
if (text[0] == '\0')
88-
return -1;
87+
// Maybe we don't need this
88+
// if (text[0] == '\0')
89+
// return -1;
8990

9091
return (int)(text - prepoint);
9192
}

Diff for: tests/re_test.h

+19-5
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ const RegexCase regex_cases_utf[] = {
118118
{ u8"[\\\U0001F600]+", u8"\U0001F600", TSM_OK },
119119
{ u8"[\\\U0001F600]+", u8"\U0001F600\U0001F600\U0001F600", TSM_OK },
120120
{ u8"[\\\U0001F600]+", "a", TSM_FAIL },
121-
122121
};
123122

124123
INSTANTIATE_TEST_SUITE_P(RegexTestInstantiation_UTF,
@@ -140,7 +139,7 @@ INSTANTIATE_TEST_SUITE_P(RegexTestInstantiation_Escaped,
140139
// https://github.com/python/cpython/blob/main/Lib/test/re_tests.py
141140
const RegexCase regex_cases_python[] = {
142141
// { ")", "", TSM_SYNTAX_ERROR }, // () operator does not supported yet.
143-
// { "", "", TSM_OK }, // this fails somehow
142+
{ "", "", TSM_OK },
144143
{ "abc", "abc", TSM_OK },
145144
{ "abc", "xbc", TSM_FAIL },
146145
{ "abc", "axc", TSM_FAIL },
@@ -165,7 +164,7 @@ const RegexCase regex_cases_python[] = {
165164
{ "^abc$", "aabc", TSM_FAIL },
166165
{ "abc$", "aabc", TSM_OK },
167166
{ "^", "abc", TSM_OK },
168-
// { "$", "abc", TSM_OK }, // This fails somehow
167+
// { "$", "abc", TSM_OK }, // fail
169168
{ "a.c", "abc", TSM_OK },
170169
{ "a.c", "axc", TSM_OK },
171170
{ "a.*c", "axyzc", TSM_OK },
@@ -197,7 +196,7 @@ const RegexCase regex_cases_python[] = {
197196
{ "a+b+c", "aabbabc", TSM_OK },
198197
{ "[^ab]*", "cde", TSM_OK },
199198
{ "abc", "", TSM_FAIL },
200-
// { "a*", "", TSM_OK }, // This fails somehow
199+
{ "a*", "", TSM_OK },
201200
{ "abcd*efg", "abcdefg", TSM_OK },
202201
{ "ab*", "xabyabbbz", TSM_OK },
203202
{ "ab*", "xayabbbz", TSM_OK },
@@ -214,14 +213,29 @@ const RegexCase regex_cases_python[] = {
214213
{ "\\D+", "1234abc5678", TSM_OK },
215214
{ "[\\D+]", "1234abc5678", TSM_OK },
216215
{ "[\\da-fA-F]+", "123abc", TSM_OK },
217-
// TODO: add more tese cases
216+
// TODO: add more test cases
218217
// { "", "", TSM_OK },
219218
};
220219

221220
INSTANTIATE_TEST_SUITE_P(RegexTestInstantiation_Python,
222221
RegexTest,
223222
::testing::ValuesIn(regex_cases_python));
224223

224+
// Original test cases.
225+
const RegexCase regex_cases_tsm[] = {
226+
{ "^[a-zA-Z_][z-zA-Z0-9_]*", "a", TSM_OK },
227+
{ "^[a-zA-Z_][z-zA-Z0-9_]*", "abcd1234_", TSM_OK },
228+
{ "^[a-zA-Z_][z-zA-Z0-9_]*", "1", TSM_FAIL },
229+
{ "^[a-zA-Z_][z-zA-Z0-9_]*", "1bcd1234_", TSM_FAIL },
230+
{ ".", "", TSM_FAIL },
231+
{ "^.$", "", TSM_FAIL },
232+
{ ".*", "", TSM_OK },
233+
};
234+
235+
INSTANTIATE_TEST_SUITE_P(RegexTestInstantiation_Tsm,
236+
RegexTest,
237+
::testing::ValuesIn(regex_cases_tsm));
238+
225239
TEST_P(RegexTest, tsm_regex_match) {
226240
const RegexCase test_case = GetParam();
227241
int actual = tsm_regex_match(test_case.pattern, test_case.str);

0 commit comments

Comments
 (0)