diff --git a/README.md b/README.md index 305c567..40fda2e 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,21 @@ regexp = expression.compile() result_re = regexp.sub('duck', replace_me) print result_re ``` +### Using named groups +```python +name = "Linus Torvalds" +expression = VerEx() + .start_of_line() + .word(name='first_name') + .then(' ') + .word(name='last_name') + .end_of_line() + .regex() +match = self.exp.match(name) + +print(match.group('first_name')) # Linus +print(match.group('last_name')) # Torvalds +``` ### Shorthand for string replace ```python result = VerEx().find('red').replace('We have a red house', 'blue') diff --git a/tests/verbal_expressions_test.py b/tests/verbal_expressions_test.py index 5ed52d8..2d322d9 100644 --- a/tests/verbal_expressions_test.py +++ b/tests/verbal_expressions_test.py @@ -135,3 +135,21 @@ def test_should_match_email_address(self): def test_should_match_url(self): self.exp = self.v.start_of_line().then('http').maybe('s').then('://').maybe('www.').word().then('.').word().maybe('/').end_of_line().regex() self.assertRegexpMatches('https://www.google.com/', self.exp, 'Not a valid email') + + def test_should_find_number(self): + self.exp = self.v.start_of_line().number().end_of_line().regex() + self.assertRegexpMatches('123', self.exp, 'Number not found') + + def test_word_should_find_named_groups(self): + name = "Linus Torvalds" + self.exp = self.v.start_of_line().word(name='first_name').then(' ').word(name='last_name').end_of_line().regex() + match = self.exp.match(name) + self.assertIsNotNone(match) + self.assertEquals(match.group('first_name'), 'Linus') + self.assertEquals(match.group('last_name'), 'Torvalds') + + def test_number_should_find_named_groups(self): + self.exp = self.v.start_of_line().number('number').end_of_line().regex() + match = self.exp.match('123') + self.assertIsNotNone(match, self.exp.pattern) + self.assertEquals(match.group('number'), '123') diff --git a/verbalexpressions/verbal_expressions.py b/verbalexpressions/verbal_expressions.py index 1450186..d983105 100644 --- a/verbalexpressions/verbal_expressions.py +++ b/verbalexpressions/verbal_expressions.py @@ -2,11 +2,16 @@ def re_escape(fn): - def arg_escaped(this, *args): + def arg_escaped(this, *args, **kwargs): t = [isinstance(a, VerEx) and a.s or re.escape(str(a)) for a in args] - return fn(this, *t) + return fn(this, *t, **kwargs) return arg_escaped + +def group(val, name=None): + prefix = '?P<{0}>'.format(name) if name else '' + return '(' + prefix + val + ')' + class VerEx(object): ''' @@ -50,49 +55,52 @@ def source(self): # --------------------------------------------- - def anything(self): - return self.add('(.*)') + def anything(self, name=None): + return self.add(group('.*', name)) @re_escape def anything_but(self, value): - return self.add('([^' + value + ']*)') + return self.add(group('[^' + value + ']*')) def end_of_line(self): return self.add('$') @re_escape def maybe(self, value): - return self.add("(" + value + ")?") + return self.add(group(value) + "?") def start_of_line(self): return self.add('^') @re_escape def find(self, value): - return self.add('(' + value + ')') + return self.add(group(value)) then = find # special characters and groups @re_escape def any(self, value): - return self.add("([" + value + "])") + return self.add(group("[" + value + "]")) any_of = any def line_break(self): - return self.add(r"(\n|(\r\n))") + return self.add(group(r"\n|(\r\n)")) br = line_break @re_escape def range(self, *args): from_tos = [args[i:i+2] for i in range(0, len(args), 2)] - return self.add("([" + ''.join(['-'.join(i) for i in from_tos]) + "])") + return self.add(group("[" + ''.join(['-'.join(i) for i in from_tos]) + "]")) def tab(self): return self.add(r'\t') - def word(self): - return self.add(r"(\w+)") + def word(self, name=None): + return self.add(group(r"\w+", name)) + + def number(self, name=None): + return self.add(group(r"\d+", name)) def OR(self, value=None): ''' `or` is a python keyword so we use `OR` instead. '''