summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2018-09-29 11:16:48 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2018-09-29 11:16:48 -0500
commit389a31fe25e96ccde571e889d9a1fb56c1fd65be (patch)
treef348c620fd1bd5032554bac03f58d9dc03a1151c
parent3497ee60fcb0c07ad51458ad3962b8548f0ee838 (diff)
downloadpyparsing-git-389a31fe25e96ccde571e889d9a1fb56c1fd65be.tar.gz
Add Regex.sub() method, and asMatch and asGroupList parameters
-rw-r--r--CHANGES18
-rw-r--r--pyparsing.py25
-rw-r--r--unitTests.py45
3 files changed, 82 insertions, 6 deletions
diff --git a/CHANGES b/CHANGES
index 3a73302..5610cf2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -15,6 +15,24 @@ Version 2.x.x - TBD
Primary intent is more to be instructional than actually rigorous
testing. Complex tests can still be added in the unitTests.py file.
+- New features added to the Regex class:
+ - optional asGroupList parameter, returns all the capture groups as
+ a list
+ - optional asMatch parameter, returns the raw re.match result
+ - new sub(repl) method, which adds a parse action calling
+ re.sub(pattern, repl, parsed_result). Simplifies creating
+ Regex expressions to be used with transformString. Like re.sub,
+ repl may be an ordinary string (similar to using pyparsing's
+ replaceWith), or may contain references to capture groups by group
+ number, or may be a callable that takes an re match group and
+ returns a string.
+
+ For instance:
+ expr = pp.Regex(r"([Hh]\d):\s*(.*)").sub(r"<\1>\2</\1>")
+ expr.transformString("h1: This is the title")
+
+ will return
+ <h1>This is the title</h1>
Version 2.2.1 - September, 2018
-------------------------------
diff --git a/pyparsing.py b/pyparsing.py
index 5b9b1e7..cdbf398 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -75,7 +75,7 @@ classes inherit from. Use the docstrings for examples of how to:
"""
__version__ = "2.2.2"
-__versionTime__ = "25 Sep 2018 04:18 UTC"
+__versionTime__ = "29 Sep 2018 15:58 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -2776,7 +2776,7 @@ class Regex(Token):
roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
"""
compiledREtype = type(re.compile("[A-Z]"))
- def __init__( self, pattern, flags=0):
+ def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
super(Regex,self).__init__()
@@ -2809,6 +2809,8 @@ class Regex(Token):
self.errmsg = "Expected " + self.name
self.mayIndexError = False
self.mayReturnEmpty = True
+ self.asGroupList = asGroupList
+ self.asMatch = asMatch
def parseImpl( self, instring, loc, doActions=True ):
result = self.re.match(instring,loc)
@@ -2817,10 +2819,15 @@ class Regex(Token):
loc = result.end()
d = result.groupdict()
- ret = ParseResults(result.group())
- if d:
- for k in d:
- ret[k] = d[k]
+ if self.asMatch:
+ ret = result
+ elif self.asGroupList:
+ ret = result.groups()
+ else:
+ ret = ParseResults(result.group())
+ if d:
+ for k in d:
+ ret[k] = d[k]
return loc,ret
def __str__( self ):
@@ -2834,6 +2841,12 @@ class Regex(Token):
return self.strRepr
+ def sub(self, repl):
+ """
+ Return Regex with an attached parse action to transform the parsed
+ result as if called using C{re.sub(expr, repl, string)}.
+ """
+ return self.addParseAction(lambda s, l, t: self.re.sub(repl, t[0]))
class QuotedString(Token):
r"""
diff --git a/unitTests.py b/unitTests.py
index b7a5171..56a6cf2 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -1701,6 +1701,51 @@ class ParseUsingRegex(ParseTestCase):
invRe = pyparsing.Regex('')
+class RegexAsTypeTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+
+ test_str = "sldkjfj 123 456 lsdfkj"
+
+ print_("return as list of match groups")
+ expr = pp.Regex(r"\w+ (\d+) (\d+) (\w+)", asGroupList=True)
+ expected_group_list = [tuple(test_str.split()[1:])]
+ result = expr.parseString(test_str)
+ print_(result.dump())
+ print_(expected_group_list)
+ assert result.asList() == expected_group_list, "incorrect group list returned by Regex"
+
+ print_("return as re.match instance")
+ expr = pp.Regex(r"\w+ (?P<num1>\d+) (?P<num2>\d+) (?P<last_word>\w+)", asMatch=True)
+ result = expr.parseString(test_str)
+ print_(result.dump())
+ print_(result[0].groups())
+ print_(expected_group_list)
+ assert result[0].groupdict() == {'num1': '123', 'num2': '456', 'last_word': 'lsdfkj'}, 'invalid group dict from Regex(asMatch=True)'
+ assert result[0].groups() == expected_group_list[0], "incorrect group list returned by Regex(asMatch)"
+
+class RegexSubTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+
+ print_("test sub with string")
+ expr = pp.Regex(r"<title>").sub("'Richard III'")
+ result = expr.transformString("This is the title: <title>")
+ print_(result)
+ assert result == "This is the title: 'Richard III'", "incorrect Regex.sub result with simple string"
+
+ print_("test sub with re string")
+ expr = pp.Regex(r"([Hh]\d):\s*(.*)").sub(r"<\1>\2</\1>")
+ result = expr.transformString("h1: This is the main heading\nh2: This is the sub-heading")
+ print_(result)
+ assert result == '<h1>This is the main heading</h1>\n<h2>This is the sub-heading</h2>', "incorrect Regex.sub result with re string"
+
+ print_("test sub with callable that return str")
+ expr = pp.Regex(r"<(.*?)>").sub(lambda m: m.group(1).upper())
+ result = expr.transformString("I want this in upcase: <what? what?>")
+ print_(result)
+ assert result == 'I want this in upcase: WHAT? WHAT?', "incorrect Regex.sub result with callable"
+
class CountedArrayTest(ParseTestCase):
def runTest(self):
from pyparsing import Word,nums,OneOrMore,countedArray