Add Regex.sub() method, and asMatch and asGroupList parameters

author: Paul McGuire <ptmcg@austin.rr.com> 2018-09-29 11:16:48 -0500
committer: Paul McGuire <ptmcg@austin.rr.com> 2018-09-29 11:16:48 -0500
commit: 389a31fe25e96ccde571e889d9a1fb56c1fd65be (patch)
tree: f348c620fd1bd5032554bac03f58d9dc03a1151c
parent: 3497ee60fcb0c07ad51458ad3962b8548f0ee838 (diff)
download: pyparsing-git-389a31fe25e96ccde571e889d9a1fb56c1fd65be.tar.gz
3 files changed, 82 insertions, 6 deletions
diff --git a/CHANGES b/CHANGES
index 3a73302..5610cf2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -15,6 +15,24 @@ Version 2.x.x - TBD
   Primary intent is more to be instructional than actually rigorous 
   testing. Complex tests can still be added in the unitTests.py file.
 
+- New features added to the Regex class:
+  - optional asGroupList parameter, returns all the capture groups as
+    a list
+  - optional asMatch parameter, returns the raw re.match result
+  - new sub(repl) method, which adds a parse action calling
+    re.sub(pattern, repl, parsed_result). Simplifies creating 
+    Regex expressions to be used with transformString. Like re.sub,
+    repl may be an ordinary string (similar to using pyparsing's 
+    replaceWith), or may contain references to capture groups by group 
+    number, or may be a callable that takes an re match group and 
+    returns a string.
+    
+    For instance:
+        expr = pp.Regex(r"([Hh]\d):\s*(.*)").sub(r"<\1>\2</\1>")
+        expr.transformString("h1: This is the title")
+
+    will return
+        <h1>This is the title</h1>
 
 Version 2.2.1 - September, 2018
 -------------------------------
diff --git a/pyparsing.py b/pyparsing.py
index 5b9b1e7..cdbf398 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -75,7 +75,7 @@ classes inherit from. Use the docstrings for examples of how to:
 """
 
 __version__ = "2.2.2"
-__versionTime__ = "25 Sep 2018 04:18 UTC"
+__versionTime__ = "29 Sep 2018 15:58 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -2776,7 +2776,7 @@ class Regex(Token):
         roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
     """
     compiledREtype = type(re.compile("[A-Z]"))
-    def __init__( self, pattern, flags=0):
+    def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
         """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
         super(Regex,self).__init__()
 
@@ -2809,6 +2809,8 @@ class Regex(Token):
         self.errmsg = "Expected " + self.name
         self.mayIndexError = False
         self.mayReturnEmpty = True
+        self.asGroupList = asGroupList
+        self.asMatch = asMatch
 
     def parseImpl( self, instring, loc, doActions=True ):
         result = self.re.match(instring,loc)
@@ -2817,10 +2819,15 @@ class Regex(Token):
 
         loc = result.end()
         d = result.groupdict()
-        ret = ParseResults(result.group())
-        if d:
-            for k in d:
-                ret[k] = d[k]
+        if self.asMatch:
+            ret = result
+        elif self.asGroupList:
+            ret = result.groups()
+        else:
+            ret = ParseResults(result.group())
+            if d:
+                for k in d:
+                    ret[k] = d[k]
         return loc,ret
 
     def __str__( self ):
@@ -2834,6 +2841,12 @@ class Regex(Token):
 
         return self.strRepr
 
+    def sub(self, repl):
+        """
+        Return Regex with an attached parse action to transform the parsed
+        result as if called using C{re.sub(expr, repl, string)}.
+        """
+        return self.addParseAction(lambda s, l, t: self.re.sub(repl, t[0]))
 
 class QuotedString(Token):
     r"""
diff --git a/unitTests.py b/unitTests.py
index b7a5171..56a6cf2 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -1701,6 +1701,51 @@ class ParseUsingRegex(ParseTestCase):
             
         invRe = pyparsing.Regex('')
 
+class RegexAsTypeTest(ParseTestCase):
+    def runTest(self):
+        import pyparsing as pp
+
+        test_str = "sldkjfj 123 456 lsdfkj"
+
+        print_("return as list of match groups")
+        expr = pp.Regex(r"\w+ (\d+) (\d+) (\w+)", asGroupList=True)
+        expected_group_list = [tuple(test_str.split()[1:])]
+        result = expr.parseString(test_str)
+        print_(result.dump())
+        print_(expected_group_list)
+        assert result.asList() == expected_group_list, "incorrect group list returned by Regex"
+
+        print_("return as re.match instance")
+        expr = pp.Regex(r"\w+ (?P<num1>\d+) (?P<num2>\d+) (?P<last_word>\w+)", asMatch=True)
+        result = expr.parseString(test_str)
+        print_(result.dump())
+        print_(result[0].groups())
+        print_(expected_group_list)
+        assert result[0].groupdict() == {'num1': '123',  'num2': '456',  'last_word': 'lsdfkj'}, 'invalid group dict from Regex(asMatch=True)'
+        assert result[0].groups() == expected_group_list[0], "incorrect group list returned by Regex(asMatch)"
+
+class RegexSubTest(ParseTestCase):
+    def runTest(self):
+        import pyparsing as pp
+
+        print_("test sub with string")
+        expr = pp.Regex(r"<title>").sub("'Richard III'")
+        result = expr.transformString("This is the title: <title>")
+        print_(result)
+        assert result == "This is the title: 'Richard III'", "incorrect Regex.sub result with simple string"
+        
+        print_("test sub with re string")
+        expr = pp.Regex(r"([Hh]\d):\s*(.*)").sub(r"<\1>\2</\1>")
+        result = expr.transformString("h1: This is the main heading\nh2: This is the sub-heading")
+        print_(result)
+        assert result == '<h1>This is the main heading</h1>\n<h2>This is the sub-heading</h2>', "incorrect Regex.sub result with re string"
+        
+        print_("test sub with callable that return str")
+        expr = pp.Regex(r"<(.*?)>").sub(lambda m: m.group(1).upper())
+        result = expr.transformString("I want this in upcase: <what? what?>")
+        print_(result)
+        assert result == 'I want this in upcase: WHAT? WHAT?', "incorrect Regex.sub result with callable"
+
 class CountedArrayTest(ParseTestCase):
     def runTest(self):
         from pyparsing import Word,nums,OneOrMore,countedArray
author	Paul McGuire <ptmcg@austin.rr.com>	2018-09-29 11:16:48 -0500
committer	Paul McGuire <ptmcg@austin.rr.com>	2018-09-29 11:16:48 -0500
commit	389a31fe25e96ccde571e889d9a1fb56c1fd65be (patch)
tree	f348c620fd1bd5032554bac03f58d9dc03a1151c
parent	3497ee60fcb0c07ad51458ad3962b8548f0ee838 (diff)
download	pyparsing-git-389a31fe25e96ccde571e889d9a1fb56c1fd65be.tar.gz