Deprecate `locatedExpr` in favor of new `Located` class

author: ptmcg <ptmcg@austin.rr.com> 2020-12-24 01:21:57 -0600
committer: ptmcg <ptmcg@austin.rr.com> 2020-12-24 01:21:57 -0600
commit: 2896fad83357bde252c0304aa4c0c55661486a5e (patch)
tree: 466fff0292422bb9c6717bf6bddb7963622ebdad
parent: c3be8acb355559cc1fdcb673f8df904b6947b0af (diff)
download: pyparsing-git-2896fad83357bde252c0304aa4c0c55661486a5e.tar.gz
5 files changed, 141 insertions, 8 deletions
diff --git a/CHANGES b/CHANGES
index 9824554..ad020a4 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,17 @@
 Change Log
 ==========
 
+Version 3.0.0b2
+---------------
+- API CHANGE
+  `locatedExpr` is being replaced by the class `Located`. `Located` has the same
+  constructor interface as `locatedExpr`, but fixes bugs in the returned
+  `ParseResults` when the searched expression contains multiple tokens, or
+  has internal results names.
+
+  `locatedExpr` is deprecated, and will be removed in a future release.
+
+
 Version 3.0.0b1
 ---------------
 - API CHANGE
diff --git a/docs/whats_new_in_3_0_0.rst b/docs/whats_new_in_3_0_0.rst
index 4ddd051..24e9423 100644
--- a/docs/whats_new_in_3_0_0.rst
+++ b/docs/whats_new_in_3_0_0.rst
@@ -4,7 +4,7 @@ What's New in Pyparsing 3.0.0
 
 :author: Paul McGuire
 
-:date: November, 2020
+:date: December, 2020
 
 :abstract: This document summarizes the changes made
     in the 3.0.0 release of pyparsing.
@@ -89,6 +89,44 @@ just namespaces, to add some helpful behavior:
   mistake when using Forwards)
   (**currently not working on PyPy**)
 
+New Located class to replace locatedExpr helper method
+------------------------------------------------------
+The new ``Located`` class will replace the current ``locatedExpr`` method for
+marking parsed results with the start and end locations of the parsed data in
+the input string.  ``locatedExpr`` had several bugs, and returned its results
+in a hard-to-use format (location data and results names were mixed in with
+the located expression's parsed results, and wrapped in an unnecessary extra
+nesting level).
+
+For this code::
+
+        wd = Word(alphas)
+        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+            print(match)
+
+the docs for ``locaatedExpr`` show this output::
+
+        [[0, 'ljsdf', 5]]
+        [[8, 'lksdjjf', 15]]
+        [[18, 'lkkjj', 23]]
+
+The parsed values and the start and end locations are merged into a single
+nested ParseResults (and any results names inthe parsed values are also
+merged in with the start and end location names).
+
+Using ``Located``, the output is::
+
+        [0, ['ljsdf'], 5]
+        [8, ['lksdjjf'], 15]
+        [18, ['lkkjj'], 23]
+
+With ``Located``, the parsed expression values and results names are kept
+separate in the second parsed value, and there is no extra grouping level
+on the whole result.
+
+The existing ``locatedExpr`` is retained for backward-compatibility, but will be
+deprecated in a future release.
+
 New IndentedBlock class to replace indentedBlock helper method
 --------------------------------------------------------------
 The new ``IndentedBlock`` class will replace the current ``indentedBlock`` method
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 0649a3c..f79c86a 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -3903,6 +3903,45 @@ class PrecededBy(ParseElementEnhance):
         return loc, ret
 
 
+class Located(ParseElementEnhance):
+    """
+    Decorates a returned token with its starting and ending
+    locations in the input string.
+
+    This helper adds the following results names:
+
+     - ``locn_start`` - location where matched expression begins
+     - ``locn_end`` - location where matched expression ends
+     - ``value`` - the actual parsed results
+
+    Be careful if the input text contains ``<TAB>`` characters, you
+    may want to call :class:`ParserElement.parseWithTabs`
+
+    Example::
+
+        wd = Word(alphas)
+        for match in Located(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+            print(match)
+
+    prints::
+
+        [0, ['ljsdf'], 5]
+        [8, ['lksdjjf'], 15]
+        [18, ['lkkjj'], 23]
+
+    """
+    def parseImpl(self, instring, loc, doActions=True):
+        start = loc
+        loc, tokens = self.expr._parse(
+            instring, start, doActions, callPreParse=False
+        )
+        ret_tokens = ParseResults([start, tokens, loc])
+        ret_tokens['locn_start'] = start
+        ret_tokens['value'] = tokens
+        ret_tokens['locn_end'] = loc
+        return loc, ret_tokens
+
+
 class NotAny(ParseElementEnhance):
     """Lookahead to disallow matching with the given parse expression.
     ``NotAny`` does *not* advance the parsing position within the
diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py
index cf59107..a5bc2cc 100644
--- a/pyparsing/helpers.py
+++ b/pyparsing/helpers.py
@@ -337,7 +337,9 @@ def ungroup(expr):
 
 
 def locatedExpr(expr):
-    """Helper to decorate a returned token with its starting and ending
+    """
+    (DEPRECATED - future code should use the Located class)
+    Helper to decorate a returned token with its starting and ending
     locations in the input string.
 
     This helper adds the following results names:
diff --git a/tests/test_unit.py b/tests/test_unit.py
index d84d08d..9b47f28 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -9,6 +9,7 @@
 
 import contextlib
 import datetime
+import re
 import sys
 from io import StringIO
 from textwrap import dedent
@@ -56,6 +57,21 @@ class resetting:
             setattr(self.ob, attr, value)
 
 
+def find_all_re_matches(patt, s):
+    ret = []
+    start = 0
+    if isinstance(patt, str):
+        patt = re.compile(patt)
+    while True:
+        found = patt.search(s, pos=start)
+        if found:
+            ret.append(found)
+            start = found.end()
+        else:
+            break
+    return ret
+
+
 class Test1_PyparsingTestInit(TestCase):
     def runTest(self):
         from pyparsing import (
@@ -1438,8 +1454,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             {"_skipped": ["red ", "456 "]},
         )
 
-    def testEllipsisRepetion(self):
-        import re
+    def testEllipsisRepetition(self):
 
         word = pp.Word(pp.alphas).setName("word")
         num = pp.Word(pp.nums).setName("num")
@@ -2938,7 +2953,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             )
 
     def testParseUsingRegex(self):
-        import re
 
         signedInt = pp.Regex(r"[-+][0-9]+")
         unsignedInt = pp.Regex(r"[0-9]+")
@@ -3471,8 +3485,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             )
             print()
 
-        import re
-
         k = pp.Regex(r"a+", flags=re.S + re.M)
         k = k.parseWithTabs()
         k = k.leaveWhitespace()
@@ -4607,6 +4619,38 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             "incorrect location calculation",
         )
 
+    def testLocatedExprUsingLocated(self):
+        #             012345678901234567890123456789012345678901234567890
+        samplestr1 = "DOB 10-10-2010;more garbage;ID PARI12345678  ;more garbage"
+
+        id_ref = pp.Located("ID" + pp.Word(pp.alphanums, exact=12)("id"))
+
+        res = id_ref.searchString(samplestr1)[0]
+        print(res.dump())
+        self.assertEqual(
+            "ID PARI12345678",
+            samplestr1[res.locn_start:res.locn_end],
+            "incorrect location calculation",
+        )
+        self.assertParseResultsEquals(res,
+                                      [28, ['ID', 'PARI12345678'], 43],
+                                      {'locn_end': 43,
+                                       'locn_start': 28,
+                                       'value': {'id': 'PARI12345678'}}
+        )
+
+        wd = pp.Word(pp.alphas)
+        test_string = "ljsdf123lksdjjf123lkkjj1222"
+        pp_matches = pp.Located(wd).searchString(test_string)
+        re_matches = find_all_re_matches("[a-z]+", test_string)
+        for pp_match, re_match in zip(pp_matches, re_matches):
+            self.assertParseResultsEquals(pp_match, [re_match.start(),
+                                                     [re_match.group(0)],
+                                                     re_match.end()])
+            print(pp_match)
+            print(re_match)
+            print(pp_match.value)
+
     def testPop(self):
         source = "AAA 123 456 789 234"
         patt = pp.Word(pp.alphas)("name") + pp.Word(pp.nums) * (1,)
@@ -7142,7 +7186,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
 
     def testWordInternalReRanges(self):
         import random
-        import re
 
         self.assertEqual(
             "[!-~]+",
author	ptmcg <ptmcg@austin.rr.com>	2020-12-24 01:21:57 -0600
committer	ptmcg <ptmcg@austin.rr.com>	2020-12-24 01:21:57 -0600
commit	2896fad83357bde252c0304aa4c0c55661486a5e (patch)
tree	466fff0292422bb9c6717bf6bddb7963622ebdad
parent	c3be8acb355559cc1fdcb673f8df904b6947b0af (diff)
download	pyparsing-git-2896fad83357bde252c0304aa4c0c55661486a5e.tar.gz