summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2020-12-24 01:21:57 -0600
committerptmcg <ptmcg@austin.rr.com>2020-12-24 01:21:57 -0600
commit2896fad83357bde252c0304aa4c0c55661486a5e (patch)
tree466fff0292422bb9c6717bf6bddb7963622ebdad
parentc3be8acb355559cc1fdcb673f8df904b6947b0af (diff)
downloadpyparsing-git-2896fad83357bde252c0304aa4c0c55661486a5e.tar.gz
Deprecate `locatedExpr` in favor of new `Located` class
-rw-r--r--CHANGES11
-rw-r--r--docs/whats_new_in_3_0_0.rst40
-rw-r--r--pyparsing/core.py39
-rw-r--r--pyparsing/helpers.py4
-rw-r--r--tests/test_unit.py55
5 files changed, 141 insertions, 8 deletions
diff --git a/CHANGES b/CHANGES
index 9824554..ad020a4 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,17 @@
Change Log
==========
+Version 3.0.0b2
+---------------
+- API CHANGE
+ `locatedExpr` is being replaced by the class `Located`. `Located` has the same
+ constructor interface as `locatedExpr`, but fixes bugs in the returned
+ `ParseResults` when the searched expression contains multiple tokens, or
+ has internal results names.
+
+ `locatedExpr` is deprecated, and will be removed in a future release.
+
+
Version 3.0.0b1
---------------
- API CHANGE
diff --git a/docs/whats_new_in_3_0_0.rst b/docs/whats_new_in_3_0_0.rst
index 4ddd051..24e9423 100644
--- a/docs/whats_new_in_3_0_0.rst
+++ b/docs/whats_new_in_3_0_0.rst
@@ -4,7 +4,7 @@ What's New in Pyparsing 3.0.0
:author: Paul McGuire
-:date: November, 2020
+:date: December, 2020
:abstract: This document summarizes the changes made
in the 3.0.0 release of pyparsing.
@@ -89,6 +89,44 @@ just namespaces, to add some helpful behavior:
mistake when using Forwards)
(**currently not working on PyPy**)
+New Located class to replace locatedExpr helper method
+------------------------------------------------------
+The new ``Located`` class will replace the current ``locatedExpr`` method for
+marking parsed results with the start and end locations of the parsed data in
+the input string. ``locatedExpr`` had several bugs, and returned its results
+in a hard-to-use format (location data and results names were mixed in with
+the located expression's parsed results, and wrapped in an unnecessary extra
+nesting level).
+
+For this code::
+
+ wd = Word(alphas)
+ for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+ print(match)
+
+the docs for ``locaatedExpr`` show this output::
+
+ [[0, 'ljsdf', 5]]
+ [[8, 'lksdjjf', 15]]
+ [[18, 'lkkjj', 23]]
+
+The parsed values and the start and end locations are merged into a single
+nested ParseResults (and any results names inthe parsed values are also
+merged in with the start and end location names).
+
+Using ``Located``, the output is::
+
+ [0, ['ljsdf'], 5]
+ [8, ['lksdjjf'], 15]
+ [18, ['lkkjj'], 23]
+
+With ``Located``, the parsed expression values and results names are kept
+separate in the second parsed value, and there is no extra grouping level
+on the whole result.
+
+The existing ``locatedExpr`` is retained for backward-compatibility, but will be
+deprecated in a future release.
+
New IndentedBlock class to replace indentedBlock helper method
--------------------------------------------------------------
The new ``IndentedBlock`` class will replace the current ``indentedBlock`` method
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 0649a3c..f79c86a 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -3903,6 +3903,45 @@ class PrecededBy(ParseElementEnhance):
return loc, ret
+class Located(ParseElementEnhance):
+ """
+ Decorates a returned token with its starting and ending
+ locations in the input string.
+
+ This helper adds the following results names:
+
+ - ``locn_start`` - location where matched expression begins
+ - ``locn_end`` - location where matched expression ends
+ - ``value`` - the actual parsed results
+
+ Be careful if the input text contains ``<TAB>`` characters, you
+ may want to call :class:`ParserElement.parseWithTabs`
+
+ Example::
+
+ wd = Word(alphas)
+ for match in Located(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+ print(match)
+
+ prints::
+
+ [0, ['ljsdf'], 5]
+ [8, ['lksdjjf'], 15]
+ [18, ['lkkjj'], 23]
+
+ """
+ def parseImpl(self, instring, loc, doActions=True):
+ start = loc
+ loc, tokens = self.expr._parse(
+ instring, start, doActions, callPreParse=False
+ )
+ ret_tokens = ParseResults([start, tokens, loc])
+ ret_tokens['locn_start'] = start
+ ret_tokens['value'] = tokens
+ ret_tokens['locn_end'] = loc
+ return loc, ret_tokens
+
+
class NotAny(ParseElementEnhance):
"""Lookahead to disallow matching with the given parse expression.
``NotAny`` does *not* advance the parsing position within the
diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py
index cf59107..a5bc2cc 100644
--- a/pyparsing/helpers.py
+++ b/pyparsing/helpers.py
@@ -337,7 +337,9 @@ def ungroup(expr):
def locatedExpr(expr):
- """Helper to decorate a returned token with its starting and ending
+ """
+ (DEPRECATED - future code should use the Located class)
+ Helper to decorate a returned token with its starting and ending
locations in the input string.
This helper adds the following results names:
diff --git a/tests/test_unit.py b/tests/test_unit.py
index d84d08d..9b47f28 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -9,6 +9,7 @@
import contextlib
import datetime
+import re
import sys
from io import StringIO
from textwrap import dedent
@@ -56,6 +57,21 @@ class resetting:
setattr(self.ob, attr, value)
+def find_all_re_matches(patt, s):
+ ret = []
+ start = 0
+ if isinstance(patt, str):
+ patt = re.compile(patt)
+ while True:
+ found = patt.search(s, pos=start)
+ if found:
+ ret.append(found)
+ start = found.end()
+ else:
+ break
+ return ret
+
+
class Test1_PyparsingTestInit(TestCase):
def runTest(self):
from pyparsing import (
@@ -1438,8 +1454,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
{"_skipped": ["red ", "456 "]},
)
- def testEllipsisRepetion(self):
- import re
+ def testEllipsisRepetition(self):
word = pp.Word(pp.alphas).setName("word")
num = pp.Word(pp.nums).setName("num")
@@ -2938,7 +2953,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
)
def testParseUsingRegex(self):
- import re
signedInt = pp.Regex(r"[-+][0-9]+")
unsignedInt = pp.Regex(r"[0-9]+")
@@ -3471,8 +3485,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
)
print()
- import re
-
k = pp.Regex(r"a+", flags=re.S + re.M)
k = k.parseWithTabs()
k = k.leaveWhitespace()
@@ -4607,6 +4619,38 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
"incorrect location calculation",
)
+ def testLocatedExprUsingLocated(self):
+ # 012345678901234567890123456789012345678901234567890
+ samplestr1 = "DOB 10-10-2010;more garbage;ID PARI12345678 ;more garbage"
+
+ id_ref = pp.Located("ID" + pp.Word(pp.alphanums, exact=12)("id"))
+
+ res = id_ref.searchString(samplestr1)[0]
+ print(res.dump())
+ self.assertEqual(
+ "ID PARI12345678",
+ samplestr1[res.locn_start:res.locn_end],
+ "incorrect location calculation",
+ )
+ self.assertParseResultsEquals(res,
+ [28, ['ID', 'PARI12345678'], 43],
+ {'locn_end': 43,
+ 'locn_start': 28,
+ 'value': {'id': 'PARI12345678'}}
+ )
+
+ wd = pp.Word(pp.alphas)
+ test_string = "ljsdf123lksdjjf123lkkjj1222"
+ pp_matches = pp.Located(wd).searchString(test_string)
+ re_matches = find_all_re_matches("[a-z]+", test_string)
+ for pp_match, re_match in zip(pp_matches, re_matches):
+ self.assertParseResultsEquals(pp_match, [re_match.start(),
+ [re_match.group(0)],
+ re_match.end()])
+ print(pp_match)
+ print(re_match)
+ print(pp_match.value)
+
def testPop(self):
source = "AAA 123 456 789 234"
patt = pp.Word(pp.alphas)("name") + pp.Word(pp.nums) * (1,)
@@ -7142,7 +7186,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
def testWordInternalReRanges(self):
import random
- import re
self.assertEqual(
"[!-~]+",