summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@users.noreply.github.com>2020-10-11 23:24:20 -0500
committerPaul McGuire <ptmcg@users.noreply.github.com>2020-10-11 23:24:20 -0500
commit989c506bacf68a1451dbdae7b6975e0004d79e77 (patch)
treeb3805f25baa7532f85416980110af6c9cfd156d5
parenta5c77176ffa0275e1ce8768ccabd105f873e406c (diff)
downloadpyparsing-git-989c506bacf68a1451dbdae7b6975e0004d79e77.tar.gz
Issue #244, fixed debug output to indicate correct parse location; updated setDebug output to include current text line and parse location
-rw-r--r--CHANGES47
-rw-r--r--pyparsing/core.py29
-rw-r--r--pyparsing/util.py5
-rw-r--r--tests/test_unit.py80
4 files changed, 129 insertions, 32 deletions
diff --git a/CHANGES b/CHANGES
index 45d98f7..f384eed 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,17 +5,17 @@ Change Log
Version 3.0.0b1
---------------
- API CHANGE
- Diagnostic flags have been moved to an enum, pyparsing.Diagnostics, and
+ Diagnostic flags have been moved to an enum, `pyparsing.Diagnostics`, and
they are enabled through module-level methods:
- - enable_diag()
- - disable_diag()
- - enable_all_warnings()
+ - `pyparsing.enable_diag()`
+ - `pyparsing.disable_diag()`
+ - `pyparsing.enable_all_warnings()`
- API CHANGE
- Most previous SyntaxWarnings that were warned when using pyparsing
- classes incorrectly have been converted to TypeError and ValueError exceptions,
+ Most previous `SyntaxWarnings` that were warned when using pyparsing
+ classes incorrectly have been converted to `TypeError` and `ValueError` exceptions,
consistent with Python calling conventions. All warnings warned by diagnostic
- flags have been converted from SyntaxWarnings to UserWarnings.
+ flags have been converted from `SyntaxWarnings` to `UserWarnings`.
- API CHANGE
Added `cache_hit` keyword argument to debug actions. Previously, if packrat
@@ -29,23 +29,36 @@ Version 3.0.0b1
to add this keyword argument, the debug methods will fail silently,
behaving as they did previously.
-- When using setDebug with packrat parsing enabled, packrat cache hits will
+- When using `setDebug` with packrat parsing enabled, packrat cache hits will
now be included in the output, shown with a leading '*'. (Previously, cache
hits and responses were not included in debug output.) For those using custom
- debug actions, see the following bullet regarding an optional API change
+ debug actions, see the previous item regarding an optional API change
for those methods.
+- `setDebug` output will also show more details about what expression
+ is about to be parsed (the current line of text being parsed, and
+ the current parse position):
+
+ Match integer at loc 0(1,1)
+ 1 2 3
+ ^
+ Matched integer -> ['1']
+
+ The current debug location will also be indicated after whitespace
+ has been skipped (was previously inconsistent, reported in Issue #244,
+ by Frank Goyens, thanks!).
+
- Fixed bugs in Each when passed OneOrMore or ZeroOrMore expressions:
. first expression match could be enclosed in an extra nesting level
. out-of-order expressions now handled correctly if mixed with required
expressions
. results names are maintained correctly for these expressions
-- Fixed traceback trimming, and added ParserElement.verbose_traceback
- save/restore to reset_pyparsing_context().
+- Fixed traceback trimming, and added `ParserElement.verbose_traceback`
+ save/restore to `reset_pyparsing_context()`.
-- Default string for Word expressions now also include indications of
- min and max length specification, if applicable, similar to regex length
+- Default string for `Word` expressions now also include indications of
+ `min` and `max` length specification, if applicable, similar to regex length
specifications:
Word(alphas) -> "W:(A-Za-z)"
@@ -55,18 +68,18 @@ Version 3.0.0b1
Word(nums, max=3) -> "W:(0-9){1,3}"
Word(nums, min=2, max=3) -> "W:(0-9){2,3}"
- For expressions of the Char class (similar to Word(..., exact=1), the expression
+ For expressions of the `Char` class (similar to `Word(..., exact=1)`, the expression
is simply the character range in parentheses:
Char(nums) -> "(0-9)"
Char(alphas) -> "(A-Za-z)"
-- Removed copy() override in Keyword class which did not preserve definition
+- Removed `copy()` override in `Keyword` class which did not preserve definition
of ident chars from the original expression. PR #233 submitted by jgrey4296,
thanks!
-- In addition to pyparsing.__version__, there is now also a pyparsing.__version_info__,
- following the same structure and field names as in sys.version_info.
+- In addition to `pyparsing.__version__`, there is now also a `pyparsing.__version_info__`,
+ following the same structure and field names as in `sys.version_info`.
Version 3.0.0a2 - June, 2020
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 00daf61..bf57c30 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -263,8 +263,14 @@ def _defaultStartDebugAction(instring, loc, expr, cache_hit=False):
cache_hit_str = "*" if cache_hit else ""
print(
(
- "{}Match {} at loc {}({},{})".format(
- cache_hit_str, expr, loc, lineno(loc, instring), col(loc, instring)
+ "{}Match {} at loc {}({},{})\n {}\n {}^".format(
+ cache_hit_str,
+ expr,
+ loc,
+ lineno(loc, instring),
+ col(loc, instring),
+ line(loc, instring),
+ " " * (col(loc, instring) - 1),
)
)
)
@@ -592,14 +598,14 @@ class ParserElement(ABC):
if debugging or self.failAction:
# print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
- if self.debugActions[TRY]:
- self.debugActions[TRY](instring, loc, self)
try:
if callPreParse and self.callPreparse:
preloc = self.preParse(instring, loc)
else:
preloc = loc
tokensStart = preloc
+ if self.debugActions[TRY]:
+ self.debugActions[TRY](instring, tokensStart, self)
if self.mayIndexError or preloc >= len(instring):
try:
loc, tokens = self.parseImpl(instring, preloc, doActions)
@@ -3300,6 +3306,8 @@ class Or(ParseExpression):
maxException = None
matches = []
fatals = []
+ if all(e.callPreparse for e in self.exprs):
+ loc = self.preParse(instring, loc)
for e in self.exprs:
try:
loc2 = e.tryParse(instring, loc, raise_fatal=True)
@@ -3422,21 +3430,30 @@ class MatchFirst(ParseExpression):
super().__init__(exprs, savelist)
if self.exprs:
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+ self.callPreparse = all(e.callPreparse for e in self.exprs)
else:
self.mayReturnEmpty = True
def streamline(self):
super().streamline()
self.saveAsList = any(e.saveAsList for e in self.exprs)
+ if self.exprs:
+ self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+ self.callPreparse = all(e.callPreparse for e in self.exprs)
+ else:
+ self.mayReturnEmpty = True
return self
def parseImpl(self, instring, loc, doActions=True):
maxExcLoc = -1
maxException = None
fatals = []
+
for e in self.exprs:
try:
- ret = e._parse(instring, loc, doActions)
+ ret = e._parse(
+ instring, loc, doActions, callPreParse=not self.callPreparse
+ )
return ret
except ParseFatalException as pfe:
pfe.__traceback__ = None
@@ -3934,7 +3951,7 @@ class _MultipleMatch(ParseElementEnhance):
# if so, fail)
if check_ender:
try_not_ender(instring, loc)
- loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
+ loc, tokens = self_expr_parse(instring, loc, doActions)
try:
hasIgnoreExprs = not not self.ignoreExprs
while 1:
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 1a700ec..152316c 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -3,7 +3,7 @@ import warnings
import types
import collections
import itertools
-
+from functools import lru_cache
_bslash = chr(92)
@@ -36,6 +36,7 @@ class __config_flags:
disable = classmethod(lambda cls, name: cls._set(name, False))
+@lru_cache(maxsize=128)
def col(loc, strg):
"""Returns current column within a string, counting newlines as line separators.
The first column is number 1.
@@ -51,6 +52,7 @@ def col(loc, strg):
return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
+@lru_cache(maxsize=128)
def lineno(loc, strg):
"""Returns current line number within a string, counting newlines as line separators.
The first line is number 1.
@@ -64,6 +66,7 @@ def lineno(loc, strg):
return strg.count("\n", 0, loc) + 1
+@lru_cache(maxsize=128)
def line(loc, strg):
"""Returns the line of text containing loc within a string, counting newlines as line separators.
"""
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 1d6e28b..4897d97 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -6941,12 +6941,20 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
expected_debug_output = textwrap.dedent(
"""\
Match integer at loc 0(1,1)
+ 1 2 3
+ ^
Matched integer -> ['1']
- Match integer at loc 1(1,2)
+ Match integer at loc 2(1,3)
+ 1 2 3
+ ^
Matched integer -> ['2']
- Match integer at loc 3(1,4)
+ Match integer at loc 4(1,5)
+ 1 2 3
+ ^
Matched integer -> ['3']
Match integer at loc 5(1,6)
+ 1 2 3
+ ^
ParseException raised: Expected integer, found end of text (at char 5), (line:1, col:6)
"""
)
@@ -6979,27 +6987,49 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
expected_debug_output = textwrap.dedent(
"""\
Match [{integer | W:(0-9A-Za-z)}]... at loc 0(1,1)
+ 123 A100
+ ^
Match integer at loc 0(1,1)
+ 123 A100
+ ^
Matched integer -> [123]
- Match integer at loc 3(1,4)
+ Match integer at loc 4(1,5)
+ 123 A100
+ ^
ParseException raised: Expected integer, found 'A' (at char 4), (line:1, col:5)
- Match W:(0-9A-Za-z) at loc 3(1,4)
+ Match W:(0-9A-Za-z) at loc 4(1,5)
+ 123 A100
+ ^
Matched W:(0-9A-Za-z) -> ['A100']
Match integer at loc 8(1,9)
+ 123 A100
+ ^
ParseException raised: Expected integer, found end of text (at char 8), (line:1, col:9)
Match W:(0-9A-Za-z) at loc 8(1,9)
+ 123 A100
+ ^
ParseException raised: Expected W:(0-9A-Za-z), found end of text (at char 8), (line:1, col:9)
Matched [{integer | W:(0-9A-Za-z)}]... -> [123, 'A100']
Match integer at loc 0(1,1)
+ 123 A100
+ ^
Matched integer -> [123]
- Match integer at loc 3(1,4)
+ Match integer at loc 4(1,5)
+ 123 A100
+ ^
ParseException raised: Expected integer, found 'A' (at char 4), (line:1, col:5)
- Match W:(0-9A-Za-z) at loc 3(1,4)
+ Match W:(0-9A-Za-z) at loc 4(1,5)
+ 123 A100
+ ^
Matched W:(0-9A-Za-z) -> ['A100']
Match integer at loc 8(1,9)
+ 123 A100
+ ^
ParseException raised: Expected integer, found end of text (at char 8), (line:1, col:9)
Match W:(0-9A-Za-z) at loc 8(1,9)
+ 123 A100
+ ^
ParseException raised: Expected W:(0-9A-Za-z), found end of text (at char 8), (line:1, col:9)
"""
)
@@ -7031,38 +7061,72 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
expected_debug_output = textwrap.dedent(
"""\
Match Z at loc 0(1,1)
+ aba
+ ^
ParseException raised: Expected Z, found 'a' (at char 0), (line:1, col:1)
Match leading_a at loc 0(1,1)
+ aba
+ ^
Match A at loc 0(1,1)
+ aba
+ ^
Matched A -> ['a']
Match Z at loc 1(1,2)
+ aba
+ ^
ParseException raised: Expected Z, found 'b' (at char 1), (line:1, col:2)
Match A at loc 1(1,2)
+ aba
+ ^
ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2)
Match B at loc 1(1,2)
+ aba
+ ^
Matched B -> ['b']
Matched leading_a -> ['a']
*Match Z at loc 1(1,2)
+ aba
+ ^
*ParseException raised: Expected Z, found 'b' (at char 1), (line:1, col:2)
Match leading_a at loc 1(1,2)
- Match A at loc 1(1,2)
- ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2)
+ aba
+ ^
+ *Match A at loc 1(1,2)
+ aba
+ ^
+ *ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2)
ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2)
*Match B at loc 1(1,2)
+ aba
+ ^
*Matched B -> ['b']
Match Z at loc 2(1,3)
+ aba
+ ^
ParseException raised: Expected Z, found 'a' (at char 2), (line:1, col:3)
Match leading_a at loc 2(1,3)
+ aba
+ ^
Match A at loc 2(1,3)
+ aba
+ ^
Matched A -> ['a']
Match Z at loc 3(1,4)
+ aba
+ ^
ParseException raised: Expected Z, found end of text (at char 3), (line:1, col:4)
Match A at loc 3(1,4)
+ aba
+ ^
ParseException raised: Expected A, found end of text (at char 3), (line:1, col:4)
Match B at loc 3(1,4)
+ aba
+ ^
ParseException raised: Expected B, found end of text (at char 3), (line:1, col:4)
ParseException raised: Expected {Z | A | B}, found end of text (at char 3), (line:1, col:4)
Match B at loc 2(1,3)
+ aba
+ ^
ParseException raised: Expected B, found 'a' (at char 2), (line:1, col:3)
"""
)