diff options
author | Paul McGuire <ptmcg@users.noreply.github.com> | 2020-10-11 23:24:20 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@users.noreply.github.com> | 2020-10-11 23:24:20 -0500 |
commit | 989c506bacf68a1451dbdae7b6975e0004d79e77 (patch) | |
tree | b3805f25baa7532f85416980110af6c9cfd156d5 | |
parent | a5c77176ffa0275e1ce8768ccabd105f873e406c (diff) | |
download | pyparsing-git-989c506bacf68a1451dbdae7b6975e0004d79e77.tar.gz |
Issue #244, fixed debug output to indicate correct parse location; updated setDebug output to include current text line and parse location
-rw-r--r-- | CHANGES | 47 | ||||
-rw-r--r-- | pyparsing/core.py | 29 | ||||
-rw-r--r-- | pyparsing/util.py | 5 | ||||
-rw-r--r-- | tests/test_unit.py | 80 |
4 files changed, 129 insertions, 32 deletions
@@ -5,17 +5,17 @@ Change Log Version 3.0.0b1 --------------- - API CHANGE - Diagnostic flags have been moved to an enum, pyparsing.Diagnostics, and + Diagnostic flags have been moved to an enum, `pyparsing.Diagnostics`, and they are enabled through module-level methods: - - enable_diag() - - disable_diag() - - enable_all_warnings() + - `pyparsing.enable_diag()` + - `pyparsing.disable_diag()` + - `pyparsing.enable_all_warnings()` - API CHANGE - Most previous SyntaxWarnings that were warned when using pyparsing - classes incorrectly have been converted to TypeError and ValueError exceptions, + Most previous `SyntaxWarnings` that were warned when using pyparsing + classes incorrectly have been converted to `TypeError` and `ValueError` exceptions, consistent with Python calling conventions. All warnings warned by diagnostic - flags have been converted from SyntaxWarnings to UserWarnings. + flags have been converted from `SyntaxWarnings` to `UserWarnings`. - API CHANGE Added `cache_hit` keyword argument to debug actions. Previously, if packrat @@ -29,23 +29,36 @@ Version 3.0.0b1 to add this keyword argument, the debug methods will fail silently, behaving as they did previously. -- When using setDebug with packrat parsing enabled, packrat cache hits will +- When using `setDebug` with packrat parsing enabled, packrat cache hits will now be included in the output, shown with a leading '*'. (Previously, cache hits and responses were not included in debug output.) For those using custom - debug actions, see the following bullet regarding an optional API change + debug actions, see the previous item regarding an optional API change for those methods. +- `setDebug` output will also show more details about what expression + is about to be parsed (the current line of text being parsed, and + the current parse position): + + Match integer at loc 0(1,1) + 1 2 3 + ^ + Matched integer -> ['1'] + + The current debug location will also be indicated after whitespace + has been skipped (was previously inconsistent, reported in Issue #244, + by Frank Goyens, thanks!). + - Fixed bugs in Each when passed OneOrMore or ZeroOrMore expressions: . first expression match could be enclosed in an extra nesting level . out-of-order expressions now handled correctly if mixed with required expressions . results names are maintained correctly for these expressions -- Fixed traceback trimming, and added ParserElement.verbose_traceback - save/restore to reset_pyparsing_context(). +- Fixed traceback trimming, and added `ParserElement.verbose_traceback` + save/restore to `reset_pyparsing_context()`. -- Default string for Word expressions now also include indications of - min and max length specification, if applicable, similar to regex length +- Default string for `Word` expressions now also include indications of + `min` and `max` length specification, if applicable, similar to regex length specifications: Word(alphas) -> "W:(A-Za-z)" @@ -55,18 +68,18 @@ Version 3.0.0b1 Word(nums, max=3) -> "W:(0-9){1,3}" Word(nums, min=2, max=3) -> "W:(0-9){2,3}" - For expressions of the Char class (similar to Word(..., exact=1), the expression + For expressions of the `Char` class (similar to `Word(..., exact=1)`, the expression is simply the character range in parentheses: Char(nums) -> "(0-9)" Char(alphas) -> "(A-Za-z)" -- Removed copy() override in Keyword class which did not preserve definition +- Removed `copy()` override in `Keyword` class which did not preserve definition of ident chars from the original expression. PR #233 submitted by jgrey4296, thanks! -- In addition to pyparsing.__version__, there is now also a pyparsing.__version_info__, - following the same structure and field names as in sys.version_info. +- In addition to `pyparsing.__version__`, there is now also a `pyparsing.__version_info__`, + following the same structure and field names as in `sys.version_info`. Version 3.0.0a2 - June, 2020 diff --git a/pyparsing/core.py b/pyparsing/core.py index 00daf61..bf57c30 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -263,8 +263,14 @@ def _defaultStartDebugAction(instring, loc, expr, cache_hit=False): cache_hit_str = "*" if cache_hit else "" print( ( - "{}Match {} at loc {}({},{})".format( - cache_hit_str, expr, loc, lineno(loc, instring), col(loc, instring) + "{}Match {} at loc {}({},{})\n {}\n {}^".format( + cache_hit_str, + expr, + loc, + lineno(loc, instring), + col(loc, instring), + line(loc, instring), + " " * (col(loc, instring) - 1), ) ) ) @@ -592,14 +598,14 @@ class ParserElement(ABC): if debugging or self.failAction: # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) - if self.debugActions[TRY]: - self.debugActions[TRY](instring, loc, self) try: if callPreParse and self.callPreparse: preloc = self.preParse(instring, loc) else: preloc = loc tokensStart = preloc + if self.debugActions[TRY]: + self.debugActions[TRY](instring, tokensStart, self) if self.mayIndexError or preloc >= len(instring): try: loc, tokens = self.parseImpl(instring, preloc, doActions) @@ -3300,6 +3306,8 @@ class Or(ParseExpression): maxException = None matches = [] fatals = [] + if all(e.callPreparse for e in self.exprs): + loc = self.preParse(instring, loc) for e in self.exprs: try: loc2 = e.tryParse(instring, loc, raise_fatal=True) @@ -3422,21 +3430,30 @@ class MatchFirst(ParseExpression): super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.callPreparse = all(e.callPreparse for e in self.exprs) else: self.mayReturnEmpty = True def streamline(self): super().streamline() self.saveAsList = any(e.saveAsList for e in self.exprs) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.callPreparse = all(e.callPreparse for e in self.exprs) + else: + self.mayReturnEmpty = True return self def parseImpl(self, instring, loc, doActions=True): maxExcLoc = -1 maxException = None fatals = [] + for e in self.exprs: try: - ret = e._parse(instring, loc, doActions) + ret = e._parse( + instring, loc, doActions, callPreParse=not self.callPreparse + ) return ret except ParseFatalException as pfe: pfe.__traceback__ = None @@ -3934,7 +3951,7 @@ class _MultipleMatch(ParseElementEnhance): # if so, fail) if check_ender: try_not_ender(instring, loc) - loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) + loc, tokens = self_expr_parse(instring, loc, doActions) try: hasIgnoreExprs = not not self.ignoreExprs while 1: diff --git a/pyparsing/util.py b/pyparsing/util.py index 1a700ec..152316c 100644 --- a/pyparsing/util.py +++ b/pyparsing/util.py @@ -3,7 +3,7 @@ import warnings import types import collections import itertools - +from functools import lru_cache _bslash = chr(92) @@ -36,6 +36,7 @@ class __config_flags: disable = classmethod(lambda cls, name: cls._set(name, False)) +@lru_cache(maxsize=128) def col(loc, strg): """Returns current column within a string, counting newlines as line separators. The first column is number 1. @@ -51,6 +52,7 @@ def col(loc, strg): return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) +@lru_cache(maxsize=128) def lineno(loc, strg): """Returns current line number within a string, counting newlines as line separators. The first line is number 1. @@ -64,6 +66,7 @@ def lineno(loc, strg): return strg.count("\n", 0, loc) + 1 +@lru_cache(maxsize=128) def line(loc, strg): """Returns the line of text containing loc within a string, counting newlines as line separators. """ diff --git a/tests/test_unit.py b/tests/test_unit.py index 1d6e28b..4897d97 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -6941,12 +6941,20 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expected_debug_output = textwrap.dedent( """\ Match integer at loc 0(1,1) + 1 2 3 + ^ Matched integer -> ['1'] - Match integer at loc 1(1,2) + Match integer at loc 2(1,3) + 1 2 3 + ^ Matched integer -> ['2'] - Match integer at loc 3(1,4) + Match integer at loc 4(1,5) + 1 2 3 + ^ Matched integer -> ['3'] Match integer at loc 5(1,6) + 1 2 3 + ^ ParseException raised: Expected integer, found end of text (at char 5), (line:1, col:6) """ ) @@ -6979,27 +6987,49 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expected_debug_output = textwrap.dedent( """\ Match [{integer | W:(0-9A-Za-z)}]... at loc 0(1,1) + 123 A100 + ^ Match integer at loc 0(1,1) + 123 A100 + ^ Matched integer -> [123] - Match integer at loc 3(1,4) + Match integer at loc 4(1,5) + 123 A100 + ^ ParseException raised: Expected integer, found 'A' (at char 4), (line:1, col:5) - Match W:(0-9A-Za-z) at loc 3(1,4) + Match W:(0-9A-Za-z) at loc 4(1,5) + 123 A100 + ^ Matched W:(0-9A-Za-z) -> ['A100'] Match integer at loc 8(1,9) + 123 A100 + ^ ParseException raised: Expected integer, found end of text (at char 8), (line:1, col:9) Match W:(0-9A-Za-z) at loc 8(1,9) + 123 A100 + ^ ParseException raised: Expected W:(0-9A-Za-z), found end of text (at char 8), (line:1, col:9) Matched [{integer | W:(0-9A-Za-z)}]... -> [123, 'A100'] Match integer at loc 0(1,1) + 123 A100 + ^ Matched integer -> [123] - Match integer at loc 3(1,4) + Match integer at loc 4(1,5) + 123 A100 + ^ ParseException raised: Expected integer, found 'A' (at char 4), (line:1, col:5) - Match W:(0-9A-Za-z) at loc 3(1,4) + Match W:(0-9A-Za-z) at loc 4(1,5) + 123 A100 + ^ Matched W:(0-9A-Za-z) -> ['A100'] Match integer at loc 8(1,9) + 123 A100 + ^ ParseException raised: Expected integer, found end of text (at char 8), (line:1, col:9) Match W:(0-9A-Za-z) at loc 8(1,9) + 123 A100 + ^ ParseException raised: Expected W:(0-9A-Za-z), found end of text (at char 8), (line:1, col:9) """ ) @@ -7031,38 +7061,72 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expected_debug_output = textwrap.dedent( """\ Match Z at loc 0(1,1) + aba + ^ ParseException raised: Expected Z, found 'a' (at char 0), (line:1, col:1) Match leading_a at loc 0(1,1) + aba + ^ Match A at loc 0(1,1) + aba + ^ Matched A -> ['a'] Match Z at loc 1(1,2) + aba + ^ ParseException raised: Expected Z, found 'b' (at char 1), (line:1, col:2) Match A at loc 1(1,2) + aba + ^ ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2) Match B at loc 1(1,2) + aba + ^ Matched B -> ['b'] Matched leading_a -> ['a'] *Match Z at loc 1(1,2) + aba + ^ *ParseException raised: Expected Z, found 'b' (at char 1), (line:1, col:2) Match leading_a at loc 1(1,2) - Match A at loc 1(1,2) - ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2) + aba + ^ + *Match A at loc 1(1,2) + aba + ^ + *ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2) ParseException raised: Expected A, found 'b' (at char 1), (line:1, col:2) *Match B at loc 1(1,2) + aba + ^ *Matched B -> ['b'] Match Z at loc 2(1,3) + aba + ^ ParseException raised: Expected Z, found 'a' (at char 2), (line:1, col:3) Match leading_a at loc 2(1,3) + aba + ^ Match A at loc 2(1,3) + aba + ^ Matched A -> ['a'] Match Z at loc 3(1,4) + aba + ^ ParseException raised: Expected Z, found end of text (at char 3), (line:1, col:4) Match A at loc 3(1,4) + aba + ^ ParseException raised: Expected A, found end of text (at char 3), (line:1, col:4) Match B at loc 3(1,4) + aba + ^ ParseException raised: Expected B, found end of text (at char 3), (line:1, col:4) ParseException raised: Expected {Z | A | B}, found end of text (at char 3), (line:1, col:4) Match B at loc 2(1,3) + aba + ^ ParseException raised: Expected B, found 'a' (at char 2), (line:1, col:3) """ ) |