Issue #244, fixed debug output to indicate correct parse location; updated setDebug output to include current text line and parse location

author: Paul McGuire <ptmcg@users.noreply.github.com> 2020-10-11 23:24:20 -0500
committer: Paul McGuire <ptmcg@users.noreply.github.com> 2020-10-11 23:24:20 -0500
commit: 989c506bacf68a1451dbdae7b6975e0004d79e77 (patch)
tree: b3805f25baa7532f85416980110af6c9cfd156d5
parent: a5c77176ffa0275e1ce8768ccabd105f873e406c (diff)
download: pyparsing-git-989c506bacf68a1451dbdae7b6975e0004d79e77.tar.gz
4 files changed, 129 insertions, 32 deletions
diff --git a/CHANGES b/CHANGES
index 45d98f7..f384eed 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,17 +5,17 @@ Change Log
 Version 3.0.0b1
 ---------------
 - API CHANGE
-  Diagnostic flags have been moved to an enum, pyparsing.Diagnostics, and
+  Diagnostic flags have been moved to an enum, `pyparsing.Diagnostics`, and
   they are enabled through module-level methods:
-  - enable_diag()
-  - disable_diag()
-  - enable_all_warnings()
+  - `pyparsing.enable_diag()`
+  - `pyparsing.disable_diag()`
+  - `pyparsing.enable_all_warnings()`
 
 - API CHANGE
-  Most previous SyntaxWarnings that were warned when using pyparsing
-  classes incorrectly have been converted to TypeError and ValueError exceptions,
+  Most previous `SyntaxWarnings` that were warned when using pyparsing
+  classes incorrectly have been converted to `TypeError` and `ValueError` exceptions,
   consistent with Python calling conventions. All warnings warned by diagnostic
-  flags have been converted from SyntaxWarnings to UserWarnings.
+  flags have been converted from `SyntaxWarnings` to `UserWarnings`.
 
 - API CHANGE
   Added `cache_hit` keyword argument to debug actions. Previously, if packrat
@@ -29,23 +29,36 @@ Version 3.0.0b1
   to add this keyword argument, the debug methods will fail silently,
   behaving as they did previously.
 
-- When using setDebug with packrat parsing enabled, packrat cache hits will
+- When using `setDebug` with packrat parsing enabled, packrat cache hits will
   now be included in the output, shown with a leading '*'. (Previously, cache
   hits and responses were not included in debug output.) For those using custom
-  debug actions, see the following bullet regarding an optional API change
+  debug actions, see the previous item regarding an optional API change
   for those methods.
 
+- `setDebug` output will also show more details about what expression
+  is about to be parsed (the current line of text being parsed, and
+  the current parse position):
+
+        Match integer at loc 0(1,1)
+          1 2 3
+          ^
+        Matched integer -> ['1']
+
+  The current debug location will also be indicated after whitespace
+  has been skipped (was previously inconsistent, reported in Issue #244,
+  by Frank Goyens, thanks!).
+
 - Fixed bugs in Each when passed OneOrMore or ZeroOrMore expressions:
   . first expression match could be enclosed in an extra nesting level
   . out-of-order expressions now handled correctly if mixed with required
     expressions
   . results names are maintained correctly for these expressions
 
-- Fixed traceback trimming, and added ParserElement.verbose_traceback
-  save/restore to reset_pyparsing_context().
+- Fixed traceback trimming, and added `ParserElement.verbose_traceback`
+  save/restore to `reset_pyparsing_context()`.
 
-- Default string for Word expressions now also include indications of
-  min and max length specification, if applicable, similar to regex length
+- Default string for `Word` expressions now also include indications of
+  `min` and `max` length specification, if applicable, similar to regex length
   specifications:
 
         Word(alphas)             -> "W:(A-Za-z)"
@@ -55,18 +68,18 @@ Version 3.0.0b1
         Word(nums, max=3)        -> "W:(0-9){1,3}"
         Word(nums, min=2, max=3) -> "W:(0-9){2,3}"
 
-  For expressions of the Char class (similar to Word(..., exact=1), the expression
+  For expressions of the `Char` class (similar to `Word(..., exact=1)`, the expression
   is simply the character range in parentheses:
 
         Char(nums)               -> "(0-9)"
         Char(alphas)             -> "(A-Za-z)"
 
-- Removed copy() override in Keyword class which did not preserve definition
+- Removed `copy()` override in `Keyword` class which did not preserve definition
   of ident chars from the original expression. PR #233 submitted by jgrey4296,
   thanks!
 
-- In addition to pyparsing.__version__, there is now also a pyparsing.__version_info__,
-  following the same structure and field names as in sys.version_info.
+- In addition to `pyparsing.__version__`, there is now also a `pyparsing.__version_info__`,
+  following the same structure and field names as in `sys.version_info`.
 
 
 Version 3.0.0a2 - June, 2020
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 00daf61..bf57c30 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -263,8 +263,14 @@ def _defaultStartDebugAction(instring, loc, expr, cache_hit=False):
     cache_hit_str = "*" if cache_hit else ""
     print(
         (
-            "{}Match {} at loc {}({},{})".format(
-                cache_hit_str, expr, loc, lineno(loc, instring), col(loc, instring)
+            "{}Match {} at loc {}({},{})\n  {}\n  {}^".format(
+                cache_hit_str,
+                expr,
+                loc,
+                lineno(loc, instring),
+                col(loc, instring),
+                line(loc, instring),
+                " " * (col(loc, instring) - 1),
             )
         )
     )
@@ -592,14 +598,14 @@ class ParserElement(ABC):
 
         if debugging or self.failAction:
             # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
-            if self.debugActions[TRY]:
-                self.debugActions[TRY](instring, loc, self)
             try:
                 if callPreParse and self.callPreparse:
                     preloc = self.preParse(instring, loc)
                 else:
                     preloc = loc
                 tokensStart = preloc
+                if self.debugActions[TRY]:
+                    self.debugActions[TRY](instring, tokensStart, self)
                 if self.mayIndexError or preloc >= len(instring):
                     try:
                         loc, tokens = self.parseImpl(instring, preloc, doActions)
@@ -3300,6 +3306,8 @@ class Or(ParseExpression):
         maxException = None
         matches = []
         fatals = []
+        if all(e.callPreparse for e in self.exprs):
+            loc = self.preParse(instring, loc)
         for e in self.exprs:
             try:
                 loc2 = e.tryParse(instring, loc, raise_fatal=True)
@@ -3422,21 +3430,30 @@ class MatchFirst(ParseExpression):
         super().__init__(exprs, savelist)
         if self.exprs:
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+            self.callPreparse = all(e.callPreparse for e in self.exprs)
         else:
             self.mayReturnEmpty = True
 
     def streamline(self):
         super().streamline()
         self.saveAsList = any(e.saveAsList for e in self.exprs)
+        if self.exprs:
+            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+            self.callPreparse = all(e.callPreparse for e in self.exprs)
+        else:
+            self.mayReturnEmpty = True
         return self
 
     def parseImpl(self, instring, loc, doActions=True):
         maxExcLoc = -1
         maxException = None
         fatals = []
+
         for e in self.exprs:
             try:
-                ret = e._parse(instring, loc, doActions)
+                ret = e._parse(
+                    instring, loc, doActions, callPreParse=not self.callPreparse
+                )
                 return ret
             except ParseFatalException as pfe:
                 pfe.__traceback__ = None
@@ -3934,7 +3951,7 @@ class _MultipleMatch(ParseElementEnhance):
         # if so, fail)
         if check_ender:
             try_not_ender(instring, loc)
-        loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
+        loc, tokens = self_expr_parse(instring, loc, doActions)
         try:
             hasIgnoreExprs = not not self.ignoreExprs
             while 1:
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 1a700ec..152316c 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -3,7 +3,7 @@ import warnings
 import types
 import collections
 import itertools
-
+from functools import lru_cache
 
 _bslash = chr(92)
 
@@ -36,6 +36,7 @@ class __config_flags:
     disable = classmethod(lambda cls, name: cls._set(name, False))
 
 
+@lru_cache(maxsize=128)
 def col(loc, strg):
     """Returns current column within a string, counting newlines as line separators.
    The first column is number 1.
@@ -51,6 +52,7 @@ def col(loc, strg):
     return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
 
 
+@lru_cache(maxsize=128)
 def lineno(loc, strg):
     """Returns current line number within a string, counting newlines as line separators.
     The first line is number 1.
@@ -64,6 +66,7 @@ def lineno(loc, strg):
     return strg.count("\n", 0, loc) + 1
 
 
+@lru_cache(maxsize=128)
 def line(loc, strg):
     """Returns the line of text containing loc within a string, counting newlines as line separators.
        """
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 1d6e28b..4897d97 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -6941,12 +6941,20 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             expected_debug_output = textwrap.dedent(
                 """\
                 Match integer at loc 0(1,1)
+                  1 2 3
+                  ^
                 Matched integer -> ['1']
-                Match integer at loc 1(1,2)
+                Match integer at loc 2(1,3)
+                  1 2 3
+                    ^
                 Matched integer -> ['2']
-                Match integer at loc 3(1,4)
+                Match integer at loc 4(1,5)
+                  1 2 3
+                      ^
                 Matched integer -> ['3']
                 Match integer at loc 5(1,6)
+                  1 2 3
+                       ^
                 ParseException raised: Expected integer, found end of text  (at char 5), (line:1, col:6)
                 """
             )
@@ -6979,27 +6987,49 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         expected_debug_output = textwrap.dedent(
             """\
             Match [{integer | W:(0-9A-Za-z)}]... at loc 0(1,1)
+              123 A100
+              ^
             Match integer at loc 0(1,1)
+              123 A100
+              ^
             Matched integer -> [123]
-            Match integer at loc 3(1,4)
+            Match integer at loc 4(1,5)
+              123 A100
+                  ^
             ParseException raised: Expected integer, found 'A'  (at char 4), (line:1, col:5)
-            Match W:(0-9A-Za-z) at loc 3(1,4)
+            Match W:(0-9A-Za-z) at loc 4(1,5)
+              123 A100
+                  ^
             Matched W:(0-9A-Za-z) -> ['A100']
             Match integer at loc 8(1,9)
+              123 A100
+                      ^
             ParseException raised: Expected integer, found end of text  (at char 8), (line:1, col:9)
             Match W:(0-9A-Za-z) at loc 8(1,9)
+              123 A100
+                      ^
             ParseException raised: Expected W:(0-9A-Za-z), found end of text  (at char 8), (line:1, col:9)
             Matched [{integer | W:(0-9A-Za-z)}]... -> [123, 'A100']
             
             Match integer at loc 0(1,1)
+              123 A100
+              ^
             Matched integer -> [123]
-            Match integer at loc 3(1,4)
+            Match integer at loc 4(1,5)
+              123 A100
+                  ^
             ParseException raised: Expected integer, found 'A'  (at char 4), (line:1, col:5)
-            Match W:(0-9A-Za-z) at loc 3(1,4)
+            Match W:(0-9A-Za-z) at loc 4(1,5)
+              123 A100
+                  ^
             Matched W:(0-9A-Za-z) -> ['A100']
             Match integer at loc 8(1,9)
+              123 A100
+                      ^
             ParseException raised: Expected integer, found end of text  (at char 8), (line:1, col:9)
             Match W:(0-9A-Za-z) at loc 8(1,9)
+              123 A100
+                      ^
             ParseException raised: Expected W:(0-9A-Za-z), found end of text  (at char 8), (line:1, col:9)
             """
         )
@@ -7031,38 +7061,72 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         expected_debug_output = textwrap.dedent(
             """\
             Match Z at loc 0(1,1)
+              aba
+              ^
             ParseException raised: Expected Z, found 'a'  (at char 0), (line:1, col:1)
             Match leading_a at loc 0(1,1)
+              aba
+              ^
             Match A at loc 0(1,1)
+              aba
+              ^
             Matched A -> ['a']
             Match Z at loc 1(1,2)
+              aba
+               ^
             ParseException raised: Expected Z, found 'b'  (at char 1), (line:1, col:2)
             Match A at loc 1(1,2)
+              aba
+               ^
             ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
             Match B at loc 1(1,2)
+              aba
+               ^
             Matched B -> ['b']
             Matched leading_a -> ['a']
             *Match Z at loc 1(1,2)
+              aba
+               ^
             *ParseException raised: Expected Z, found 'b'  (at char 1), (line:1, col:2)
             Match leading_a at loc 1(1,2)
-            Match A at loc 1(1,2)
-            ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
+              aba
+               ^
+            *Match A at loc 1(1,2)
+              aba
+               ^
+            *ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
             ParseException raised: Expected A, found 'b'  (at char 1), (line:1, col:2)
             *Match B at loc 1(1,2)
+              aba
+               ^
             *Matched B -> ['b']
             Match Z at loc 2(1,3)
+              aba
+                ^
             ParseException raised: Expected Z, found 'a'  (at char 2), (line:1, col:3)
             Match leading_a at loc 2(1,3)
+              aba
+                ^
             Match A at loc 2(1,3)
+              aba
+                ^
             Matched A -> ['a']
             Match Z at loc 3(1,4)
+              aba
+                 ^
             ParseException raised: Expected Z, found end of text  (at char 3), (line:1, col:4)
             Match A at loc 3(1,4)
+              aba
+                 ^
             ParseException raised: Expected A, found end of text  (at char 3), (line:1, col:4)
             Match B at loc 3(1,4)
+              aba
+                 ^
             ParseException raised: Expected B, found end of text  (at char 3), (line:1, col:4)
             ParseException raised: Expected {Z | A | B}, found end of text  (at char 3), (line:1, col:4)
             Match B at loc 2(1,3)
+              aba
+                ^
             ParseException raised: Expected B, found 'a'  (at char 2), (line:1, col:3)
             """
         )
author	Paul McGuire <ptmcg@users.noreply.github.com>	2020-10-11 23:24:20 -0500
committer	Paul McGuire <ptmcg@users.noreply.github.com>	2020-10-11 23:24:20 -0500
commit	989c506bacf68a1451dbdae7b6975e0004d79e77 (patch)
tree	b3805f25baa7532f85416980110af6c9cfd156d5
parent	a5c77176ffa0275e1ce8768ccabd105f873e406c (diff)
download	pyparsing-git-989c506bacf68a1451dbdae7b6975e0004d79e77.tar.gz