Add support for LR parsingleft_recursion_support

* first draft of LR parsing * removed debug output * cache is owned and cleared by ParserElement * bounded recursion must be enabled explicitly * packrat rejects recursion * basic LR unit test * tests for associativity and nesting * added math example * fixed test typo * unittest for empty and non-peg clauses * LR-Forward can match Empty * fixed test typos * added base case to unittest * memo cache only provides copies * flattened Forward parse method * added high-level description of algorithm * expanded docstring * added tests for repetition rules * renamed bounded to left recursion * naive test for existing suite * explicitly testing tests for LR compatibility * LR memo no longer mixes action/no-action results * simplified replacement logic * adjusted example with ambiguous failure case * LR memo content is always returned as copy * draft for peeking recursion * memo update consistent for all actions * fixed a bug for non-string token identifiers * action wins against no-action * cleanup * properly setting names in tests * memoization can be turned off * testing memo switches * typos * flattened recursion memo * left recursion memo size may be limited * adjusted docs for recursion cache
author: Max Fischer <maxfischer2781@gmail.com> 2021-07-30 14:51:33 +0200
committer: GitHub <noreply@github.com> 2021-07-30 07:51:33 -0500
commit: 5eafa07470a3e761944ff5af00dbcd794dfa09da (patch)
tree: 832e780435bee81bbe5f1c619b8be544a6426656 /pyparsing
parent: d108a29db062c9250f50c978e3a86381d1b0746b (diff)
download: pyparsing-git-left_recursion_support.tar.gz
4 files changed, 194 insertions, 3 deletions
diff --git a/pyparsing/core.py b/pyparsing/core.py
index c4adb5d..d5ee05f 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -1,6 +1,7 @@
 #
 # core.py
 #
+from typing import Optional
 from abc import ABC, abstractmethod
 from enum import Enum
 import string
@@ -23,6 +24,8 @@ from .util import (
     _escapeRegexRangeChars,
     _bslash,
     _flatten,
+    LRUMemo as _LRUMemo,
+    UnboundedMemo as _UnboundedMemo,
 )
 from .exceptions import *
 from .actions import *
@@ -703,6 +706,10 @@ class ParserElement(ABC):
         else:
             return True
 
+    # cache for left-recursion in Forward references
+    recursion_lock = RLock()
+    recursion_memos = {}  # type: dict[tuple[int, Forward, bool], tuple[int, ParseResults | Exception]]
+
     # argument cache for optimizing repeated calls when backtracking through recursive expressions
     packrat_cache = (
         {}
@@ -766,11 +773,73 @@ class ParserElement(ABC):
         ParserElement.packrat_cache_stats[:] = [0] * len(
             ParserElement.packrat_cache_stats
         )
+        ParserElement.recursion_memos.clear()
 
     _packratEnabled = False
+    _left_recursion_enabled = False
+
+    @staticmethod
+    def disable_memoization():
+        """
+        Disables active Packrat or Left Recursion parsing and their memoization
+
+        This method also works if neither Packrat nor Left Recursion are enabled.
+        This makes it safe to call before activating Packrat nor Left Recursion
+        to clear any previous settings.
+        """
+        ParserElement.resetCache()
+        ParserElement._left_recursion_enabled = False
+        ParserElement._packratEnabled = False
+        ParserElement._parse = ParserElement._parseNoCache
+
+    @staticmethod
+    def enable_left_recursion(cache_size_limit: Optional[int] = None, *, force=False):
+        """
+        Enables "bounded recursion" parsing, which allows for both direct and indirect
+        left-recursion. During parsing, left-recursive :class:`Forward` elements are
+        repeatedly matched with a fixed recursion depth that is gradually increased
+        until finding the longest match.
+
+        Example::
+
+            import pyparsing as pp
+            pp.ParserElement.enable_left_recursion()
+
+            E = pp.Forward("E")
+            num = pp.Word(pp.nums)
+            # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
+            E <<= E + '+' - num | num
+
+            print(E.parseString("1+2+3"))
+
+        Recursion search naturally memoizes matches of ``Forward`` elements and may
+        thus skip reevaluation of parse actions during backtracking. This may break
+        programs with parse actions which rely on strict ordering of side-effects.
+
+        Parameters:
+
+        - cache_size_limit - (default=``None``) - memoize at most this many
+          ``Forward`` elements during matching; if ``None`` (the default),
+          memoize all ``Forward`` elements.
+
+        Bounded Recursion parsing works similar but not identical to Packrat parsing,
+        thus the two cannot be used together. Use ``force=True`` to disable any
+        previous, conflicting settings.
+        """
+        if force:
+            ParserElement.disable_memoization()
+        elif ParserElement._packratEnabled:
+            raise RuntimeError("Packrat and Bounded Recursion are not compatible")
+        if cache_size_limit is None:
+            ParserElement.recursion_memos = _UnboundedMemo()
+        elif cache_size_limit > 0:
+            ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit)
+        else:
+            raise NotImplementedError("Memo size of %s" % cache_size_limit)
+        ParserElement._left_recursion_enabled = True
 
     @staticmethod
-    def enablePackrat(cache_size_limit=128):
+    def enablePackrat(cache_size_limit=128, *, force=False):
         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
            Repeated parse attempts at the same string location (which happens
            often in many complex grammars) can immediately return a cached value,
@@ -795,7 +864,15 @@ class ParserElement(ABC):
 
                import pyparsing
                pyparsing.ParserElement.enablePackrat()
+
+           Packrat parsing works similar but not identical to Bounded Recursion parsing,
+           thus the two cannot be used together. Use ``force=True`` to disable any
+           previous, conflicting settings.
         """
+        if force:
+            ParserElement.disable_memoization()
+        elif ParserElement._left_recursion_enabled:
+            raise RuntimeError("Packrat and Bounded Recursion are not compatible")
         if not ParserElement._packratEnabled:
             ParserElement._packratEnabled = True
             if cache_size_limit is None:
@@ -4390,7 +4467,72 @@ class Forward(ParseElementEnhance):
                 "Forward expression was never assigned a value, will not parse any input",
                 stacklevel=stacklevel,
             )
-        return super().parseImpl(instring, loc, doActions)
+        if not ParserElement._left_recursion_enabled:
+            return super().parseImpl(instring, loc, doActions)
+        # ## Bounded Recursion algorithm ##
+        # Recursion only needs to be processed at ``Forward`` elements, since they are
+        # the only ones that can actually refer to themselves. The general idea is
+        # to handle recursion stepwise: We start at no recursion, then recurse once,
+        # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
+        #
+        # The "trick" here is that each ``Forward`` gets evaluated in two contexts
+        # - to *match* a specific recursion level, and
+        # - to *search* the bounded recursion level
+        # and the two run concurrently. The *search* must *match* each recursion level
+        # to find the best possible match. This is handled by a memo table, which
+        # provides the previous match to the next level match attempt.
+        #
+        # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
+        #
+        # There is a complication since we not only *parse* but also *transform* via
+        # actions: We do not want to run the actions too often while expanding. Thus,
+        # we expand using `doActions=False` and only run `doActions=True` if the next
+        # recursion level is acceptable.
+        with ParserElement.recursion_lock:
+            memo = ParserElement.recursion_memos
+            try:
+                # we are parsing at a specific recursion expansion – use it as-is
+                prev_loc, prev_result = memo[loc, self, doActions]
+                if isinstance(prev_result, Exception):
+                    raise prev_result
+                return prev_loc, prev_result.copy()
+            except KeyError:
+                act_key = (loc, self, True)
+                peek_key = (loc, self, False)
+                # we are searching for the best recursion expansion – keep on improving
+                # both `doActions` cases must be tracked separately here!
+                prev_loc, prev_peek = memo[peek_key] = loc - 1, ParseException(
+                    instring, loc, "Forward recursion without base case", self
+                )
+                if doActions:
+                    memo[act_key] = memo[peek_key]
+                while True:
+                    try:
+                        new_loc, new_peek = super().parseImpl(instring, loc, False)
+                    except ParseException:
+                        # we failed before getting any match – do not hide the error
+                        if isinstance(prev_peek, Exception):
+                            raise
+                        new_loc, new_peek = prev_loc, prev_peek
+                    # the match did not get better: we are done
+                    if new_loc <= prev_loc:
+                        if doActions:
+                            # replace the match for doActions=False as well,
+                            # in case the action did backtrack
+                            prev_loc, prev_result = memo[peek_key] = memo[act_key]
+                            del memo[peek_key], memo[act_key]
+                            return prev_loc, prev_result.copy()
+                        del memo[peek_key]
+                        return prev_loc, prev_peek.copy()
+                    # the match did get better: see if we can improve further
+                    else:
+                        if doActions:
+                            try:
+                                memo[act_key] = super().parseImpl(instring, loc, True)
+                            except ParseException as e:
+                                memo[peek_key] = memo[act_key] = (new_loc, e)
+                                raise
+                        prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
 
     def leaveWhitespace(self, recursive=True):
         self.skipWhitespace = False
diff --git a/pyparsing/results.py b/pyparsing/results.py
index fb03ecd..fa94a00 100644
--- a/pyparsing/results.py
+++ b/pyparsing/results.py
@@ -522,7 +522,7 @@ class ParseResults:
         Returns a new copy of a :class:`ParseResults` object.
         """
         ret = ParseResults(self._toklist)
-        ret._tokdict = dict(**self._tokdict)
+        ret._tokdict = self._tokdict.copy()
         ret._parent = self._parent
         ret._all_names |= self._all_names
         ret._name = self._name
diff --git a/pyparsing/testing.py b/pyparsing/testing.py
index 393f37b..43f23ab 100644
--- a/pyparsing/testing.py
+++ b/pyparsing/testing.py
@@ -20,6 +20,7 @@ class pyparsing_test:
         """
         Context manager to be used when writing unit tests that modify pyparsing config values:
          - packrat parsing
+         - bounded recursion parsing
          - default whitespace characters.
          - default keyword characters
          - literal string auto-conversion class
@@ -61,6 +62,7 @@ class pyparsing_test:
             else:
                 self._save_context["packrat_cache_size"] = None
             self._save_context["packrat_parse"] = ParserElement._parse
+            self._save_context["recursion_enabled"] = ParserElement._left_recursion_enabled
 
             self._save_context["__diag__"] = {
                 name: getattr(__diag__, name) for name in __diag__._all_names
@@ -97,6 +99,7 @@ class pyparsing_test:
                 ParserElement.enablePackrat(self._save_context["packrat_cache_size"])
             else:
                 ParserElement._parse = self._save_context["packrat_parse"]
+            ParserElement._left_recursion_enabled = self._save_context["recursion_enabled"]
 
             __compat__.collect_all_And_tokens = self._save_context["__compat__"]
 
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 3cb69d2..875799d 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -119,6 +119,52 @@ class _FifoCache:
         self.clear = types.MethodType(clear, self)
 
 
+class LRUMemo:
+    """
+    A memoizing mapping that retains `capacity` deleted items
+
+    The memo tracks retained items by their access order; once `capacity` items
+    are retained, the least recently used item is discarded.
+    """
+    def __init__(self, capacity):
+        self._capacity = capacity
+        self._active = {}
+        self._memory = collections.OrderedDict()
+
+    def __getitem__(self, key):
+        try:
+            return self._active[key]
+        except KeyError:
+            self._memory.move_to_end(key)
+            return self._memory[key]
+
+    def __setitem__(self, key, value):
+        self._memory.pop(key, None)
+        self._active[key] = value
+
+    def __delitem__(self, key):
+        try:
+            value = self._active.pop(key)
+        except KeyError:
+            pass
+        else:
+            while len(self._memory) >= self._capacity:
+                self._memory.popitem(last=False)
+            self._memory[key] = value
+
+    def clear(self):
+        self._active.clear()
+        self._memory.clear()
+
+
+class UnboundedMemo(dict):
+    """
+    A memoizing mapping that retains all deleted items
+    """
+    def __delitem__(self, key):
+        pass
+
+
 def _escapeRegexRangeChars(s):
     # escape these chars: ^-[]
     for c in r"\^-[]":
author	Max Fischer <maxfischer2781@gmail.com>	2021-07-30 14:51:33 +0200
committer	GitHub <noreply@github.com>	2021-07-30 07:51:33 -0500
commit	5eafa07470a3e761944ff5af00dbcd794dfa09da (patch)
tree	832e780435bee81bbe5f1c619b8be544a6426656 /pyparsing
parent	d108a29db062c9250f50c978e3a86381d1b0746b (diff)
download	pyparsing-git-left_recursion_support.tar.gz