Add support for LR parsingleft_recursion_support

* first draft of LR parsing * removed debug output * cache is owned and cleared by ParserElement * bounded recursion must be enabled explicitly * packrat rejects recursion * basic LR unit test * tests for associativity and nesting * added math example * fixed test typo * unittest for empty and non-peg clauses * LR-Forward can match Empty * fixed test typos * added base case to unittest * memo cache only provides copies * flattened Forward parse method * added high-level description of algorithm * expanded docstring * added tests for repetition rules * renamed bounded to left recursion * naive test for existing suite * explicitly testing tests for LR compatibility * LR memo no longer mixes action/no-action results * simplified replacement logic * adjusted example with ambiguous failure case * LR memo content is always returned as copy * draft for peeking recursion * memo update consistent for all actions * fixed a bug for non-string token identifiers * action wins against no-action * cleanup * properly setting names in tests * memoization can be turned off * testing memo switches * typos * flattened recursion memo * left recursion memo size may be limited * adjusted docs for recursion cache
author: Max Fischer <maxfischer2781@gmail.com> 2021-07-30 14:51:33 +0200
committer: GitHub <noreply@github.com> 2021-07-30 07:51:33 -0500
commit: 5eafa07470a3e761944ff5af00dbcd794dfa09da (patch)
tree: 832e780435bee81bbe5f1c619b8be544a6426656 /tests
parent: d108a29db062c9250f50c978e3a86381d1b0746b (diff)
download: pyparsing-git-left_recursion_support.tar.gz
1 files changed, 215 insertions, 20 deletions
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 09ad6a4..aed2db3 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -287,7 +287,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         test("-9", -9)
         test("--9", 9)
         test("-E", -math.e)
-        test("9 + 3 + 6", 9 + 3 + 6)
+        test("9 + 3 + 5", 9 + 3 + 5)
         test("9 + 3 / 11", 9 + 3.0 / 11)
         test("(9 + 3)", (9 + 3))
         test("(9+3) / 11", (9 + 3.0) / 11)
@@ -1609,8 +1609,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             pp.QuotedString("", "\\")
 
     def testRepeater(self):
-        if ParserElement._packratEnabled:
-            print("skipping this test, not compatible with packratting")
+        if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
+            print("skipping this test, not compatible with memoization")
             return
 
         first = pp.Word("abcdef").setName("word1")
@@ -1715,8 +1715,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
     def testRepeater2(self):
         """test matchPreviousLiteral with empty repeater"""
 
-        if ParserElement._packratEnabled:
-            print("skipping this test, not compatible with packratting")
+        if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
+            print("skipping this test, not compatible with memoization")
             return
 
         first = pp.Optional(pp.Word("abcdef").setName("words1"))
@@ -1735,8 +1735,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
     def testRepeater3(self):
         """test matchPreviousLiteral with multiple repeater tokens"""
 
-        if ParserElement._packratEnabled:
-            print("skipping this test, not compatible with packratting")
+        if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
+            print("skipping this test, not compatible with memoization")
             return
 
         first = pp.Word("a") + pp.Word("d")
@@ -1755,8 +1755,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
     def testRepeater4(self):
         """test matchPreviousExpr with multiple repeater tokens"""
 
-        if ParserElement._packratEnabled:
-            print("skipping this test, not compatible with packratting")
+        if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
+            print("skipping this test, not compatible with memoization")
             return
 
         first = pp.Group(pp.Word(pp.alphas) + pp.Word(pp.alphas))
@@ -1782,8 +1782,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
     def testRepeater5(self):
         """a simplified testRepeater4 to examine matchPreviousExpr with a single repeater token"""
 
-        if ParserElement._packratEnabled:
-            print("skipping this test, not compatible with packratting")
+        if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
+            print("skipping this test, not compatible with memoization")
             return
 
         first = pp.Word(pp.alphas)
@@ -6712,6 +6712,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             self.fail("failed to raise exception when matching empty string")
 
     def testExplainException(self):
+        pp.ParserElement.disable_memoization()
         expr = pp.Word(pp.nums).setName("int") + pp.Word(pp.alphas).setName("word")
         try:
             expr.parseString("123 355")
@@ -6732,16 +6733,22 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             return t[0] / t[1]
 
         expr.addParseAction(divide_args)
-        pp.ParserElement.enablePackrat()
-        print()
+        for memo_kind, enable_memo in [
+            ('Packrat', pp.ParserElement.enablePackrat),
+            ('Left Recursion', pp.ParserElement.enable_left_recursion),
+        ]:
+            enable_memo(force=True)
+            print("Explain for", memo_kind)
 
-        try:
-            expr.parseString("123 0")
-        except pp.ParseException as pe:
-            print(pe.explain())
-        except Exception as exc:
-            print(pp.ParseBaseException.explain_exception(exc))
-            raise
+            try:
+                expr.parseString("123 0")
+            except pp.ParseException as pe:
+                print(pe.explain())
+            except Exception as exc:
+                print(pp.ParseBaseException.explain_exception(exc))
+                raise
+        # make sure we leave the state compatible with everything
+        pp.ParserElement.disable_memoization()
 
     def testCaselessKeywordVsKeywordCaseless(self):
         frule = pp.Keyword("t", caseless=True) + pp.Keyword("yes", caseless=True)
@@ -8067,9 +8074,197 @@ class Test8_WithUnboundedPackrat(Test2_WithoutPackrat):
         )
 
 
+class Test9_WithLeftRecursionParsing(Test2_WithoutPackrat):
+    """
+    rerun Test2 tests, now with unbounded left recursion cache
+    """
+
+    def setUp(self):
+        ParserElement.enable_left_recursion(force=True)
+
+    def tearDown(self):
+        default_suite_context.restore()
+
+    def test000_assert_packrat_status(self):
+        print("Left-Recursion enabled:", ParserElement._left_recursion_enabled)
+        self.assertTrue(ParserElement._left_recursion_enabled, "left recursion not enabled")
+        self.assertIsInstance(ParserElement.recursion_memos, pp.util.UnboundedMemo)
+
+
+class Test10_WithLeftRecursionParsingBoundedMemo(Test2_WithoutPackrat):
+    """
+    rerun Test2 tests, now with bounded left recursion cache
+    """
+
+    def setUp(self):
+        ParserElement.enable_left_recursion(cache_size_limit=4, force=True)
+
+    def tearDown(self):
+        default_suite_context.restore()
+
+    def test000_assert_packrat_status(self):
+        print("Left-Recursion enabled:", ParserElement._left_recursion_enabled)
+        self.assertTrue(ParserElement._left_recursion_enabled, "left recursion not enabled")
+        self.assertIsInstance(ParserElement.recursion_memos, pp.util.LRUMemo)
+        # check that the cache matches roughly what we expect
+        # – it may be larger due to action handling
+        self.assertLessEqual(ParserElement.recursion_memos._capacity, 4)
+        self.assertGreater(ParserElement.recursion_memos._capacity * 3, 4)
+
+
+class TestLR1_Recursion(ppt.TestParseResultsAsserts, TestCase):
+    """
+    Tests for recursive parsing
+    """
+    suite_context = None
+    save_suite_context = None
+
+    def setUp(self):
+        recursion_suite_context.restore()
+
+    def tearDown(self):
+        default_suite_context.restore()
+
+    def test_repeat_as_recurse(self):
+        """repetition rules formulated with recursion"""
+        one_or_more = pp.Forward().setName("one_or_more")
+        one_or_more <<= one_or_more + "a" | "a"
+        self.assertParseResultsEquals(
+            one_or_more.parseString("a"),
+            expected_list=["a"],
+        )
+        self.assertParseResultsEquals(
+            one_or_more.parseString("aaa aa"),
+            expected_list=["a", "a", "a", "a", "a"],
+        )
+        delimited_list = pp.Forward().setName("delimited_list")
+        delimited_list <<= delimited_list + pp.Suppress(',') + "b" | "b"
+        self.assertParseResultsEquals(
+            delimited_list.parseString("b"),
+            expected_list=["b"],
+        )
+        self.assertParseResultsEquals(
+            delimited_list.parseString("b,b"),
+            expected_list=["b", "b"],
+        )
+        self.assertParseResultsEquals(
+            delimited_list.parseString("b,b , b, b,b"),
+            expected_list=["b", "b", "b", "b", "b"],
+        )
+
+    def test_binary_recursive(self):
+        """parsing of single left-recursive binary operator"""
+        expr = pp.Forward().setName("expr")
+        num = pp.Word(pp.nums)
+        expr <<= expr + '+' - num | num
+        self.assertParseResultsEquals(
+            expr.parseString("1+2"),
+            expected_list=['1', '+', '2']
+        )
+        self.assertParseResultsEquals(
+            expr.parseString("1+2+3+4"),
+            expected_list=['1', '+', '2', '+', '3', '+', '4']
+        )
+
+    def test_binary_associative(self):
+        """associative is preserved for single left-recursive binary operator"""
+        expr = pp.Forward().setName("expr")
+        num = pp.Word(pp.nums)
+        expr <<= pp.Group(expr) + '+' - num | num
+        self.assertParseResultsEquals(
+            expr.parseString("1+2"),
+            expected_list=[['1'], '+', '2'],
+        )
+        self.assertParseResultsEquals(
+            expr.parseString("1+2+3+4"),
+            expected_list=[[[['1'], '+', '2'], '+', '3'], '+', '4'],
+        )
+
+    def test_add_sub(self):
+        """indirectly left-recursive/associative add/sub calculator"""
+        expr = pp.Forward().setName("expr")
+        num = pp.Word(pp.nums).setParseAction(lambda t: int(t[0]))
+        expr <<= (
+            (expr + '+' - num).setParseAction(lambda t: t[0] + t[2])
+            | (expr + '-' - num).setParseAction(lambda t: t[0] - t[2])
+            | num
+        )
+        self.assertEqual(expr.parseString("1+2")[0], 3)
+        self.assertEqual(expr.parseString("1+2+3")[0], 6)
+        self.assertEqual(expr.parseString("1+2-3")[0], 0)
+        self.assertEqual(expr.parseString("1-2+3")[0], 2)
+        self.assertEqual(expr.parseString("1-2-3")[0], -4)
+
+    def test_math(self):
+        """precedence climbing parser for math"""
+        # named references
+        expr = pp.Forward().setName("expr")
+        add_sub = pp.Forward().setName("add_sub")
+        mul_div = pp.Forward().setName("mul_div")
+        power = pp.Forward().setName("power")
+        terminal = pp.Forward().setName("terminal")
+        # concrete rules
+        number = pp.Word(pp.nums).setParseAction(lambda t: int(t[0]))
+        signed = ('+' - expr) | ('-' - expr).setParseAction(lambda t: -t[1])
+        group = pp.Suppress('(') - expr - pp.Suppress(')')
+        add_sub <<= (
+            (add_sub + '+' - mul_div).setParseAction(lambda t: t[0] + t[2])
+            | (add_sub + '-' - mul_div).setParseAction(lambda t: t[0] - t[2])
+            | mul_div
+        )
+        mul_div <<= (
+            (mul_div + '*' - power).setParseAction(lambda t: t[0] * t[2])
+            | (mul_div + '/' - power).setParseAction(lambda t: t[0] / t[2])
+            | power
+        )
+        power <<= (
+            (terminal + '^' - power).setParseAction(lambda t: t[0] ** t[2])
+            | terminal
+        )
+        terminal <<= number | signed | group
+        expr <<= add_sub
+        # simple add_sub expressions
+        self.assertEqual(expr.parseString("1+2")[0], 3)
+        self.assertEqual(expr.parseString("1+2+3")[0], 6)
+        self.assertEqual(expr.parseString("1+2-3")[0], 0)
+        self.assertEqual(expr.parseString("1-2+3")[0], 2)
+        self.assertEqual(expr.parseString("1-2-3")[0], -4)
+        # precedence overwriting via parentheses
+        self.assertEqual(expr.parseString("1+(2+3)")[0], 6)
+        self.assertEqual(expr.parseString("1+(2-3)")[0], 0)
+        self.assertEqual(expr.parseString("1-(2+3)")[0], -4)
+        self.assertEqual(expr.parseString("1-(2-3)")[0], 2)
+        # complicated math expressions – same as Python expressions
+        self.assertEqual(expr.parseString("1----3")[0], 1----3)
+        self.assertEqual(expr.parseString("1+2*3")[0], 1+2*3)
+        self.assertEqual(expr.parseString("1*2+3")[0], 1*2+3)
+        self.assertEqual(expr.parseString("1*2^3")[0], 1*2**3)
+        self.assertEqual(expr.parseString("4^3^2^1")[0], 4**3**2**1)
+
+    def test_terminate_empty(self):
+        """Recursion with ``Empty`` terminates"""
+        empty = pp.Forward().setName('e')
+        empty <<= empty + pp.Empty() | pp.Empty()
+        self.assertParseResultsEquals(empty.parseString(""), expected_list=[])
+
+    def test_non_peg(self):
+        """Recursion works for non-PEG operators"""
+        expr = pp.Forward().setName('expr')
+        expr <<= expr + "a" ^ expr + "ab" ^ expr + "abc" ^ "."
+        self.assertParseResultsEquals(
+            expr.parseString(".abcabaabc"),
+            expected_list=[".", "abc", "ab", "a", "abc"]
+        )
+
+
 # force clear of packrat parsing flags before saving contexts
 pp.ParserElement._packratEnabled = False
 pp.ParserElement._parse = pp.ParserElement._parseNoCache
 
 Test2_WithoutPackrat.suite_context = ppt.reset_pyparsing_context().save()
 Test2_WithoutPackrat.save_suite_context = ppt.reset_pyparsing_context().save()
+
+default_suite_context = ppt.reset_pyparsing_context().save()
+pp.ParserElement.enable_left_recursion()
+recursion_suite_context = ppt.reset_pyparsing_context().save()
+default_suite_context.restore()
author	Max Fischer <maxfischer2781@gmail.com>	2021-07-30 14:51:33 +0200
committer	GitHub <noreply@github.com>	2021-07-30 07:51:33 -0500
commit	5eafa07470a3e761944ff5af00dbcd794dfa09da (patch)
tree	832e780435bee81bbe5f1c619b8be544a6426656 /tests
parent	d108a29db062c9250f50c978e3a86381d1b0746b (diff)
download	pyparsing-git-left_recursion_support.tar.gz