diff options
author | ptmcg <ptmcg@austin.rr.com> | 2020-11-02 18:14:56 -0600 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2020-11-02 18:14:56 -0600 |
commit | 2dd2e2bb70407eea91f18de2caea5ba4527eb7dc (patch) | |
tree | c0a8824908983d6b4b81a6081af629e6ba8064b0 | |
parent | 96e0fab07788fca87e1473b0ae755335d6988895 (diff) | |
download | pyparsing-git-2dd2e2bb70407eea91f18de2caea5ba4527eb7dc.tar.gz |
Add IndentedBlock class; made vertical keyword arg more visible when creating railroad diags; changed create_diagram from monkeypatch to included method on ParserElement; better debug exception if Dict is constructed with non-Group expressionpyparsing_3.0.0b1
-rw-r--r-- | CHANGES | 5 | ||||
-rw-r--r-- | docs/whats_new_in_3_0_0.rst | 74 | ||||
-rw-r--r-- | examples/indentedGrammarExample.py | 4 | ||||
-rw-r--r-- | examples/indented_block_example.py | 54 | ||||
-rw-r--r-- | examples/number_words.py | 31 | ||||
-rw-r--r-- | examples/verilogParse.py | 13 | ||||
-rw-r--r-- | pyparsing/__init__.py | 3 | ||||
-rw-r--r-- | pyparsing/core.py | 41 | ||||
-rw-r--r-- | pyparsing/diagram/__init__.py | 29 | ||||
-rw-r--r-- | pyparsing/helpers.py | 37 | ||||
-rw-r--r-- | tests/test_unit.py | 79 |
11 files changed, 302 insertions, 68 deletions
@@ -41,6 +41,11 @@ Version 3.0.0b1 This is the mechanism used internally by the `Group` class when defined using `aslist=True`. +- A new `IndentedBlock` class is introduced, to eventually replace the + current `indentedBlock` helper method. The interface is largely the same, + however, the new class manages its own internal indentation stack, so + it is no longer necessary to maintain an external `indentStack` variable. + - API CHANGE Added `cache_hit` keyword argument to debug actions. Previously, if packrat parsing was enabled, the debug methods were not called in the event of cache diff --git a/docs/whats_new_in_3_0_0.rst b/docs/whats_new_in_3_0_0.rst index 8135a1d..4ddd051 100644 --- a/docs/whats_new_in_3_0_0.rst +++ b/docs/whats_new_in_3_0_0.rst @@ -4,7 +4,7 @@ What's New in Pyparsing 3.0.0 :author: Paul McGuire -:date: August, 2020 +:date: November, 2020 :abstract: This document summarizes the changes made in the 3.0.0 release of pyparsing. @@ -23,8 +23,6 @@ An excellent new enhancement is the new railroad diagram generator for documenting pyparsing parsers:: import pyparsing as pp - from pyparsing.diagram import to_railroad, railroad_to_html - from pathlib import Path # define a simple grammar for parsing street addresses such # as "123 Main Street" @@ -37,8 +35,7 @@ generator for documenting pyparsing parsers:: # construct railroad track diagram for this parser and # save as HTML - rr = to_railroad(parser) - Path('parser_rr_diag.html').write_text(railroad_to_html(rr)) + parser.create_diagram('parser_rr_diag.html') (Contributed by Michael Milton) @@ -92,6 +89,38 @@ just namespaces, to add some helpful behavior: mistake when using Forwards) (**currently not working on PyPy**) +New IndentedBlock class to replace indentedBlock helper method +-------------------------------------------------------------- +The new ``IndentedBlock`` class will replace the current ``indentedBlock`` method +for defining indented blocks of text, similar to Python source code. Using +``IndentedBlock``, the expression instance itself keeps track of the indent stack, +so a separate external ``indentStack`` variable is no longer required. + +Here is a simple example of an expression containing an alphabetic key, followed +by an indented list of integers:: + + integer = pp.Word(pp.nums) + group = pp.Group(pp.Char(pp.alphas) + pp.Group(pp.IndentedBlock(integer))) + +parses:: + + A + 100 + 101 + B + 200 + 201 + +as:: + + [['A', [100, 101]], ['B', [200, 201]]] + +``IndentedBlock`` may also be used to define a recursive indented block (containing nested +indented blocks). + +The existing ``indentedBlock`` is retained for backward-compatibility, but will be +deprecated in a future release. + Shortened tracebacks -------------------- Cleaned up default tracebacks when getting a ``ParseException`` when calling @@ -99,8 +128,23 @@ Cleaned up default tracebacks when getting a ``ParseException`` when calling and not include the internal pyparsing traceback frames. (If the full traceback is desired, then set ``ParserElement.verbose_traceback`` to ``True``.) +Improved debug logging +---------------------- +Debug logging has been improved by: + +- Including try/match/fail logging when getting results from the + packrat cache (previously cache hits did not show debug logging). + Values returned from the packrat cache are marked with an '*'. + +- Improved fail logging, showing the failed text line and marker where + the failure occurred. + New / improved examples ----------------------- +- ``number_words.py`` includes a parser/evaluator to parse "forty-two" + and return 42. Also includes example code to generate a railroad + diagram for this parser. + - ``BigQueryViewParser.py`` added to examples directory, submitted by Michael Smedberg. @@ -136,6 +180,14 @@ Other new features grammar utilities to navigate through the tree of expressions in a pyparsing grammar. +- The ``repr()`` string for ``ParseResults`` is now of the form:: + + ParseResults([tokens], {named_results}) + + The previous form omitted the leading ``ParseResults`` class name, + and was easily misinterpreted as a ``tuple`` containing a ``list`` and + a ``dict``. + - Minor reformatting of output from ``runTests`` to make embedded comments more visible. @@ -208,6 +260,12 @@ API Changes To run explain against other exceptions, use ``ParseException.explain_exception()``. +- Debug actions now take an added keyword argument ``cache_hit``. + Now that debug actions are called for expressions matched in the + packrat parsing cache, debug actions are now called with this extra + flag, set to True. For custom debug actions, it is necessary to add + support for this new argument. + - ``ZeroOrMore`` expressions that have results names will now include empty lists for their name if no matches are found. Previously, no named result would be present. Code that tested @@ -260,7 +318,7 @@ Other discontinued features - Removed support for running ``python setup.py test``. The setuptools maintainers consider the ``test`` command deprecated (see - <https://github.com/pypa/setuptools/issues/1684>). To run the Pyparsing test, + <https://github.com/pypa/setuptools/issues/1684>). To run the Pyparsing tests, use the command ``tox``. @@ -285,9 +343,11 @@ Fixed Bugs - Fixed bug in ``Each`` when using ``Regex``, when ``Regex`` expression would get parsed twice. -- Fixed ``FutureWarning`` that sometimes are raised when ``'['`` passed as a +- Fixed ``FutureWarning`` that sometimes is raised when ``'['`` passed as a character to ``Word``. +- Fixed debug logging to show failure location after whitespace skipping. + Acknowledgments =============== diff --git a/examples/indentedGrammarExample.py b/examples/indentedGrammarExample.py index c761393..706a0d7 100644 --- a/examples/indentedGrammarExample.py +++ b/examples/indentedGrammarExample.py @@ -10,6 +10,7 @@ from pyparsing import *
+
data = """\
def A(z):
A1
@@ -32,9 +33,8 @@ def spam(x,y): """
-indentStack = [1]
stmt = Forward()
-suite = indentedBlock(stmt, indentStack)
+suite = IndentedBlock(stmt)
identifier = Word(alphas, alphanums)
funcDecl = (
diff --git a/examples/indented_block_example.py b/examples/indented_block_example.py new file mode 100644 index 0000000..4f5feb1 --- /dev/null +++ b/examples/indented_block_example.py @@ -0,0 +1,54 @@ +# +# indented_block_example.py +# + +import pyparsing as pp + +ppc = pp.pyparsing_common + +data = """\ + + A + 100 + 101 + + 102 + B + 200 + 201 + + C + 300 + +""" + +integer = ppc.integer +group = pp.Group(pp.Char(pp.alphas) + pp.Group(pp.IndentedBlock(integer))) + +print(group[...].parseString(data).dump()) + +# example of a recursive IndentedBlock + +data = """\ + + A + 100 + 101 + + 102 + B + 200 + b + 210 + 211 + 202 + C + 300 + +""" + +group = pp.Forward() +group <<= pp.Group(pp.Char(pp.alphas) + pp.Group(pp.IndentedBlock(integer | group))) + +print("using searchString") +print(sum(group.searchString(data)).dump()) diff --git a/examples/number_words.py b/examples/number_words.py index 724059f..b1217cf 100644 --- a/examples/number_words.py +++ b/examples/number_words.py @@ -29,13 +29,18 @@ import pyparsing as pp from operator import mul import pyparsing.diagram + def define_numeric_word(s, value): return pp.CaselessKeyword(s).addParseAction(lambda: value) + def define_numeric_word_range(s, vals): if isinstance(s, str): s = s.split() - return pp.MatchFirst(define_numeric_word(nm, nm_value) for nm, nm_value in zip(s, vals)) + return pp.MatchFirst( + define_numeric_word(nm, nm_value) for nm, nm_value in zip(s, vals) + ) + opt_dash = pp.Optional(pp.Suppress("-")).setName("optional '-'") opt_and = pp.Optional((pp.CaselessKeyword("and") | "-").suppress()).setName("optional 'and'") @@ -54,23 +59,24 @@ thousands = one_to_999 + define_numeric_word("thousand", 1000) hundreds.setName("100s") thousands.setName("1000s") + def multiply(t): return mul(*t) + hundreds.addParseAction(multiply) thousands.addParseAction(multiply) -numeric_expression = (pp.Optional(thousands + opt_and) - + pp.Optional(hundreds + opt_and) - + one_to_99 - | pp.Optional(thousands + opt_and) - + hundreds - | thousands - ).setName("numeric_words") +numeric_expression = ( + pp.Optional(thousands + opt_and) + pp.Optional(hundreds + opt_and) + one_to_99 + | pp.Optional(thousands + opt_and) + hundreds + | thousands +).setName("numeric_words") numeric_expression.addParseAction(sum) -if __name__ == '__main__': - numeric_expression.runTests(""" +if __name__ == "__main__": + numeric_expression.runTests( + """ one seven twelve @@ -83,7 +89,8 @@ if __name__ == '__main__': nine hundred thousand nine hundred and ninety nine nine hundred and ninety nine thousand nine hundred and ninety nine nineteen hundred thousand nineteen hundred and ninety nine - """) + """ + ) # create railroad diagram - numeric_expression.create_diagram("numeric_words_diagram.html") + numeric_expression.create_diagram("numeric_words_diagram.html", vertical=5) diff --git a/examples/verilogParse.py b/examples/verilogParse.py index 8a809d1..cba0ac0 100644 --- a/examples/verilogParse.py +++ b/examples/verilogParse.py @@ -914,6 +914,9 @@ if 0: else: def main(): + import sys + + sys.setrecursionlimit(5000) print("Verilog parser test (V %s)" % __version__) print(" - using pyparsing version", pyparsing.__version__) print(" - using Python version", sys.version) @@ -927,8 +930,8 @@ else: failCount = 0 Verilog_BNF() numlines = 0 - startTime = time.clock() - fileDir = "verilog" + startTime = time.time() + fileDir = "../scratch/verilog" # ~ fileDir = "verilog/new" # ~ fileDir = "verilog/new2" # ~ fileDir = "verilog/new3" @@ -950,9 +953,9 @@ else: print(fnam, len(filelines), end=" ") numlines += len(filelines) teststr = "".join(filelines) - time1 = time.clock() + time1 = time.time() tokens = test(teststr) - time2 = time.clock() + time2 = time.time() elapsed = time2 - time1 totalTime += elapsed if len(tokens): @@ -974,7 +977,7 @@ else: failCount += 1 for i, line in enumerate(filelines, 1): print("%4d: %s" % (i, line.rstrip())) - endTime = time.clock() + endTime = time.time() print("Total parse time:", totalTime) print("Total source lines:", numlines) print("Average lines/sec:", ("%.1f" % (float(numlines) / (totalTime + 0.05)))) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 1e5c625..816b0e5 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -103,7 +103,7 @@ __version__ = ( __version_info__.releaseLevel == "final" ] ) -__versionTime__ = "25 October 2020 05:09 UTC" +__versionTime__ = "2 November 2020 17:20 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" from .util import * @@ -149,6 +149,7 @@ __all__ = [ "Forward", "GoToColumn", "Group", + "IndentedBlock", "Keyword", "LineEnd", "LineStart", diff --git a/pyparsing/core.py b/pyparsing/core.py index c242d40..fed71a9 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -1793,6 +1793,26 @@ class ParserElement(ABC): return success, allResults + def create_diagram(expr: "ParserElement", output_html, vertical=3, **kwargs): + """ + + """ + + try: + from .diagram import to_railroad, railroad_to_html + except ImportError as ie: + raise Exception( + "must install 'railroad' to generate parser railroad diagrams" + ) from ie + + railroad = to_railroad(expr, vertical=vertical, diagram_kwargs=kwargs) + if isinstance(output_html, str): + with open(output_html, "w", encoding="utf-8") as diag_file: + diag_file.write(railroad_to_html(railroad)) + else: + # we were passed a file-like object, just write to it + output_html.write(railroad_to_html(railroad)) + class _PendingSkip(ParserElement): # internal placeholder class to hold a place were '...' is added to a parser element, @@ -3233,7 +3253,8 @@ class And(ParseExpression): ) errorStop = False for e in self.exprs[1:]: - if isinstance(e, And._ErrorStop): + # if isinstance(e, And._ErrorStop): + if type(e) is And._ErrorStop: errorStop = True continue if errorStop: @@ -4541,16 +4562,30 @@ class Dict(TokenConverter): for i, tok in enumerate(tokenlist): if len(tok) == 0: continue + ikey = tok[0] if isinstance(ikey, int): - ikey = str(tok[0]).strip() + ikey = str(ikey).strip() + if len(tok) == 1: tokenlist[ikey] = _ParseResultsWithOffset("", i) + elif len(tok) == 2 and not isinstance(tok[1], ParseResults): tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) + else: - dictvalue = tok.copy() # ParseResults(i) + try: + dictvalue = tok.copy() # ParseResults(i) + except Exception: + exc = TypeError( + "could not extract dict values from parsed results" + " - Dict expression must contain Grouped expressions" + ) + exc.__cause__ = None + raise exc + del dictvalue[0] + if len(dictvalue) != 1 or ( isinstance(dictvalue, ParseResults) and dictvalue.haskeys() ): diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 9678368..e10a50b 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -2,7 +2,6 @@ import railroad import pyparsing from pkg_resources import resource_filename from typing import ( - Union, List, Optional, NamedTuple, @@ -104,12 +103,13 @@ def resolve_partial(partial: "EditablePartial[T]") -> T: def to_railroad( element: pyparsing.ParserElement, diagram_kwargs: dict = {}, - vertical: Union[int, bool] = 5, + vertical: int = None, ) -> List[NamedDiagram]: """ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram creation if you want to access the Railroad tree before it is converted to HTML :param diagram_kwargs: kwargs to pass to the Diagram() constructor + :param vertical: (optional) """ # Convert the whole tree underneath the root lookup = ConverterState(diagram_kwargs=diagram_kwargs) @@ -125,16 +125,14 @@ def to_railroad( return sorted(resolved, key=lambda diag: diag.index) -def _should_vertical(specification: Union[int, bool], count: int) -> bool: +def _should_vertical(specification: int, count: int) -> bool: """ Returns true if we should return a vertical list of elements """ - if isinstance(specification, bool): - return specification - elif isinstance(specification, int): - return count >= specification + if specification is None: + return False else: - raise Exception() + return count >= specification class ElementState: @@ -275,7 +273,7 @@ def _to_diagram_element( element: pyparsing.ParserElement, parent: Optional[EditablePartial], lookup: ConverterState = None, - vertical: Union[int, bool] = 3, + vertical: int = None, index: int = 0, name_hint: str = None, ) -> Optional[EditablePartial]: @@ -424,16 +422,3 @@ def _to_diagram_element( ) else: return ret - -# monkeypatch .create_diagram method onto ParserElement -def _create_diagram(expr: pyparsing.ParserElement, output_html): - railroad = to_railroad(expr) - if isinstance(output_html, str): - with open(output_html, "w", encoding="utf-8") as diag_file: - diag_file.write(railroad_to_html(railroad)) - else: - # we were passed a file-like object, just write to it - output_html.write(railroad_to_html(railroad)) - - -pyparsing.ParserElement.create_diagram = _create_diagram diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index d2325c9..cf59107 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -699,7 +699,9 @@ def infixNotation(baseExpr, opList, lpar=Suppress("("), rpar=Suppress(")")): thisExpr = Forward().setName(termName) if rightLeftAssoc is opAssoc.LEFT: if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr + opExpr[...]) + matchExpr = _FB(lastExpr + opExpr) + Group( + lastExpr + opExpr + opExpr[...] + ) elif arity == 2: if opExpr is not None: matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( @@ -760,6 +762,9 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[] A valid block must contain at least one ``blockStatement``. + (Note that indentedBlock uses internal parse actions which make it + incompatible with packrat parsing.) + Example:: data = ''' @@ -881,6 +886,36 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[] return smExpr.setName("indented block") +class IndentedBlock(ParseElementEnhance): + """ + Expression to match one or more expressions at a given indentation level. + Useful for parsing text where structure is implied by indentation (like Python source code). + """ + + def __init__(self, expr, recursive=True): + super().__init__(expr, savelist=True) + self._recursive = recursive + + def parseImpl(self, instring, loc, doActions=True): + # see if self.expr matches at the current location - if not it will raise an exception + # and no further work is necessary + self.expr.parseImpl(instring, loc, doActions) + + indent_col = col(loc, instring) + peer_parse_action = matchOnlyAtCol(indent_col) + peer_expr = FollowedBy(self.expr).addParseAction(peer_parse_action) + inner_expr = Empty() + peer_expr.suppress() + self.expr + + if self._recursive: + indent_parse_action = conditionAsParseAction( + lambda s, l, t, relative_to_col=indent_col: col(l, s) > relative_to_col + ) + indent_expr = FollowedBy(self.expr).addParseAction(indent_parse_action) + inner_expr += Optional(indent_expr + self) + + return OneOrMore(inner_expr).parseImpl(instring, loc, doActions) + + # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").setName( "C style comment" diff --git a/tests/test_unit.py b/tests/test_unit.py index ffec86a..d84d08d 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -11,6 +11,7 @@ import contextlib import datetime import sys from io import StringIO +from textwrap import dedent from unittest import TestCase import pyparsing as pp @@ -3401,8 +3402,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): """ - from textwrap import dedent - test = dedent(test) print(test) @@ -6148,7 +6147,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): # Make sure example in indentedBlock docstring actually works! def testIndentedBlockExample(self): - from textwrap import dedent data = dedent( """ @@ -6242,7 +6240,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): def testIndentedBlock(self): # parse pseudo-yaml indented text - import textwrap EQ = pp.Suppress("=") stack = [1] @@ -6263,7 +6260,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): c3 = 'A horse, a horse, my kingdom for a horse' d = 505 """ - text = textwrap.dedent(text) + text = dedent(text) print(text) result = parser.parseString(text) @@ -6274,7 +6271,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): # exercise indentedBlock with example posted in issue #87 def testIndentedBlockTest2(self): - from textwrap import dedent indent_stack = [1] @@ -6378,8 +6374,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): block <<= pp.Literal("block:") + body return block - from textwrap import dedent - # This input string is a perfect match for the parser, so a single match is found p1 = get_parser() r1 = list( @@ -6476,6 +6470,65 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ) self.assertEqual(1, len(r6)) + def testIndentedBlockClass(self): + data = """\ + + A + 100 + 101 + + 102 + B + 200 + 201 + + C + 300 + + """ + + integer = ppc.integer + group = pp.Group(pp.Char(pp.alphas) + pp.Group(pp.IndentedBlock(integer))) + + group[...].parseString(data).pprint() + + self.assertParseAndCheckList( + group[...], data, [["A", [100, 101, 102]], ["B", [200, 201]], ["C", [300]]] + ) + + def testIndentedBlockClassWithRecursion(self): + data = """\ + + A + 100 + 101 + + 102 + B + b + 200 + 201 + + C + 300 + + """ + + integer = ppc.integer + group = pp.Forward() + group <<= pp.Group( + pp.Char(pp.alphas) + pp.Group(pp.IndentedBlock(integer | group)) + ) + + print("using searchString") + print(sum(group.searchString(data)).dump()) + + self.assertParseAndCheckList( + group[...], + data, + [["A", [100, 101, 102]], ["B", [["b", [200, 201]]]], ["C", [300]]], + ) + def testInvalidDiagSetting(self): with self.assertRaises( ValueError, @@ -6791,8 +6844,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent calls to ParserElement.setName() (default=False) """ - import textwrap - with ppt.reset_pyparsing_context(): test_stdout = StringIO() @@ -6805,7 +6856,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): integer[...].parseString("1 2 3") - expected_debug_output = textwrap.dedent( + expected_debug_output = dedent( """\ Match integer at loc 0(1,1) 1 2 3 @@ -6835,7 +6886,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ) def testEnableDebugOnExpressionWithParseAction(self): - import textwrap test_stdout = StringIO() with resetting(sys, "stdout", "stderr"): @@ -6851,7 +6901,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): parser.setDebug(False) parser.parseString("123 A100") - expected_debug_output = textwrap.dedent( + expected_debug_output = dedent( """\ Match [{integer | W:(0-9A-Za-z)}]... at loc 0(1,1) 123 A100 @@ -6909,7 +6959,6 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ) def testEnableDebugWithCachedExpressionsMarkedWithAsterisk(self): - import textwrap test_stdout = StringIO() with resetting(sys, "stdout", "stderr"): @@ -6925,7 +6974,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): grammar = (z | leading_a | b)[...] + "a" grammar.parseString("aba") - expected_debug_output = textwrap.dedent( + expected_debug_output = dedent( """\ Match Z at loc 0(1,1) aba |