diff options
-rw-r--r-- | CHANGES | 20 | ||||
-rw-r--r-- | docs/HowToUsePyparsing.rst | 6 | ||||
-rw-r--r-- | docs/whats_new_in_3_0_0.rst | 10 | ||||
-rw-r--r-- | examples/chemicalFormulas.py | 10 | ||||
-rw-r--r-- | examples/delta_time.py | 10 | ||||
-rw-r--r-- | examples/jsonParser.py | 8 | ||||
-rw-r--r-- | examples/make_diagram.py | 25 | ||||
-rw-r--r-- | examples/mozillaCalendarParser.py | 31 | ||||
-rw-r--r-- | examples/railroad_diagram_demo.py | 31 | ||||
-rw-r--r-- | examples/simpleBool.py | 4 | ||||
-rw-r--r-- | examples/simpleSQL.py | 18 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/core.py | 20 | ||||
-rw-r--r-- | pyparsing/diagram/__init__.py | 311 | ||||
-rw-r--r-- | pyparsing/helpers.py | 14 | ||||
-rw-r--r-- | tests/test_diagram.py | 98 |
16 files changed, 433 insertions, 185 deletions
@@ -50,6 +50,25 @@ Version 3.0.0c1 - cjk_identifier = pp.Word(ppu.CJK.identchars, ppu.CJK.identbodychars) greek_identifier = pp.Word(ppu.Greek.identchars, ppu.Greek.identbodychars) +- Railroad diagrams have been reformatted: + . creating diagrams is easier - call + + expr.create_diagram("diagram_output.html") + + create_diagram() takes 3 arguments: + . the filename to write the diagram HTML + . optional 'vertical' argument, to specify the minimum number of items in a path + to be shown vertically; default=3 + . optional 'show_results_names' argument, to specify whether results name + annotations should be shown; default=False + . every expression that gets a name using setName() gets separated out as + a separate subdiagram + . results names can be shown as annotations to diagram items + . Each, FollowedBy, and PreceededBy elements get [ALL], [LOOKAHEAD], and [LOOKBEHIND] + annotations + . removed annotations for Suppress elements + . some diagram cleanup when a grammar contains Forward elements + . check out the examples make_diagram.py and railroad_diagram_demo.py - Added a caseless parameter to the `CloseMatch` class to allow for casing to be ignored when checking for close matches. (Issue #281) (PR by Adrian Edwards, thanks!) @@ -60,7 +79,6 @@ Version 3.0.0c1 - repeated character. (Issue #263) - Version 3.0.0b3 - August, 2021 ------------------------------ - PEP-8 compatible names are being introduced in pyparsing version 3.0! diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index 4fe8cf1..ffdec07 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -31,7 +31,9 @@ using the Python interpreter's built-in ``help()`` function). You will also find many example scripts in the `examples <https://github.com/pyparsing/pyparsing/tree/master/examples>`_ directory of the pyparsing GitHub repo. -*Note: In pyparsing 3.0, many method and function names which were +----------- + +**Note**: *In pyparsing 3.0, many method and function names which were originally written using camelCase have been converted to PEP8-compatible snake_case. So ``parseString()`` is being renamed to ``parse_string()``, ``delimitedList`` to ``delimited_list``, and so on. You may see the old @@ -44,6 +46,8 @@ names to the legacy camelCase names. In pyparsing 3.0.x, both forms are supported, but the legacy forms are deprecated; they will be dropped in a future release.* +----------- + Steps to follow =============== diff --git a/docs/whats_new_in_3_0_0.rst b/docs/whats_new_in_3_0_0.rst index 5d4bfcd..82845c1 100644 --- a/docs/whats_new_in_3_0_0.rst +++ b/docs/whats_new_in_3_0_0.rst @@ -38,7 +38,7 @@ can now be written as:: Pyparsing 3.0 will run both versions of this example. New code should be written using the PEP-8 compatible names. The compatibility -synonyms will be removed in a future version. +synonyms will be removed in a future version of pyparsing. Railroad diagramming @@ -62,7 +62,9 @@ generator for documenting pyparsing parsers. You need to install # save as HTML parser.create_diagram('parser_rr_diag.html') -(Contributed by Michael Milton) +See more in the examples directory: ``make_diagram.py`` and ``railroad_diagram_demo.py``. + +(Railroad diagram enhancement contributed by Michael Milton) Support for left-recursive parsers ---------------------------------- @@ -94,7 +96,7 @@ Prints:: See more examples in ``left_recursion.py`` in the pyparsing examples directory. -(Contributed by Max Fischer) +(LR parsing support contributed by Max Fischer) Packrat/memoization enable and disable methods ---------------------------------------------- @@ -393,7 +395,7 @@ Other new features character ranges (converting ``"0123456789"`` to ``"0-9"`` for instance). - Added a caseless parameter to the `CloseMatch` class to allow for casing to be - ignored when checking for close matches. + ignored when checking for close matches. Contributed by Adrian Edwards. API Changes diff --git a/examples/chemicalFormulas.py b/examples/chemicalFormulas.py index f7c7d14..d4c87cd 100644 --- a/examples/chemicalFormulas.py +++ b/examples/chemicalFormulas.py @@ -17,7 +17,7 @@ atomicWeight = { digits = "0123456789"
# Version 1
-element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2)
+element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element")
# for stricter matching, use this Regex instead
# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
@@ -69,9 +69,9 @@ formula.runTests( print()
# Version 3 - convert integers during parsing process
-integer = pp.Word(digits).setParseAction(lambda t: int(t[0]))
+integer = pp.Word(digits).setParseAction(lambda t: int(t[0])).setName("integer")
elementRef = pp.Group(element("symbol") + pp.Optional(integer, default=1)("qty"))
-formula = elementRef[...]
+formula = elementRef[...].setName("chemical_formula")
def sum_atomic_weights_by_results_name_with_converted_ints(element_list):
@@ -103,10 +103,10 @@ def cvt_subscript_int(s): return ret
-subscript_int = pp.Word(subscript_digits).addParseAction(cvt_subscript_int)
+subscript_int = pp.Word(subscript_digits).addParseAction(cvt_subscript_int).set_name("subscript")
elementRef = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty"))
-formula = elementRef[...]
+formula = elementRef[1, ...].setName("chemical_formula")
formula.runTests(
"""\
H₂O
diff --git a/examples/delta_time.py b/examples/delta_time.py index dfe7a65..2f9466c 100644 --- a/examples/delta_time.py +++ b/examples/delta_time.py @@ -36,6 +36,7 @@ import calendar __all__ = ["time_expression"] + # basic grammar definitions def make_integer_word_expr(int_name, int_value): return pp.CaselessKeyword(int_name).addParseAction(pp.replaceWith(int_value)) @@ -49,7 +50,8 @@ integer_word = pp.MatchFirst( " seventeen eighteen nineteen twenty".split(), start=1, ) -) +).setName("integer_word") + integer = pp.pyparsing_common.integer | integer_word integer.setName("numeric") @@ -66,7 +68,7 @@ def plural(s): week, day, hour, minute, second = map(plural, "week day hour minute second".split()) time_units = hour | minute | second -any_time_units = week | day | time_units +any_time_units = (week | day | time_units).setName("time_units") am = CL("am") pm = CL("pm") @@ -110,9 +112,9 @@ def fill_default_time_fields(t): weekday_name_list = list(calendar.day_name) -weekday_name = pp.oneOf(weekday_name_list) +weekday_name = pp.oneOf(weekday_name_list).setName("weekday_name") -_24hour_time = ~(integer + any_time_units) + pp.Word(pp.nums, exact=4).addParseAction( +_24hour_time = ~(integer + any_time_units).setName("numbered_time_units") + pp.Word(pp.nums, exact=4).setName("HHMM").addParseAction( lambda t: [int(t[0][:2]), int(t[0][2:])], fill_24hr_time_fields ) _24hour_time.setName("0000 time") diff --git a/examples/jsonParser.py b/examples/jsonParser.py index 6d6b1c2..0ea4aa1 100644 --- a/examples/jsonParser.py +++ b/examples/jsonParser.py @@ -51,12 +51,12 @@ NULL = make_keyword("null", None) LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:")
jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
-jsonNumber = ppc.number()
+jsonNumber = ppc.number().setName("jsonNumber")
jsonObject = pp.Forward().setName("jsonObject")
jsonValue = pp.Forward().setName("jsonValue")
-jsonElements = pp.delimitedList(jsonValue)
+jsonElements = pp.delimitedList(jsonValue).setName(None)
# jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK)
# jsonValue << (
# jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE | FALSE | NULL
@@ -65,7 +65,7 @@ jsonElements = pp.delimitedList(jsonValue) jsonArray = pp.Group(
LBRACK + pp.Optional(jsonElements) + RBRACK, aslist=RETURN_PYTHON_COLLECTIONS
-)
+).setName("jsonArray")
jsonValue << (jsonString | jsonNumber | jsonObject | jsonArray | TRUE | FALSE | NULL)
@@ -73,7 +73,7 @@ memberDef = pp.Group( jsonString + COLON + jsonValue, aslist=RETURN_PYTHON_COLLECTIONS
).setName("jsonMember")
-jsonMembers = pp.delimitedList(memberDef)
+jsonMembers = pp.delimitedList(memberDef).setName(None)
# jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE)
jsonObject << pp.Dict(
LBRACE + pp.Optional(jsonMembers) + RBRACE, asdict=RETURN_PYTHON_COLLECTIONS
diff --git a/examples/make_diagram.py b/examples/make_diagram.py index 5508f4e..23e435b 100644 --- a/examples/make_diagram.py +++ b/examples/make_diagram.py @@ -3,16 +3,7 @@ # # Sample railroad diagrams of selected pyparsing examples. # -# Copyright 2020, Paul McGuire - -from pyparsing.diagram import to_railroad, railroad_to_html - - -def make_diagram(expr, output_html="output.html"): - with open(output_html, "w", encoding="utf-8") as fp: - railroad = to_railroad(expr) - fp.write(railroad_to_html(railroad)) - +# Copyright 2021, Paul McGuire # Uncomment the related import statement and rerun to construct railroad diagram @@ -22,14 +13,22 @@ from examples.delta_time import time_expression as imported_expr # from examples.ebnftest import ebnf_parser as imported_expr # from examples.jsonParser import jsonObject as imported_expr # from examples.lucene_grammar import expression as imported_expr -# from examples.invRegex import parser as imported_expr +# from examples.invRegex import parser; imported_expr = parser() # from examples.oc import program as imported_expr # from examples.mozillaCalendarParser import calendars as imported_expr # from examples.pgn import pgnGrammar as imported_expr -# from examples.idlParse import CORBA_IDL_BNF as imported_expr +# from examples.idlParse import CORBA_IDL_BNF; imported_expr = CORBA_IDL_BNF() # from examples.chemicalFormulas import formula as imported_expr # from examples.romanNumerals import romanNumeral as imported_expr # from examples.protobuf_parser import parser as imported_expr # from examples.parsePythonValue import listItem as imported_expr +# from examples.one_to_ninety_nine import one_to_99 as imported_expr +# from examples.simpleSQL import simpleSQL as imported_expr +# from examples.simpleBool import boolExpr as imported_expr +grammar = imported_expr + +# or define a custom grammar here +# import pyparsing as pp +# grammar = pp.Or(["foo", "bar"]) + pp.Word(pp.nums) + pp.pyparsing_common.uuid -make_diagram(imported_expr) +grammar.create_diagram(output_html="output.html", show_results_names=True) diff --git a/examples/mozillaCalendarParser.py b/examples/mozillaCalendarParser.py index 5000cfe..562ec48 100644 --- a/examples/mozillaCalendarParser.py +++ b/examples/mozillaCalendarParser.py @@ -37,19 +37,20 @@ ALARM = Literal("VALARM").suppress() # TOKENS -CALPROP = oneOf("VERSION PRODID METHOD") -ALMPROP = oneOf("TRIGGER") +CALPROP = oneOf("VERSION PRODID METHOD", asKeyword=True) +ALMPROP = oneOf("TRIGGER", asKeyword=True) EVTPROP = oneOf( - "X-MOZILLA-RECUR-DEFAULT-INTERVAL \ - X-MOZILLA-RECUR-DEFAULT-UNITS \ - UID DTSTAMP LAST-MODIFIED X RRULE EXDATE" + """X-MOZILLA-RECUR-DEFAULT-INTERVAL + X-MOZILLA-RECUR-DEFAULT-UNITS + UID DTSTAMP LAST-MODIFIED X RRULE EXDATE""", asKeyword=True ) -propval = Word(valstr) -typeval = Word(valstr) -typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL") +valuestr = Word(valstr).setName("valuestr") +propval = valuestr +typeval = valuestr +typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL", asKeyword=True) -proptype = Group(SEMI + typename + EQ + typeval).suppress() +proptype = Group(SEMI + typename + EQ + typeval).setName("proptype").suppress() calprop = Group(CALPROP + ZeroOrMore(proptype) + COLON + propval) almprop = Group(ALMPROP + ZeroOrMore(proptype) + COLON + propval) @@ -65,15 +66,15 @@ evtprop = ( | "STATUS" + COLON + propval.setResultsName("status") | "SUMMARY" + COLON + propval.setResultsName("summary") | "URL" + COLON + propval.setResultsName("url") -) -calprops = Group(OneOrMore(calprop)).suppress() +).setName("evtprop") +calprops = Group(OneOrMore(calprop)).setName("calprops").suppress() evtprops = Group(OneOrMore(evtprop)) -almprops = Group(OneOrMore(almprop)).suppress() +almprops = Group(OneOrMore(almprop)).setName("almprops").suppress() -alarm = BEGIN + ALARM + almprops + END + ALARM -event = BEGIN + EVENT + evtprops + Optional(alarm) + END + EVENT +alarm = (BEGIN + ALARM + almprops + END + ALARM).setName("alarm") +event = (BEGIN + EVENT + evtprops + Optional(alarm) + END + EVENT).setName("event") events = Group(OneOrMore(event)) -calendar = BEGIN + CALENDAR + calprops + ZeroOrMore(event) + END + CALENDAR +calendar = (BEGIN + CALENDAR + calprops + ZeroOrMore(event) + END + CALENDAR).setName("calendar") calendars = OneOrMore(calendar) diff --git a/examples/railroad_diagram_demo.py b/examples/railroad_diagram_demo.py new file mode 100644 index 0000000..8995bdc --- /dev/null +++ b/examples/railroad_diagram_demo.py @@ -0,0 +1,31 @@ +import pyparsing as pp +ppc = pp.pyparsing_common + +word = pp.Word(pp.alphas).setName("word") +integer = pp.Word(pp.nums).setName("integer") +plus_minus = pp.Char("+-") +mult_div = pp.Char("*/") +street_address = pp.Group(integer("house_number") + word[1, ...]("street_name")).setName("street_address") +time = pp.Regex(r"\d\d:\d\d") + +grammar = (pp.Group(integer[1, ...]) + + (ppc.ipv4_address & word("header_word") & pp.Optional(time)).setName("header with various elements")("header") + + street_address("address") + + pp.Group(pp.counted_array(word)) + + pp.Group(integer * 8)("data") + + pp.Group(pp.Word("abc") + pp.Word("def")*3) + + pp.infix_notation(integer, + [ + (plus_minus().setName("leading sign"), 1, pp.opAssoc.RIGHT), + (mult_div, 2, pp.opAssoc.LEFT), + (plus_minus, 2, pp.opAssoc.LEFT), + ]).setName("simple_arithmetic") + + ... + + pp.Group(ppc.ipv4_address)("ip_address") + ).setName("grammar") + + +grammar.create_diagram("railroad_diagram_demo.html", vertical=6, show_results_names=True) + +test = """1 2 3 ABC 1.2.3.4 12:45 123 Main St 4 abc def ghi jkl 5 5 5 5 5 5 5 5 a d d d 2+2 bob 5.6.7.8""" +result = grammar.runTests([test]) diff --git a/examples/simpleBool.py b/examples/simpleBool.py index 5ff1728..ac75187 100644 --- a/examples/simpleBool.py +++ b/examples/simpleBool.py @@ -73,7 +73,7 @@ class BoolNot: TRUE = Keyword("True")
FALSE = Keyword("False")
boolOperand = TRUE | FALSE | Word(alphas, max=1)
-boolOperand.setParseAction(BoolOperand)
+boolOperand.setParseAction(BoolOperand).setName("bool_operand")
# define expression, based on expression operand and
# list of operations in precedence order
@@ -84,7 +84,7 @@ boolExpr = infixNotation( ("and", 2, opAssoc.LEFT, BoolAnd),
("or", 2, opAssoc.LEFT, BoolOr),
],
-)
+).setName("boolean_expression")
if __name__ == "__main__":
diff --git a/examples/simpleSQL.py b/examples/simpleSQL.py index 5c93191..39b8b4b 100644 --- a/examples/simpleSQL.py +++ b/examples/simpleSQL.py @@ -35,24 +35,24 @@ NOT_NULL = NOT + NULL ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnName.addParseAction(ppc.upcaseTokens) -columnNameList = Group(delimitedList(columnName)) +columnNameList = Group(delimitedList(columnName).setName("column_list")) tableName = delimitedList(ident, ".", combine=True).setName("table name") tableName.addParseAction(ppc.upcaseTokens) -tableNameList = Group(delimitedList(tableName)) +tableNameList = Group(delimitedList(tableName).setName("table_list")) -binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) -realNum = ppc.real() +binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True).setName("binop") +realNum = ppc.real().setName("real number") intNum = ppc.signed_integer() columnRval = ( realNum | intNum | quotedString | columnName -) # need to add support for alg expressions +).setName("column_rvalue") # need to add support for alg expressions whereCondition = Group( (columnName + binop + columnRval) - | (columnName + IN + Group("(" + delimitedList(columnRval) + ")")) + | (columnName + IN + Group("(" + delimitedList(columnRval).setName("in_values_list") + ")")) | (columnName + IN + Group("(" + selectStmt + ")")) | (columnName + IS + (NULL | NOT_NULL)) -) +).setName("where_condition") whereExpression = infixNotation( whereCondition, @@ -61,7 +61,7 @@ whereExpression = infixNotation( (AND, 2, opAssoc.LEFT), (OR, 2, opAssoc.LEFT), ], -) +).setName("where_expression") # define the grammar selectStmt <<= ( @@ -70,7 +70,7 @@ selectStmt <<= ( + FROM + tableNameList("tables") + Optional(Group(WHERE + whereExpression), "")("where") -) +).setName("select_statement") simpleSQL = selectStmt diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 12b772a..90467c1 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -103,7 +103,7 @@ __version__ = ( __version_info__.release_level == "final" ] ) -__version_time__ = "2 September 2021 21:25 UTC" +__version_time__ = "6 September 2021 18:51 UTC" __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" diff --git a/pyparsing/core.py b/pyparsing/core.py index 2f45758..c08c431 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -2013,7 +2013,11 @@ class ParserElement(ABC): return success, allResults def create_diagram( - self, output_html: Union[TextIO, str], vertical: int = 3, **kwargs + self, + output_html: Union[TextIO, str], + vertical: int = 3, + show_results_names: bool = False, + **kwargs, ) -> NoReturn: """ Create a railroad diagram for the parser. @@ -2023,6 +2027,8 @@ class ParserElement(ABC): diagram HTML - vertical (int) - threshold for formatting multiple alternatives vertically instead of horizontally (default=3) + - show_results_names - bool flag whether diagram should show annotations for + defined results names Additional diagram-formatting keyword arguments can also be included; see railroad.Diagram class. @@ -2032,11 +2038,17 @@ class ParserElement(ABC): from .diagram import to_railroad, railroad_to_html except ImportError as ie: raise Exception( - "must install 'Railroad-Diagram Generator' from https://pypi.org/project/railroad-diagrams" - "and jinja2 from https://pypi.org/project/jinja2 to generate parser railroad diagrams" + "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" ) from ie - railroad = to_railroad(self, vertical=vertical, diagram_kwargs=kwargs) + self.streamline() + + railroad = to_railroad( + self, + vertical=vertical, + show_results_names=show_results_names, + diagram_kwargs=kwargs, + ) if isinstance(output_html, str): with open(output_html, "w", encoding="utf-8") as diag_file: diag_file.write(railroad_to_html(railroad)) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index e96d49c..ce84e8e 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -9,6 +9,7 @@ from typing import ( TypeVar, Dict, Callable, + Set, ) from jinja2 import Template from io import StringIO @@ -29,6 +30,32 @@ A simple structure for associating a name with a railroad diagram T = TypeVar("T") +class EachItem(railroad.Group): + """ + Custom railroad item to compose a: + - Group containing a + - OneOrMore containing a + - Choice of the elements in the Each + with the group label indicating that all must be matched + """ + + all_label = "[ALL]" + + def __init__(self, *items): + choice_item = railroad.Choice(len(items) - 1, *items) + one_or_more_item = railroad.OneOrMore(item=choice_item) + super().__init__(one_or_more_item, label=self.all_label) + + +class AnnotatedItem(railroad.Group): + """ + Simple subclass of Group that creates an annotation label + """ + + def __init__(self, label: str, item): + super().__init__(item=item, label="[{}]".format(label)) + + class EditablePartial(Generic[T]): """ Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been @@ -51,6 +78,10 @@ class EditablePartial(Generic[T]): """ return EditablePartial(func=func, args=list(args), kwargs=kwargs) + @property + def name(self): + return self.kwargs["name"] + def __call__(self) -> T: """ Evaluate the partial and return the result @@ -102,26 +133,55 @@ def resolve_partial(partial: "EditablePartial[T]") -> T: def to_railroad( element: pyparsing.ParserElement, - diagram_kwargs: dict = {}, - vertical: int = None, + diagram_kwargs: Optional[dict] = None, + vertical: int = 3, + show_results_names: bool = False, ) -> List[NamedDiagram]: """ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram creation if you want to access the Railroad tree before it is converted to HTML + :param element: base element of the parser being diagrammed :param diagram_kwargs: kwargs to pass to the Diagram() constructor - :param vertical: (optional) + :param vertical: (optional) - int - limit at which number of alternatives should be + shown vertically instead of horizontally + :param show_results_names - bool to indicate whether results name annotations should be + included in the diagram """ # Convert the whole tree underneath the root - lookup = ConverterState(diagram_kwargs=diagram_kwargs) - _to_diagram_element(element, lookup=lookup, parent=None, vertical=vertical) + lookup = ConverterState(diagram_kwargs=diagram_kwargs or {}) + _to_diagram_element( + element, + lookup=lookup, + parent=None, + vertical=vertical, + show_results_names=show_results_names, + ) root_id = id(element) # Convert the root if it hasn't been already - if root_id in lookup.first: - lookup.first[root_id].mark_for_extraction(root_id, lookup, force=True) + if root_id in lookup: + if not element.customName: + lookup[root_id].name = "" + lookup[root_id].mark_for_extraction(root_id, lookup, force=True) # Now that we're finished, we can convert from intermediate structures into Railroad elements - resolved = [resolve_partial(partial) for partial in lookup.diagrams.values()] + diags = list(lookup.diagrams.values()) + if len(diags) > 1: + # collapse out duplicate diags with the same name + seen = set() + deduped_diags = [] + for d in diags: + # don't extract SkipTo elements, they are uninformative as subdiagrams + if d.name == "...": + continue + if d.name is not None and d.name not in seen: + seen.add(d.name) + deduped_diags.append(d) + resolved = [resolve_partial(partial) for partial in deduped_diags] + else: + # special case - if just one diagram, always display it, even if + # it has no name + resolved = [resolve_partial(partial) for partial in diags] return sorted(resolved, key=lambda diag: diag.index) @@ -148,30 +208,33 @@ class ElementState: parent: EditablePartial, number: int, name: str = None, - index: Optional[int] = None, + parent_index: Optional[int] = None, ): #: The pyparsing element that this represents - self.element = element # type: pyparsing.ParserElement + self.element: pyparsing.ParserElement = element #: The name of the element - self.name = name # type: str + self.name: str = name #: The output Railroad element in an unconverted state - self.converted = converted # type: EditablePartial + self.converted: EditablePartial = converted #: The parent Railroad element, which we store so that we can extract this if it's duplicated - self.parent = parent # type: EditablePartial + self.parent: EditablePartial = parent #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram - self.number = number # type: int + self.number: int = number #: The index of this inside its parent - self.parent_index = index # type: Optional[int] + self.parent_index: Optional[int] = parent_index #: If true, we should extract this out into a subdiagram - self.extract = False # type: bool + self.extract: bool = False #: If true, all of this element's children have been filled out - self.complete = False # type: bool + self.complete: bool = False def mark_for_extraction( self, el_id: int, state: "ConverterState", name: str = None, force: bool = False ): """ Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram + :param el_id: id of the element + :param state: element/diagram state tracker + :param name: name to use for this element's text :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the root element when we know we're finished """ @@ -185,8 +248,7 @@ class ElementState: elif self.element.customName: self.name = self.element.customName else: - unnamed_number = 1 if self.parent is None else state.generate_unnamed() - self.name = "Unnamed {}".format(unnamed_number) + self.name = "" # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children # to be added @@ -200,17 +262,30 @@ class ConverterState: Stores some state that persists between recursions into the element tree """ - def __init__(self, diagram_kwargs: dict = {}): - #: A dictionary mapping ParserElement IDs to state relating to them - self.first = {} # type: Dict[int, ElementState] + def __init__(self, diagram_kwargs: Optional[dict] = None): + #: A dictionary mapping ParserElements to state relating to them + self._element_diagram_states: Dict[int, ElementState] = {} #: A dictionary mapping ParserElement IDs to subdiagrams generated from them - self.diagrams = {} # type: Dict[int, EditablePartial[NamedDiagram]] + self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {} #: The index of the next unnamed element - self.unnamed_index = 1 # type: int + self.unnamed_index: int = 1 #: The index of the next element. This is used for sorting - self.index = 0 # type: int + self.index: int = 0 #: Shared kwargs that are used to customize the construction of diagrams - self.diagram_kwargs = diagram_kwargs # type: dict + self.diagram_kwargs: dict = diagram_kwargs or {} + self.extracted_diagram_names: Set[str] = set() + + def __setitem__(self, key: int, value: ElementState): + self._element_diagram_states[key] = value + + def __getitem__(self, key: int) -> ElementState: + return self._element_diagram_states[key] + + def __delitem__(self, key: int): + del self._element_diagram_states[key] + + def __contains__(self, key: int): + return key in self._element_diagram_states def generate_unnamed(self) -> int: """ @@ -228,17 +303,18 @@ class ConverterState: def extract_into_diagram(self, el_id: int): """ - Used when we encounter the same token twice in the same tree. When this happens, we replace all instances of that - token with a terminal, and create a new subdiagram for the token + Used when we encounter the same token twice in the same tree. When this + happens, we replace all instances of that token with a terminal, and + create a new subdiagram for the token """ - position = self.first[el_id] + position = self[el_id] # Replace the original definition of this element with a regular block if position.parent: ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name) if "item" in position.parent.kwargs: position.parent.kwargs["item"] = ret - else: + elif "items" in position.parent.kwargs: position.parent.kwargs["items"][position.parent_index] = ret # If the element we're extracting is a group, skip to its content but keep the title @@ -255,7 +331,8 @@ class ConverterState: ), index=position.number, ) - del self.first[el_id] + + del self[el_id] def _worth_extracting(element: pyparsing.ParserElement) -> bool: @@ -264,11 +341,50 @@ def _worth_extracting(element: pyparsing.ParserElement) -> bool: themselves have children, then its complex enough to extract """ children = element.recurse() - return any( - [hasattr(child, "expr") or hasattr(child, "exprs") for child in children] - ) + return any(child.recurse() for child in children) + + +def _apply_diagram_item_enhancements(fn): + """ + decorator to ensure enhancements to a diagram item (such as results name annotations) + get applied on return from _to_diagram_element (we do this since there are several + returns in _to_diagram_element) + """ + + def _inner( + element: pyparsing.ParserElement, + parent: Optional[EditablePartial], + lookup: ConverterState = None, + vertical: int = None, + index: int = 0, + name_hint: str = None, + show_results_names: bool = False, + ) -> Optional[EditablePartial]: + + ret = fn( + element, + parent, + lookup, + vertical, + index, + name_hint, + show_results_names, + ) + + # apply annotation for results name, if present + if show_results_names and ret is not None: + element_results_name = element.resultsName + if element_results_name: + ret = EditablePartial.from_call( + railroad.Group, item=ret, label=element_results_name + ) + return ret + + return _inner + +@_apply_diagram_item_enhancements def _to_diagram_element( element: pyparsing.ParserElement, parent: Optional[EditablePartial], @@ -276,6 +392,7 @@ def _to_diagram_element( vertical: int = None, index: int = 0, name_hint: str = None, + show_results_names: bool = False, ) -> Optional[EditablePartial]: """ Recursively converts a PyParsing Element to a railroad Element @@ -286,6 +403,7 @@ def _to_diagram_element( it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never do so :param name_hint: If provided, this will override the generated name + :param show_results_names: bool flag indicating whether to add annotations for results names :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed """ exprs = element.recurse() @@ -294,46 +412,62 @@ def _to_diagram_element( # Python's id() is used to provide a unique identifier for elements el_id = id(element) - # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram - if isinstance(element, (pyparsing.Group, pyparsing.Forward)) and ( - not element.customName or not exprs[0].customName - ): - # However, if this element has a useful custom name, we can pass it on to the child - if not exprs[0].customName: - propagated_name = name - else: - propagated_name = None + element_results_name = element.resultsName + ret = None - return _to_diagram_element( - element.expr, - parent=parent, - lookup=lookup, - vertical=vertical, - index=index, - name_hint=propagated_name, - ) + # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram + if not element.customName: + if isinstance( + element, + ( + pyparsing.TokenConverter, + # pyparsing.Forward, + pyparsing.Located, + ), + ): + # However, if this element has a useful custom name, and its child does not, we can pass it on to the child + if not exprs[0].customName: + propagated_name = name + else: + propagated_name = None + + return _to_diagram_element( + element.expr, + parent=parent, + lookup=lookup, + vertical=vertical, + index=index, + name_hint=propagated_name, + show_results_names=show_results_names, + ) # If the element isn't worth extracting, we always treat it as the first time we say it if _worth_extracting(element): - if el_id in lookup.first: + if el_id in lookup: # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, # so we have to extract it into a new diagram. - looked_up = lookup.first[el_id] + looked_up = lookup[el_id] looked_up.mark_for_extraction(el_id, lookup, name=name_hint) - return EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) + return ret elif el_id in lookup.diagrams: # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we # just put in a marker element that refers to the sub-diagram - return EditablePartial.from_call( + ret = EditablePartial.from_call( railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] ) + return ret # Recursively convert child elements # Here we find the most relevant Railroad element for matching pyparsing Element # We use ``items=[]`` here to hold the place for where the child elements will go once created if isinstance(element, pyparsing.And): - if _should_vertical(vertical, len(exprs)): + # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat + # (all will have the same name, and resultsName) + if len(set((e.name, e.resultsName) for e in exprs)) == 1: + ret = EditablePartial.from_call(railroad.OneOrMore, item="", repeat=str(len(exprs))) + elif _should_vertical(vertical, len(exprs)): ret = EditablePartial.from_call(railroad.Stack, items=[]) else: ret = EditablePartial.from_call(railroad.Sequence, items=[]) @@ -342,6 +476,14 @@ def _to_diagram_element( ret = EditablePartial.from_call(railroad.Choice, 0, items=[]) else: ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[]) + elif isinstance(element, pyparsing.Each): + ret = EditablePartial.from_call(EachItem, items=[]) + elif isinstance(element, pyparsing.NotAny): + ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="") + elif isinstance(element, pyparsing.FollowedBy): + ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="") + elif isinstance(element, pyparsing.PrecededBy): + ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="") elif isinstance(element, pyparsing.Opt): ret = EditablePartial.from_call(railroad.Optional, item="") elif isinstance(element, pyparsing.OneOrMore): @@ -349,33 +491,33 @@ def _to_diagram_element( elif isinstance(element, pyparsing.ZeroOrMore): ret = EditablePartial.from_call(railroad.ZeroOrMore, item="") elif isinstance(element, pyparsing.Group): - ret = EditablePartial.from_call(railroad.Group, item=None, label=name) + ret = EditablePartial.from_call( + railroad.Group, item=None, label=element_results_name + ) elif isinstance(element, pyparsing.Empty) and not element.customName: # Skip unnamed "Empty" elements ret = None elif len(exprs) > 1: ret = EditablePartial.from_call(railroad.Sequence, items=[]) - elif len(exprs) > 0: + elif len(exprs) > 0 and not element_results_name: ret = EditablePartial.from_call(railroad.Group, item="", label=name) else: - # If the terminal has a custom name, we annotate the terminal with it, but still show the defaultName, because - # it describes the pattern that it matches, which is useful to have present in the diagram terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName) - if element.customName is not None: - ret = EditablePartial.from_call( - railroad.Group, item=terminal, label=element.customName - ) - else: - ret = terminal + ret = terminal + + if ret is None: + return # Indicate this element's position in the tree so we can extract it if necessary - lookup.first[el_id] = ElementState( + lookup[el_id] = ElementState( element=element, converted=ret, parent=parent, - index=index, + parent_index=index, number=lookup.generate_index(), ) + if element.customName: + lookup[el_id].mark_for_extraction(el_id, lookup, element.customName) i = 0 for expr in exprs: @@ -384,7 +526,12 @@ def _to_diagram_element( ret.kwargs["items"].insert(i, None) item = _to_diagram_element( - expr, parent=ret, lookup=lookup, vertical=vertical, index=i + expr, + parent=ret, + lookup=lookup, + vertical=vertical, + index=i, + show_results_names=show_results_names, ) # Some elements don't need to be shown in the diagram @@ -393,8 +540,7 @@ def _to_diagram_element( ret.kwargs["item"] = item elif "items" in ret.kwargs: # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal - if ret.kwargs["items"][i] is None: - ret.kwargs["items"][i] = item + ret.kwargs["items"][i] = item i += 1 elif "items" in ret.kwargs: # If we're supposed to skip this element, remove it from the parent @@ -405,20 +551,17 @@ def _to_diagram_element( ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0) or ("item" in ret.kwargs and ret.kwargs["item"] is None) ): - return EditablePartial.from_call(railroad.Terminal, name) + ret = EditablePartial.from_call(railroad.Terminal, name) # Mark this element as "complete", ie it has all of its children - if el_id in lookup.first: - lookup.first[el_id].complete = True + if el_id in lookup: + lookup[el_id].complete = True - if ( - el_id in lookup.first - and lookup.first[el_id].extract - and lookup.first[el_id].complete - ): + if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete: lookup.extract_into_diagram(el_id) - return EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] - ) - else: - return ret + if ret is not None: + ret = EditablePartial.from_call( + railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + ) + + return ret diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index dc183a0..1328a79 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -783,17 +783,15 @@ def infix_notation( thisExpr = Forward().set_name(term_name) if rightLeftAssoc is OpAssoc.LEFT: if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group( - lastExpr + opExpr + opExpr[...] - ) + matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) elif arity == 2: if opExpr is not None: matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( - lastExpr + opExpr + lastExpr + (opExpr + lastExpr)[...] + lastExpr + (opExpr + lastExpr)[1, ...] ) else: matchExpr = _FB(lastExpr + lastExpr) + Group( - lastExpr + lastExpr + lastExpr[...] + lastExpr + lastExpr[1, ...] ) elif arity == 3: matchExpr = _FB( @@ -808,11 +806,11 @@ def infix_notation( elif arity == 2: if opExpr is not None: matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( - lastExpr + opExpr + thisExpr + (opExpr + thisExpr)[...] + lastExpr + (opExpr + thisExpr)[1, ...] ) else: matchExpr = _FB(lastExpr + thisExpr) + Group( - lastExpr + thisExpr + thisExpr[...] + lastExpr + thisExpr[1, ...] ) elif arity == 3: matchExpr = _FB( @@ -823,7 +821,7 @@ def infix_notation( matchExpr.set_parse_action(*pa) else: matchExpr.set_parse_action(pa) - thisExpr <<= matchExpr.set_name(term_name) | lastExpr + thisExpr <<= (matchExpr | lastExpr).setName(term_name) lastExpr = thisExpr ret <<= lastExpr return ret diff --git a/tests/test_diagram.py b/tests/test_diagram.py index eeb1ee8..96c13cd 100644 --- a/tests/test_diagram.py +++ b/tests/test_diagram.py @@ -1,66 +1,104 @@ import unittest +from typing import List + from examples.jsonParser import jsonObject from examples.simpleBool import boolExpr from examples.simpleSQL import simpleSQL from examples.mozillaCalendarParser import calendars -from pyparsing.diagram import to_railroad, railroad_to_html -from pyparsing import Or +from pyparsing.diagram import to_railroad, railroad_to_html, NamedDiagram +import pyparsing as pp import tempfile import os +import sys class TestRailroadDiagrams(unittest.TestCase): def railroad_debug(self) -> bool: """ - Returns True if we're in debug mode + Returns True if we're in debug mode (determined by either setting + environment var, or running in a debugger which sets sys.settrace) """ - return os.environ.get("RAILROAD_DEBUG", False) + return os.environ.get("RAILROAD_DEBUG", False) or sys.gettrace() def get_temp(self): """ Returns an appropriate temporary file for writing a railroad diagram """ return tempfile.NamedTemporaryFile( - delete=not self.railroad_debug(), mode="w", encoding="utf-8", suffix=".html" + dir=".", + delete=not self.railroad_debug(), + mode="w", + encoding="utf-8", + suffix=".html", ) - def test_bool_expr(self): + def generate_railroad( + self, expr: pp.ParserElement, label: str, show_results_names: bool = False + ) -> List[NamedDiagram]: + """ + Generate an intermediate list of NamedDiagrams from a pyparsing expression. + """ with self.get_temp() as temp: - railroad = to_railroad(boolExpr) - assert len(railroad) == 3 + railroad = to_railroad(expr, show_results_names=show_results_names) temp.write(railroad_to_html(railroad)) - if self.railroad_debug(): - print("bool expr:" + temp.name) + if self.railroad_debug(): + print(f"{label}: {temp.name}") - def test_json(self): - with self.get_temp() as temp: - railroad = to_railroad(jsonObject) - assert len(railroad) == 4 - temp.write(railroad_to_html(railroad)) + return railroad - if self.railroad_debug(): - print("json: " + temp.name) + def test_bool_expr(self): + railroad = self.generate_railroad(boolExpr, "boolExpr") + assert len(railroad) == 5 - def test_sql(self): - with self.get_temp() as temp: - railroad = to_railroad(simpleSQL) - assert len(railroad) == 7 - temp.write(railroad_to_html(railroad)) + def test_json(self): + railroad = self.generate_railroad(jsonObject, "jsonObject") + assert len(railroad) == 9 - if self.railroad_debug(): - print("sql: " + temp.name) + def test_sql(self): + railroad = self.generate_railroad(simpleSQL, "simpleSQL") + assert len(railroad) == 18 def test_calendars(self): - with self.get_temp() as temp: - railroad = to_railroad(calendars) - temp.write(railroad_to_html(railroad)) + railroad = self.generate_railroad(calendars, "calendars") + assert len(railroad) == 13 + + def test_nested_forward_with_inner_and_outer_names(self): + outer = pp.Forward().setName("outer") + inner = pp.Word(pp.alphas)[...].setName("inner") + outer <<= inner + + railroad = self.generate_railroad(outer, "inner_outer_names") + assert len(railroad) == 2 + + def test_nested_forward_with_inner_name_only(self): + outer = pp.Forward() + inner = pp.Word(pp.alphas)[...].setName("inner") + outer <<= inner - if self.railroad_debug(): - print("calendar: " + temp.name) + railroad = self.generate_railroad(outer, "inner_only") + assert len(railroad) == 2 + + def test_each_grammar(self): + + grammar = pp.Each( + [ + pp.Word(pp.nums), + pp.Word(pp.alphas), + pp.pyparsing_common.uuid, + ] + ).setName("int-word-uuid in any order") + railroad = self.generate_railroad(grammar, "each_expression") + assert len(railroad) == 2 def test_none_name(self): - grammar = Or(["foo", "bar"]) + grammar = pp.Or(["foo", "bar"]) railroad = to_railroad(grammar) assert len(railroad) == 1 assert railroad[0].name is not None + + def test_none_name2(self): + grammar = pp.Or(["foo", "bar"]) + pp.Word(pp.nums).setName("integer") + railroad = to_railroad(grammar) + assert len(railroad) == 2 + assert railroad[0].name is not None |