diff options
author | Paul McGuire <ptmcg@users.noreply.github.com> | 2021-09-08 09:03:40 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-08 09:03:40 -0500 |
commit | dfc7d7524ed9bb74a04865a68a06982bb54fcc5c (patch) | |
tree | d7655f624998dd910caf5c833dda2cb4a68aad85 /examples | |
parent | 11fda2880df71ce6661807b3b5921bc09bd6e003 (diff) | |
download | pyparsing-git-dfc7d7524ed9bb74a04865a68a06982bb54fcc5c.tar.gz |
226 railroad updates (#298)
* Add line separators to HowToUsePyparsing.rst to call attention to PEP-8 naming in this document
* Update railroad diagram generation code, to show results names as group annotations, and break out all expressions with a name set using setName.
* Revert dataclasses back to NamedTuples for 3.6-7 compat; add setName calls in simpleBool.py; add simpleBool to make_diagram.py
* Remove default setName calls on delimitedList
* Add setName calls to simpleSQL for better diagram
* Remove hard-coded debug mode
* Move setName on delimitedList into test code
* Restore default setName() calls for delimitedList; set default vertical=3; update jsonParser.py and simpleSQL.py with better setName() calls (and update test_diagram.py accordingly); update test_diagram.py to move asserts after tempfiles are written, moved tempfiles to local dir instead of hard-to-find temp dir
* Get proper railroad diags for infixNotation
* Undo forced railroad_debug
* Code cleanup from PR comments
* Remove hard-coded base_expr name from infix_notation
* Add special EachItem to compose DiagramItem for Group-OneOrMore-Choice; refactored tests to move duplicated code to function; added names to mozillaCalendarParser.py for better diagram
* Make sure root element gets in the diagram, even if it has no custom name
* Update tests to reflect diagram structure changes
* Add LOOKAHEAD and LOOKBEHIND annotations for FollowedBy and PrecededBy elements, and changed the annotation on Each to [ALL]; renamed _first to _element_diagram_states; add expr.streamline() in create_diagram() to collapse nested exprs; added railroad_diagram_demo.py example general blackening; update CHANGES with latest enhancements; bump version date
* Fix pip command
* Update CHANGES and whats_new_in_3_0_0.rst with some features and acknowledgements
* Updates from PR review: change user instructions to use pyparsing[diagrams]; consistent annotations for NotAny along with FollowedBy and PrecededBy; fixed up comments and type annotations
* Remove unneeded pip installs for tox (already handled in tox.ini)
* Refactor duplicate code into decorator; drop unused group_results_name argument
* Add diagram handling for SkipTo, and for And's constructed using `expr*N` notation (use a OneOrMore diagram with a repeat count instead of a sequence of N exprs)
* Fix parsing ambiguity in railroad_diagram_demo.py so that parser can actually parse a valid input string
Diffstat (limited to 'examples')
-rw-r--r-- | examples/chemicalFormulas.py | 10 | ||||
-rw-r--r-- | examples/delta_time.py | 10 | ||||
-rw-r--r-- | examples/jsonParser.py | 8 | ||||
-rw-r--r-- | examples/make_diagram.py | 25 | ||||
-rw-r--r-- | examples/mozillaCalendarParser.py | 31 | ||||
-rw-r--r-- | examples/railroad_diagram_demo.py | 31 | ||||
-rw-r--r-- | examples/simpleBool.py | 4 | ||||
-rw-r--r-- | examples/simpleSQL.py | 18 |
8 files changed, 85 insertions, 52 deletions
diff --git a/examples/chemicalFormulas.py b/examples/chemicalFormulas.py index f7c7d14..d4c87cd 100644 --- a/examples/chemicalFormulas.py +++ b/examples/chemicalFormulas.py @@ -17,7 +17,7 @@ atomicWeight = { digits = "0123456789"
# Version 1
-element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2)
+element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element")
# for stricter matching, use this Regex instead
# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
@@ -69,9 +69,9 @@ formula.runTests( print()
# Version 3 - convert integers during parsing process
-integer = pp.Word(digits).setParseAction(lambda t: int(t[0]))
+integer = pp.Word(digits).setParseAction(lambda t: int(t[0])).setName("integer")
elementRef = pp.Group(element("symbol") + pp.Optional(integer, default=1)("qty"))
-formula = elementRef[...]
+formula = elementRef[...].setName("chemical_formula")
def sum_atomic_weights_by_results_name_with_converted_ints(element_list):
@@ -103,10 +103,10 @@ def cvt_subscript_int(s): return ret
-subscript_int = pp.Word(subscript_digits).addParseAction(cvt_subscript_int)
+subscript_int = pp.Word(subscript_digits).addParseAction(cvt_subscript_int).set_name("subscript")
elementRef = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty"))
-formula = elementRef[...]
+formula = elementRef[1, ...].setName("chemical_formula")
formula.runTests(
"""\
H₂O
diff --git a/examples/delta_time.py b/examples/delta_time.py index dfe7a65..2f9466c 100644 --- a/examples/delta_time.py +++ b/examples/delta_time.py @@ -36,6 +36,7 @@ import calendar __all__ = ["time_expression"] + # basic grammar definitions def make_integer_word_expr(int_name, int_value): return pp.CaselessKeyword(int_name).addParseAction(pp.replaceWith(int_value)) @@ -49,7 +50,8 @@ integer_word = pp.MatchFirst( " seventeen eighteen nineteen twenty".split(), start=1, ) -) +).setName("integer_word") + integer = pp.pyparsing_common.integer | integer_word integer.setName("numeric") @@ -66,7 +68,7 @@ def plural(s): week, day, hour, minute, second = map(plural, "week day hour minute second".split()) time_units = hour | minute | second -any_time_units = week | day | time_units +any_time_units = (week | day | time_units).setName("time_units") am = CL("am") pm = CL("pm") @@ -110,9 +112,9 @@ def fill_default_time_fields(t): weekday_name_list = list(calendar.day_name) -weekday_name = pp.oneOf(weekday_name_list) +weekday_name = pp.oneOf(weekday_name_list).setName("weekday_name") -_24hour_time = ~(integer + any_time_units) + pp.Word(pp.nums, exact=4).addParseAction( +_24hour_time = ~(integer + any_time_units).setName("numbered_time_units") + pp.Word(pp.nums, exact=4).setName("HHMM").addParseAction( lambda t: [int(t[0][:2]), int(t[0][2:])], fill_24hr_time_fields ) _24hour_time.setName("0000 time") diff --git a/examples/jsonParser.py b/examples/jsonParser.py index 6d6b1c2..0ea4aa1 100644 --- a/examples/jsonParser.py +++ b/examples/jsonParser.py @@ -51,12 +51,12 @@ NULL = make_keyword("null", None) LBRACK, RBRACK, LBRACE, RBRACE, COLON = map(pp.Suppress, "[]{}:")
jsonString = pp.dblQuotedString().setParseAction(pp.removeQuotes)
-jsonNumber = ppc.number()
+jsonNumber = ppc.number().setName("jsonNumber")
jsonObject = pp.Forward().setName("jsonObject")
jsonValue = pp.Forward().setName("jsonValue")
-jsonElements = pp.delimitedList(jsonValue)
+jsonElements = pp.delimitedList(jsonValue).setName(None)
# jsonArray = pp.Group(LBRACK + pp.Optional(jsonElements, []) + RBRACK)
# jsonValue << (
# jsonString | jsonNumber | pp.Group(jsonObject) | jsonArray | TRUE | FALSE | NULL
@@ -65,7 +65,7 @@ jsonElements = pp.delimitedList(jsonValue) jsonArray = pp.Group(
LBRACK + pp.Optional(jsonElements) + RBRACK, aslist=RETURN_PYTHON_COLLECTIONS
-)
+).setName("jsonArray")
jsonValue << (jsonString | jsonNumber | jsonObject | jsonArray | TRUE | FALSE | NULL)
@@ -73,7 +73,7 @@ memberDef = pp.Group( jsonString + COLON + jsonValue, aslist=RETURN_PYTHON_COLLECTIONS
).setName("jsonMember")
-jsonMembers = pp.delimitedList(memberDef)
+jsonMembers = pp.delimitedList(memberDef).setName(None)
# jsonObject << pp.Dict(LBRACE + pp.Optional(jsonMembers) + RBRACE)
jsonObject << pp.Dict(
LBRACE + pp.Optional(jsonMembers) + RBRACE, asdict=RETURN_PYTHON_COLLECTIONS
diff --git a/examples/make_diagram.py b/examples/make_diagram.py index 5508f4e..23e435b 100644 --- a/examples/make_diagram.py +++ b/examples/make_diagram.py @@ -3,16 +3,7 @@ # # Sample railroad diagrams of selected pyparsing examples. # -# Copyright 2020, Paul McGuire - -from pyparsing.diagram import to_railroad, railroad_to_html - - -def make_diagram(expr, output_html="output.html"): - with open(output_html, "w", encoding="utf-8") as fp: - railroad = to_railroad(expr) - fp.write(railroad_to_html(railroad)) - +# Copyright 2021, Paul McGuire # Uncomment the related import statement and rerun to construct railroad diagram @@ -22,14 +13,22 @@ from examples.delta_time import time_expression as imported_expr # from examples.ebnftest import ebnf_parser as imported_expr # from examples.jsonParser import jsonObject as imported_expr # from examples.lucene_grammar import expression as imported_expr -# from examples.invRegex import parser as imported_expr +# from examples.invRegex import parser; imported_expr = parser() # from examples.oc import program as imported_expr # from examples.mozillaCalendarParser import calendars as imported_expr # from examples.pgn import pgnGrammar as imported_expr -# from examples.idlParse import CORBA_IDL_BNF as imported_expr +# from examples.idlParse import CORBA_IDL_BNF; imported_expr = CORBA_IDL_BNF() # from examples.chemicalFormulas import formula as imported_expr # from examples.romanNumerals import romanNumeral as imported_expr # from examples.protobuf_parser import parser as imported_expr # from examples.parsePythonValue import listItem as imported_expr +# from examples.one_to_ninety_nine import one_to_99 as imported_expr +# from examples.simpleSQL import simpleSQL as imported_expr +# from examples.simpleBool import boolExpr as imported_expr +grammar = imported_expr + +# or define a custom grammar here +# import pyparsing as pp +# grammar = pp.Or(["foo", "bar"]) + pp.Word(pp.nums) + pp.pyparsing_common.uuid -make_diagram(imported_expr) +grammar.create_diagram(output_html="output.html", show_results_names=True) diff --git a/examples/mozillaCalendarParser.py b/examples/mozillaCalendarParser.py index 5000cfe..562ec48 100644 --- a/examples/mozillaCalendarParser.py +++ b/examples/mozillaCalendarParser.py @@ -37,19 +37,20 @@ ALARM = Literal("VALARM").suppress() # TOKENS -CALPROP = oneOf("VERSION PRODID METHOD") -ALMPROP = oneOf("TRIGGER") +CALPROP = oneOf("VERSION PRODID METHOD", asKeyword=True) +ALMPROP = oneOf("TRIGGER", asKeyword=True) EVTPROP = oneOf( - "X-MOZILLA-RECUR-DEFAULT-INTERVAL \ - X-MOZILLA-RECUR-DEFAULT-UNITS \ - UID DTSTAMP LAST-MODIFIED X RRULE EXDATE" + """X-MOZILLA-RECUR-DEFAULT-INTERVAL + X-MOZILLA-RECUR-DEFAULT-UNITS + UID DTSTAMP LAST-MODIFIED X RRULE EXDATE""", asKeyword=True ) -propval = Word(valstr) -typeval = Word(valstr) -typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL") +valuestr = Word(valstr).setName("valuestr") +propval = valuestr +typeval = valuestr +typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL", asKeyword=True) -proptype = Group(SEMI + typename + EQ + typeval).suppress() +proptype = Group(SEMI + typename + EQ + typeval).setName("proptype").suppress() calprop = Group(CALPROP + ZeroOrMore(proptype) + COLON + propval) almprop = Group(ALMPROP + ZeroOrMore(proptype) + COLON + propval) @@ -65,15 +66,15 @@ evtprop = ( | "STATUS" + COLON + propval.setResultsName("status") | "SUMMARY" + COLON + propval.setResultsName("summary") | "URL" + COLON + propval.setResultsName("url") -) -calprops = Group(OneOrMore(calprop)).suppress() +).setName("evtprop") +calprops = Group(OneOrMore(calprop)).setName("calprops").suppress() evtprops = Group(OneOrMore(evtprop)) -almprops = Group(OneOrMore(almprop)).suppress() +almprops = Group(OneOrMore(almprop)).setName("almprops").suppress() -alarm = BEGIN + ALARM + almprops + END + ALARM -event = BEGIN + EVENT + evtprops + Optional(alarm) + END + EVENT +alarm = (BEGIN + ALARM + almprops + END + ALARM).setName("alarm") +event = (BEGIN + EVENT + evtprops + Optional(alarm) + END + EVENT).setName("event") events = Group(OneOrMore(event)) -calendar = BEGIN + CALENDAR + calprops + ZeroOrMore(event) + END + CALENDAR +calendar = (BEGIN + CALENDAR + calprops + ZeroOrMore(event) + END + CALENDAR).setName("calendar") calendars = OneOrMore(calendar) diff --git a/examples/railroad_diagram_demo.py b/examples/railroad_diagram_demo.py new file mode 100644 index 0000000..8995bdc --- /dev/null +++ b/examples/railroad_diagram_demo.py @@ -0,0 +1,31 @@ +import pyparsing as pp +ppc = pp.pyparsing_common + +word = pp.Word(pp.alphas).setName("word") +integer = pp.Word(pp.nums).setName("integer") +plus_minus = pp.Char("+-") +mult_div = pp.Char("*/") +street_address = pp.Group(integer("house_number") + word[1, ...]("street_name")).setName("street_address") +time = pp.Regex(r"\d\d:\d\d") + +grammar = (pp.Group(integer[1, ...]) + + (ppc.ipv4_address & word("header_word") & pp.Optional(time)).setName("header with various elements")("header") + + street_address("address") + + pp.Group(pp.counted_array(word)) + + pp.Group(integer * 8)("data") + + pp.Group(pp.Word("abc") + pp.Word("def")*3) + + pp.infix_notation(integer, + [ + (plus_minus().setName("leading sign"), 1, pp.opAssoc.RIGHT), + (mult_div, 2, pp.opAssoc.LEFT), + (plus_minus, 2, pp.opAssoc.LEFT), + ]).setName("simple_arithmetic") + + ... + + pp.Group(ppc.ipv4_address)("ip_address") + ).setName("grammar") + + +grammar.create_diagram("railroad_diagram_demo.html", vertical=6, show_results_names=True) + +test = """1 2 3 ABC 1.2.3.4 12:45 123 Main St 4 abc def ghi jkl 5 5 5 5 5 5 5 5 a d d d 2+2 bob 5.6.7.8""" +result = grammar.runTests([test]) diff --git a/examples/simpleBool.py b/examples/simpleBool.py index 5ff1728..ac75187 100644 --- a/examples/simpleBool.py +++ b/examples/simpleBool.py @@ -73,7 +73,7 @@ class BoolNot: TRUE = Keyword("True")
FALSE = Keyword("False")
boolOperand = TRUE | FALSE | Word(alphas, max=1)
-boolOperand.setParseAction(BoolOperand)
+boolOperand.setParseAction(BoolOperand).setName("bool_operand")
# define expression, based on expression operand and
# list of operations in precedence order
@@ -84,7 +84,7 @@ boolExpr = infixNotation( ("and", 2, opAssoc.LEFT, BoolAnd),
("or", 2, opAssoc.LEFT, BoolOr),
],
-)
+).setName("boolean_expression")
if __name__ == "__main__":
diff --git a/examples/simpleSQL.py b/examples/simpleSQL.py index 5c93191..39b8b4b 100644 --- a/examples/simpleSQL.py +++ b/examples/simpleSQL.py @@ -35,24 +35,24 @@ NOT_NULL = NOT + NULL ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnName.addParseAction(ppc.upcaseTokens) -columnNameList = Group(delimitedList(columnName)) +columnNameList = Group(delimitedList(columnName).setName("column_list")) tableName = delimitedList(ident, ".", combine=True).setName("table name") tableName.addParseAction(ppc.upcaseTokens) -tableNameList = Group(delimitedList(tableName)) +tableNameList = Group(delimitedList(tableName).setName("table_list")) -binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) -realNum = ppc.real() +binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True).setName("binop") +realNum = ppc.real().setName("real number") intNum = ppc.signed_integer() columnRval = ( realNum | intNum | quotedString | columnName -) # need to add support for alg expressions +).setName("column_rvalue") # need to add support for alg expressions whereCondition = Group( (columnName + binop + columnRval) - | (columnName + IN + Group("(" + delimitedList(columnRval) + ")")) + | (columnName + IN + Group("(" + delimitedList(columnRval).setName("in_values_list") + ")")) | (columnName + IN + Group("(" + selectStmt + ")")) | (columnName + IS + (NULL | NOT_NULL)) -) +).setName("where_condition") whereExpression = infixNotation( whereCondition, @@ -61,7 +61,7 @@ whereExpression = infixNotation( (AND, 2, opAssoc.LEFT), (OR, 2, opAssoc.LEFT), ], -) +).setName("where_expression") # define the grammar selectStmt <<= ( @@ -70,7 +70,7 @@ selectStmt <<= ( + FROM + tableNameList("tables") + Optional(Group(WHERE + whereExpression), "")("where") -) +).setName("select_statement") simpleSQL = selectStmt |