diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-04-07 13:45:07 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-04-07 13:45:07 -0500 |
commit | adf8dd00b736ba1934914e1db78528af02662b65 (patch) | |
tree | f673c526701163c1ce94608a2af3181a78bdf40f /examples | |
parent | 2a81eb83ff5450e85bd22bd2ed35e43938c18208 (diff) | |
download | pyparsing-git-adf8dd00b736ba1934914e1db78528af02662b65.tar.gz |
Code updates to current idiomspyparsing_2.4.0
Diffstat (limited to 'examples')
-rw-r--r-- | examples/antlr_grammar.py | 165 | ||||
-rw-r--r-- | examples/antlr_grammar_tests.py | 6 |
2 files changed, 117 insertions, 54 deletions
diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py index 09ef264..c131cfb 100644 --- a/examples/antlr_grammar.py +++ b/examples/antlr_grammar.py @@ -7,49 +7,71 @@ Created on 4 sept. 2010 Submitted by Luca DallOlio, September, 2010 (Minor updates by Paul McGuire, June, 2012) +(Code idiom updates by Paul McGuire, April, 2019) ''' -from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ - LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ - cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums, \ - delimitedList, ungroup +from pyparsing import (Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, + LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, + cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums, + delimitedList, Char) # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g +QUOTE,APOS,EQ,LBRACK,RBRACK,LBRACE,RBRACE,LPAR,RPAR,ROOT,BANG,AT,TIL,SEMI,COLON,VERT = map(Suppress, + '"\'=[]{}()^!@~;:|') +BSLASH = Literal('\\') +keywords = (SRC_, SCOPE_, OPTIONS_, TOKENS_, FRAGMENT, ID, LEXER, PARSER, GRAMMAR, TREE, CATCH, FINALLY, + THROWS, PROTECTED, PUBLIC, PRIVATE, ) = map(Keyword, + """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected + public private """.split()) +KEYWORD = MatchFirst(keywords) + # Tokens EOL = Suppress(LineEnd()) # $ +SGL_PRINTABLE = Char(printables) singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) -ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) -LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) -CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") -STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'") +ESC = BSLASH + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | SGL_PRINTABLE) +LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE +CHAR_LITERAL = APOS + LITERAL_CHAR + APOS +STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' -DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' +DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(SGL_PRINTABLE) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums+'_') RULE_REF = Word(alphas.lower(), alphanums+'_') -ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1)) -ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'") -ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"') -SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line") +ACTION_ESC = (BSLASH.suppress() + APOS + | BSLASH.suppress() + | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE) + ) +ACTION_CHAR_LITERAL = (APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS) +ACTION_STRING_LITERAL = (QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE) + +SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL ML_COMMENT = cStyleComment WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n'))) WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT) NESTED_ARG_ACTION = Forward() -NESTED_ARG_ACTION << Suppress('[') + ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress(']') +NESTED_ARG_ACTION << (LBRACK + + ZeroOrMore(NESTED_ARG_ACTION + | ACTION_STRING_LITERAL + | ACTION_CHAR_LITERAL) + + RBRACK) ARG_ACTION = NESTED_ARG_ACTION NESTED_ACTION = Forward() -NESTED_ACTION << Suppress('{') + ZeroOrMore(NESTED_ACTION | SL_COMMENT | ML_COMMENT | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress('}') +NESTED_ACTION << (LBRACE + + ZeroOrMore(NESTED_ACTION + | SL_COMMENT + | ML_COMMENT + | ACTION_STRING_LITERAL + | ACTION_CHAR_LITERAL) + + RBRACE) ACTION = NESTED_ACTION + Optional('?') -SCOPE = Suppress('scope') -OPTIONS = Suppress('options') + Suppress('{') # + WS_LOOP + Suppress('{') -TOKENS = Suppress('tokens') + Suppress('{') # + WS_LOOP + Suppress('{') -FRAGMENT = 'fragment'; -TREE_BEGIN = Suppress('^(') -ROOT = Suppress('^') -BANG = Suppress('!') +SCOPE = SCOPE_.suppress() +OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{') +TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{') +TREE_BEGIN = ROOT + LPAR RANGE = Suppress('..') REWRITE = Suppress('->') @@ -58,26 +80,36 @@ REWRITE = Suppress('->') # Grammar heading optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s") -option = Group(id("id") + Suppress('=') + optionValue("value"))("option") -optionsSpec = OPTIONS + Group(OneOrMore(option + Suppress(';')))("options") + Suppress('}') -tokenSpec = Group(TOKEN_REF("token_ref") + (Suppress('=') + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + Suppress(';') -tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + Suppress('}') -attrScope = Suppress('scope') + id + ACTION -grammarType = Keyword('lexer') + Keyword('parser') + Keyword('tree') -actionScopeName = id | Keyword('lexer')("l") | Keyword('parser')("p") -action = Suppress('@') + Optional(actionScopeName + Suppress('::')) + id + ACTION - -grammarHeading = Optional(ML_COMMENT("ML_COMMENT")) + Optional(grammarType) + Suppress('grammar') + id("grammarName") + Suppress(';') + Optional(optionsSpec) + Optional(tokensSpec) + ZeroOrMore(attrScope) + ZeroOrMore(action) - -modifier = Keyword('protected') | Keyword('public') | Keyword('private') | Keyword('fragment') -ruleAction = Suppress('@') + id + ACTION -throwsSpec = Suppress('throws') + delimitedList(id) -ruleScopeSpec = (Suppress('scope') + ACTION) | (Suppress('scope') + delimitedList(id) + Suppress(';')) | (Suppress('scope') + ACTION + Suppress('scope') + delimitedList(id) + Suppress(';')) +option = Group(id("id") + EQ + optionValue("value"))("option") +optionsSpec = OPTIONS + Group(OneOrMore(option + SEMI))("options") + RBRACE +tokenSpec = Group(TOKEN_REF("token_ref") + + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + SEMI +tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + RBRACE +attrScope = SCOPE_.suppress() + id + ACTION +grammarType = LEXER + PARSER + TREE +actionScopeName = id | LEXER("l") | PARSER("p") +action = AT + Optional(actionScopeName + Suppress('::')) + id + ACTION + +grammarHeading = (Optional(ML_COMMENT("ML_COMMENT")) + + Optional(grammarType) + + GRAMMAR + + id("grammarName") + SEMI + + Optional(optionsSpec) + + Optional(tokensSpec) + + ZeroOrMore(attrScope) + + ZeroOrMore(action)) + +modifier = PROTECTED | PUBLIC | PRIVATE | FRAGMENT +ruleAction = AT + id + ACTION +throwsSpec = THROWS.suppress() + delimitedList(id) +ruleScopeSpec = ((SCOPE_.suppress() + ACTION) + | (SCOPE_.suppress() + delimitedList(id) + SEMI) + | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI)) unary_op = oneOf("^ !") notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL terminal = (CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | '.') + Optional(unary_op) block = Forward() -notSet = Suppress('~') + (notTerminal | block) +notSet = TIL + (notTerminal | block) rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2") atom = Group((rangeNotPython + Optional(unary_op)("op")) | terminal @@ -85,21 +117,44 @@ atom = Group((rangeNotPython + Optional(unary_op)("op")) | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op")) ) element = Forward() -treeSpec = Suppress('^(') + element*(2,) + Suppress(')') +treeSpec = ROOT + LPAR + element*(2,) + RPAR ebnfSuffix = oneOf("? * +") ebnf = block + Optional(ebnfSuffix("op") | '=>') -elementNoOptionSpec = (id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix)) | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix)) | atom("atom") + Optional(ebnfSuffix) | ebnf | ACTION | (treeSpec + Optional(ebnfSuffix)) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED ) -element << Group(elementNoOptionSpec)("element") -alternative = Group(Group(OneOrMore(element))("elements")) # Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure? +elementNoOptionSpec = ((id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix)) + | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix)) + | atom("atom") + Optional(ebnfSuffix) + | ebnf + | ACTION + | (treeSpec + Optional(ebnfSuffix)) + ) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED ) +element <<= Group(elementNoOptionSpec)("element") +# Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure? +alternative = Group(Group(OneOrMore(element))("elements")) rewrite = Optional(Literal('TODO REWRITE RULES TODO')) -block << Suppress('(') + Optional(Optional(optionsSpec("opts")) + Suppress(':')) + Group(alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives"))("block") + Suppress(')') -altList = alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives") -exceptionHandler = Suppress('catch') + ARG_ACTION + ACTION -finallyClause = Suppress('finally') + ACTION +block <<= (LPAR + + Optional(Optional(optionsSpec("opts")) + COLON) + + Group(alternative('a1') + + rewrite + + Group(ZeroOrMore(VERT + + alternative('a2') + + rewrite))("alternatives"))("block") + + RPAR) +altList = alternative('a1') + rewrite + Group(ZeroOrMore(VERT + alternative('a2') + rewrite))("alternatives") +exceptionHandler = CATCH.suppress() + ARG_ACTION + ACTION +finallyClause = FINALLY.suppress() + ACTION exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause -ruleHeading = Optional(ML_COMMENT)("ruleComment") + Optional(modifier)("modifier") + id("ruleName") + Optional("!") + Optional(ARG_ACTION("arg")) + Optional(Suppress('returns') + ARG_ACTION("rt")) + Optional(throwsSpec) + Optional(optionsSpec) + Optional(ruleScopeSpec) + ZeroOrMore(ruleAction) -rule = Group(ruleHeading + Suppress(':') + altList + Suppress(';') + Optional(exceptionGroup))("rule") +ruleHeading = (Optional(ML_COMMENT)("ruleComment") + + Optional(modifier)("modifier") + + id("ruleName") + + Optional("!") + + Optional(ARG_ACTION("arg")) + + Optional(Suppress('returns') + ARG_ACTION("rt")) + + Optional(throwsSpec) + + Optional(optionsSpec) + + Optional(ruleScopeSpec) + + ZeroOrMore(ruleAction)) +rule = Group(ruleHeading + COLON + altList + SEMI + Optional(exceptionGroup))("rule") grammarDef = grammarHeading + Group(OneOrMore(rule))("rules") @@ -149,7 +204,7 @@ def __antlrAlternativeConverter(pyparsingRules, antlrAlternative): rule = Group(And(elementList))("anonymous_and") else: rule = elementList[0] - assert rule != None + assert rule is not None return rule def __antlrRuleConverter(pyparsingRules, antlrRule): @@ -161,11 +216,13 @@ def __antlrRuleConverter(pyparsingRules, antlrRule): def antlrConverter(antlrGrammarTree): pyparsingRules = {} + antlrTokens = {} for antlrToken in antlrGrammarTree.tokens: antlrTokens[antlrToken.token_ref] = antlrToken.lit for antlrTokenName, antlrToken in list(antlrTokens.items()): pyparsingRules[antlrTokenName] = Literal(antlrToken) + antlrRules = {} for antlrRule in antlrGrammarTree.rules: antlrRules[antlrRule.ruleName] = antlrRule @@ -173,12 +230,14 @@ def antlrConverter(antlrGrammarTree): for antlrRuleName, antlrRule in list(antlrRules.items()): pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule) assert pyparsingRule != None - pyparsingRules[antlrRuleName] << pyparsingRule + pyparsingRules[antlrRuleName] <<= pyparsingRule + return pyparsingRules if __name__ == "__main__": - text = """grammar SimpleCalc; + text = """\ +grammar SimpleCalc; options { language = Python; @@ -216,8 +275,8 @@ fragment DIGIT : '0'..'9' ; grammar().validate() antlrGrammarTree = grammar().parseString(text) - print(antlrGrammarTree.asXML("antlrGrammarTree")) + print(antlrGrammarTree.dump()) pyparsingRules = antlrConverter(antlrGrammarTree) pyparsingRule = pyparsingRules["expr"] pyparsingTree = pyparsingRule.parseString("2 - 5 * 42 + 7 / 25") - print(pyparsingTree.asXML("pyparsingTree")) + print(pyparsingTree.dump()) diff --git a/examples/antlr_grammar_tests.py b/examples/antlr_grammar_tests.py index 1df28f9..57d6cb6 100644 --- a/examples/antlr_grammar_tests.py +++ b/examples/antlr_grammar_tests.py @@ -80,7 +80,11 @@ fragment DIGIT : '0'..'9' ;""" pyparsingRules = antlr_grammar.antlrConverter(antlrGrammarTree) pyparsingRule = pyparsingRules["expr"] pyparsingTree = pyparsingRule.parseString("2 - 5 * 42 + 7 / 25") - self.assertNotEqual(None, pyparsingTree) + pyparsingTreeList = pyparsingTree.asList() + print(pyparsingTreeList) + self.assertEqual(pyparsingTreeList, + [[[['2'], []], [['-', [['5'], [['*', ['4', '2']]]]], ['+', [['7'], [['/', ['2', '5']]]]]]]] + ) if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.testOptionsSpec'] |