# This module tries to implement ISO 14977 standard with pyparsing. # pyparsing version 1.1 or greater is required. # ISO 14977 standardize The Extended Backus-Naur Form(EBNF) syntax. # You can read a final draft version here: # https://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html # # Submitted 2004 by Seo Sanghyeon # from pyparsing import * all_names = """ integer meta_identifier terminal_string optional_sequence repeated_sequence grouped_sequence syntactic_primary syntactic_factor syntactic_term single_definition definitions_list syntax_rule syntax """.split() integer = Word(nums) meta_identifier = Word(alphas, alphanums + "_") terminal_string = Suppress("'") + CharsNotIn("'") + Suppress("'") ^ Suppress( '"' ) + CharsNotIn('"') + Suppress('"') definitions_list = Forward() optional_sequence = Suppress("[") + definitions_list + Suppress("]") repeated_sequence = Suppress("{") + definitions_list + Suppress("}") grouped_sequence = Suppress("(") + definitions_list + Suppress(")") syntactic_primary = ( optional_sequence ^ repeated_sequence ^ grouped_sequence ^ meta_identifier ^ terminal_string ) syntactic_factor = Optional(integer + Suppress("*")) + syntactic_primary syntactic_term = syntactic_factor + Optional(Suppress("-") + syntactic_factor) single_definition = delimitedList(syntactic_term, ",") definitions_list << delimitedList(single_definition, "|") syntax_rule = meta_identifier + Suppress("=") + definitions_list + Suppress(";") ebnfComment = ( ("(*" + ZeroOrMore(CharsNotIn("*") | ("*" + ~Literal(")"))) + "*)") .streamline() .setName("ebnfComment") ) syntax = OneOrMore(syntax_rule) syntax.ignore(ebnfComment) def do_integer(str, loc, toks): return int(toks[0]) def do_meta_identifier(str, loc, toks): if toks[0] in symbol_table: return symbol_table[toks[0]] else: forward_count.value += 1 symbol_table[toks[0]] = Forward() return symbol_table[toks[0]] def do_terminal_string(str, loc, toks): return Literal(toks[0]) def do_optional_sequence(str, loc, toks): return Optional(toks[0]) def do_repeated_sequence(str, loc, toks): return ZeroOrMore(toks[0]) def do_grouped_sequence(str, loc, toks): return Group(toks[0]) def do_syntactic_primary(str, loc, toks): return toks[0] def do_syntactic_factor(str, loc, toks): if len(toks) == 2: # integer * syntactic_primary return And([toks[1]] * toks[0]) else: # syntactic_primary return [toks[0]] def do_syntactic_term(str, loc, toks): if len(toks) == 2: # syntactic_factor - syntactic_factor return NotAny(toks[1]) + toks[0] else: # syntactic_factor return [toks[0]] def do_single_definition(str, loc, toks): toks = toks.asList() if len(toks) > 1: # syntactic_term , syntactic_term , ... return And(toks) else: # syntactic_term return [toks[0]] def do_definitions_list(str, loc, toks): toks = toks.asList() if len(toks) > 1: # single_definition | single_definition | ... return Or(toks) else: # single_definition return [toks[0]] def do_syntax_rule(str, loc, toks): # meta_identifier = definitions_list ; assert toks[0].expr is None, "Duplicate definition" forward_count.value -= 1 toks[0] << toks[1] return [toks[0]] def do_syntax(str, loc, toks): # syntax_rule syntax_rule ... return symbol_table symbol_table = {} class forward_count: pass forward_count.value = 0 for name in all_names: expr = vars()[name] action = vars()["do_" + name] expr.setName(name) expr.setParseAction(action) # ~ expr.setDebug() def parse(ebnf, given_table={}): symbol_table.clear() symbol_table.update(given_table) forward_count.value = 0 table = syntax.parseString(ebnf)[0] assert forward_count.value == 0, "Missing definition" for name in table: expr = table[name] expr.setName(name) # ~ expr.setDebug() return table