diff options
Diffstat (limited to 'Parser/spark.py')
| -rw-r--r-- | Parser/spark.py | 849 | 
1 files changed, 0 insertions, 849 deletions
| diff --git a/Parser/spark.py b/Parser/spark.py deleted file mode 100644 index 88c1a89a27..0000000000 --- a/Parser/spark.py +++ /dev/null @@ -1,849 +0,0 @@ -#  Copyright (c) 1998-2002 John Aycock -# -#  Permission is hereby granted, free of charge, to any person obtaining -#  a copy of this software and associated documentation files (the -#  "Software"), to deal in the Software without restriction, including -#  without limitation the rights to use, copy, modify, merge, publish, -#  distribute, sublicense, and/or sell copies of the Software, and to -#  permit persons to whom the Software is furnished to do so, subject to -#  the following conditions: -# -#  The above copyright notice and this permission notice shall be -#  included in all copies or substantial portions of the Software. -# -#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -#  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -#  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -__version__ = 'SPARK-0.7 (pre-alpha-5)' - -import re - -# Compatibility with older pythons. -def output(string='', end='\n'): -    sys.stdout.write(string + end) - -try: -    sorted -except NameError: -    def sorted(seq): -        seq2 = seq[:] -        seq2.sort() -        return seq2 - -def _namelist(instance): -    namelist, namedict, classlist = [], {}, [instance.__class__] -    for c in classlist: -        for b in c.__bases__: -            classlist.append(b) -        for name in c.__dict__.keys(): -            if name not in namedict: -                namelist.append(name) -                namedict[name] = 1 -    return namelist - -class GenericScanner: -    def __init__(self, flags=0): -        pattern = self.reflect() -        self.re = re.compile(pattern, re.VERBOSE|flags) - -        self.index2func = {} -        for name, number in self.re.groupindex.items(): -            self.index2func[number-1] = getattr(self, 't_' + name) - -    def makeRE(self, name): -        doc = getattr(self, name).__doc__ -        rv = '(?P<%s>%s)' % (name[2:], doc) -        return rv - -    def reflect(self): -        rv = [] -        for name in _namelist(self): -            if name[:2] == 't_' and name != 't_default': -                rv.append(self.makeRE(name)) - -        rv.append(self.makeRE('t_default')) -        return '|'.join(rv) - -    def error(self, s, pos): -        output("Lexical error at position %s" % pos) -        raise SystemExit - -    def tokenize(self, s): -        pos = 0 -        n = len(s) -        while pos < n: -            m = self.re.match(s, pos) -            if m is None: -                self.error(s, pos) - -            groups = m.groups() -            for i in range(len(groups)): -                if groups[i] and i in self.index2func: -                    self.index2func[i](groups[i]) -            pos = m.end() - -    def t_default(self, s): -        r'( . | \n )+' -        output("Specification error: unmatched input") -        raise SystemExit - -# -#  Extracted from GenericParser and made global so that [un]picking works. -# -class _State: -    def __init__(self, stateno, items): -        self.T, self.complete, self.items = [], [], items -        self.stateno = stateno - -class GenericParser: -    # -    #  An Earley parser, as per J. Earley, "An Efficient Context-Free -    #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley, -    #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, -    #  Carnegie-Mellon University, August 1968.  New formulation of -    #  the parser according to J. Aycock, "Practical Earley Parsing -    #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria, -    #  2001, and J. Aycock and R. N. Horspool, "Practical Earley -    #  Parsing", unpublished paper, 2001. -    # - -    def __init__(self, start): -        self.rules = {} -        self.rule2func = {} -        self.rule2name = {} -        self.collectRules() -        self.augment(start) -        self.ruleschanged = 1 - -    _NULLABLE = '\e_' -    _START = 'START' -    _BOF = '|-' - -    # -    #  When pickling, take the time to generate the full state machine; -    #  some information is then extraneous, too.  Unfortunately we -    #  can't save the rule2func map. -    # -    def __getstate__(self): -        if self.ruleschanged: -            # -            #  XXX - duplicated from parse() -            # -            self.computeNull() -            self.newrules = {} -            self.new2old = {} -            self.makeNewRules() -            self.ruleschanged = 0 -            self.edges, self.cores = {}, {} -            self.states = { 0: self.makeState0() } -            self.makeState(0, self._BOF) -        # -        #  XXX - should find a better way to do this.. -        # -        changes = 1 -        while changes: -            changes = 0 -            for k, v in self.edges.items(): -                if v is None: -                    state, sym = k -                    if state in self.states: -                        self.goto(state, sym) -                        changes = 1 -        rv = self.__dict__.copy() -        for s in self.states.values(): -            del s.items -        del rv['rule2func'] -        del rv['nullable'] -        del rv['cores'] -        return rv - -    def __setstate__(self, D): -        self.rules = {} -        self.rule2func = {} -        self.rule2name = {} -        self.collectRules() -        start = D['rules'][self._START][0][1][1]        # Blech. -        self.augment(start) -        D['rule2func'] = self.rule2func -        D['makeSet'] = self.makeSet_fast -        self.__dict__ = D - -    # -    #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess -    #  thee not with this; nor shall thee toucheth the _preprocess -    #  argument to addRule. -    # -    def preprocess(self, rule, func):       return rule, func - -    def addRule(self, doc, func, _preprocess=1): -        fn = func -        rules = doc.split() - -        index = [] -        for i in range(len(rules)): -            if rules[i] == '::=': -                index.append(i-1) -        index.append(len(rules)) - -        for i in range(len(index)-1): -            lhs = rules[index[i]] -            rhs = rules[index[i]+2:index[i+1]] -            rule = (lhs, tuple(rhs)) - -            if _preprocess: -                rule, fn = self.preprocess(rule, func) - -            if lhs in self.rules: -                self.rules[lhs].append(rule) -            else: -                self.rules[lhs] = [ rule ] -            self.rule2func[rule] = fn -            self.rule2name[rule] = func.__name__[2:] -        self.ruleschanged = 1 - -    def collectRules(self): -        for name in _namelist(self): -            if name[:2] == 'p_': -                func = getattr(self, name) -                doc = func.__doc__ -                self.addRule(doc, func) - -    def augment(self, start): -        rule = '%s ::= %s %s' % (self._START, self._BOF, start) -        self.addRule(rule, lambda args: args[1], 0) - -    def computeNull(self): -        self.nullable = {} -        tbd = [] - -        for rulelist in self.rules.values(): -            lhs = rulelist[0][0] -            self.nullable[lhs] = 0 -            for rule in rulelist: -                rhs = rule[1] -                if len(rhs) == 0: -                    self.nullable[lhs] = 1 -                    continue -                # -                #  We only need to consider rules which -                #  consist entirely of nonterminal symbols. -                #  This should be a savings on typical -                #  grammars. -                # -                for sym in rhs: -                    if sym not in self.rules: -                        break -                else: -                    tbd.append(rule) -        changes = 1 -        while changes: -            changes = 0 -            for lhs, rhs in tbd: -                if self.nullable[lhs]: -                    continue -                for sym in rhs: -                    if not self.nullable[sym]: -                        break -                else: -                    self.nullable[lhs] = 1 -                    changes = 1 - -    def makeState0(self): -        s0 = _State(0, []) -        for rule in self.newrules[self._START]: -            s0.items.append((rule, 0)) -        return s0 - -    def finalState(self, tokens): -        # -        #  Yuck. -        # -        if len(self.newrules[self._START]) == 2 and len(tokens) == 0: -            return 1 -        start = self.rules[self._START][0][1][1] -        return self.goto(1, start) - -    def makeNewRules(self): -        worklist = [] -        for rulelist in self.rules.values(): -            for rule in rulelist: -                worklist.append((rule, 0, 1, rule)) - -        for rule, i, candidate, oldrule in worklist: -            lhs, rhs = rule -            n = len(rhs) -            while i < n: -                sym = rhs[i] -                if sym not in self.rules or \ -                   not self.nullable[sym]: -                    candidate = 0 -                    i = i + 1 -                    continue - -                newrhs = list(rhs) -                newrhs[i] = self._NULLABLE+sym -                newrule = (lhs, tuple(newrhs)) -                worklist.append((newrule, i+1, -                                 candidate, oldrule)) -                candidate = 0 -                i = i + 1 -            else: -                if candidate: -                    lhs = self._NULLABLE+lhs -                    rule = (lhs, rhs) -                if lhs in self.newrules: -                    self.newrules[lhs].append(rule) -                else: -                    self.newrules[lhs] = [ rule ] -                self.new2old[rule] = oldrule - -    def typestring(self, token): -        return None - -    def error(self, token): -        output("Syntax error at or near `%s' token" % token) -        raise SystemExit - -    def parse(self, tokens): -        sets = [ [(1,0), (2,0)] ] -        self.links = {} - -        if self.ruleschanged: -            self.computeNull() -            self.newrules = {} -            self.new2old = {} -            self.makeNewRules() -            self.ruleschanged = 0 -            self.edges, self.cores = {}, {} -            self.states = { 0: self.makeState0() } -            self.makeState(0, self._BOF) - -        for i in range(len(tokens)): -            sets.append([]) - -            if sets[i] == []: -                break -            self.makeSet(tokens[i], sets, i) -        else: -            sets.append([]) -            self.makeSet(None, sets, len(tokens)) - -        #_dump(tokens, sets, self.states) - -        finalitem = (self.finalState(tokens), 0) -        if finalitem not in sets[-2]: -            if len(tokens) > 0: -                self.error(tokens[i-1]) -            else: -                self.error(None) - -        return self.buildTree(self._START, finalitem, -                              tokens, len(sets)-2) - -    def isnullable(self, sym): -        # -        #  For symbols in G_e only.  If we weren't supporting 1.5, -        #  could just use sym.startswith(). -        # -        return self._NULLABLE == sym[0:len(self._NULLABLE)] - -    def skip(self, hs, pos=0): -        n = len(hs[1]) -        while pos < n: -            if not self.isnullable(hs[1][pos]): -                break -            pos = pos + 1 -        return pos - -    def makeState(self, state, sym): -        assert sym is not None -        # -        #  Compute \epsilon-kernel state's core and see if -        #  it exists already. -        # -        kitems = [] -        for rule, pos in self.states[state].items: -            lhs, rhs = rule -            if rhs[pos:pos+1] == (sym,): -                kitems.append((rule, self.skip(rule, pos+1))) -        core = kitems - -        core.sort() -        tcore = tuple(core) -        if tcore in self.cores: -            return self.cores[tcore] -        # -        #  Nope, doesn't exist.  Compute it and the associated -        #  \epsilon-nonkernel state together; we'll need it right away. -        # -        k = self.cores[tcore] = len(self.states) -        K, NK = _State(k, kitems), _State(k+1, []) -        self.states[k] = K -        predicted = {} - -        edges = self.edges -        rules = self.newrules -        for X in K, NK: -            worklist = X.items -            for item in worklist: -                rule, pos = item -                lhs, rhs = rule -                if pos == len(rhs): -                    X.complete.append(rule) -                    continue - -                nextSym = rhs[pos] -                key = (X.stateno, nextSym) -                if nextSym not in rules: -                    if key not in edges: -                        edges[key] = None -                        X.T.append(nextSym) -                else: -                    edges[key] = None -                    if nextSym not in predicted: -                        predicted[nextSym] = 1 -                        for prule in rules[nextSym]: -                            ppos = self.skip(prule) -                            new = (prule, ppos) -                            NK.items.append(new) -            # -            #  Problem: we know K needs generating, but we -            #  don't yet know about NK.  Can't commit anything -            #  regarding NK to self.edges until we're sure.  Should -            #  we delay committing on both K and NK to avoid this -            #  hacky code?  This creates other problems.. -            # -            if X is K: -                edges = {} - -        if NK.items == []: -            return k - -        # -        #  Check for \epsilon-nonkernel's core.  Unfortunately we -        #  need to know the entire set of predicted nonterminals -        #  to do this without accidentally duplicating states. -        # -        core = sorted(predicted.keys()) -        tcore = tuple(core) -        if tcore in self.cores: -            self.edges[(k, None)] = self.cores[tcore] -            return k - -        nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno -        self.edges.update(edges) -        self.states[nk] = NK -        return k - -    def goto(self, state, sym): -        key = (state, sym) -        if key not in self.edges: -            # -            #  No transitions from state on sym. -            # -            return None - -        rv = self.edges[key] -        if rv is None: -            # -            #  Target state isn't generated yet.  Remedy this. -            # -            rv = self.makeState(state, sym) -            self.edges[key] = rv -        return rv - -    def gotoT(self, state, t): -        return [self.goto(state, t)] - -    def gotoST(self, state, st): -        rv = [] -        for t in self.states[state].T: -            if st == t: -                rv.append(self.goto(state, t)) -        return rv - -    def add(self, set, item, i=None, predecessor=None, causal=None): -        if predecessor is None: -            if item not in set: -                set.append(item) -        else: -            key = (item, i) -            if item not in set: -                self.links[key] = [] -                set.append(item) -            self.links[key].append((predecessor, causal)) - -    def makeSet(self, token, sets, i): -        cur, next = sets[i], sets[i+1] - -        ttype = token is not None and self.typestring(token) or None -        if ttype is not None: -            fn, arg = self.gotoT, ttype -        else: -            fn, arg = self.gotoST, token - -        for item in cur: -            ptr = (item, i) -            state, parent = item -            add = fn(state, arg) -            for k in add: -                if k is not None: -                    self.add(next, (k, parent), i+1, ptr) -                    nk = self.goto(k, None) -                    if nk is not None: -                        self.add(next, (nk, i+1)) - -            if parent == i: -                continue - -            for rule in self.states[state].complete: -                lhs, rhs = rule -                for pitem in sets[parent]: -                    pstate, pparent = pitem -                    k = self.goto(pstate, lhs) -                    if k is not None: -                        why = (item, i, rule) -                        pptr = (pitem, parent) -                        self.add(cur, (k, pparent), -                                 i, pptr, why) -                        nk = self.goto(k, None) -                        if nk is not None: -                            self.add(cur, (nk, i)) - -    def makeSet_fast(self, token, sets, i): -        # -        #  Call *only* when the entire state machine has been built! -        #  It relies on self.edges being filled in completely, and -        #  then duplicates and inlines code to boost speed at the -        #  cost of extreme ugliness. -        # -        cur, next = sets[i], sets[i+1] -        ttype = token is not None and self.typestring(token) or None - -        for item in cur: -            ptr = (item, i) -            state, parent = item -            if ttype is not None: -                k = self.edges.get((state, ttype), None) -                if k is not None: -                    #self.add(next, (k, parent), i+1, ptr) -                    #INLINED --v -                    new = (k, parent) -                    key = (new, i+1) -                    if new not in next: -                        self.links[key] = [] -                        next.append(new) -                    self.links[key].append((ptr, None)) -                    #INLINED --^ -                    #nk = self.goto(k, None) -                    nk = self.edges.get((k, None), None) -                    if nk is not None: -                        #self.add(next, (nk, i+1)) -                        #INLINED --v -                        new = (nk, i+1) -                        if new not in next: -                            next.append(new) -                        #INLINED --^ -            else: -                add = self.gotoST(state, token) -                for k in add: -                    if k is not None: -                        self.add(next, (k, parent), i+1, ptr) -                        #nk = self.goto(k, None) -                        nk = self.edges.get((k, None), None) -                        if nk is not None: -                            self.add(next, (nk, i+1)) - -            if parent == i: -                continue - -            for rule in self.states[state].complete: -                lhs, rhs = rule -                for pitem in sets[parent]: -                    pstate, pparent = pitem -                    #k = self.goto(pstate, lhs) -                    k = self.edges.get((pstate, lhs), None) -                    if k is not None: -                        why = (item, i, rule) -                        pptr = (pitem, parent) -                        #self.add(cur, (k, pparent), -                        #        i, pptr, why) -                        #INLINED --v -                        new = (k, pparent) -                        key = (new, i) -                        if new not in cur: -                            self.links[key] = [] -                            cur.append(new) -                        self.links[key].append((pptr, why)) -                        #INLINED --^ -                        #nk = self.goto(k, None) -                        nk = self.edges.get((k, None), None) -                        if nk is not None: -                            #self.add(cur, (nk, i)) -                            #INLINED --v -                            new = (nk, i) -                            if new not in cur: -                                cur.append(new) -                            #INLINED --^ - -    def predecessor(self, key, causal): -        for p, c in self.links[key]: -            if c == causal: -                return p -        assert 0 - -    def causal(self, key): -        links = self.links[key] -        if len(links) == 1: -            return links[0][1] -        choices = [] -        rule2cause = {} -        for p, c in links: -            rule = c[2] -            choices.append(rule) -            rule2cause[rule] = c -        return rule2cause[self.ambiguity(choices)] - -    def deriveEpsilon(self, nt): -        if len(self.newrules[nt]) > 1: -            rule = self.ambiguity(self.newrules[nt]) -        else: -            rule = self.newrules[nt][0] -        #output(rule) - -        rhs = rule[1] -        attr = [None] * len(rhs) - -        for i in range(len(rhs)-1, -1, -1): -            attr[i] = self.deriveEpsilon(rhs[i]) -        return self.rule2func[self.new2old[rule]](attr) - -    def buildTree(self, nt, item, tokens, k): -        state, parent = item - -        choices = [] -        for rule in self.states[state].complete: -            if rule[0] == nt: -                choices.append(rule) -        rule = choices[0] -        if len(choices) > 1: -            rule = self.ambiguity(choices) -        #output(rule) - -        rhs = rule[1] -        attr = [None] * len(rhs) - -        for i in range(len(rhs)-1, -1, -1): -            sym = rhs[i] -            if sym not in self.newrules: -                if sym != self._BOF: -                    attr[i] = tokens[k-1] -                    key = (item, k) -                    item, k = self.predecessor(key, None) -            #elif self.isnullable(sym): -            elif self._NULLABLE == sym[0:len(self._NULLABLE)]: -                attr[i] = self.deriveEpsilon(sym) -            else: -                key = (item, k) -                why = self.causal(key) -                attr[i] = self.buildTree(sym, why[0], -                                         tokens, why[1]) -                item, k = self.predecessor(key, why) -        return self.rule2func[self.new2old[rule]](attr) - -    def ambiguity(self, rules): -        # -        #  XXX - problem here and in collectRules() if the same rule -        #        appears in >1 method.  Also undefined results if rules -        #        causing the ambiguity appear in the same method. -        # -        sortlist = [] -        name2index = {} -        for i in range(len(rules)): -            lhs, rhs = rule = rules[i] -            name = self.rule2name[self.new2old[rule]] -            sortlist.append((len(rhs), name)) -            name2index[name] = i -        sortlist.sort() -        list = [b for a, b in sortlist] -        return rules[name2index[self.resolve(list)]] - -    def resolve(self, list): -        # -        #  Resolve ambiguity in favor of the shortest RHS. -        #  Since we walk the tree from the top down, this -        #  should effectively resolve in favor of a "shift". -        # -        return list[0] - -# -#  GenericASTBuilder automagically constructs a concrete/abstract syntax tree -#  for a given input.  The extra argument is a class (not an instance!) -#  which supports the "__setslice__" and "__len__" methods. -# -#  XXX - silently overrides any user code in methods. -# - -class GenericASTBuilder(GenericParser): -    def __init__(self, AST, start): -        GenericParser.__init__(self, start) -        self.AST = AST - -    def preprocess(self, rule, func): -        rebind = lambda lhs, self=self: \ -                        lambda args, lhs=lhs, self=self: \ -                                self.buildASTNode(args, lhs) -        lhs, rhs = rule -        return rule, rebind(lhs) - -    def buildASTNode(self, args, lhs): -        children = [] -        for arg in args: -            if isinstance(arg, self.AST): -                children.append(arg) -            else: -                children.append(self.terminal(arg)) -        return self.nonterminal(lhs, children) - -    def terminal(self, token):      return token - -    def nonterminal(self, type, args): -        rv = self.AST(type) -        rv[:len(args)] = args -        return rv - -# -#  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For -#  each node it attempts to invoke the method n_<node type>, falling -#  back onto the default() method if the n_* can't be found.  The preorder -#  traversal also looks for an exit hook named n_<node type>_exit (no default -#  routine is called if it's not found).  To prematurely halt traversal -#  of a subtree, call the prune() method -- this only makes sense for a -#  preorder traversal.  Node type is determined via the typestring() method. -# - -class GenericASTTraversalPruningException: -    pass - -class GenericASTTraversal: -    def __init__(self, ast): -        self.ast = ast - -    def typestring(self, node): -        return node.type - -    def prune(self): -        raise GenericASTTraversalPruningException - -    def preorder(self, node=None): -        if node is None: -            node = self.ast - -        try: -            name = 'n_' + self.typestring(node) -            if hasattr(self, name): -                func = getattr(self, name) -                func(node) -            else: -                self.default(node) -        except GenericASTTraversalPruningException: -            return - -        for kid in node: -            self.preorder(kid) - -        name = name + '_exit' -        if hasattr(self, name): -            func = getattr(self, name) -            func(node) - -    def postorder(self, node=None): -        if node is None: -            node = self.ast - -        for kid in node: -            self.postorder(kid) - -        name = 'n_' + self.typestring(node) -        if hasattr(self, name): -            func = getattr(self, name) -            func(node) -        else: -            self.default(node) - - -    def default(self, node): -        pass - -# -#  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__" -#  implemented. -# -#  XXX - makes assumptions about how GenericParser walks the parse tree. -# - -class GenericASTMatcher(GenericParser): -    def __init__(self, start, ast): -        GenericParser.__init__(self, start) -        self.ast = ast - -    def preprocess(self, rule, func): -        rebind = lambda func, self=self: \ -                        lambda args, func=func, self=self: \ -                                self.foundMatch(args, func) -        lhs, rhs = rule -        rhslist = list(rhs) -        rhslist.reverse() - -        return (lhs, tuple(rhslist)), rebind(func) - -    def foundMatch(self, args, func): -        func(args[-1]) -        return args[-1] - -    def match_r(self, node): -        self.input.insert(0, node) -        children = 0 - -        for child in node: -            if children == 0: -                self.input.insert(0, '(') -            children = children + 1 -            self.match_r(child) - -        if children > 0: -            self.input.insert(0, ')') - -    def match(self, ast=None): -        if ast is None: -            ast = self.ast -        self.input = [] - -        self.match_r(ast) -        self.parse(self.input) - -    def resolve(self, list): -        # -        #  Resolve ambiguity in favor of the longest RHS. -        # -        return list[-1] - -def _dump(tokens, sets, states): -    for i in range(len(sets)): -        output('set %d' % i) -        for item in sets[i]: -            output('\t', item) -            for (lhs, rhs), pos in states[item[0]].items: -                output('\t\t', lhs, '::=', end='') -                output(' '.join(rhs[:pos]), end='') -                output('.', end='') -                output(' '.join(rhs[pos:])) -        if i < len(tokens): -            output() -            output('token %s' % str(tokens[i])) -            output() | 
