diff options
Diffstat (limited to 'python/mllib')
-rw-r--r-- | python/mllib/__init__.py | 85 | ||||
-rw-r--r-- | python/mllib/dom.py | 310 | ||||
-rw-r--r-- | python/mllib/parsers.py | 139 | ||||
-rw-r--r-- | python/mllib/transforms.py | 164 |
4 files changed, 0 insertions, 698 deletions
diff --git a/python/mllib/__init__.py b/python/mllib/__init__.py deleted file mode 100644 index 9aa1e56e66..0000000000 --- a/python/mllib/__init__.py +++ /dev/null @@ -1,85 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -""" -This module provides document parsing and transformation utilities for -both SGML and XML. -""" - -import os, dom, transforms, parsers, sys -import xml.sax, types -from xml.sax.handler import ErrorHandler -from xml.sax.xmlreader import InputSource -from cStringIO import StringIO - -def transform(node, *args): - result = node - for t in args: - if isinstance(t, types.ClassType): - t = t() - result = result.dispatch(t) - return result - -def sgml_parse(source): - if isinstance(source, basestring): - source = StringIO(source) - fname = "<string>" - elif hasattr(source, "name"): - fname = source.name - p = parsers.SGMLParser() - num = 1 - for line in source: - p.feed(line) - p.parser.line(fname, num, None) - num += 1 - p.close() - return p.parser.tree - -class Resolver: - - def __init__(self, path): - self.path = path - - def resolveEntity(self, publicId, systemId): - for p in self.path: - fname = os.path.join(p, systemId) - if os.path.exists(fname): - source = InputSource(systemId) - source.setByteStream(open(fname)) - return source - return InputSource(systemId) - -def xml_parse(filename, path=()): - if sys.version_info[0:2] == (2,3): - # XXX: this is for older versions of python - source = "file://%s" % os.path.abspath(filename) - else: - source = filename - h = parsers.XMLParser() - p = xml.sax.make_parser() - p.setContentHandler(h) - p.setErrorHandler(ErrorHandler()) - p.setEntityResolver(Resolver(path)) - p.parse(source) - return h.parser.tree - -def sexp(node): - s = transforms.Sexp() - node.dispatch(s) - return s.out diff --git a/python/mllib/dom.py b/python/mllib/dom.py deleted file mode 100644 index 486f7082e1..0000000000 --- a/python/mllib/dom.py +++ /dev/null @@ -1,310 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -""" -Simple DOM for both SGML and XML documents. -""" - -from __future__ import division -from __future__ import generators -from __future__ import nested_scopes - -import transforms - -class Container: - - def __init__(self): - self.children = [] - - def add(self, child): - child.parent = self - self.children.append(child) - - def extend(self, children): - for child in children: - child.parent = self - self.children.append(child) - -class Component: - - def __init__(self): - self.parent = None - - def index(self): - if self.parent: - return self.parent.children.index(self) - else: - return 0 - - def _line(self, file, line, column): - self.file = file - self.line = line - self.column = column - -class DispatchError(Exception): - - def __init__(self, scope, f): - msg = "no such attribtue" - -class Dispatcher: - - def is_type(self, type): - cls = self - while cls != None: - if cls.type == type: - return True - cls = cls.base - return False - - def dispatch(self, f, attrs = ""): - cls = self - while cls != None: - if hasattr(f, cls.type): - return getattr(f, cls.type)(self) - else: - cls = cls.base - - cls = self - while cls != None: - if attrs: - sep = ", " - if cls.base == None: - sep += "or " - else: - sep = "" - attrs += "%s'%s'" % (sep, cls.type) - cls = cls.base - - raise AttributeError("'%s' object has no attribute %s" % - (f.__class__.__name__, attrs)) - -class Node(Container, Component, Dispatcher): - - type = "node" - base = None - - def __init__(self): - Container.__init__(self) - Component.__init__(self) - self.query = Query([self]) - - def __getitem__(self, name): - for nd in self.query[name]: - return nd - - def text(self): - return self.dispatch(transforms.Text()) - - def tag(self, name, *attrs, **kwargs): - t = Tag(name, *attrs, **kwargs) - self.add(t) - return t - - def data(self, s): - d = Data(s) - self.add(d) - return d - - def entity(self, s): - e = Entity(s) - self.add(e) - return e - -class Tree(Node): - - type = "tree" - base = Node - -class Tag(Node): - - type = "tag" - base = Node - - def __init__(self, _name, *attrs, **kwargs): - Node.__init__(self) - self.name = _name - self.attrs = list(attrs) - self.attrs.extend(kwargs.items()) - self.singleton = False - - def get_attr(self, name): - for k, v in self.attrs: - if name == k: - return v - - def _idx(self, attr): - idx = 0 - for k, v in self.attrs: - if k == attr: - return idx - idx += 1 - return None - - def set_attr(self, name, value): - idx = self._idx(name) - if idx is None: - self.attrs.append((name, value)) - else: - self.attrs[idx] = (name, value) - - def dispatch(self, f): - try: - attr = "do_" + self.name - method = getattr(f, attr) - except AttributeError: - return Dispatcher.dispatch(self, f, "'%s'" % attr) - return method(self) - -class Leaf(Component, Dispatcher): - - type = "leaf" - base = None - - def __init__(self, data): - assert isinstance(data, basestring) - self.data = data - -class Data(Leaf): - type = "data" - base = Leaf - -class Entity(Leaf): - type = "entity" - base = Leaf - -class Character(Leaf): - type = "character" - base = Leaf - -class Comment(Leaf): - type = "comment" - base = Leaf - -################### -## Query Classes ## -########################################################################### - -class Adder: - - def __add__(self, other): - return Sum(self, other) - -class Sum(Adder): - - def __init__(self, left, right): - self.left = left - self.right = right - - def __iter__(self): - for x in self.left: - yield x - for x in self.right: - yield x - -class View(Adder): - - def __init__(self, source): - self.source = source - -class Filter(View): - - def __init__(self, predicate, source): - View.__init__(self, source) - self.predicate = predicate - - def __iter__(self): - for nd in self.source: - if self.predicate(nd): yield nd - -class Flatten(View): - - def __iter__(self): - sources = [iter(self.source)] - while sources: - try: - nd = sources[-1].next() - if isinstance(nd, Tree): - sources.append(iter(nd.children)) - else: - yield nd - except StopIteration: - sources.pop() - -class Children(View): - - def __iter__(self): - for nd in self.source: - for child in nd.children: - yield child - -class Attributes(View): - - def __iter__(self): - for nd in self.source: - for a in nd.attrs: - yield a - -class Values(View): - - def __iter__(self): - for name, value in self.source: - yield value - -def flatten_path(path): - if isinstance(path, basestring): - for part in path.split("/"): - yield part - elif callable(path): - yield path - else: - for p in path: - for fp in flatten_path(p): - yield fp - -class Query(View): - - def __iter__(self): - for nd in self.source: - yield nd - - def __getitem__(self, path): - query = self.source - for p in flatten_path(path): - if callable(p): - select = Query - pred = p - source = query - elif isinstance(p, basestring): - if p[0] == "@": - select = Values - pred = lambda x, n=p[1:]: x[0] == n - source = Attributes(query) - elif p[0] == "#": - select = Query - pred = lambda x, t=p[1:]: x.is_type(t) - source = Children(query) - else: - select = Query - pred = lambda x, n=p: isinstance(x, Tag) and x.name == n - source = Flatten(Children(query)) - else: - raise ValueError(p) - query = select(Filter(pred, source)) - - return query diff --git a/python/mllib/parsers.py b/python/mllib/parsers.py deleted file mode 100644 index 3e7cc10dc2..0000000000 --- a/python/mllib/parsers.py +++ /dev/null @@ -1,139 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -""" -Parsers for SGML and XML to dom. -""" - -import sgmllib, xml.sax.handler -from dom import * - -class Parser: - - def __init__(self): - self.tree = Tree() - self.node = self.tree - self.nodes = [] - - def line(self, id, lineno, colno): - while self.nodes: - n = self.nodes.pop() - n._line(id, lineno, colno) - - def add(self, node): - self.node.add(node) - self.nodes.append(node) - - def start(self, name, attrs): - tag = Tag(name, *attrs) - self.add(tag) - self.node = tag - - def end(self, name): - self.balance(name) - self.node = self.node.parent - - def data(self, data): - children = self.node.children - if children and isinstance(children[-1], Data): - children[-1].data += data - else: - self.add(Data(data)) - - def comment(self, comment): - self.add(Comment(comment)) - - def entity(self, ref): - self.add(Entity(ref)) - - def character(self, ref): - self.add(Character(ref)) - - def balance(self, name = None): - while self.node != self.tree and name != self.node.name: - self.node.parent.extend(self.node.children) - del self.node.children[:] - self.node.singleton = True - self.node = self.node.parent - - -class SGMLParser(sgmllib.SGMLParser): - - def __init__(self, entitydefs = None): - sgmllib.SGMLParser.__init__(self) - if entitydefs == None: - self.entitydefs = {} - else: - self.entitydefs = entitydefs - self.parser = Parser() - - def unknown_starttag(self, name, attrs): - self.parser.start(name, attrs) - - def handle_data(self, data): - self.parser.data(data) - - def handle_comment(self, comment): - self.parser.comment(comment) - - def unknown_entityref(self, ref): - self.parser.entity(ref) - - def unknown_charref(self, ref): - self.parser.character(ref) - - def unknown_endtag(self, name): - self.parser.end(name) - - def close(self): - sgmllib.SGMLParser.close(self) - self.parser.balance() - assert self.parser.node == self.parser.tree - -class XMLParser(xml.sax.handler.ContentHandler): - - def __init__(self): - self.parser = Parser() - self.locator = None - - def line(self): - if self.locator != None: - self.parser.line(self.locator.getSystemId(), - self.locator.getLineNumber(), - self.locator.getColumnNumber()) - - def setDocumentLocator(self, locator): - self.locator = locator - - def startElement(self, name, attrs): - self.parser.start(name, attrs.items()) - self.line() - - def endElement(self, name): - self.parser.end(name) - self.line() - - def characters(self, content): - self.parser.data(content) - self.line() - - def skippedEntity(self, name): - self.parser.entity(name) - self.line() - diff --git a/python/mllib/transforms.py b/python/mllib/transforms.py deleted file mode 100644 index 69d99125e3..0000000000 --- a/python/mllib/transforms.py +++ /dev/null @@ -1,164 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -""" -Useful transforms for dom objects. -""" - -import dom -from cStringIO import StringIO - -class Visitor: - - def descend(self, node): - for child in node.children: - child.dispatch(self) - - def node(self, node): - self.descend(node) - - def leaf(self, leaf): - pass - -class Identity: - - def descend(self, node): - result = [] - for child in node.children: - result.append(child.dispatch(self)) - return result - - def default(self, tag): - result = dom.Tag(tag.name, *tag.attrs) - result.extend(self.descend(tag)) - return result - - def tree(self, tree): - result = dom.Tree() - result.extend(self.descend(tree)) - return result - - def tag(self, tag): - return self.default(tag) - - def leaf(self, leaf): - return leaf.__class__(leaf.data) - -class Sexp(Identity): - - def __init__(self): - self.stack = [] - self.level = 0 - self.out = "" - - def open(self, s): - self.out += "(%s" % s - self.level += len(s) + 1 - self.stack.append(s) - - def line(self, s = ""): - self.out = self.out.rstrip() - self.out += "\n" + " "*self.level + s - - def close(self): - s = self.stack.pop() - self.level -= len(s) + 1 - self.out = self.out.rstrip() - self.out += ")" - - def tree(self, tree): - self.open("+ ") - for child in tree.children: - self.line(); child.dispatch(self) - self.close() - - def tag(self, tag): - self.open("Node(%s) " % tag.name) - for child in tag.children: - self.line(); child.dispatch(self) - self.close() - - def leaf(self, leaf): - self.line("%s(%s)" % (leaf.__class__.__name__, leaf.data)) - -class Output: - - def descend(self, node): - out = StringIO() - for child in node.children: - out.write(child.dispatch(self)) - return out.getvalue() - - def default(self, tag): - out = StringIO() - out.write("<%s" % tag.name) - for k, v in tag.attrs: - out.write(' %s="%s"' % (k, v)) - out.write(">") - out.write(self.descend(tag)) - if not tag.singleton: - out.write("</%s>" % tag.name) - return out.getvalue() - - def tree(self, tree): - return self.descend(tree) - - def tag(self, tag): - return self.default(tag) - - def data(self, leaf): - return leaf.data - - def entity(self, leaf): - return "&%s;" % leaf.data - - def character(self, leaf): - raise Exception("TODO") - - def comment(self, leaf): - return "<!-- %s -->" % leaf.data - -class Empty(Output): - - def tag(self, tag): - return self.descend(tag) - - def data(self, leaf): - return "" - - def entity(self, leaf): - return "" - - def character(self, leaf): - return "" - - def comment(self, leaf): - return "" - -class Text(Empty): - - def data(self, leaf): - return leaf.data - - def entity(self, leaf): - return "&%s;" % leaf.data - - def character(self, leaf): - # XXX: is this right? - return "&#%s;" % leaf.data |