summaryrefslogtreecommitdiff
path: root/python/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'python/mllib')
-rw-r--r--python/mllib/__init__.py85
-rw-r--r--python/mllib/dom.py310
-rw-r--r--python/mllib/parsers.py139
-rw-r--r--python/mllib/transforms.py164
4 files changed, 0 insertions, 698 deletions
diff --git a/python/mllib/__init__.py b/python/mllib/__init__.py
deleted file mode 100644
index 9aa1e56e66..0000000000
--- a/python/mllib/__init__.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-"""
-This module provides document parsing and transformation utilities for
-both SGML and XML.
-"""
-
-import os, dom, transforms, parsers, sys
-import xml.sax, types
-from xml.sax.handler import ErrorHandler
-from xml.sax.xmlreader import InputSource
-from cStringIO import StringIO
-
-def transform(node, *args):
- result = node
- for t in args:
- if isinstance(t, types.ClassType):
- t = t()
- result = result.dispatch(t)
- return result
-
-def sgml_parse(source):
- if isinstance(source, basestring):
- source = StringIO(source)
- fname = "<string>"
- elif hasattr(source, "name"):
- fname = source.name
- p = parsers.SGMLParser()
- num = 1
- for line in source:
- p.feed(line)
- p.parser.line(fname, num, None)
- num += 1
- p.close()
- return p.parser.tree
-
-class Resolver:
-
- def __init__(self, path):
- self.path = path
-
- def resolveEntity(self, publicId, systemId):
- for p in self.path:
- fname = os.path.join(p, systemId)
- if os.path.exists(fname):
- source = InputSource(systemId)
- source.setByteStream(open(fname))
- return source
- return InputSource(systemId)
-
-def xml_parse(filename, path=()):
- if sys.version_info[0:2] == (2,3):
- # XXX: this is for older versions of python
- source = "file://%s" % os.path.abspath(filename)
- else:
- source = filename
- h = parsers.XMLParser()
- p = xml.sax.make_parser()
- p.setContentHandler(h)
- p.setErrorHandler(ErrorHandler())
- p.setEntityResolver(Resolver(path))
- p.parse(source)
- return h.parser.tree
-
-def sexp(node):
- s = transforms.Sexp()
- node.dispatch(s)
- return s.out
diff --git a/python/mllib/dom.py b/python/mllib/dom.py
deleted file mode 100644
index 486f7082e1..0000000000
--- a/python/mllib/dom.py
+++ /dev/null
@@ -1,310 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-"""
-Simple DOM for both SGML and XML documents.
-"""
-
-from __future__ import division
-from __future__ import generators
-from __future__ import nested_scopes
-
-import transforms
-
-class Container:
-
- def __init__(self):
- self.children = []
-
- def add(self, child):
- child.parent = self
- self.children.append(child)
-
- def extend(self, children):
- for child in children:
- child.parent = self
- self.children.append(child)
-
-class Component:
-
- def __init__(self):
- self.parent = None
-
- def index(self):
- if self.parent:
- return self.parent.children.index(self)
- else:
- return 0
-
- def _line(self, file, line, column):
- self.file = file
- self.line = line
- self.column = column
-
-class DispatchError(Exception):
-
- def __init__(self, scope, f):
- msg = "no such attribtue"
-
-class Dispatcher:
-
- def is_type(self, type):
- cls = self
- while cls != None:
- if cls.type == type:
- return True
- cls = cls.base
- return False
-
- def dispatch(self, f, attrs = ""):
- cls = self
- while cls != None:
- if hasattr(f, cls.type):
- return getattr(f, cls.type)(self)
- else:
- cls = cls.base
-
- cls = self
- while cls != None:
- if attrs:
- sep = ", "
- if cls.base == None:
- sep += "or "
- else:
- sep = ""
- attrs += "%s'%s'" % (sep, cls.type)
- cls = cls.base
-
- raise AttributeError("'%s' object has no attribute %s" %
- (f.__class__.__name__, attrs))
-
-class Node(Container, Component, Dispatcher):
-
- type = "node"
- base = None
-
- def __init__(self):
- Container.__init__(self)
- Component.__init__(self)
- self.query = Query([self])
-
- def __getitem__(self, name):
- for nd in self.query[name]:
- return nd
-
- def text(self):
- return self.dispatch(transforms.Text())
-
- def tag(self, name, *attrs, **kwargs):
- t = Tag(name, *attrs, **kwargs)
- self.add(t)
- return t
-
- def data(self, s):
- d = Data(s)
- self.add(d)
- return d
-
- def entity(self, s):
- e = Entity(s)
- self.add(e)
- return e
-
-class Tree(Node):
-
- type = "tree"
- base = Node
-
-class Tag(Node):
-
- type = "tag"
- base = Node
-
- def __init__(self, _name, *attrs, **kwargs):
- Node.__init__(self)
- self.name = _name
- self.attrs = list(attrs)
- self.attrs.extend(kwargs.items())
- self.singleton = False
-
- def get_attr(self, name):
- for k, v in self.attrs:
- if name == k:
- return v
-
- def _idx(self, attr):
- idx = 0
- for k, v in self.attrs:
- if k == attr:
- return idx
- idx += 1
- return None
-
- def set_attr(self, name, value):
- idx = self._idx(name)
- if idx is None:
- self.attrs.append((name, value))
- else:
- self.attrs[idx] = (name, value)
-
- def dispatch(self, f):
- try:
- attr = "do_" + self.name
- method = getattr(f, attr)
- except AttributeError:
- return Dispatcher.dispatch(self, f, "'%s'" % attr)
- return method(self)
-
-class Leaf(Component, Dispatcher):
-
- type = "leaf"
- base = None
-
- def __init__(self, data):
- assert isinstance(data, basestring)
- self.data = data
-
-class Data(Leaf):
- type = "data"
- base = Leaf
-
-class Entity(Leaf):
- type = "entity"
- base = Leaf
-
-class Character(Leaf):
- type = "character"
- base = Leaf
-
-class Comment(Leaf):
- type = "comment"
- base = Leaf
-
-###################
-## Query Classes ##
-###########################################################################
-
-class Adder:
-
- def __add__(self, other):
- return Sum(self, other)
-
-class Sum(Adder):
-
- def __init__(self, left, right):
- self.left = left
- self.right = right
-
- def __iter__(self):
- for x in self.left:
- yield x
- for x in self.right:
- yield x
-
-class View(Adder):
-
- def __init__(self, source):
- self.source = source
-
-class Filter(View):
-
- def __init__(self, predicate, source):
- View.__init__(self, source)
- self.predicate = predicate
-
- def __iter__(self):
- for nd in self.source:
- if self.predicate(nd): yield nd
-
-class Flatten(View):
-
- def __iter__(self):
- sources = [iter(self.source)]
- while sources:
- try:
- nd = sources[-1].next()
- if isinstance(nd, Tree):
- sources.append(iter(nd.children))
- else:
- yield nd
- except StopIteration:
- sources.pop()
-
-class Children(View):
-
- def __iter__(self):
- for nd in self.source:
- for child in nd.children:
- yield child
-
-class Attributes(View):
-
- def __iter__(self):
- for nd in self.source:
- for a in nd.attrs:
- yield a
-
-class Values(View):
-
- def __iter__(self):
- for name, value in self.source:
- yield value
-
-def flatten_path(path):
- if isinstance(path, basestring):
- for part in path.split("/"):
- yield part
- elif callable(path):
- yield path
- else:
- for p in path:
- for fp in flatten_path(p):
- yield fp
-
-class Query(View):
-
- def __iter__(self):
- for nd in self.source:
- yield nd
-
- def __getitem__(self, path):
- query = self.source
- for p in flatten_path(path):
- if callable(p):
- select = Query
- pred = p
- source = query
- elif isinstance(p, basestring):
- if p[0] == "@":
- select = Values
- pred = lambda x, n=p[1:]: x[0] == n
- source = Attributes(query)
- elif p[0] == "#":
- select = Query
- pred = lambda x, t=p[1:]: x.is_type(t)
- source = Children(query)
- else:
- select = Query
- pred = lambda x, n=p: isinstance(x, Tag) and x.name == n
- source = Flatten(Children(query))
- else:
- raise ValueError(p)
- query = select(Filter(pred, source))
-
- return query
diff --git a/python/mllib/parsers.py b/python/mllib/parsers.py
deleted file mode 100644
index 3e7cc10dc2..0000000000
--- a/python/mllib/parsers.py
+++ /dev/null
@@ -1,139 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-"""
-Parsers for SGML and XML to dom.
-"""
-
-import sgmllib, xml.sax.handler
-from dom import *
-
-class Parser:
-
- def __init__(self):
- self.tree = Tree()
- self.node = self.tree
- self.nodes = []
-
- def line(self, id, lineno, colno):
- while self.nodes:
- n = self.nodes.pop()
- n._line(id, lineno, colno)
-
- def add(self, node):
- self.node.add(node)
- self.nodes.append(node)
-
- def start(self, name, attrs):
- tag = Tag(name, *attrs)
- self.add(tag)
- self.node = tag
-
- def end(self, name):
- self.balance(name)
- self.node = self.node.parent
-
- def data(self, data):
- children = self.node.children
- if children and isinstance(children[-1], Data):
- children[-1].data += data
- else:
- self.add(Data(data))
-
- def comment(self, comment):
- self.add(Comment(comment))
-
- def entity(self, ref):
- self.add(Entity(ref))
-
- def character(self, ref):
- self.add(Character(ref))
-
- def balance(self, name = None):
- while self.node != self.tree and name != self.node.name:
- self.node.parent.extend(self.node.children)
- del self.node.children[:]
- self.node.singleton = True
- self.node = self.node.parent
-
-
-class SGMLParser(sgmllib.SGMLParser):
-
- def __init__(self, entitydefs = None):
- sgmllib.SGMLParser.__init__(self)
- if entitydefs == None:
- self.entitydefs = {}
- else:
- self.entitydefs = entitydefs
- self.parser = Parser()
-
- def unknown_starttag(self, name, attrs):
- self.parser.start(name, attrs)
-
- def handle_data(self, data):
- self.parser.data(data)
-
- def handle_comment(self, comment):
- self.parser.comment(comment)
-
- def unknown_entityref(self, ref):
- self.parser.entity(ref)
-
- def unknown_charref(self, ref):
- self.parser.character(ref)
-
- def unknown_endtag(self, name):
- self.parser.end(name)
-
- def close(self):
- sgmllib.SGMLParser.close(self)
- self.parser.balance()
- assert self.parser.node == self.parser.tree
-
-class XMLParser(xml.sax.handler.ContentHandler):
-
- def __init__(self):
- self.parser = Parser()
- self.locator = None
-
- def line(self):
- if self.locator != None:
- self.parser.line(self.locator.getSystemId(),
- self.locator.getLineNumber(),
- self.locator.getColumnNumber())
-
- def setDocumentLocator(self, locator):
- self.locator = locator
-
- def startElement(self, name, attrs):
- self.parser.start(name, attrs.items())
- self.line()
-
- def endElement(self, name):
- self.parser.end(name)
- self.line()
-
- def characters(self, content):
- self.parser.data(content)
- self.line()
-
- def skippedEntity(self, name):
- self.parser.entity(name)
- self.line()
-
diff --git a/python/mllib/transforms.py b/python/mllib/transforms.py
deleted file mode 100644
index 69d99125e3..0000000000
--- a/python/mllib/transforms.py
+++ /dev/null
@@ -1,164 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-"""
-Useful transforms for dom objects.
-"""
-
-import dom
-from cStringIO import StringIO
-
-class Visitor:
-
- def descend(self, node):
- for child in node.children:
- child.dispatch(self)
-
- def node(self, node):
- self.descend(node)
-
- def leaf(self, leaf):
- pass
-
-class Identity:
-
- def descend(self, node):
- result = []
- for child in node.children:
- result.append(child.dispatch(self))
- return result
-
- def default(self, tag):
- result = dom.Tag(tag.name, *tag.attrs)
- result.extend(self.descend(tag))
- return result
-
- def tree(self, tree):
- result = dom.Tree()
- result.extend(self.descend(tree))
- return result
-
- def tag(self, tag):
- return self.default(tag)
-
- def leaf(self, leaf):
- return leaf.__class__(leaf.data)
-
-class Sexp(Identity):
-
- def __init__(self):
- self.stack = []
- self.level = 0
- self.out = ""
-
- def open(self, s):
- self.out += "(%s" % s
- self.level += len(s) + 1
- self.stack.append(s)
-
- def line(self, s = ""):
- self.out = self.out.rstrip()
- self.out += "\n" + " "*self.level + s
-
- def close(self):
- s = self.stack.pop()
- self.level -= len(s) + 1
- self.out = self.out.rstrip()
- self.out += ")"
-
- def tree(self, tree):
- self.open("+ ")
- for child in tree.children:
- self.line(); child.dispatch(self)
- self.close()
-
- def tag(self, tag):
- self.open("Node(%s) " % tag.name)
- for child in tag.children:
- self.line(); child.dispatch(self)
- self.close()
-
- def leaf(self, leaf):
- self.line("%s(%s)" % (leaf.__class__.__name__, leaf.data))
-
-class Output:
-
- def descend(self, node):
- out = StringIO()
- for child in node.children:
- out.write(child.dispatch(self))
- return out.getvalue()
-
- def default(self, tag):
- out = StringIO()
- out.write("<%s" % tag.name)
- for k, v in tag.attrs:
- out.write(' %s="%s"' % (k, v))
- out.write(">")
- out.write(self.descend(tag))
- if not tag.singleton:
- out.write("</%s>" % tag.name)
- return out.getvalue()
-
- def tree(self, tree):
- return self.descend(tree)
-
- def tag(self, tag):
- return self.default(tag)
-
- def data(self, leaf):
- return leaf.data
-
- def entity(self, leaf):
- return "&%s;" % leaf.data
-
- def character(self, leaf):
- raise Exception("TODO")
-
- def comment(self, leaf):
- return "<!-- %s -->" % leaf.data
-
-class Empty(Output):
-
- def tag(self, tag):
- return self.descend(tag)
-
- def data(self, leaf):
- return ""
-
- def entity(self, leaf):
- return ""
-
- def character(self, leaf):
- return ""
-
- def comment(self, leaf):
- return ""
-
-class Text(Empty):
-
- def data(self, leaf):
- return leaf.data
-
- def entity(self, leaf):
- return "&%s;" % leaf.data
-
- def character(self, leaf):
- # XXX: is this right?
- return "&#%s;" % leaf.data