summaryrefslogtreecommitdiff
path: root/Doc/tools/sgmlconv/latex2esis.py
diff options
context:
space:
mode:
Diffstat (limited to 'Doc/tools/sgmlconv/latex2esis.py')
-rwxr-xr-xDoc/tools/sgmlconv/latex2esis.py566
1 files changed, 0 insertions, 566 deletions
diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py
deleted file mode 100755
index 3ee86e320e..0000000000
--- a/Doc/tools/sgmlconv/latex2esis.py
+++ /dev/null
@@ -1,566 +0,0 @@
-#! /usr/bin/env python
-
-"""Generate ESIS events based on a LaTeX source document and
-configuration data.
-
-The conversion is not strong enough to work with arbitrary LaTeX
-documents; it has only been designed to work with the highly stylized
-markup used in the standard Python documentation. A lot of
-information about specific markup is encoded in the control table
-passed to the convert() function; changing this table can allow this
-tool to support additional LaTeX markups.
-
-The format of the table is largely undocumented; see the commented
-headers where the table is specified in main(). There is no provision
-to load an alternate table from an external file.
-"""
-
-import errno
-import getopt
-import os
-import re
-import sys
-import xml.sax
-import xml.sax.saxutils
-
-from esistools import encode
-
-
-DEBUG = 0
-
-
-class LaTeXFormatError(Exception):
- pass
-
-
-class LaTeXStackError(LaTeXFormatError):
- def __init__(self, found, stack):
- msg = "environment close for %s doesn't match;\n stack = %s" \
- % (found, stack)
- self.found = found
- self.stack = stack[:]
- LaTeXFormatError.__init__(self, msg)
-
-
-_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
-_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
-_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
-_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
-_text_rx = re.compile(r"[^]~%\\{}]+")
-_optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE)
-# _parameter_rx is this complicated to allow {...} inside a parameter;
-# this is useful to match tabular layout specifications like {c|p{24pt}}
-_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
-_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
-_start_group_rx = re.compile("[ \n]*{")
-_start_optional_rx = re.compile("[ \n]*[[]")
-
-
-ESCAPED_CHARS = "$%#^ {}&~"
-
-
-def dbgmsg(msg):
- if DEBUG:
- sys.stderr.write(msg + "\n")
-
-def pushing(name, point, depth):
- dbgmsg("pushing <%s> at %s" % (name, point))
-
-def popping(name, point, depth):
- dbgmsg("popping </%s> at %s" % (name, point))
-
-
-class _Stack(list):
- def append(self, entry):
- if not isinstance(entry, str):
- raise LaTeXFormatError("cannot push non-string on stack: %r"
- % (entry, ))
- #dbgmsg("%s<%s>" % (" "*len(self.data), entry))
- list.append(self, entry)
-
- def pop(self, index=-1):
- entry = self[index]
- del self[index]
- #dbgmsg("%s</%s>" % (" " * len(self), entry))
-
- def __delitem__(self, index):
- entry = self[index]
- list.__delitem__(self, index)
- #dbgmsg("%s</%s>" % (" " * len(self), entry))
-
-
-def new_stack():
- if DEBUG:
- return _Stack()
- else:
- return []
-
-
-class Conversion:
- def __init__(self, ifp, ofp, table):
- self.write = ofp.write
- self.ofp = ofp
- self.table = table
- L = [s.rstrip() for s in ifp.readlines()]
- L.append("")
- self.line = "\n".join(L)
- self.preamble = 1
-
- def convert(self):
- self.subconvert()
-
- def subconvert(self, endchar=None, depth=0):
- #
- # Parses content, including sub-structures, until the character
- # 'endchar' is found (with no open structures), or until the end
- # of the input data is endchar is None.
- #
- stack = new_stack()
- line = self.line
- while line:
- if line[0] == endchar and not stack:
- self.line = line
- return line
- m = _comment_rx.match(line)
- if m:
- text = m.group(1)
- if text:
- self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
- % encode(text))
- line = line[m.end():]
- continue
- m = _begin_env_rx.match(line)
- if m:
- name = m.group(1)
- entry = self.get_env_entry(name)
- # re-write to use the macro handler
- line = r"\%s %s" % (name, line[m.end():])
- continue
- m = _end_env_rx.match(line)
- if m:
- # end of environment
- envname = m.group(1)
- entry = self.get_entry(envname)
- while stack and envname != stack[-1] \
- and stack[-1] in entry.endcloses:
- self.write(")%s\n" % stack.pop())
- if stack and envname == stack[-1]:
- self.write(")%s\n" % entry.outputname)
- del stack[-1]
- else:
- raise LaTeXStackError(envname, stack)
- line = line[m.end():]
- continue
- m = _begin_macro_rx.match(line)
- if m:
- # start of macro
- macroname = m.group(1)
- if macroname == "c":
- # Ugh! This is a combining character...
- endpos = m.end()
- self.combining_char("c", line[endpos])
- line = line[endpos + 1:]
- continue
- entry = self.get_entry(macroname)
- if entry.verbatim:
- # magic case!
- pos = line.find("\\end{%s}" % macroname)
- text = line[m.end(1):pos]
- stack.append(entry.name)
- self.write("(%s\n" % entry.outputname)
- self.write("-%s\n" % encode(text))
- self.write(")%s\n" % entry.outputname)
- stack.pop()
- line = line[pos + len("\\end{%s}" % macroname):]
- continue
- while stack and stack[-1] in entry.closes:
- top = stack.pop()
- topentry = self.get_entry(top)
- if topentry.outputname:
- self.write(")%s\n-\\n\n" % topentry.outputname)
- #
- if entry.outputname and entry.empty:
- self.write("e\n")
- #
- params, optional, empty = self.start_macro(macroname)
- # rip off the macroname
- if params:
- line = line[m.end(1):]
- elif empty:
- line = line[m.end(1):]
- else:
- line = line[m.end():]
- opened = 0
- implied_content = 0
-
- # handle attribute mappings here:
- for pentry in params:
- if pentry.type == "attribute":
- if pentry.optional:
- m = _optional_rx.match(line)
- if m and entry.outputname:
- line = line[m.end():]
- self.dump_attr(pentry, m.group(1))
- elif pentry.text and entry.outputname:
- # value supplied by conversion spec:
- self.dump_attr(pentry, pentry.text)
- else:
- m = _parameter_rx.match(line)
- if not m:
- raise LaTeXFormatError(
- "could not extract parameter %s for %s: %r"
- % (pentry.name, macroname, line[:100]))
- if entry.outputname:
- self.dump_attr(pentry, m.group(1))
- line = line[m.end():]
- elif pentry.type == "child":
- if pentry.optional:
- m = _optional_rx.match(line)
- if m:
- line = line[m.end():]
- if entry.outputname and not opened:
- opened = 1
- self.write("(%s\n" % entry.outputname)
- stack.append(macroname)
- stack.append(pentry.name)
- self.write("(%s\n" % pentry.name)
- self.write("-%s\n" % encode(m.group(1)))
- self.write(")%s\n" % pentry.name)
- stack.pop()
- else:
- if entry.outputname and not opened:
- opened = 1
- self.write("(%s\n" % entry.outputname)
- stack.append(entry.name)
- self.write("(%s\n" % pentry.name)
- stack.append(pentry.name)
- self.line = skip_white(line)[1:]
- line = self.subconvert(
- "}", len(stack) + depth + 1)[1:]
- self.write(")%s\n" % stack.pop())
- elif pentry.type == "content":
- if pentry.implied:
- implied_content = 1
- else:
- if entry.outputname and not opened:
- opened = 1
- self.write("(%s\n" % entry.outputname)
- stack.append(entry.name)
- line = skip_white(line)
- if line[0] != "{":
- raise LaTeXFormatError(
- "missing content for " + macroname)
- self.line = line[1:]
- line = self.subconvert("}", len(stack) + depth + 1)
- if line and line[0] == "}":
- line = line[1:]
- elif pentry.type == "text" and pentry.text:
- if entry.outputname and not opened:
- opened = 1
- stack.append(entry.name)
- self.write("(%s\n" % entry.outputname)
- #dbgmsg("--- text: %r" % pentry.text)
- self.write("-%s\n" % encode(pentry.text))
- elif pentry.type == "entityref":
- self.write("&%s\n" % pentry.name)
- if entry.outputname:
- if not opened:
- self.write("(%s\n" % entry.outputname)
- stack.append(entry.name)
- if not implied_content:
- self.write(")%s\n" % entry.outputname)
- stack.pop()
- continue
- if line[0] == endchar and not stack:
- self.line = line[1:]
- return self.line
- if line[0] == "}":
- # end of macro or group
- macroname = stack[-1]
- if macroname:
- conversion = self.table[macroname]
- if conversion.outputname:
- # otherwise, it was just a bare group
- self.write(")%s\n" % conversion.outputname)
- del stack[-1]
- line = line[1:]
- continue
- if line[0] == "~":
- # don't worry about the "tie" aspect of this command
- line = line[1:]
- self.write("- \n")
- continue
- if line[0] == "{":
- stack.append("")
- line = line[1:]
- continue
- if line[0] == "\\" and line[1] in ESCAPED_CHARS:
- self.write("-%s\n" % encode(line[1]))
- line = line[2:]
- continue
- if line[:2] == r"\\":
- self.write("(BREAK\n)BREAK\n")
- line = line[2:]
- continue
- if line[:2] == r"\_":
- line = "_" + line[2:]
- continue
- if line[:2] in (r"\'", r'\"'):
- # combining characters...
- self.combining_char(line[1], line[2])
- line = line[3:]
- continue
- m = _text_rx.match(line)
- if m:
- text = encode(m.group())
- self.write("-%s\n" % text)
- line = line[m.end():]
- continue
- # special case because of \item[]
- # XXX can we axe this???
- if line[0] == "]":
- self.write("-]\n")
- line = line[1:]
- continue
- # avoid infinite loops
- extra = ""
- if len(line) > 100:
- extra = "..."
- raise LaTeXFormatError("could not identify markup: %r%s"
- % (line[:100], extra))
- while stack:
- entry = self.get_entry(stack[-1])
- if entry.closes:
- self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
- del stack[-1]
- else:
- break
- if stack:
- raise LaTeXFormatError("elements remain on stack: "
- + ", ".join(stack))
- # otherwise we just ran out of input here...
-
- # This is a really limited table of combinations, but it will have
- # to do for now.
- _combinations = {
- ("c", "c"): 0x00E7,
- ("'", "e"): 0x00E9,
- ('"', "o"): 0x00F6,
- }
-
- def combining_char(self, prefix, char):
- ordinal = self._combinations[(prefix, char)]
- self.write("-\\%%%d;\n" % ordinal)
-
- def start_macro(self, name):
- conversion = self.get_entry(name)
- parameters = conversion.parameters
- optional = parameters and parameters[0].optional
- return parameters, optional, conversion.empty
-
- def get_entry(self, name):
- entry = self.table.get(name)
- if entry is None:
- dbgmsg("get_entry(%r) failing; building default entry!" % (name, ))
- # not defined; build a default entry:
- entry = TableEntry(name)
- entry.has_content = 1
- entry.parameters.append(Parameter("content"))
- self.table[name] = entry
- return entry
-
- def get_env_entry(self, name):
- entry = self.table.get(name)
- if entry is None:
- # not defined; build a default entry:
- entry = TableEntry(name, 1)
- entry.has_content = 1
- entry.parameters.append(Parameter("content"))
- entry.parameters[-1].implied = 1
- self.table[name] = entry
- elif not entry.environment:
- raise LaTeXFormatError(
- name + " is defined as a macro; expected environment")
- return entry
-
- def dump_attr(self, pentry, value):
- if not (pentry.name and value):
- return
- if _token_rx.match(value):
- dtype = "TOKEN"
- else:
- dtype = "CDATA"
- self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
-
-
-def convert(ifp, ofp, table):
- c = Conversion(ifp, ofp, table)
- try:
- c.convert()
- except IOError as e:
- (err, msg) = e
- if err != errno.EPIPE:
- raise
-
-
-def skip_white(line):
- while line and line[0] in " %\n\t\r":
- line = line[1:].lstrip()
- return line
-
-
-
-class TableEntry:
- def __init__(self, name, environment=0):
- self.name = name
- self.outputname = name
- self.environment = environment
- self.empty = not environment
- self.has_content = 0
- self.verbatim = 0
- self.auto_close = 0
- self.parameters = []
- self.closes = []
- self.endcloses = []
-
-class Parameter:
- def __init__(self, type, name=None, optional=0):
- self.type = type
- self.name = name
- self.optional = optional
- self.text = ''
- self.implied = 0
-
-
-class TableHandler(xml.sax.handler.ContentHandler):
- def __init__(self):
- self.__table = {}
- self.__buffer = ''
- self.__methods = {}
-
- def get_table(self):
- for entry in self.__table.values():
- if entry.environment and not entry.has_content:
- p = Parameter("content")
- p.implied = 1
- entry.parameters.append(p)
- entry.has_content = 1
- return self.__table
-
- def startElement(self, tag, attrs):
- try:
- start, end = self.__methods[tag]
- except KeyError:
- start = getattr(self, "start_" + tag, None)
- end = getattr(self, "end_" + tag, None)
- self.__methods[tag] = (start, end)
- if start:
- start(attrs)
-
- def endElement(self, tag):
- start, end = self.__methods[tag]
- if end:
- end()
-
- def endDocument(self):
- self.__methods.clear()
-
- def characters(self, data):
- self.__buffer += data
-
- def start_environment(self, attrs):
- name = attrs["name"]
- self.__current = TableEntry(name, environment=1)
- self.__current.verbatim = attrs.get("verbatim") == "yes"
- if "outputname" in attrs:
- self.__current.outputname = attrs.get("outputname")
- self.__current.endcloses = attrs.get("endcloses", "").split()
- def end_environment(self):
- self.end_macro()
-
- def start_macro(self, attrs):
- name = attrs["name"]
- self.__current = TableEntry(name)
- self.__current.closes = attrs.get("closes", "").split()
- if "outputname" in attrs:
- self.__current.outputname = attrs.get("outputname")
- def end_macro(self):
- name = self.__current.name
- if name in self.__table:
- raise ValueError("name %r already in use" % (name,))
- self.__table[name] = self.__current
- self.__current = None
-
- def start_attribute(self, attrs):
- name = attrs.get("name")
- optional = attrs.get("optional") == "yes"
- if name:
- p = Parameter("attribute", name, optional=optional)
- else:
- p = Parameter("attribute", optional=optional)
- self.__current.parameters.append(p)
- self.__buffer = ''
- def end_attribute(self):
- self.__current.parameters[-1].text = self.__buffer
-
- def start_entityref(self, attrs):
- name = attrs["name"]
- p = Parameter("entityref", name)
- self.__current.parameters.append(p)
-
- def start_child(self, attrs):
- name = attrs["name"]
- p = Parameter("child", name, attrs.get("optional") == "yes")
- self.__current.parameters.append(p)
- self.__current.empty = 0
-
- def start_content(self, attrs):
- p = Parameter("content")
- p.implied = attrs.get("implied") == "yes"
- if self.__current.environment:
- p.implied = 1
- self.__current.parameters.append(p)
- self.__current.has_content = 1
- self.__current.empty = 0
-
- def start_text(self, attrs):
- self.__current.empty = 0
- self.__buffer = ''
- def end_text(self):
- p = Parameter("text")
- p.text = self.__buffer
- self.__current.parameters.append(p)
-
-
-def load_table(fp):
- ch = TableHandler()
- xml.sax.parse(fp, ch)
- return ch.get_table()
-
-
-def main():
- global DEBUG
- #
- opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
- for opt, arg in opts:
- if opt in ("-D", "--debug"):
- DEBUG += 1
- if len(args) == 0:
- ifp = sys.stdin
- ofp = sys.stdout
- elif len(args) == 1:
- ifp = open(args[0])
- ofp = sys.stdout
- elif len(args) == 2:
- ifp = open(args[0])
- ofp = open(args[1], "w")
- else:
- usage()
- sys.exit(2)
-
- table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
- convert(ifp, ofp, table)
-
-
-if __name__ == "__main__":
- main()