summaryrefslogtreecommitdiff
path: root/docutils
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2015-04-10 23:48:51 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2015-04-10 23:48:51 +0000
commit940459351be72e2ff352a2aee5ba8960fe2eeecb (patch)
tree6c7d3b0d008283a73b2d1951621ef38fdcca79c2 /docutils
parent0ef1a2a15705a4a9372510ef340addbab4ff5b9e (diff)
downloaddocutils-940459351be72e2ff352a2aee5ba8960fe2eeecb.tar.gz
Support some external TeX math -> MathML converters.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7861 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
-rw-r--r--docutils/docs/dev/todo.txt48
-rw-r--r--docutils/docs/user/config.txt18
-rw-r--r--docutils/docutils/utils/math/tex2mathml_extern.py147
-rw-r--r--docutils/docutils/writers/html_base/__init__.py61
4 files changed, 233 insertions, 41 deletions
diff --git a/docutils/docs/dev/todo.txt b/docutils/docs/dev/todo.txt
index ed3591c29..467375d1c 100644
--- a/docutils/docs/dev/todo.txt
+++ b/docutils/docs/dev/todo.txt
@@ -1317,40 +1317,46 @@ MathML_
Converters from LaTeX to MathML include
* latex_math_ (Python) by Jens Jørgen Mortensen in the Docutils sandbox
- * Blahtex_ (C++)
- * MathToWeb_ (Java)
- * TeX4ht_ (TeX based)
- * LaTeXML_ (Perl) with the command latexmlmath_, e.g.::
- latexmlmath --contentmathml=- $\vec{\alpha}$
+ This is the base for Docutils latex2mathml_ module used by
+ default with ``--math-output=MathML``.
+
+ * blahtexml_ (C++), supported with ``--math-output=MathML blahtexml``.
+
+ Fast conversion, support for many symbols and environments, but
+ no "align" (or other equation-aligning) environment.
+
+ * LaTeXML_ (Perl), supported with ``--math-output=MathML latexml``.
+ Comprehensive macro support but very slow
+
+ * TtM_ (C), ``--math-output=MathML ttm``, support may be removed.
+
+ No "matrix", "align" and "cases" environments.
+
+ * MathToWeb_ (Java)
+ * TeX4ht_ (TeX based)
* itex_ (also `used in Abiword`__)
- * TtM_ (C, non free, free binary for Linux) with an `online-trial page`__
* `Steve’s LATEX-to-MathML translator`_
('mini-language', javascript, Python)
- latex_math_ is the base for the current latex2mathml_ module used
- with ``--math-output=MathML``.
- * Write a new converter based on:
+ * Write a new converter? E.g. based on:
* a generic tokenizer (see e.g. a `latex-codec recipe`_,
`updated latex-codec`_, )
* the Unicode-Char <-> LaTeX mappings database unimathsymbols_
__ http://msevior.livejournal.com/26377.html
- __ http://hutchinson.belmont.ma.us/tth/mml/ttmmozform.html
.. _MathML: http://www.w3.org/TR/MathML2/
.. _latex_math: ../../../sandbox/jensj/latex_math/
.. _latex2mathml: ../../docutils/math/latex2mathml.py
- .. _Blahtex: http://gva.noekeon.org/blahtexml/
- .. _MathToWeb: http://www.mathtoweb.com/
- .. _TeX4ht: http://www.tug.org/applications/tex4ht/mn.html
+ .. _blahtexml: http://gva.noekeon.org/blahtexml/
.. _LaTeXML: http://dlmf.nist.gov/LaTeXML/
- .. _latexmlmath:
- http://dlmf.nist.gov/LaTeXML/manual/commands/latexmlmath.html
- .. _itex: http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html
.. _ttm: http://hutchinson.belmont.ma.us/tth/mml/
+ .. _TeX4ht: http://www.tug.org/applications/tex4ht/mn.html
+ .. _MathToWeb: http://www.mathtoweb.com/
+ .. _itex: http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html
.. _Steve’s LATEX-to-MathML translator:
http://www.gold-saucer.org/mathml/greasemonkey/dist/display-latex
.. _latex-codec recipe:
@@ -1367,19 +1373,19 @@ HTML/CSS
format math in standard HTML enhanced by CSS rules
(Overview__, `Examples and experiments`__).
- LaTeX-math to HTML/CSS converters include
+ The ``math-output=html`` option uses the converter from eLyXer_
+ (included with Docutils).
+
+ Alternatives: LaTeX-math to HTML/CSS converters include
* TtH_ (C)
* Hevea_ (Objective Caml)
- * eLyXer_ (Python)
-
- The ``math-output=html`` option uses the converter from eLyXer.
__ http://www.cs.tut.fi/~jkorpela/math/
__ http://www.zipcon.net/~swhite/docs/math/math.html
+ .. _elyxer: http://elyxer.nongnu.org/
.. _TtH: ttp://hutchinson.belmont.ma.us/tth/index.html
.. _Hevea: http://para.inria.fr/~maranget/hevea/
- .. _elyxer: http://elyxer.nongnu.org/
images
(PNG or SVG) like e.g. Wikipedia. (e.g. with dvisvgm_ or the
diff --git a/docutils/docs/user/config.txt b/docutils/docs/user/config.txt
index 3d50d6d63..2d929518d 100644
--- a/docutils/docs/user/config.txt
+++ b/docutils/docs/user/config.txt
@@ -977,6 +977,21 @@ the output document. Supported values are (case insensitive):
Transitional. However, MathML-enabled browsers will render it
fine.
+ An external converter can be appended after whitespace:
+
+ * ``math-output: MathML blahtexml``
+
+ blahtexml_ (C++) Fast conversion, support for many symbols and
+ environments, but no "align" (or other equation-aligning) environment.
+
+ * ``--math-output=MathML latexml``.
+
+ LaTeXML_ (Perl) Comprehensive macro support but very slow
+
+ * ``--math-output=MathML ttm``, support may be removed.
+
+ No "matrix", "align" and "cases" environments.
+
:LaTeX:
Include literal LaTeX code.
@@ -991,6 +1006,9 @@ New in Docutils 0.8.
.. _MathJax: http://www.mathjax.org/
.. _MathPlayer: http://www.dessci.com/en/products/mathplayer/
.. _MathML: http://www.w3.org/TR/MathML/
+.. _blahtexml: http://gva.noekeon.org/blahtexml/
+.. _LaTeXML: http://dlmf.nist.gov/LaTeXML/
+.. _ttm: http://hutchinson.belmont.ma.us/tth/mml/
option_limit
~~~~~~~~~~~~
diff --git a/docutils/docutils/utils/math/tex2mathml_extern.py b/docutils/docutils/utils/math/tex2mathml_extern.py
new file mode 100644
index 000000000..f315977b8
--- /dev/null
+++ b/docutils/docutils/utils/math/tex2mathml_extern.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# :Id: $Id$
+# :Copyright: © 2015 Günter Milde.
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
+
+# Wrappers for TeX->MathML conversion by external tools
+# =====================================================
+
+import subprocess
+
+document_template = r"""\documentclass{article}
+\usepackage{amsmath}
+\begin{document}
+%s
+\end{document}
+"""
+
+def latexml(math_code, reporter=None):
+ """Convert LaTeX math code to MathML with LaTeXML_
+
+ .. _LaTeXML: http://dlmf.nist.gov/LaTeXML/
+ """
+ p = subprocess.Popen(['latexml',
+ '-', # read from stdin
+ # '--preload=amsmath',
+ '--inputencoding=utf8',
+ ],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=True)
+ p.stdin.write((document_template % math_code).encode('utf8'))
+ p.stdin.close()
+ latexml_code = p.stdout.read()
+ latexml_err = p.stderr.read().decode('utf8')
+ if reporter and latexml_err.find('Error') >= 0 or not latexml_code:
+ reporter.error(latexml_err)
+
+ post_p = subprocess.Popen(['latexmlpost',
+ '-',
+ '--nonumbersections',
+ '--format=xhtml',
+ # '--linelength=78', # experimental
+ '--'
+ ],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=True)
+ post_p.stdin.write(latexml_code)
+ post_p.stdin.close()
+ result = post_p.stdout.read().decode('utf8')
+ post_p_err = post_p.stderr.read().decode('utf8')
+ if reporter and post_p_err.find('Error') >= 0 or not result:
+ reporter.error(post_p_err)
+
+ # extract MathML code:
+ start,end = result.find('<math'), result.find('</math>')+7
+ result = result[start:end]
+ if 'class="ltx_ERROR' in result:
+ raise SyntaxError(result)
+ return result
+
+def ttm(math_code, reporter=None):
+ """Convert LaTeX math code to MathML with TtM_
+
+ .. _TtM: http://hutchinson.belmont.ma.us/tth/mml/
+ """
+ p = subprocess.Popen(['ttm',
+ # '-i', # italic font for equations. Default roman.
+ '-u', # unicode character encoding. (Default iso-8859-1).
+ '-r', # output raw MathML (no preamble or postlude)
+ ],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=True)
+ p.stdin.write((document_template % math_code).encode('utf8'))
+ p.stdin.close()
+ result = p.stdout.read()
+ err = p.stderr.read().decode('utf8')
+ if err.find('**** Unknown') >= 0:
+ msg = '\n'.join([line for line in err.splitlines()
+ if line.startswith('****')])
+ raise SyntaxError('\nMessage from external converter TtM:\n'+ msg)
+ if reporter and err.find('**** Error') >= 0 or not result:
+ reporter.error(err)
+ start,end = result.find('<math'), result.find('</math>')+7
+ result = result[start:end]
+ return result
+
+def blahtexml(math_code, inline=True, reporter=None):
+ """Convert LaTeX math code to MathML with blahtexml_
+
+ .. _blahtexml: http://gva.noekeon.org/blahtexml/
+ """
+ options = ['--mathml',
+ '--indented',
+ '--spacing', 'moderate',
+ '--mathml-encoding', 'raw',
+ '--other-encoding', 'raw',
+ '--doctype-xhtml+mathml',
+ '--annotate-TeX',
+ ]
+ if inline:
+ mathmode_arg = ''
+ else:
+ mathmode_arg = 'mode="display"'
+ options.append('--displaymath')
+
+ p = subprocess.Popen(['blahtexml']+options,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=True)
+ p.stdin.write(math_code.encode('utf8'))
+ p.stdin.close()
+ result = p.stdout.read().decode('utf8')
+ err = p.stderr.read().decode('utf8')
+
+ print err
+ if result.find('<error>') >= 0:
+ raise SyntaxError('\nMessage from external converter blahtexml:\n'
+ +result[result.find('<message>')+9:result.find('</message>')])
+ if reporter and (err.find('**** Error') >= 0 or not result):
+ reporter.error(err)
+ start,end = result.find('<markup>')+9, result.find('</markup>')
+ result = ('<math xmlns="http://www.w3.org/1998/Math/MathML"%s>\n'
+ '%s</math>\n') % (mathmode_arg, result[start:end])
+ return result
+
+# self-test
+
+if __name__ == "__main__":
+ example = ur'\frac{\partial \sin^2(\alpha)}{\partial \vec r} \varpi \, \text{Grüße}'
+ # print latexml(example).encode('utf8')
+ # print ttm(example)#.encode('utf8')
+ print blahtexml(example).encode('utf8')
diff --git a/docutils/docutils/writers/html_base/__init__.py b/docutils/docutils/writers/html_base/__init__.py
index 38b9a2644..108b7fbc1 100644
--- a/docutils/docutils/writers/html_base/__init__.py
+++ b/docutils/docutils/writers/html_base/__init__.py
@@ -44,8 +44,8 @@ import docutils
from docutils import frontend, nodes, utils, writers, languages, io
from docutils.utils.error_reporting import SafeString
from docutils.transforms import writer_aux
-from docutils.utils.math import unichar2tex, pick_math_environment, math2html
-from docutils.utils.math.latex2mathml import parse_latex_math
+from docutils.utils.math import (unichar2tex, pick_math_environment,
+ math2html, latex2mathml, tex2mathml_extern)
class Writer(writers.Writer):
@@ -132,8 +132,9 @@ class Writer(writers.Writer):
'Defined styles: "borderless". Default: ""',
['--table-style'],
{'default': ''}),
- ('Math output format (one of "MathML", "HTML", "MathJax" '
- 'or "LaTeX") and options(s). Default: "HTML math.css"',
+ ('Math output format (one of "MathML", "HTML", "MathJax", '
+ 'or "LaTeX") and option(s). '
+ 'Default: "HTML math.css"',
['--math-output'],
{'default': 'HTML math.css'}),
('Prepend an XML declaration. (Thwarts HTML5 conformance.) '
@@ -1141,15 +1142,9 @@ class HTMLTranslator(nodes.NodeVisitor):
def visit_math(self, node, math_env=''):
# If the method is called from visit_math_block(), math_env != ''.
- # As there is no native HTML math support, we provide alternatives:
- # LaTeX and MathJax math_output modes simply wrap the content,
- # HTML and MathML math_output modes also convert the math_code.
- if self.math_output not in ('mathml', 'html', 'mathjax', 'latex'):
- self.document.reporter.error(
- 'math-output format "%s" not supported '
- 'falling back to "latex"'% self.math_output)
- self.math_output = 'latex'
- #
+ # As there is no native HTML math support, we provide alternatives
+ # for the math-output: LaTeX and MathJax simply wrap the content,
+ # HTML and MathML also convert the math_code.
# HTML container
tags = {# math_output: (block, inline, class-arguments)
'mathml': ('div', '', ''),
@@ -1157,22 +1152,31 @@ class HTMLTranslator(nodes.NodeVisitor):
'mathjax': ('div', 'span', 'math'),
'latex': ('pre', 'tt', 'math'),
}
+ if self.math_output not in tags:
+ self.document.reporter.error(
+ 'math-output format "%s" not supported '
+ 'falling back to "latex"'% self.math_output)
+ self.math_output = 'latex'
tag = tags[self.math_output][math_env == '']
clsarg = tags[self.math_output][2]
# LaTeX container
wrappers = {# math_mode: (inline, block)
- 'mathml': (None, None),
+ 'mathml': ('$%s$', u'\\begin{%s}\n%s\n\\end{%s}'),
'html': ('$%s$', u'\\begin{%s}\n%s\n\\end{%s}'),
'mathjax': ('\(%s\)', u'\\begin{%s}\n%s\n\\end{%s}'),
'latex': (None, None),
}
wrapper = wrappers[self.math_output][math_env != '']
+ if self.math_output == 'mathml' and (not self.math_output_options or
+ self.math_output_options[0] == 'blahtexml'):
+ wrapper = None
# get and wrap content
math_code = node.astext().translate(unichar2tex.uni2tex_table)
- if wrapper and math_env:
- math_code = wrapper % (math_env, math_code, math_env)
- elif wrapper:
- math_code = wrapper % math_code
+ if wrapper:
+ try: # wrapper with three "%s"
+ math_code = wrapper % (math_env, math_code, math_env)
+ except TypeError: # wrapper with one "%s"
+ math_code = wrapper % math_code
# settings and conversion
if self.math_output in ('latex', 'mathjax'):
math_code = self.encode(math_code)
@@ -1191,9 +1195,26 @@ class HTMLTranslator(nodes.NodeVisitor):
elif self.math_output == 'mathml':
self.doctype = self.doctype_mathml
# self.content_type = self.content_type_mathml
+ converter = ' '.join(self.math_output_options).lower()
try:
- mathml_tree = parse_latex_math(math_code, inline=not(math_env))
- math_code = ''.join(mathml_tree.xml())
+ if converter == 'latexml':
+ math_code = tex2mathml_extern.latexml(math_code,
+ self.document.reporter)
+ elif converter == 'ttm':
+ math_code = tex2mathml_extern.ttm(math_code,
+ self.document.reporter)
+ elif converter == 'blahtexml':
+ math_code = tex2mathml_extern.blahtexml(math_code,
+ inline=not(math_env),
+ reporter=self.document.reporter)
+ elif not converter:
+ math_code = latex2mathml.tex2mathml(math_code,
+ inline=not(math_env))
+ else:
+ self.document.reporter.error('option "%s" not supported '
+ 'with math-output "MathML"')
+ except OSError:
+ raise OSError('is "latexmlmath" in your PATH?')
except SyntaxError, err:
err_node = self.document.reporter.error(err, base_node=node)
self.visit_system_message(err_node)