diff options
| author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2015-04-10 23:48:51 +0000 |
|---|---|---|
| committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2015-04-10 23:48:51 +0000 |
| commit | 940459351be72e2ff352a2aee5ba8960fe2eeecb (patch) | |
| tree | 6c7d3b0d008283a73b2d1951621ef38fdcca79c2 /docutils | |
| parent | 0ef1a2a15705a4a9372510ef340addbab4ff5b9e (diff) | |
| download | docutils-940459351be72e2ff352a2aee5ba8960fe2eeecb.tar.gz | |
Support some external TeX math -> MathML converters.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7861 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'docutils')
| -rw-r--r-- | docutils/docs/dev/todo.txt | 48 | ||||
| -rw-r--r-- | docutils/docs/user/config.txt | 18 | ||||
| -rw-r--r-- | docutils/docutils/utils/math/tex2mathml_extern.py | 147 | ||||
| -rw-r--r-- | docutils/docutils/writers/html_base/__init__.py | 61 |
4 files changed, 233 insertions, 41 deletions
diff --git a/docutils/docs/dev/todo.txt b/docutils/docs/dev/todo.txt index ed3591c29..467375d1c 100644 --- a/docutils/docs/dev/todo.txt +++ b/docutils/docs/dev/todo.txt @@ -1317,40 +1317,46 @@ MathML_ Converters from LaTeX to MathML include * latex_math_ (Python) by Jens Jørgen Mortensen in the Docutils sandbox - * Blahtex_ (C++) - * MathToWeb_ (Java) - * TeX4ht_ (TeX based) - * LaTeXML_ (Perl) with the command latexmlmath_, e.g.:: - latexmlmath --contentmathml=- $\vec{\alpha}$ + This is the base for Docutils latex2mathml_ module used by + default with ``--math-output=MathML``. + + * blahtexml_ (C++), supported with ``--math-output=MathML blahtexml``. + + Fast conversion, support for many symbols and environments, but + no "align" (or other equation-aligning) environment. + + * LaTeXML_ (Perl), supported with ``--math-output=MathML latexml``. + Comprehensive macro support but very slow + + * TtM_ (C), ``--math-output=MathML ttm``, support may be removed. + + No "matrix", "align" and "cases" environments. + + * MathToWeb_ (Java) + * TeX4ht_ (TeX based) * itex_ (also `used in Abiword`__) - * TtM_ (C, non free, free binary for Linux) with an `online-trial page`__ * `Steve’s LATEX-to-MathML translator`_ ('mini-language', javascript, Python) - latex_math_ is the base for the current latex2mathml_ module used - with ``--math-output=MathML``. - * Write a new converter based on: + * Write a new converter? E.g. based on: * a generic tokenizer (see e.g. a `latex-codec recipe`_, `updated latex-codec`_, ) * the Unicode-Char <-> LaTeX mappings database unimathsymbols_ __ http://msevior.livejournal.com/26377.html - __ http://hutchinson.belmont.ma.us/tth/mml/ttmmozform.html .. _MathML: http://www.w3.org/TR/MathML2/ .. _latex_math: ../../../sandbox/jensj/latex_math/ .. _latex2mathml: ../../docutils/math/latex2mathml.py - .. _Blahtex: http://gva.noekeon.org/blahtexml/ - .. _MathToWeb: http://www.mathtoweb.com/ - .. _TeX4ht: http://www.tug.org/applications/tex4ht/mn.html + .. _blahtexml: http://gva.noekeon.org/blahtexml/ .. _LaTeXML: http://dlmf.nist.gov/LaTeXML/ - .. _latexmlmath: - http://dlmf.nist.gov/LaTeXML/manual/commands/latexmlmath.html - .. _itex: http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html .. _ttm: http://hutchinson.belmont.ma.us/tth/mml/ + .. _TeX4ht: http://www.tug.org/applications/tex4ht/mn.html + .. _MathToWeb: http://www.mathtoweb.com/ + .. _itex: http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html .. _Steve’s LATEX-to-MathML translator: http://www.gold-saucer.org/mathml/greasemonkey/dist/display-latex .. _latex-codec recipe: @@ -1367,19 +1373,19 @@ HTML/CSS format math in standard HTML enhanced by CSS rules (Overview__, `Examples and experiments`__). - LaTeX-math to HTML/CSS converters include + The ``math-output=html`` option uses the converter from eLyXer_ + (included with Docutils). + + Alternatives: LaTeX-math to HTML/CSS converters include * TtH_ (C) * Hevea_ (Objective Caml) - * eLyXer_ (Python) - - The ``math-output=html`` option uses the converter from eLyXer. __ http://www.cs.tut.fi/~jkorpela/math/ __ http://www.zipcon.net/~swhite/docs/math/math.html + .. _elyxer: http://elyxer.nongnu.org/ .. _TtH: ttp://hutchinson.belmont.ma.us/tth/index.html .. _Hevea: http://para.inria.fr/~maranget/hevea/ - .. _elyxer: http://elyxer.nongnu.org/ images (PNG or SVG) like e.g. Wikipedia. (e.g. with dvisvgm_ or the diff --git a/docutils/docs/user/config.txt b/docutils/docs/user/config.txt index 3d50d6d63..2d929518d 100644 --- a/docutils/docs/user/config.txt +++ b/docutils/docs/user/config.txt @@ -977,6 +977,21 @@ the output document. Supported values are (case insensitive): Transitional. However, MathML-enabled browsers will render it fine. + An external converter can be appended after whitespace: + + * ``math-output: MathML blahtexml`` + + blahtexml_ (C++) Fast conversion, support for many symbols and + environments, but no "align" (or other equation-aligning) environment. + + * ``--math-output=MathML latexml``. + + LaTeXML_ (Perl) Comprehensive macro support but very slow + + * ``--math-output=MathML ttm``, support may be removed. + + No "matrix", "align" and "cases" environments. + :LaTeX: Include literal LaTeX code. @@ -991,6 +1006,9 @@ New in Docutils 0.8. .. _MathJax: http://www.mathjax.org/ .. _MathPlayer: http://www.dessci.com/en/products/mathplayer/ .. _MathML: http://www.w3.org/TR/MathML/ +.. _blahtexml: http://gva.noekeon.org/blahtexml/ +.. _LaTeXML: http://dlmf.nist.gov/LaTeXML/ +.. _ttm: http://hutchinson.belmont.ma.us/tth/mml/ option_limit ~~~~~~~~~~~~ diff --git a/docutils/docutils/utils/math/tex2mathml_extern.py b/docutils/docutils/utils/math/tex2mathml_extern.py new file mode 100644 index 000000000..f315977b8 --- /dev/null +++ b/docutils/docutils/utils/math/tex2mathml_extern.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# :Id: $Id$ +# :Copyright: © 2015 Günter Milde. +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + +# Wrappers for TeX->MathML conversion by external tools +# ===================================================== + +import subprocess + +document_template = r"""\documentclass{article} +\usepackage{amsmath} +\begin{document} +%s +\end{document} +""" + +def latexml(math_code, reporter=None): + """Convert LaTeX math code to MathML with LaTeXML_ + + .. _LaTeXML: http://dlmf.nist.gov/LaTeXML/ + """ + p = subprocess.Popen(['latexml', + '-', # read from stdin + # '--preload=amsmath', + '--inputencoding=utf8', + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True) + p.stdin.write((document_template % math_code).encode('utf8')) + p.stdin.close() + latexml_code = p.stdout.read() + latexml_err = p.stderr.read().decode('utf8') + if reporter and latexml_err.find('Error') >= 0 or not latexml_code: + reporter.error(latexml_err) + + post_p = subprocess.Popen(['latexmlpost', + '-', + '--nonumbersections', + '--format=xhtml', + # '--linelength=78', # experimental + '--' + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True) + post_p.stdin.write(latexml_code) + post_p.stdin.close() + result = post_p.stdout.read().decode('utf8') + post_p_err = post_p.stderr.read().decode('utf8') + if reporter and post_p_err.find('Error') >= 0 or not result: + reporter.error(post_p_err) + + # extract MathML code: + start,end = result.find('<math'), result.find('</math>')+7 + result = result[start:end] + if 'class="ltx_ERROR' in result: + raise SyntaxError(result) + return result + +def ttm(math_code, reporter=None): + """Convert LaTeX math code to MathML with TtM_ + + .. _TtM: http://hutchinson.belmont.ma.us/tth/mml/ + """ + p = subprocess.Popen(['ttm', + # '-i', # italic font for equations. Default roman. + '-u', # unicode character encoding. (Default iso-8859-1). + '-r', # output raw MathML (no preamble or postlude) + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True) + p.stdin.write((document_template % math_code).encode('utf8')) + p.stdin.close() + result = p.stdout.read() + err = p.stderr.read().decode('utf8') + if err.find('**** Unknown') >= 0: + msg = '\n'.join([line for line in err.splitlines() + if line.startswith('****')]) + raise SyntaxError('\nMessage from external converter TtM:\n'+ msg) + if reporter and err.find('**** Error') >= 0 or not result: + reporter.error(err) + start,end = result.find('<math'), result.find('</math>')+7 + result = result[start:end] + return result + +def blahtexml(math_code, inline=True, reporter=None): + """Convert LaTeX math code to MathML with blahtexml_ + + .. _blahtexml: http://gva.noekeon.org/blahtexml/ + """ + options = ['--mathml', + '--indented', + '--spacing', 'moderate', + '--mathml-encoding', 'raw', + '--other-encoding', 'raw', + '--doctype-xhtml+mathml', + '--annotate-TeX', + ] + if inline: + mathmode_arg = '' + else: + mathmode_arg = 'mode="display"' + options.append('--displaymath') + + p = subprocess.Popen(['blahtexml']+options, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True) + p.stdin.write(math_code.encode('utf8')) + p.stdin.close() + result = p.stdout.read().decode('utf8') + err = p.stderr.read().decode('utf8') + + print err + if result.find('<error>') >= 0: + raise SyntaxError('\nMessage from external converter blahtexml:\n' + +result[result.find('<message>')+9:result.find('</message>')]) + if reporter and (err.find('**** Error') >= 0 or not result): + reporter.error(err) + start,end = result.find('<markup>')+9, result.find('</markup>') + result = ('<math xmlns="http://www.w3.org/1998/Math/MathML"%s>\n' + '%s</math>\n') % (mathmode_arg, result[start:end]) + return result + +# self-test + +if __name__ == "__main__": + example = ur'\frac{\partial \sin^2(\alpha)}{\partial \vec r} \varpi \, \text{Grüße}' + # print latexml(example).encode('utf8') + # print ttm(example)#.encode('utf8') + print blahtexml(example).encode('utf8') diff --git a/docutils/docutils/writers/html_base/__init__.py b/docutils/docutils/writers/html_base/__init__.py index 38b9a2644..108b7fbc1 100644 --- a/docutils/docutils/writers/html_base/__init__.py +++ b/docutils/docutils/writers/html_base/__init__.py @@ -44,8 +44,8 @@ import docutils from docutils import frontend, nodes, utils, writers, languages, io from docutils.utils.error_reporting import SafeString from docutils.transforms import writer_aux -from docutils.utils.math import unichar2tex, pick_math_environment, math2html -from docutils.utils.math.latex2mathml import parse_latex_math +from docutils.utils.math import (unichar2tex, pick_math_environment, + math2html, latex2mathml, tex2mathml_extern) class Writer(writers.Writer): @@ -132,8 +132,9 @@ class Writer(writers.Writer): 'Defined styles: "borderless". Default: ""', ['--table-style'], {'default': ''}), - ('Math output format (one of "MathML", "HTML", "MathJax" ' - 'or "LaTeX") and options(s). Default: "HTML math.css"', + ('Math output format (one of "MathML", "HTML", "MathJax", ' + 'or "LaTeX") and option(s). ' + 'Default: "HTML math.css"', ['--math-output'], {'default': 'HTML math.css'}), ('Prepend an XML declaration. (Thwarts HTML5 conformance.) ' @@ -1141,15 +1142,9 @@ class HTMLTranslator(nodes.NodeVisitor): def visit_math(self, node, math_env=''): # If the method is called from visit_math_block(), math_env != ''. - # As there is no native HTML math support, we provide alternatives: - # LaTeX and MathJax math_output modes simply wrap the content, - # HTML and MathML math_output modes also convert the math_code. - if self.math_output not in ('mathml', 'html', 'mathjax', 'latex'): - self.document.reporter.error( - 'math-output format "%s" not supported ' - 'falling back to "latex"'% self.math_output) - self.math_output = 'latex' - # + # As there is no native HTML math support, we provide alternatives + # for the math-output: LaTeX and MathJax simply wrap the content, + # HTML and MathML also convert the math_code. # HTML container tags = {# math_output: (block, inline, class-arguments) 'mathml': ('div', '', ''), @@ -1157,22 +1152,31 @@ class HTMLTranslator(nodes.NodeVisitor): 'mathjax': ('div', 'span', 'math'), 'latex': ('pre', 'tt', 'math'), } + if self.math_output not in tags: + self.document.reporter.error( + 'math-output format "%s" not supported ' + 'falling back to "latex"'% self.math_output) + self.math_output = 'latex' tag = tags[self.math_output][math_env == ''] clsarg = tags[self.math_output][2] # LaTeX container wrappers = {# math_mode: (inline, block) - 'mathml': (None, None), + 'mathml': ('$%s$', u'\\begin{%s}\n%s\n\\end{%s}'), 'html': ('$%s$', u'\\begin{%s}\n%s\n\\end{%s}'), 'mathjax': ('\(%s\)', u'\\begin{%s}\n%s\n\\end{%s}'), 'latex': (None, None), } wrapper = wrappers[self.math_output][math_env != ''] + if self.math_output == 'mathml' and (not self.math_output_options or + self.math_output_options[0] == 'blahtexml'): + wrapper = None # get and wrap content math_code = node.astext().translate(unichar2tex.uni2tex_table) - if wrapper and math_env: - math_code = wrapper % (math_env, math_code, math_env) - elif wrapper: - math_code = wrapper % math_code + if wrapper: + try: # wrapper with three "%s" + math_code = wrapper % (math_env, math_code, math_env) + except TypeError: # wrapper with one "%s" + math_code = wrapper % math_code # settings and conversion if self.math_output in ('latex', 'mathjax'): math_code = self.encode(math_code) @@ -1191,9 +1195,26 @@ class HTMLTranslator(nodes.NodeVisitor): elif self.math_output == 'mathml': self.doctype = self.doctype_mathml # self.content_type = self.content_type_mathml + converter = ' '.join(self.math_output_options).lower() try: - mathml_tree = parse_latex_math(math_code, inline=not(math_env)) - math_code = ''.join(mathml_tree.xml()) + if converter == 'latexml': + math_code = tex2mathml_extern.latexml(math_code, + self.document.reporter) + elif converter == 'ttm': + math_code = tex2mathml_extern.ttm(math_code, + self.document.reporter) + elif converter == 'blahtexml': + math_code = tex2mathml_extern.blahtexml(math_code, + inline=not(math_env), + reporter=self.document.reporter) + elif not converter: + math_code = latex2mathml.tex2mathml(math_code, + inline=not(math_env)) + else: + self.document.reporter.error('option "%s" not supported ' + 'with math-output "MathML"') + except OSError: + raise OSError('is "latexmlmath" in your PATH?') except SyntaxError, err: err_node = self.document.reporter.error(err, base_node=node) self.visit_system_message(err_node) |
