diff options
| author | Waylan Limberg <waylan.limberg@icloud.com> | 2020-06-23 16:04:31 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-06-23 16:04:31 -0400 |
| commit | 10058fae6577e79b786f4e185218aebb1b53e937 (patch) | |
| tree | e57b70fa1f83d89bd9e8db31bc1ab18f4f91ddf2 /markdown/extensions | |
| parent | dbb9b3d766e847f1909fa0c92c6b997f0e7c868d (diff) | |
| download | python-markdown-10058fae6577e79b786f4e185218aebb1b53e937.tar.gz | |
Refactor fenced_code & codehilite options (#816)
* Add `language-` prefix to output when syntax highlighting is
disabled for both codehilite and fenced_code extensions.
* Add `lang_prefix` config option to customize the prefix.
* Add a 'pygments' env to tox which runs the tests with Pygments
installed. Pygments is locked to a specific version in the env.
* Updated codehilite to accept any Pygments options.
* Refactor fenced code attributes.
- ID attr is defined on `pre` tag.
- Add support for attr_list extension, which allows setting arbitrary
attributes.
- When syntax highlighting is enabled, any pygments options can
be defined per block in the attr list.
- For backward compatibility, continue to support `hi_lines` outside
of an attr_list. That is the only attr other than lang which is allowed
without the brackets (`{}`) of an attr list. Note that if the brackets
exist, then everything, including lang and hl_lines, must be within
them.
* Resolves #775. Resolves #334. Addresses #652.
Diffstat (limited to 'markdown/extensions')
| -rw-r--r-- | markdown/extensions/attr_list.py | 1 | ||||
| -rw-r--r-- | markdown/extensions/codehilite.py | 155 | ||||
| -rw-r--r-- | markdown/extensions/fenced_code.py | 139 |
3 files changed, 201 insertions, 94 deletions
diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 23c6ad0..2a39fc0 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -161,6 +161,7 @@ class AttrListTreeprocessor(Treeprocessor): class AttrListExtension(Extension): def extendMarkdown(self, md): md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) + md.registerExtension(self) def makeExtension(**kwargs): # pragma: no cover diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index ac45ede..915dfcf 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -17,13 +17,14 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php) from . import Extension from ..treeprocessors import Treeprocessor +from ..util import parseBoolValue -try: +try: # pragma: no cover from pygments import highlight from pygments.lexers import get_lexer_by_name, guess_lexer from pygments.formatters import get_formatter_by_name pygments = True -except ImportError: +except ImportError: # pragma: no cover pygments = False @@ -38,52 +39,78 @@ def parse_hl_lines(expr): try: return list(map(int, expr.split())) - except ValueError: + except ValueError: # pragma: no cover return [] # ------------------ The Main CodeHilite Class ---------------------- class CodeHilite: """ - Determine language of source code, and pass it into pygments hilighter. + Determine language of source code, and pass it on to the Pygments highlighter. - Basic Usage: - >>> code = CodeHilite(src = 'some text') - >>> html = code.hilite() + Usage: + code = CodeHilite(src=some_code, lang='python') + html = code.hilite() + Arguments: * src: Source string or any object with a .readline attribute. - * linenums: (Boolean) Set line numbering to 'on' (True), - 'off' (False) or 'auto'(None). Set to 'auto' by default. + * lang: String name of Pygments lexer to use for highlighting. Default: `None`. - * guess_lang: (Boolean) Turn language auto-detection - 'on' or 'off' (on by default). + * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid + value. Default: `True`. - * css_class: Set class name of wrapper div ('codehilite' by default). + * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is + instead wrapped for highlighting by a JavaScript library. Default: `True`. - * hl_lines: (List of integers) Lines to emphasize, 1-indexed. + * linenums: An alias to Pygments `linenos` formatter option. Default: `None`. - Low Level Usage: - >>> code = CodeHilite() - >>> code.src = 'some text' # String or anything with a .readline attr. - >>> code.linenos = True # Turns line numbering on or of. - >>> html = code.hilite() + * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. + + * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`. + Default: "language-". + + Other Options: + Any other options are accepted and passed on to the lexer and formatter. Therefore, + valid options include any options which are accepted by the `html` formatter or + whichever lexer the code's language uses. Note that most lexers do not have any + options. However, a few have very useful options, such as PHP's `startinline` option. + Any invalid options are ignored without error. + + Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter + Lexer Options: https://pygments.org/docs/lexers/ + + Advanced Usage: + code = CodeHilite( + src = some_code, + lang = 'php', + startinline = True, # Lexer option. Snippet does not start with `<?php`. + linenostart = 42, # Formatter option. Snippet starts on line 42. + hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50. + linenos = 'inline' # Formatter option. Avoid alignment problems. + ) + html = code.hilite() """ - def __init__(self, src=None, linenums=None, guess_lang=True, - css_class="codehilite", lang=None, style='default', - noclasses=False, tab_length=4, hl_lines=None, use_pygments=True): + def __init__(self, src, **options): self.src = src - self.lang = lang - self.linenums = linenums - self.guess_lang = guess_lang - self.css_class = css_class - self.style = style - self.noclasses = noclasses - self.tab_length = tab_length - self.hl_lines = hl_lines or [] - self.use_pygments = use_pygments + self.lang = options.pop('lang', None) + self.guess_lang = options.pop('guess_lang', True) + self.use_pygments = options.pop('use_pygments', True) + self.lang_prefix = options.pop('lang_prefix', 'language-') + + if 'linenos' not in options: + options['linenos'] = options.pop('linenums', None) + if 'cssclass' not in options: + options['cssclass'] = options.pop('css_class', 'codehilite') + if 'wrapcode' not in options: + # Override pygments default + options['wrapcode'] = True + # Disallow use of `full` option + options['full'] = False + + self.options = options def hilite(self): """ @@ -103,22 +130,16 @@ class CodeHilite: if pygments and self.use_pygments: try: - lexer = get_lexer_by_name(self.lang) + lexer = get_lexer_by_name(self.lang, **self.options) except ValueError: try: if self.guess_lang: - lexer = guess_lexer(self.src) + lexer = guess_lexer(self.src, **self.options) else: - lexer = get_lexer_by_name('text') - except ValueError: - lexer = get_lexer_by_name('text') - formatter = get_formatter_by_name('html', - linenos=self.linenums, - cssclass=self.css_class, - style=self.style, - noclasses=self.noclasses, - hl_lines=self.hl_lines, - wrapcode=True) + lexer = get_lexer_by_name('text', **self.options) + except ValueError: # pragma: no cover + lexer = get_lexer_by_name('text', **self.options) + formatter = get_formatter_by_name('html', **self.options) return highlight(self.src, lexer, formatter) else: # just escape and build markup usable by JS highlighting libs @@ -128,14 +149,17 @@ class CodeHilite: txt = txt.replace('"', '"') classes = [] if self.lang: - classes.append('language-%s' % self.lang) - if self.linenums: + classes.append('{}{}'.format(self.lang_prefix, self.lang)) + if self.options['linenos']: classes.append('linenums') class_str = '' if classes: - class_str = ' class="%s"' % ' '.join(classes) - return '<pre class="%s"><code%s>%s</code></pre>\n' % \ - (self.css_class, class_str, txt) + class_str = ' class="{}"'.format(' '.join(classes)) + return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format( + self.options['cssclass'], + class_str, + txt + ) def _parseHeader(self): """ @@ -176,16 +200,16 @@ class CodeHilite: # we have a match try: self.lang = m.group('lang').lower() - except IndexError: + except IndexError: # pragma: no cover self.lang = None if m.group('path'): # path exists - restore first line lines.insert(0, fl) - if self.linenums is None and m.group('shebang'): + if self.options['linenos'] is None and m.group('shebang'): # Overridable and Shebang exists - use line numbers - self.linenums = True + self.options['linenos'] = True - self.hl_lines = parse_hl_lines(m.group('hl_lines')) + self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines')) else: # No match lines.insert(0, fl) @@ -213,13 +237,9 @@ class HiliteTreeprocessor(Treeprocessor): if len(block) == 1 and block[0].tag == 'code': code = CodeHilite( self.code_unescape(block[0].text), - linenums=self.config['linenums'], - guess_lang=self.config['guess_lang'], - css_class=self.config['css_class'], - style=self.config['pygments_style'], - noclasses=self.config['noclasses'], tab_length=self.md.tab_length, - use_pygments=self.config['use_pygments'] + style=self.config.pop('pygments_style', 'default'), + **self.config ) placeholder = self.md.htmlStash.store(code.hilite()) # Clear codeblock in etree instance @@ -237,7 +257,7 @@ class CodeHiliteExtension(Extension): # define default configs self.config = { 'linenums': [None, - "Use lines numbers. True=yes, False=no, None=auto"], + "Use lines numbers. True|table|inline=yes, False=no, None=auto"], 'guess_lang': [True, "Automatic language detection - Default: True"], 'css_class': ["codehilite", @@ -252,10 +272,25 @@ class CodeHiliteExtension(Extension): 'use_pygments': [True, 'Use Pygments to Highlight code blocks. ' 'Disable if using a JavaScript library. ' - 'Default: True'] + 'Default: True'], + 'lang_prefix': [ + 'language-', + 'Prefix prepended to the language when use_pygments is false. Default: "language-"' + ] } - super().__init__(**kwargs) + for key, value in kwargs.items(): + if key in self.config: + self.setConfig(key, value) + else: + # manually set unknown keywords. + if isinstance(value, str): + try: + # Attempt to parse str as a bool value + value = parseBoolValue(value, preserve_none=True) + except ValueError: + pass # Assume it's not a bool value. Use as-is. + self.config[key] = [value, ''] def extendMarkdown(self, md): """ Add HilitePostprocessor to Markdown instance. """ diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 71fac1a..e3b3f1b 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -15,78 +15,131 @@ All changes Copyright 2008-2014 The Python Markdown Project License: [BSD](https://opensource.org/licenses/bsd-license.php) """ + +from textwrap import dedent from . import Extension from ..preprocessors import Preprocessor from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines +from .attr_list import get_attrs, AttrListExtension +from ..util import parseBoolValue import re class FencedCodeExtension(Extension): + def __init__(self, **kwargs): + self.config = { + 'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"'] + } + super().__init__(**kwargs) def extendMarkdown(self, md): """ Add FencedBlockPreprocessor to the Markdown instance. """ md.registerExtension(self) - md.preprocessors.register(FencedBlockPreprocessor(md), 'fenced_code_block', 25) + md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25) class FencedBlockPreprocessor(Preprocessor): - FENCED_BLOCK_RE = re.compile(r''' -(?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~ -(\{?\.?(?P<lang>[\w#.+-]*))?[ ]* # Optional {, and lang -# Optional highlight lines, single- or double-quote-delimited -(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]* -}?[ ]*\n # Optional closing } -(?P<code>.*?)(?<=\n) -(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE) - CODE_WRAP = '<pre><code%s>%s</code></pre>' - LANG_TAG = ' class="%s"' - - def __init__(self, md): + FENCED_BLOCK_RE = re.compile( + dedent(r''' + (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence + ((\{(?P<attrs>[^\}\n]*)\})?| # (optional {attrs} or + (\.?(?P<lang>[\w#.+-]*))?[ ]* # optional (.)lang + (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?) # optional hl_lines) + [ ]*\n # newline (end of opening fence) + (?P<code>.*?)(?<=\n) # the code block + (?P=fence)[ ]*$ # closing fence + '''), + re.MULTILINE | re.DOTALL | re.VERBOSE + ) + + def __init__(self, md, config): super().__init__(md) - - self.checked_for_codehilite = False + self.config = config + self.checked_for_deps = False self.codehilite_conf = {} + self.use_attr_list = False + # List of options to convert to bool values + self.bool_options = [ + 'linenums', + 'guess_lang', + 'noclasses', + 'use_pygments' + ] def run(self, lines): """ Match and store Fenced Code Blocks in the HtmlStash. """ - # Check for code hilite extension - if not self.checked_for_codehilite: + # Check for dependent extensions + if not self.checked_for_deps: for ext in self.md.registeredExtensions: if isinstance(ext, CodeHiliteExtension): - self.codehilite_conf = ext.config - break + self.codehilite_conf = ext.getConfigs() + if isinstance(ext, AttrListExtension): + self.use_attr_list = True - self.checked_for_codehilite = True + self.checked_for_deps = True text = "\n".join(lines) while 1: m = self.FENCED_BLOCK_RE.search(text) if m: - lang = '' - if m.group('lang'): - lang = self.LANG_TAG % m.group('lang') + lang, id, classes, config = None, '', [], {} + if m.group('attrs'): + id, classes, config = self.handle_attrs(get_attrs(m.group('attrs'))) + if len(classes): + lang = classes[0] + else: + if m.group('lang'): + lang = m.group('lang') + classes.append(lang) + if m.group('hl_lines'): + # Support hl_lines outside of attrs for backward-compatibility + config['hl_lines'] = parse_hl_lines(m.group('hl_lines')) # If config is not empty, then the codehighlite extension # is enabled, so we call it to highlight the code - if self.codehilite_conf: + if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True): + local_config = self.codehilite_conf.copy() + local_config.update(config) + # Combine classes with cssclass. Ensure cssclass is at end + # as pygments appends a suffix under certain circumstances. + # Ignore ID as Pygments does not offer an option to set it. + if classes: + local_config['css_class'] = '{} {}'.format( + ' '.join(classes), + local_config['css_class'] + ) highliter = CodeHilite( m.group('code'), - linenums=self.codehilite_conf['linenums'][0], - guess_lang=self.codehilite_conf['guess_lang'][0], - css_class=self.codehilite_conf['css_class'][0], - style=self.codehilite_conf['pygments_style'][0], - use_pygments=self.codehilite_conf['use_pygments'][0], - lang=(m.group('lang') or None), - noclasses=self.codehilite_conf['noclasses'][0], - hl_lines=parse_hl_lines(m.group('hl_lines')) + lang=lang, + style=local_config.pop('pygments_style', 'default'), + **local_config ) code = highliter.hilite() else: - code = self.CODE_WRAP % (lang, - self._escape(m.group('code'))) + id_attr = class_attr = kv_pairs = '' + if classes: + class_attr = ' class="{}{}"'.format( + self.config.get('lang_prefix', 'language-'), + ' '.join(classes) + ) + if id: + id_attr = ' id="{}"'.format(id) + if self.use_attr_list and config and not config.get('use_pygments', False): + # Only assign key/value pairs to code element if attr_list ext is enabled, key/value pairs + # were defined on the code block, and the `use_pygments` key was not set to True. The + # `use_pygments` key could be either set to False or not defined. It is omitted from output. + kv_pairs = ' ' + ' '.join( + '{k}="{v}"'.format(k=k, v=v) for k, v in config.items() if k != 'use_pygments' + ) + code = '<pre{id}><code{cls}{kv}>{code}</code></pre>'.format( + id=id_attr, + cls=class_attr, + kv=kv_pairs, + code=self._escape(m.group('code')) + ) placeholder = self.md.htmlStash.store(code) text = '{}\n{}\n{}'.format(text[:m.start()], @@ -96,6 +149,24 @@ class FencedBlockPreprocessor(Preprocessor): break return text.split("\n") + def handle_attrs(self, attrs): + """ Return tuple: (id, [list, of, classes], {configs}) """ + id = '' + classes = [] + configs = {} + for k, v in attrs: + if k == 'id': + id = v + elif k == '.': + classes.append(v) + elif k == 'hl_lines': + configs[k] = parse_hl_lines(v) + elif k in self.bool_options: + configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True) + else: + configs[k] = v + return id, classes, configs + def _escape(self, txt): """ basic html escaping """ txt = txt.replace('&', '&') |
