summaryrefslogtreecommitdiff
path: root/markdown/extensions
diff options
context:
space:
mode:
authorWaylan Limberg <waylan.limberg@icloud.com>2020-06-23 16:04:31 -0400
committerGitHub <noreply@github.com>2020-06-23 16:04:31 -0400
commit10058fae6577e79b786f4e185218aebb1b53e937 (patch)
treee57b70fa1f83d89bd9e8db31bc1ab18f4f91ddf2 /markdown/extensions
parentdbb9b3d766e847f1909fa0c92c6b997f0e7c868d (diff)
downloadpython-markdown-10058fae6577e79b786f4e185218aebb1b53e937.tar.gz
Refactor fenced_code & codehilite options (#816)
* Add `language-` prefix to output when syntax highlighting is disabled for both codehilite and fenced_code extensions. * Add `lang_prefix` config option to customize the prefix. * Add a 'pygments' env to tox which runs the tests with Pygments installed. Pygments is locked to a specific version in the env. * Updated codehilite to accept any Pygments options. * Refactor fenced code attributes. - ID attr is defined on `pre` tag. - Add support for attr_list extension, which allows setting arbitrary attributes. - When syntax highlighting is enabled, any pygments options can be defined per block in the attr list. - For backward compatibility, continue to support `hi_lines` outside of an attr_list. That is the only attr other than lang which is allowed without the brackets (`{}`) of an attr list. Note that if the brackets exist, then everything, including lang and hl_lines, must be within them. * Resolves #775. Resolves #334. Addresses #652.
Diffstat (limited to 'markdown/extensions')
-rw-r--r--markdown/extensions/attr_list.py1
-rw-r--r--markdown/extensions/codehilite.py155
-rw-r--r--markdown/extensions/fenced_code.py139
3 files changed, 201 insertions, 94 deletions
diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
index 23c6ad0..2a39fc0 100644
--- a/markdown/extensions/attr_list.py
+++ b/markdown/extensions/attr_list.py
@@ -161,6 +161,7 @@ class AttrListTreeprocessor(Treeprocessor):
class AttrListExtension(Extension):
def extendMarkdown(self, md):
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
+ md.registerExtension(self)
def makeExtension(**kwargs): # pragma: no cover
diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py
index ac45ede..915dfcf 100644
--- a/markdown/extensions/codehilite.py
+++ b/markdown/extensions/codehilite.py
@@ -17,13 +17,14 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..treeprocessors import Treeprocessor
+from ..util import parseBoolValue
-try:
+try: # pragma: no cover
from pygments import highlight
from pygments.lexers import get_lexer_by_name, guess_lexer
from pygments.formatters import get_formatter_by_name
pygments = True
-except ImportError:
+except ImportError: # pragma: no cover
pygments = False
@@ -38,52 +39,78 @@ def parse_hl_lines(expr):
try:
return list(map(int, expr.split()))
- except ValueError:
+ except ValueError: # pragma: no cover
return []
# ------------------ The Main CodeHilite Class ----------------------
class CodeHilite:
"""
- Determine language of source code, and pass it into pygments hilighter.
+ Determine language of source code, and pass it on to the Pygments highlighter.
- Basic Usage:
- >>> code = CodeHilite(src = 'some text')
- >>> html = code.hilite()
+ Usage:
+ code = CodeHilite(src=some_code, lang='python')
+ html = code.hilite()
+ Arguments:
* src: Source string or any object with a .readline attribute.
- * linenums: (Boolean) Set line numbering to 'on' (True),
- 'off' (False) or 'auto'(None). Set to 'auto' by default.
+ * lang: String name of Pygments lexer to use for highlighting. Default: `None`.
- * guess_lang: (Boolean) Turn language auto-detection
- 'on' or 'off' (on by default).
+ * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
+ value. Default: `True`.
- * css_class: Set class name of wrapper div ('codehilite' by default).
+ * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
+ instead wrapped for highlighting by a JavaScript library. Default: `True`.
- * hl_lines: (List of integers) Lines to emphasize, 1-indexed.
+ * linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
- Low Level Usage:
- >>> code = CodeHilite()
- >>> code.src = 'some text' # String or anything with a .readline attr.
- >>> code.linenos = True # Turns line numbering on or of.
- >>> html = code.hilite()
+ * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
+
+ * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`.
+ Default: "language-".
+
+ Other Options:
+ Any other options are accepted and passed on to the lexer and formatter. Therefore,
+ valid options include any options which are accepted by the `html` formatter or
+ whichever lexer the code's language uses. Note that most lexers do not have any
+ options. However, a few have very useful options, such as PHP's `startinline` option.
+ Any invalid options are ignored without error.
+
+ Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
+ Lexer Options: https://pygments.org/docs/lexers/
+
+ Advanced Usage:
+ code = CodeHilite(
+ src = some_code,
+ lang = 'php',
+ startinline = True, # Lexer option. Snippet does not start with `<?php`.
+ linenostart = 42, # Formatter option. Snippet starts on line 42.
+ hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
+ linenos = 'inline' # Formatter option. Avoid alignment problems.
+ )
+ html = code.hilite()
"""
- def __init__(self, src=None, linenums=None, guess_lang=True,
- css_class="codehilite", lang=None, style='default',
- noclasses=False, tab_length=4, hl_lines=None, use_pygments=True):
+ def __init__(self, src, **options):
self.src = src
- self.lang = lang
- self.linenums = linenums
- self.guess_lang = guess_lang
- self.css_class = css_class
- self.style = style
- self.noclasses = noclasses
- self.tab_length = tab_length
- self.hl_lines = hl_lines or []
- self.use_pygments = use_pygments
+ self.lang = options.pop('lang', None)
+ self.guess_lang = options.pop('guess_lang', True)
+ self.use_pygments = options.pop('use_pygments', True)
+ self.lang_prefix = options.pop('lang_prefix', 'language-')
+
+ if 'linenos' not in options:
+ options['linenos'] = options.pop('linenums', None)
+ if 'cssclass' not in options:
+ options['cssclass'] = options.pop('css_class', 'codehilite')
+ if 'wrapcode' not in options:
+ # Override pygments default
+ options['wrapcode'] = True
+ # Disallow use of `full` option
+ options['full'] = False
+
+ self.options = options
def hilite(self):
"""
@@ -103,22 +130,16 @@ class CodeHilite:
if pygments and self.use_pygments:
try:
- lexer = get_lexer_by_name(self.lang)
+ lexer = get_lexer_by_name(self.lang, **self.options)
except ValueError:
try:
if self.guess_lang:
- lexer = guess_lexer(self.src)
+ lexer = guess_lexer(self.src, **self.options)
else:
- lexer = get_lexer_by_name('text')
- except ValueError:
- lexer = get_lexer_by_name('text')
- formatter = get_formatter_by_name('html',
- linenos=self.linenums,
- cssclass=self.css_class,
- style=self.style,
- noclasses=self.noclasses,
- hl_lines=self.hl_lines,
- wrapcode=True)
+ lexer = get_lexer_by_name('text', **self.options)
+ except ValueError: # pragma: no cover
+ lexer = get_lexer_by_name('text', **self.options)
+ formatter = get_formatter_by_name('html', **self.options)
return highlight(self.src, lexer, formatter)
else:
# just escape and build markup usable by JS highlighting libs
@@ -128,14 +149,17 @@ class CodeHilite:
txt = txt.replace('"', '&quot;')
classes = []
if self.lang:
- classes.append('language-%s' % self.lang)
- if self.linenums:
+ classes.append('{}{}'.format(self.lang_prefix, self.lang))
+ if self.options['linenos']:
classes.append('linenums')
class_str = ''
if classes:
- class_str = ' class="%s"' % ' '.join(classes)
- return '<pre class="%s"><code%s>%s</code></pre>\n' % \
- (self.css_class, class_str, txt)
+ class_str = ' class="{}"'.format(' '.join(classes))
+ return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
+ self.options['cssclass'],
+ class_str,
+ txt
+ )
def _parseHeader(self):
"""
@@ -176,16 +200,16 @@ class CodeHilite:
# we have a match
try:
self.lang = m.group('lang').lower()
- except IndexError:
+ except IndexError: # pragma: no cover
self.lang = None
if m.group('path'):
# path exists - restore first line
lines.insert(0, fl)
- if self.linenums is None and m.group('shebang'):
+ if self.options['linenos'] is None and m.group('shebang'):
# Overridable and Shebang exists - use line numbers
- self.linenums = True
+ self.options['linenos'] = True
- self.hl_lines = parse_hl_lines(m.group('hl_lines'))
+ self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
else:
# No match
lines.insert(0, fl)
@@ -213,13 +237,9 @@ class HiliteTreeprocessor(Treeprocessor):
if len(block) == 1 and block[0].tag == 'code':
code = CodeHilite(
self.code_unescape(block[0].text),
- linenums=self.config['linenums'],
- guess_lang=self.config['guess_lang'],
- css_class=self.config['css_class'],
- style=self.config['pygments_style'],
- noclasses=self.config['noclasses'],
tab_length=self.md.tab_length,
- use_pygments=self.config['use_pygments']
+ style=self.config.pop('pygments_style', 'default'),
+ **self.config
)
placeholder = self.md.htmlStash.store(code.hilite())
# Clear codeblock in etree instance
@@ -237,7 +257,7 @@ class CodeHiliteExtension(Extension):
# define default configs
self.config = {
'linenums': [None,
- "Use lines numbers. True=yes, False=no, None=auto"],
+ "Use lines numbers. True|table|inline=yes, False=no, None=auto"],
'guess_lang': [True,
"Automatic language detection - Default: True"],
'css_class': ["codehilite",
@@ -252,10 +272,25 @@ class CodeHiliteExtension(Extension):
'use_pygments': [True,
'Use Pygments to Highlight code blocks. '
'Disable if using a JavaScript library. '
- 'Default: True']
+ 'Default: True'],
+ 'lang_prefix': [
+ 'language-',
+ 'Prefix prepended to the language when use_pygments is false. Default: "language-"'
+ ]
}
- super().__init__(**kwargs)
+ for key, value in kwargs.items():
+ if key in self.config:
+ self.setConfig(key, value)
+ else:
+ # manually set unknown keywords.
+ if isinstance(value, str):
+ try:
+ # Attempt to parse str as a bool value
+ value = parseBoolValue(value, preserve_none=True)
+ except ValueError:
+ pass # Assume it's not a bool value. Use as-is.
+ self.config[key] = [value, '']
def extendMarkdown(self, md):
""" Add HilitePostprocessor to Markdown instance. """
diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
index 71fac1a..e3b3f1b 100644
--- a/markdown/extensions/fenced_code.py
+++ b/markdown/extensions/fenced_code.py
@@ -15,78 +15,131 @@ All changes Copyright 2008-2014 The Python Markdown Project
License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+
+from textwrap import dedent
from . import Extension
from ..preprocessors import Preprocessor
from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
+from .attr_list import get_attrs, AttrListExtension
+from ..util import parseBoolValue
import re
class FencedCodeExtension(Extension):
+ def __init__(self, **kwargs):
+ self.config = {
+ 'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"']
+ }
+ super().__init__(**kwargs)
def extendMarkdown(self, md):
""" Add FencedBlockPreprocessor to the Markdown instance. """
md.registerExtension(self)
- md.preprocessors.register(FencedBlockPreprocessor(md), 'fenced_code_block', 25)
+ md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25)
class FencedBlockPreprocessor(Preprocessor):
- FENCED_BLOCK_RE = re.compile(r'''
-(?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~
-(\{?\.?(?P<lang>[\w#.+-]*))?[ ]* # Optional {, and lang
-# Optional highlight lines, single- or double-quote-delimited
-(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]*
-}?[ ]*\n # Optional closing }
-(?P<code>.*?)(?<=\n)
-(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE)
- CODE_WRAP = '<pre><code%s>%s</code></pre>'
- LANG_TAG = ' class="%s"'
-
- def __init__(self, md):
+ FENCED_BLOCK_RE = re.compile(
+ dedent(r'''
+ (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
+ ((\{(?P<attrs>[^\}\n]*)\})?| # (optional {attrs} or
+ (\.?(?P<lang>[\w#.+-]*))?[ ]* # optional (.)lang
+ (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?) # optional hl_lines)
+ [ ]*\n # newline (end of opening fence)
+ (?P<code>.*?)(?<=\n) # the code block
+ (?P=fence)[ ]*$ # closing fence
+ '''),
+ re.MULTILINE | re.DOTALL | re.VERBOSE
+ )
+
+ def __init__(self, md, config):
super().__init__(md)
-
- self.checked_for_codehilite = False
+ self.config = config
+ self.checked_for_deps = False
self.codehilite_conf = {}
+ self.use_attr_list = False
+ # List of options to convert to bool values
+ self.bool_options = [
+ 'linenums',
+ 'guess_lang',
+ 'noclasses',
+ 'use_pygments'
+ ]
def run(self, lines):
""" Match and store Fenced Code Blocks in the HtmlStash. """
- # Check for code hilite extension
- if not self.checked_for_codehilite:
+ # Check for dependent extensions
+ if not self.checked_for_deps:
for ext in self.md.registeredExtensions:
if isinstance(ext, CodeHiliteExtension):
- self.codehilite_conf = ext.config
- break
+ self.codehilite_conf = ext.getConfigs()
+ if isinstance(ext, AttrListExtension):
+ self.use_attr_list = True
- self.checked_for_codehilite = True
+ self.checked_for_deps = True
text = "\n".join(lines)
while 1:
m = self.FENCED_BLOCK_RE.search(text)
if m:
- lang = ''
- if m.group('lang'):
- lang = self.LANG_TAG % m.group('lang')
+ lang, id, classes, config = None, '', [], {}
+ if m.group('attrs'):
+ id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
+ if len(classes):
+ lang = classes[0]
+ else:
+ if m.group('lang'):
+ lang = m.group('lang')
+ classes.append(lang)
+ if m.group('hl_lines'):
+ # Support hl_lines outside of attrs for backward-compatibility
+ config['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
# If config is not empty, then the codehighlite extension
# is enabled, so we call it to highlight the code
- if self.codehilite_conf:
+ if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
+ local_config = self.codehilite_conf.copy()
+ local_config.update(config)
+ # Combine classes with cssclass. Ensure cssclass is at end
+ # as pygments appends a suffix under certain circumstances.
+ # Ignore ID as Pygments does not offer an option to set it.
+ if classes:
+ local_config['css_class'] = '{} {}'.format(
+ ' '.join(classes),
+ local_config['css_class']
+ )
highliter = CodeHilite(
m.group('code'),
- linenums=self.codehilite_conf['linenums'][0],
- guess_lang=self.codehilite_conf['guess_lang'][0],
- css_class=self.codehilite_conf['css_class'][0],
- style=self.codehilite_conf['pygments_style'][0],
- use_pygments=self.codehilite_conf['use_pygments'][0],
- lang=(m.group('lang') or None),
- noclasses=self.codehilite_conf['noclasses'][0],
- hl_lines=parse_hl_lines(m.group('hl_lines'))
+ lang=lang,
+ style=local_config.pop('pygments_style', 'default'),
+ **local_config
)
code = highliter.hilite()
else:
- code = self.CODE_WRAP % (lang,
- self._escape(m.group('code')))
+ id_attr = class_attr = kv_pairs = ''
+ if classes:
+ class_attr = ' class="{}{}"'.format(
+ self.config.get('lang_prefix', 'language-'),
+ ' '.join(classes)
+ )
+ if id:
+ id_attr = ' id="{}"'.format(id)
+ if self.use_attr_list and config and not config.get('use_pygments', False):
+ # Only assign key/value pairs to code element if attr_list ext is enabled, key/value pairs
+ # were defined on the code block, and the `use_pygments` key was not set to True. The
+ # `use_pygments` key could be either set to False or not defined. It is omitted from output.
+ kv_pairs = ' ' + ' '.join(
+ '{k}="{v}"'.format(k=k, v=v) for k, v in config.items() if k != 'use_pygments'
+ )
+ code = '<pre{id}><code{cls}{kv}>{code}</code></pre>'.format(
+ id=id_attr,
+ cls=class_attr,
+ kv=kv_pairs,
+ code=self._escape(m.group('code'))
+ )
placeholder = self.md.htmlStash.store(code)
text = '{}\n{}\n{}'.format(text[:m.start()],
@@ -96,6 +149,24 @@ class FencedBlockPreprocessor(Preprocessor):
break
return text.split("\n")
+ def handle_attrs(self, attrs):
+ """ Return tuple: (id, [list, of, classes], {configs}) """
+ id = ''
+ classes = []
+ configs = {}
+ for k, v in attrs:
+ if k == 'id':
+ id = v
+ elif k == '.':
+ classes.append(v)
+ elif k == 'hl_lines':
+ configs[k] = parse_hl_lines(v)
+ elif k in self.bool_options:
+ configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True)
+ else:
+ configs[k] = v
+ return id, classes, configs
+
def _escape(self, txt):
""" basic html escaping """
txt = txt.replace('&', '&amp;')