diff options
| author | Waylan Limberg <waylan.limberg@icloud.com> | 2020-10-20 14:06:48 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-10-20 14:06:48 -0400 |
| commit | 6b6cd8bc2f0a870ed309f8b8036492af535e75a1 (patch) | |
| tree | 4641036a9df302c211f51a07971c4b483b777b8d /markdown/extensions | |
| parent | 56b03b21f50d2b28b7ab87df7d8015e1f1b62184 (diff) | |
| download | python-markdown-6b6cd8bc2f0a870ed309f8b8036492af535e75a1.tar.gz | |
Unify all block-level tags. (#1048)
Use the list of tags defined in the core by the md_in_html extension.
This ensures that the lists do not diverge and allows users and/or
extensions to expand the list in the core and have that change affect
the extension. Fixes #1047.
Diffstat (limited to 'markdown/extensions')
| -rw-r--r-- | markdown/extensions/md_in_html.py | 43 |
1 files changed, 19 insertions, 24 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index f635563..489c3fe 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -23,27 +23,22 @@ from ..htmlparser import HTMLExtractor import xml.etree.ElementTree as etree -# Block-level tags in which the content only gets span level parsing -span_tags = ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th'] - -# Block-level tags in which the content gets parsed as blocks -block_tags = [ - 'address', 'article', 'aside', 'blockquote', 'body', 'colgroup', 'details', 'div', 'dl', 'fieldset', - 'figcaption', 'figure', 'footer', 'form', 'iframe', 'header', 'hr', 'main', 'menu', 'nav', 'map', - 'noscript', 'object', 'ol', 'section', 'table', 'tbody', 'thead', 'tfoot', 'tr', 'ul' -] - -# Block-level tags which never get their content parsed. -raw_tags = ['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'] - -block_level_tags = span_tags + block_tags + raw_tags - - class HTMLExtractorExtra(HTMLExtractor): """ Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown. """ + def __init__(self, md, *args, **kwargs): + # All block-level tags. + self.block_level_tags = md.block_level_elements.copy() + # Block-level tags in which the content only gets span level parsing + self.span_tags = ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th'] + # Block-level tags which never get their content parsed. + self.raw_tags = ['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'] + # Block-level tags in which the content gets parsed as blocks + self.block_tags = [tag for tag in self.block_level_tags if tag not in self.span_tags + self.raw_tags] + super().__init__(md, *args, **kwargs) + def reset(self): """Reset this instance. Loses all unprocessed data.""" self.mdstack = [] # When markdown=1, stack contains a list of tags @@ -75,13 +70,13 @@ class HTMLExtractorExtra(HTMLExtractor): if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'): # Only use the parent state if it is more restrictive than the markdown attribute. md_attr = parent_state - if ((md_attr == '1' and tag in block_tags) or - (md_attr == 'block' and tag in span_tags + block_tags)): + if ((md_attr == '1' and tag in self.block_tags) or + (md_attr == 'block' and tag in self.span_tags + self.block_tags)): return 'block' - elif ((md_attr == '1' and tag in span_tags) or - (md_attr == 'span' and tag in span_tags + block_tags)): + elif ((md_attr == '1' and tag in self.span_tags) or + (md_attr == 'span' and tag in self.span_tags + self.block_tags)): return 'span' - elif tag in block_level_tags: + elif tag in self.block_level_tags: return 'off' else: # pragma: no cover return None @@ -95,7 +90,7 @@ class HTMLExtractorExtra(HTMLExtractor): return value def handle_starttag(self, tag, attrs): - if tag in block_level_tags: + if tag in self.block_level_tags: # Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`. # Convert to `{'checked': 'checked'}`. attrs = {key: value if value is not None else key for key, value in attrs} @@ -106,7 +101,7 @@ class HTMLExtractorExtra(HTMLExtractor): attrs.pop('markdown', None) super().handle_starttag(tag, attrs) else: - if 'p' in self.mdstack and tag in block_level_tags: + if 'p' in self.mdstack and tag in self.block_level_tags: # Close unclosed 'p' tag self.handle_endtag('p') self.mdstate.append(state) @@ -125,7 +120,7 @@ class HTMLExtractorExtra(HTMLExtractor): self.handle_data(text) def handle_endtag(self, tag): - if tag in block_level_tags: + if tag in self.block_level_tags: if self.inraw: super().handle_endtag(tag) elif tag in self.mdstack: |
