summaryrefslogtreecommitdiff
path: root/markdown/extensions
diff options
context:
space:
mode:
authorWaylan Limberg <waylan.limberg@icloud.com>2020-10-20 14:06:48 -0400
committerGitHub <noreply@github.com>2020-10-20 14:06:48 -0400
commit6b6cd8bc2f0a870ed309f8b8036492af535e75a1 (patch)
tree4641036a9df302c211f51a07971c4b483b777b8d /markdown/extensions
parent56b03b21f50d2b28b7ab87df7d8015e1f1b62184 (diff)
downloadpython-markdown-6b6cd8bc2f0a870ed309f8b8036492af535e75a1.tar.gz
Unify all block-level tags. (#1048)
Use the list of tags defined in the core by the md_in_html extension. This ensures that the lists do not diverge and allows users and/or extensions to expand the list in the core and have that change affect the extension. Fixes #1047.
Diffstat (limited to 'markdown/extensions')
-rw-r--r--markdown/extensions/md_in_html.py43
1 files changed, 19 insertions, 24 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
index f635563..489c3fe 100644
--- a/markdown/extensions/md_in_html.py
+++ b/markdown/extensions/md_in_html.py
@@ -23,27 +23,22 @@ from ..htmlparser import HTMLExtractor
import xml.etree.ElementTree as etree
-# Block-level tags in which the content only gets span level parsing
-span_tags = ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
-
-# Block-level tags in which the content gets parsed as blocks
-block_tags = [
- 'address', 'article', 'aside', 'blockquote', 'body', 'colgroup', 'details', 'div', 'dl', 'fieldset',
- 'figcaption', 'figure', 'footer', 'form', 'iframe', 'header', 'hr', 'main', 'menu', 'nav', 'map',
- 'noscript', 'object', 'ol', 'section', 'table', 'tbody', 'thead', 'tfoot', 'tr', 'ul'
-]
-
-# Block-level tags which never get their content parsed.
-raw_tags = ['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea']
-
-block_level_tags = span_tags + block_tags + raw_tags
-
-
class HTMLExtractorExtra(HTMLExtractor):
"""
Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown.
"""
+ def __init__(self, md, *args, **kwargs):
+ # All block-level tags.
+ self.block_level_tags = md.block_level_elements.copy()
+ # Block-level tags in which the content only gets span level parsing
+ self.span_tags = ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
+ # Block-level tags which never get their content parsed.
+ self.raw_tags = ['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea']
+ # Block-level tags in which the content gets parsed as blocks
+ self.block_tags = [tag for tag in self.block_level_tags if tag not in self.span_tags + self.raw_tags]
+ super().__init__(md, *args, **kwargs)
+
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.mdstack = [] # When markdown=1, stack contains a list of tags
@@ -75,13 +70,13 @@ class HTMLExtractorExtra(HTMLExtractor):
if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'):
# Only use the parent state if it is more restrictive than the markdown attribute.
md_attr = parent_state
- if ((md_attr == '1' and tag in block_tags) or
- (md_attr == 'block' and tag in span_tags + block_tags)):
+ if ((md_attr == '1' and tag in self.block_tags) or
+ (md_attr == 'block' and tag in self.span_tags + self.block_tags)):
return 'block'
- elif ((md_attr == '1' and tag in span_tags) or
- (md_attr == 'span' and tag in span_tags + block_tags)):
+ elif ((md_attr == '1' and tag in self.span_tags) or
+ (md_attr == 'span' and tag in self.span_tags + self.block_tags)):
return 'span'
- elif tag in block_level_tags:
+ elif tag in self.block_level_tags:
return 'off'
else: # pragma: no cover
return None
@@ -95,7 +90,7 @@ class HTMLExtractorExtra(HTMLExtractor):
return value
def handle_starttag(self, tag, attrs):
- if tag in block_level_tags:
+ if tag in self.block_level_tags:
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
@@ -106,7 +101,7 @@ class HTMLExtractorExtra(HTMLExtractor):
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
else:
- if 'p' in self.mdstack and tag in block_level_tags:
+ if 'p' in self.mdstack and tag in self.block_level_tags:
# Close unclosed 'p' tag
self.handle_endtag('p')
self.mdstate.append(state)
@@ -125,7 +120,7 @@ class HTMLExtractorExtra(HTMLExtractor):
self.handle_data(text)
def handle_endtag(self, tag):
- if tag in block_level_tags:
+ if tag in self.block_level_tags:
if self.inraw:
super().handle_endtag(tag)
elif tag in self.mdstack: