summaryrefslogtreecommitdiff
path: root/markdown/extensions
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2020-10-24 19:34:51 -0600
committerGitHub <noreply@github.com>2020-10-24 21:34:51 -0400
commit11c9e179390ba4e3fbc5ed35b9af16ea93f7d5ca (patch)
treec9a2e7d7c1a349127c9ef2fd9b865e33e2f4fb41 /markdown/extensions
parent18b17e1bf5efa22ed06f09df14cc4c3ff8d7b5f8 (diff)
downloadpython-markdown-11c9e179390ba4e3fbc5ed35b9af16ea93f7d5ca.tar.gz
Fix issues related to hr tags
Ensure that start/end tag handler does not include tags in the previous paragraph. Provide special handling for tags like hr that never have content. Use sets for block tag lists as they are much faster when comparing if an item is in the list. Fixes #1053.
Diffstat (limited to 'markdown/extensions')
-rw-r--r--markdown/extensions/md_in_html.py41
1 files changed, 35 insertions, 6 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
index a2137c7..eb8902e 100644
--- a/markdown/extensions/md_in_html.py
+++ b/markdown/extensions/md_in_html.py
@@ -30,15 +30,19 @@ class HTMLExtractorExtra(HTMLExtractor):
def __init__(self, md, *args, **kwargs):
# All block-level tags.
- self.block_level_tags = md.block_level_elements.copy()
+ self.block_level_tags = set(md.block_level_elements.copy())
# Block-level tags in which the content only gets span level parsing
- self.span_tags = ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
+ self.span_tags = set(
+ ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
+ )
# Block-level tags which never get their content parsed.
- self.raw_tags = ['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea']
+ self.raw_tags = set(['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'])
# Block-level tags in which the content gets parsed as blocks
- self.block_tags = [tag for tag in self.block_level_tags if tag not in self.span_tags + self.raw_tags]
super().__init__(md, *args, **kwargs)
+ self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
+ self.span_and_blocks_tags = self.block_tags | self.span_tags
+
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.mdstack = [] # When markdown=1, stack contains a list of tags
@@ -71,10 +75,10 @@ class HTMLExtractorExtra(HTMLExtractor):
# Only use the parent state if it is more restrictive than the markdown attribute.
md_attr = parent_state
if ((md_attr == '1' and tag in self.block_tags) or
- (md_attr == 'block' and tag in self.span_tags + self.block_tags)):
+ (md_attr == 'block' and tag in self.span_and_blocks_tags)):
return 'block'
elif ((md_attr == '1' and tag in self.span_tags) or
- (md_attr == 'span' and tag in self.span_tags + self.block_tags)):
+ (md_attr == 'span' and tag in self.span_and_blocks_tags)):
return 'span'
elif tag in self.block_level_tags:
return 'off'
@@ -90,6 +94,18 @@ class HTMLExtractorExtra(HTMLExtractor):
return value
def handle_starttag(self, tag, attrs):
+ # Handle tags that should always be empty and do not specify a closing tag
+ if tag in self.empty_tags:
+ attrs = {key: value if value is not None else key for key, value in attrs}
+ if "markdown" in attrs:
+ attrs.pop('markdown')
+ element = etree.Element(tag, attrs)
+ data = etree.tostring(element, encoding='unicode', method='html')
+ else:
+ data = self.get_starttag_text()
+ self.handle_empty_tag(data, True)
+ return
+
if tag in self.block_level_tags:
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
@@ -161,6 +177,19 @@ class HTMLExtractorExtra(HTMLExtractor):
else:
self.handle_data(text)
+ def handle_startendtag(self, tag, attrs):
+ if tag in self.empty_tags:
+ attrs = {key: value if value is not None else key for key, value in attrs}
+ if "markdown" in attrs:
+ attrs.pop('markdown')
+ element = etree.Element(tag, attrs)
+ data = etree.tostring(element, encoding='unicode', method='html')
+ else:
+ data = self.get_starttag_text()
+ else:
+ data = self.get_starttag_text()
+ self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
+
def handle_data(self, data):
if self.inraw or not self.mdstack:
super().handle_data(data)