summaryrefslogtreecommitdiff
path: root/markdown/extensions
diff options
context:
space:
mode:
authorWaylan Limberg <waylan.limberg@icloud.com>2020-11-18 13:33:20 -0500
committerGitHub <noreply@github.com>2020-11-18 13:33:20 -0500
commit81cc5b8bf1ad2a44b0a042d059caab3ed802ed33 (patch)
tree78eeb3ceaeb05a4518fe45156e864e6e2c7877cf /markdown/extensions
parent447da662b0c9548941a44a911e45c7cf6ad32861 (diff)
downloadpython-markdown-81cc5b8bf1ad2a44b0a042d059caab3ed802ed33.tar.gz
Properly parse code spans in md_in_html (#1069)
This reverts part of 2766698 and re-implements handling of tails in the same manner as the core. Also, ensure line_offset doesn't raise an error on bad input (see #1066) and properly handle script tags in code spans (same as in the core). Fixes #1068.
Diffstat (limited to 'markdown/extensions')
-rw-r--r--markdown/extensions/md_in_html.py27
1 files changed, 14 insertions, 13 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
index eb8902e..b8848ef 100644
--- a/markdown/extensions/md_in_html.py
+++ b/markdown/extensions/md_in_html.py
@@ -19,7 +19,7 @@ from ..blockprocessors import BlockProcessor
from ..preprocessors import Preprocessor
from ..postprocessors import RawHtmlPostprocessor
from .. import util
-from ..htmlparser import HTMLExtractor
+from ..htmlparser import HTMLExtractor, blank_line_re
import xml.etree.ElementTree as etree
@@ -85,17 +85,9 @@ class HTMLExtractorExtra(HTMLExtractor):
else: # pragma: no cover
return None
- def at_line_start(self):
- """At line start."""
-
- value = super().at_line_start()
- if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
- value = True
- return value
-
def handle_starttag(self, tag, attrs):
# Handle tags that should always be empty and do not specify a closing tag
- if tag in self.empty_tags:
+ if tag in self.empty_tags and (self.at_line_start() or self.intail):
attrs = {key: value if value is not None else key for key, value in attrs}
if "markdown" in attrs:
attrs.pop('markdown')
@@ -106,13 +98,12 @@ class HTMLExtractorExtra(HTMLExtractor):
self.handle_empty_tag(data, True)
return
- if tag in self.block_level_tags:
+ if tag in self.block_level_tags and (self.at_line_start() or self.intail):
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)
-
- if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
+ if self.inraw or (state in [None, 'off'] and not self.mdstack):
# fall back to default behavior
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
@@ -134,6 +125,9 @@ class HTMLExtractorExtra(HTMLExtractor):
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
+ if tag in self.CDATA_CONTENT_ELEMENTS:
+ # This is presumably a standalone tag in a code span (see #1036).
+ self.clear_cdata_mode()
def handle_endtag(self, tag):
if tag in self.block_level_tags:
@@ -159,6 +153,11 @@ class HTMLExtractorExtra(HTMLExtractor):
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
self.state = []
+ # Check if element has a tail
+ if not blank_line_re.match(
+ self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
+ # More content exists after endtag.
+ self.intail = True
else:
# Treat orphan closing tag as a span level tag.
text = self.get_endtag_text(tag)
@@ -191,6 +190,8 @@ class HTMLExtractorExtra(HTMLExtractor):
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
def handle_data(self, data):
+ if self.intail and '\n' in data:
+ self.intail = False
if self.inraw or not self.mdstack:
super().handle_data(data)
else: