diff options
| author | Waylan Limberg <waylan.limberg@icloud.com> | 2020-11-18 13:33:20 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-11-18 13:33:20 -0500 |
| commit | 81cc5b8bf1ad2a44b0a042d059caab3ed802ed33 (patch) | |
| tree | 78eeb3ceaeb05a4518fe45156e864e6e2c7877cf /markdown/extensions | |
| parent | 447da662b0c9548941a44a911e45c7cf6ad32861 (diff) | |
| download | python-markdown-81cc5b8bf1ad2a44b0a042d059caab3ed802ed33.tar.gz | |
Properly parse code spans in md_in_html (#1069)
This reverts part of 2766698 and re-implements handling
of tails in the same manner as the core.
Also, ensure line_offset doesn't raise an error on bad input
(see #1066) and properly handle script tags in code
spans (same as in the core).
Fixes #1068.
Diffstat (limited to 'markdown/extensions')
| -rw-r--r-- | markdown/extensions/md_in_html.py | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index eb8902e..b8848ef 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -19,7 +19,7 @@ from ..blockprocessors import BlockProcessor from ..preprocessors import Preprocessor from ..postprocessors import RawHtmlPostprocessor from .. import util -from ..htmlparser import HTMLExtractor +from ..htmlparser import HTMLExtractor, blank_line_re import xml.etree.ElementTree as etree @@ -85,17 +85,9 @@ class HTMLExtractorExtra(HTMLExtractor): else: # pragma: no cover return None - def at_line_start(self): - """At line start.""" - - value = super().at_line_start() - if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'): - value = True - return value - def handle_starttag(self, tag, attrs): # Handle tags that should always be empty and do not specify a closing tag - if tag in self.empty_tags: + if tag in self.empty_tags and (self.at_line_start() or self.intail): attrs = {key: value if value is not None else key for key, value in attrs} if "markdown" in attrs: attrs.pop('markdown') @@ -106,13 +98,12 @@ class HTMLExtractorExtra(HTMLExtractor): self.handle_empty_tag(data, True) return - if tag in self.block_level_tags: + if tag in self.block_level_tags and (self.at_line_start() or self.intail): # Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`. # Convert to `{'checked': 'checked'}`. attrs = {key: value if value is not None else key for key, value in attrs} state = self.get_state(tag, attrs) - - if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start(): + if self.inraw or (state in [None, 'off'] and not self.mdstack): # fall back to default behavior attrs.pop('markdown', None) super().handle_starttag(tag, attrs) @@ -134,6 +125,9 @@ class HTMLExtractorExtra(HTMLExtractor): self.handle_data(self.md.htmlStash.store(text)) else: self.handle_data(text) + if tag in self.CDATA_CONTENT_ELEMENTS: + # This is presumably a standalone tag in a code span (see #1036). + self.clear_cdata_mode() def handle_endtag(self, tag): if tag in self.block_level_tags: @@ -159,6 +153,11 @@ class HTMLExtractorExtra(HTMLExtractor): self.cleandoc.append(self.md.htmlStash.store(element)) self.cleandoc.append('\n\n') self.state = [] + # Check if element has a tail + if not blank_line_re.match( + self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]): + # More content exists after endtag. + self.intail = True else: # Treat orphan closing tag as a span level tag. text = self.get_endtag_text(tag) @@ -191,6 +190,8 @@ class HTMLExtractorExtra(HTMLExtractor): self.handle_empty_tag(data, is_block=self.md.is_block_level(tag)) def handle_data(self, data): + if self.intail and '\n' in data: + self.intail = False if self.inraw or not self.mdstack: super().handle_data(data) else: |
