summaryrefslogtreecommitdiff
path: root/markdown/extensions
diff options
context:
space:
mode:
authorJim Porter <826865+jimporter@users.noreply.github.com>2020-01-31 17:08:49 +0100
committerGitHub <noreply@github.com>2020-01-31 11:08:49 -0500
commitccf56edd1404c828c8c2f4e5492ae854e3b06dd7 (patch)
treee4c3fa1828ed07534872ee0b89f65a35aa2a108a /markdown/extensions
parent66517465786b90ccb7fcffeef7996b593374d889 (diff)
downloadpython-markdown-ccf56edd1404c828c8c2f4e5492ae854e3b06dd7.tar.gz
Don't emit stashed HTML tag placeholders in `.toc_tokens` (#901)
Note: this slightly changes existing behavior in that raw HTML tags are no longer included in the HTML `.toc`. However, the fact that that worked before was an oversight. The intention was always to strip all markup. Resolves #899.
Diffstat (limited to 'markdown/extensions')
-rw-r--r--markdown/extensions/toc.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
index d490a06..e42836a 100644
--- a/markdown/extensions/toc.py
+++ b/markdown/extensions/toc.py
@@ -43,7 +43,7 @@ def unique(id, ids):
return id
-def stashedHTML2text(text, md):
+def stashedHTML2text(text, md, strip_entities=True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
""" Substitute raw html with plain text. """
@@ -51,8 +51,11 @@ def stashedHTML2text(text, md):
raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
except (IndexError, TypeError): # pragma: no cover
return m.group(0)
- # Strip out tags and entities - leaveing text
- return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
+ # Strip out tags and/or entities - leaving text
+ res = re.sub(r'(<[^>]+>)', '', raw)
+ if strip_entities:
+ res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res)
+ return res
return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
@@ -259,7 +262,9 @@ class TocTreeprocessor(Treeprocessor):
toc_tokens.append({
'level': int(el.tag[-1]),
'id': el.attrib["id"],
- 'name': el.attrib.get('data-toc-label', text)
+ 'name': unescape(stashedHTML2text(
+ el.attrib.get('data-toc-label', text), self.md, strip_entities=False
+ ))
})
# Remove the data-toc-label attribute as it is no longer needed