diff options
| author | Jim Porter <826865+jimporter@users.noreply.github.com> | 2020-01-31 17:08:49 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-01-31 11:08:49 -0500 |
| commit | ccf56edd1404c828c8c2f4e5492ae854e3b06dd7 (patch) | |
| tree | e4c3fa1828ed07534872ee0b89f65a35aa2a108a /markdown/extensions | |
| parent | 66517465786b90ccb7fcffeef7996b593374d889 (diff) | |
| download | python-markdown-ccf56edd1404c828c8c2f4e5492ae854e3b06dd7.tar.gz | |
Don't emit stashed HTML tag placeholders in `.toc_tokens` (#901)
Note: this slightly changes existing behavior in that raw HTML tags are no
longer included in the HTML `.toc`. However, the fact that that worked before
was an oversight. The intention was always to strip all markup. Resolves #899.
Diffstat (limited to 'markdown/extensions')
| -rw-r--r-- | markdown/extensions/toc.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index d490a06..e42836a 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -43,7 +43,7 @@ def unique(id, ids): return id -def stashedHTML2text(text, md): +def stashedHTML2text(text, md, strip_entities=True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ @@ -51,8 +51,11 @@ def stashedHTML2text(text, md): raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) - # Strip out tags and entities - leaveing text - return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + # Strip out tags and/or entities - leaving text + res = re.sub(r'(<[^>]+>)', '', raw) + if strip_entities: + res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res) + return res return HTML_PLACEHOLDER_RE.sub(_html_sub, text) @@ -259,7 +262,9 @@ class TocTreeprocessor(Treeprocessor): toc_tokens.append({ 'level': int(el.tag[-1]), 'id': el.attrib["id"], - 'name': el.attrib.get('data-toc-label', text) + 'name': unescape(stashedHTML2text( + el.attrib.get('data-toc-label', text), self.md, strip_entities=False + )) }) # Remove the data-toc-label attribute as it is no longer needed |
