diff options
author | Justin Mayer <entroP@gmail.com> | 2020-10-19 20:43:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-19 20:43:18 +0200 |
commit | 197cd1e12ef9f14b2ba277e23504c8ac26a0467f (patch) | |
tree | 6af2cacdcb10275d55831d4c23408acbe2d77812 | |
parent | 20a1ac0e6f26f5ee07e48160cfe2d7c2d8f810a0 (diff) | |
parent | fd0923d2f24c9833021c01f247f97f4d1d8e67b4 (diff) | |
download | pelican-197cd1e12ef9f14b2ba277e23504c8ac26a0467f.tar.gz |
Merge pull request #2812 from avaris/2646
Try unescaped paths in intrasite link discovery
-rw-r--r-- | pelican/contents.py | 80 | ||||
-rw-r--r-- | pelican/tests/test_contents.py | 57 |
2 files changed, 106 insertions, 31 deletions
diff --git a/pelican/contents.py b/pelican/contents.py index 6470ee45..75cedcdc 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -4,7 +4,8 @@ import locale import logging import os import re -from urllib.parse import urljoin, urlparse, urlunparse +from html import unescape +from urllib.parse import unquote, urljoin, urlparse, urlunparse import pytz @@ -250,38 +251,55 @@ class Content: # XXX Put this in a different location. if what in {'filename', 'static', 'attach'}: - if path.startswith('/'): - path = path[1:] + def _get_linked_content(key, url): + nonlocal value + + def _find_path(path): + if path.startswith('/'): + path = path[1:] + else: + # relative to the source path of this content + path = self.get_relative_source_path( + os.path.join(self.relative_dir, path) + ) + return self._context[key].get(path, None) + + # try path + result = _find_path(url.path) + if result is not None: + return result + + # try unquoted path + result = _find_path(unquote(url.path)) + if result is not None: + return result + + # try html unescaped url + unescaped_url = urlparse(unescape(url.geturl())) + result = _find_path(unescaped_url.path) + if result is not None: + value = unescaped_url + return result + + # check if a static file is linked with {filename} + if what == 'filename' and key == 'generated_content': + linked_content = _get_linked_content('static_content', value) + if linked_content: + logger.warning( + '{filename} used for linking to static' + ' content %s in %s. Use {static} instead', + value.path, + self.get_relative_source_path()) + return linked_content + + return None + + if what == 'filename': + key = 'generated_content' else: - # relative to the source path of this content - path = self.get_relative_source_path( - os.path.join(self.relative_dir, path) - ) + key = 'static_content' - key = 'static_content' if what in ('static', 'attach')\ - else 'generated_content' - - def _get_linked_content(key, path): - try: - return self._context[key][path] - except KeyError: - try: - # Markdown escapes spaces, try unescaping - return self._context[key][path.replace('%20', ' ')] - except KeyError: - if what == 'filename' and key == 'generated_content': - key = 'static_content' - linked_content = _get_linked_content(key, path) - if linked_content: - logger.warning( - '{filename} used for linking to static' - ' content %s in %s. Use {static} instead', - path, - self.get_relative_source_path()) - return linked_content - return None - - linked_content = _get_linked_content(key, path) + linked_content = _get_linked_content(key, value) if linked_content: if what == 'attach': linked_content.attach_to(self) diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 1a520bc7..32012d4f 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -30,6 +30,9 @@ class TestBase(LoggedTestCase): 'content': TEST_CONTENT, 'context': { 'localsiteurl': '', + 'generated_content': {}, + 'static_content': {}, + 'static_links': set() }, 'metadata': { 'summary': TEST_SUMMARY, @@ -519,6 +522,60 @@ class TestPage(TestBase): '<img src="http://static.cool.site/images/poster.jpg"/>' ) + def test_intrasite_link_escape(self): + article = type( + '_DummyArticle', (object,), {'url': 'article-spaces.html'}) + asset = type( + '_DummyAsset', (object,), {'url': 'name@example.com'}) + + args = self.page_kwargs.copy() + args['settings'] = get_settings() + args['source_path'] = 'content' + args['context']['generated_content'] = {'article spaces.rst': article} + args['context']['static_content'] = {'name@example.com': asset} + + expected_output = ( + 'A simple test with a ' + '<a href="http://notmyidea.org/article-spaces.html#anchor">link</a> ' + '<a href="http://notmyidea.org/name@example.com#anchor">file</a>' + ) + + # not escaped + args['content'] = ( + 'A simple test with a ' + '<a href="{filename}article spaces.rst#anchor">link</a> ' + '<a href="{static}name@example.com#anchor">file</a>' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual(content, expected_output) + + # html escaped + args['content'] = ( + 'A simple test with a ' + '<a href="{filename}article spaces.rst#anchor">link</a> ' + '<a href="{static}name@example.com#anchor">file</a>' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual(content, expected_output) + + # url escaped + args['content'] = ( + 'A simple test with a ' + '<a href="{filename}article%20spaces.rst#anchor">link</a> ' + '<a href="{static}name%40example.com#anchor">file</a>' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual(content, expected_output) + + # html and url escaped + args['content'] = ( + 'A simple test with a ' + '<a href="{filename}article%20spaces.rst#anchor">link</a> ' + '<a href="{static}name@example.com#anchor">file</a>' + ) + content = Page(**args).get_content('http://notmyidea.org') + self.assertEqual(content, expected_output) + def test_intrasite_link_markdown_spaces(self): cls_name = '_DummyArticle' article = type(cls_name, (object,), {'url': 'article-spaces.html'}) |