summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Mayer <entroP@gmail.com>2020-10-19 20:43:18 +0200
committerGitHub <noreply@github.com>2020-10-19 20:43:18 +0200
commit197cd1e12ef9f14b2ba277e23504c8ac26a0467f (patch)
tree6af2cacdcb10275d55831d4c23408acbe2d77812
parent20a1ac0e6f26f5ee07e48160cfe2d7c2d8f810a0 (diff)
parentfd0923d2f24c9833021c01f247f97f4d1d8e67b4 (diff)
downloadpelican-197cd1e12ef9f14b2ba277e23504c8ac26a0467f.tar.gz
Merge pull request #2812 from avaris/2646
Try unescaped paths in intrasite link discovery
-rw-r--r--pelican/contents.py80
-rw-r--r--pelican/tests/test_contents.py57
2 files changed, 106 insertions, 31 deletions
diff --git a/pelican/contents.py b/pelican/contents.py
index 6470ee45..75cedcdc 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -4,7 +4,8 @@ import locale
import logging
import os
import re
-from urllib.parse import urljoin, urlparse, urlunparse
+from html import unescape
+from urllib.parse import unquote, urljoin, urlparse, urlunparse
import pytz
@@ -250,38 +251,55 @@ class Content:
# XXX Put this in a different location.
if what in {'filename', 'static', 'attach'}:
- if path.startswith('/'):
- path = path[1:]
+ def _get_linked_content(key, url):
+ nonlocal value
+
+ def _find_path(path):
+ if path.startswith('/'):
+ path = path[1:]
+ else:
+ # relative to the source path of this content
+ path = self.get_relative_source_path(
+ os.path.join(self.relative_dir, path)
+ )
+ return self._context[key].get(path, None)
+
+ # try path
+ result = _find_path(url.path)
+ if result is not None:
+ return result
+
+ # try unquoted path
+ result = _find_path(unquote(url.path))
+ if result is not None:
+ return result
+
+ # try html unescaped url
+ unescaped_url = urlparse(unescape(url.geturl()))
+ result = _find_path(unescaped_url.path)
+ if result is not None:
+ value = unescaped_url
+ return result
+
+ # check if a static file is linked with {filename}
+ if what == 'filename' and key == 'generated_content':
+ linked_content = _get_linked_content('static_content', value)
+ if linked_content:
+ logger.warning(
+ '{filename} used for linking to static'
+ ' content %s in %s. Use {static} instead',
+ value.path,
+ self.get_relative_source_path())
+ return linked_content
+
+ return None
+
+ if what == 'filename':
+ key = 'generated_content'
else:
- # relative to the source path of this content
- path = self.get_relative_source_path(
- os.path.join(self.relative_dir, path)
- )
+ key = 'static_content'
- key = 'static_content' if what in ('static', 'attach')\
- else 'generated_content'
-
- def _get_linked_content(key, path):
- try:
- return self._context[key][path]
- except KeyError:
- try:
- # Markdown escapes spaces, try unescaping
- return self._context[key][path.replace('%20', ' ')]
- except KeyError:
- if what == 'filename' and key == 'generated_content':
- key = 'static_content'
- linked_content = _get_linked_content(key, path)
- if linked_content:
- logger.warning(
- '{filename} used for linking to static'
- ' content %s in %s. Use {static} instead',
- path,
- self.get_relative_source_path())
- return linked_content
- return None
-
- linked_content = _get_linked_content(key, path)
+ linked_content = _get_linked_content(key, value)
if linked_content:
if what == 'attach':
linked_content.attach_to(self)
diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
index 1a520bc7..32012d4f 100644
--- a/pelican/tests/test_contents.py
+++ b/pelican/tests/test_contents.py
@@ -30,6 +30,9 @@ class TestBase(LoggedTestCase):
'content': TEST_CONTENT,
'context': {
'localsiteurl': '',
+ 'generated_content': {},
+ 'static_content': {},
+ 'static_links': set()
},
'metadata': {
'summary': TEST_SUMMARY,
@@ -519,6 +522,60 @@ class TestPage(TestBase):
'<img src="http://static.cool.site/images/poster.jpg"/>'
)
+ def test_intrasite_link_escape(self):
+ article = type(
+ '_DummyArticle', (object,), {'url': 'article-spaces.html'})
+ asset = type(
+ '_DummyAsset', (object,), {'url': 'name@example.com'})
+
+ args = self.page_kwargs.copy()
+ args['settings'] = get_settings()
+ args['source_path'] = 'content'
+ args['context']['generated_content'] = {'article spaces.rst': article}
+ args['context']['static_content'] = {'name@example.com': asset}
+
+ expected_output = (
+ 'A simple test with a '
+ '<a href="http://notmyidea.org/article-spaces.html#anchor">link</a> '
+ '<a href="http://notmyidea.org/name@example.com#anchor">file</a>'
+ )
+
+ # not escaped
+ args['content'] = (
+ 'A simple test with a '
+ '<a href="{filename}article spaces.rst#anchor">link</a> '
+ '<a href="{static}name@example.com#anchor">file</a>'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(content, expected_output)
+
+ # html escaped
+ args['content'] = (
+ 'A simple test with a '
+ '<a href="{filename}article spaces.rst#anchor">link</a> '
+ '<a href="{static}name&#64;example.com#anchor">file</a>'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(content, expected_output)
+
+ # url escaped
+ args['content'] = (
+ 'A simple test with a '
+ '<a href="{filename}article%20spaces.rst#anchor">link</a> '
+ '<a href="{static}name%40example.com#anchor">file</a>'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(content, expected_output)
+
+ # html and url escaped
+ args['content'] = (
+ 'A simple test with a '
+ '<a href="{filename}article%20spaces.rst#anchor">link</a> '
+ '<a href="{static}name&#64;example.com#anchor">file</a>'
+ )
+ content = Page(**args).get_content('http://notmyidea.org')
+ self.assertEqual(content, expected_output)
+
def test_intrasite_link_markdown_spaces(self):
cls_name = '_DummyArticle'
article = type(cls_name, (object,), {'url': 'article-spaces.html'})