diff options
author | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2016-10-17 16:06:45 +0900 |
---|---|---|
committer | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2016-10-17 16:08:37 +0900 |
commit | 53ea1cb2808e90b51f0ed9468740a34c00decc2a (patch) | |
tree | e442ad075ba6ba89ced0ce6e4ee7abc7e9cf4277 | |
parent | 78d96b4abb1ac98efb96e73b79f70375f3496194 (diff) | |
download | sphinx-git-53ea1cb2808e90b51f0ed9468740a34c00decc2a.tar.gz |
Fix #3045: HTML search index creator should ignore "raw" content if now html
-rw-r--r-- | CHANGES | 1 | ||||
-rw-r--r-- | sphinx/search/__init__.py | 15 | ||||
-rw-r--r-- | tests/roots/test-search/index.rst | 10 | ||||
-rw-r--r-- | tests/test_search.py | 10 |
4 files changed, 27 insertions, 9 deletions
@@ -53,6 +53,7 @@ Bugs fixed * #3031: incompatibility with LaTeX package ``tocloft`` * #3003: literal blocks in footnotes are not supported by Latex * #3047: spacing before footnote in pdf output is not coherent and allows breaks +* #3045: HTML search index creator should ignore "raw" content if now html Testing -------- diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 09430876b..d3c6c0eba 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -196,13 +196,14 @@ class WordCollector(NodeVisitor): if issubclass(nodetype, comment): raise SkipNode if issubclass(nodetype, raw): - # Some people might put content in raw HTML that should be searched, - # so we just amateurishly strip HTML tags and index the remaining - # content - nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext()) - nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext) - nodetext = re.sub(r'<[^<]+?>', '', nodetext) - self.found_words.extend(self.lang.split(nodetext)) + if 'html' in node.get('format', '').split(): + # Some people might put content in raw HTML that should be searched, + # so we just amateurishly strip HTML tags and index the remaining + # content + nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext()) + nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext) + nodetext = re.sub(r'<[^<]+?>', '', nodetext) + self.found_words.extend(self.lang.split(nodetext)) raise SkipNode if issubclass(nodetype, Text): self.found_words.extend(self.lang.split(node.astext())) diff --git a/tests/roots/test-search/index.rst b/tests/roots/test-search/index.rst index 21fcdf53c..b593c6cad 100644 --- a/tests/roots/test-search/index.rst +++ b/tests/roots/test-search/index.rst @@ -17,4 +17,12 @@ textinheading .. toctree:: - tocitem
\ No newline at end of file + tocitem + +.. raw:: html + + <span class="raw">rawword"</span> + +.. raw:: latex + + latex_keyword diff --git a/tests/test_search.py b/tests/test_search.py index a363b30be..d4b8817de 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -114,4 +114,12 @@ def test_term_in_heading_and_section(app, status, warning): # both documents should be a hit in the search index as a title, # respectively text hit assert 'textinhead:1' in searchindex - assert 'textinhead:0' in searchindex
\ No newline at end of file + assert 'textinhead:0' in searchindex + + +@with_app(testroot='search') +def test_term_in_raw_directive(app, status, warning): + searchindex = jsload(app.outdir / 'searchindex.js') + assert not is_registered_term(searchindex, 'raw') + assert is_registered_term(searchindex, 'rawword') + assert not is_registered_term(searchindex, 'latex_keyword') |