summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakeshi KOMIYA <i.tkomiya@gmail.com>2016-10-17 16:06:45 +0900
committerTakeshi KOMIYA <i.tkomiya@gmail.com>2016-10-17 16:08:37 +0900
commit53ea1cb2808e90b51f0ed9468740a34c00decc2a (patch)
treee442ad075ba6ba89ced0ce6e4ee7abc7e9cf4277
parent78d96b4abb1ac98efb96e73b79f70375f3496194 (diff)
downloadsphinx-git-53ea1cb2808e90b51f0ed9468740a34c00decc2a.tar.gz
Fix #3045: HTML search index creator should ignore "raw" content if now html
-rw-r--r--CHANGES1
-rw-r--r--sphinx/search/__init__.py15
-rw-r--r--tests/roots/test-search/index.rst10
-rw-r--r--tests/test_search.py10
4 files changed, 27 insertions, 9 deletions
diff --git a/CHANGES b/CHANGES
index cdc428cd0..c5c06efb3 100644
--- a/CHANGES
+++ b/CHANGES
@@ -53,6 +53,7 @@ Bugs fixed
* #3031: incompatibility with LaTeX package ``tocloft``
* #3003: literal blocks in footnotes are not supported by Latex
* #3047: spacing before footnote in pdf output is not coherent and allows breaks
+* #3045: HTML search index creator should ignore "raw" content if now html
Testing
--------
diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py
index 09430876b..d3c6c0eba 100644
--- a/sphinx/search/__init__.py
+++ b/sphinx/search/__init__.py
@@ -196,13 +196,14 @@ class WordCollector(NodeVisitor):
if issubclass(nodetype, comment):
raise SkipNode
if issubclass(nodetype, raw):
- # Some people might put content in raw HTML that should be searched,
- # so we just amateurishly strip HTML tags and index the remaining
- # content
- nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext())
- nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext)
- nodetext = re.sub(r'<[^<]+?>', '', nodetext)
- self.found_words.extend(self.lang.split(nodetext))
+ if 'html' in node.get('format', '').split():
+ # Some people might put content in raw HTML that should be searched,
+ # so we just amateurishly strip HTML tags and index the remaining
+ # content
+ nodetext = re.sub(r'(?is)<style.*?</style>', '', node.astext())
+ nodetext = re.sub(r'(?is)<script.*?</script>', '', nodetext)
+ nodetext = re.sub(r'<[^<]+?>', '', nodetext)
+ self.found_words.extend(self.lang.split(nodetext))
raise SkipNode
if issubclass(nodetype, Text):
self.found_words.extend(self.lang.split(node.astext()))
diff --git a/tests/roots/test-search/index.rst b/tests/roots/test-search/index.rst
index 21fcdf53c..b593c6cad 100644
--- a/tests/roots/test-search/index.rst
+++ b/tests/roots/test-search/index.rst
@@ -17,4 +17,12 @@ textinheading
.. toctree::
- tocitem \ No newline at end of file
+ tocitem
+
+.. raw:: html
+
+ <span class="raw">rawword"</span>
+
+.. raw:: latex
+
+ latex_keyword
diff --git a/tests/test_search.py b/tests/test_search.py
index a363b30be..d4b8817de 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -114,4 +114,12 @@ def test_term_in_heading_and_section(app, status, warning):
# both documents should be a hit in the search index as a title,
# respectively text hit
assert 'textinhead:1' in searchindex
- assert 'textinhead:0' in searchindex \ No newline at end of file
+ assert 'textinhead:0' in searchindex
+
+
+@with_app(testroot='search')
+def test_term_in_raw_directive(app, status, warning):
+ searchindex = jsload(app.outdir / 'searchindex.js')
+ assert not is_registered_term(searchindex, 'raw')
+ assert is_registered_term(searchindex, 'rawword')
+ assert not is_registered_term(searchindex, 'latex_keyword')