1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
# -*- coding: utf-8 -*-
"""
test_search
~~~~~~~~~~~
Test the search index builder.
:copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import os
from docutils import frontend, utils
from docutils.parsers import rst
from sphinx.search import IndexBuilder
from sphinx.util import jsdump
from util import with_app
settings = parser = None
def setup_module():
global settings, parser
optparser = frontend.OptionParser(components=(rst.Parser,))
settings = optparser.get_default_values()
parser = rst.Parser()
def jsload(path):
searchindex = path.text()
assert searchindex.startswith('Search.setIndex(')
return jsdump.loads(searchindex[16:-2])
def is_registered_term(index, keyword):
return index['terms'].get(keyword, []) != []
FILE_CONTENTS = '''\
.. test that comments are not indexed: boson
test that non-comments are indexed: fermion
'''
def test_wordcollector():
doc = utils.new_document(b'test data', settings)
doc['file'] = 'dummy'
parser.parse(FILE_CONTENTS, doc)
ix = IndexBuilder(None, 'en', {}, None)
ix.feed('docname', 'filename', 'title', doc)
assert 'boson' not in ix._mapping
assert 'fermion' in ix._mapping
@with_app(testroot='ext-viewcode')
def test_objects_are_escaped(app, status, warning):
app.builder.build_all()
searchindex = (app.outdir / 'searchindex.js').text()
assert searchindex.startswith('Search.setIndex(')
index = jsdump.loads(searchindex[16:-2])
assert 'n::Array<T, d>' in index.get('objects').get('') # n::Array<T,d> is escaped
@with_app(testroot='search')
def test_meta_keys_are_handled_for_language_en(app, status, warning):
app.builder.build_all()
searchindex = jsload(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'thisnoteith')
assert is_registered_term(searchindex, 'thisonetoo')
assert is_registered_term(searchindex, 'findthiskei')
assert is_registered_term(searchindex, 'thistoo')
assert not is_registered_term(searchindex, 'onlygerman')
assert is_registered_term(searchindex, 'notgerman')
assert not is_registered_term(searchindex, 'onlytoogerman')
@with_app(testroot='search', confoverrides={'html_search_language': 'de'})
def test_meta_keys_are_handled_for_language_de(app, status, warning):
app.builder.build_all()
searchindex = jsload(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'thisnoteith')
assert is_registered_term(searchindex, 'thisonetoo')
assert not is_registered_term(searchindex, 'findthiskei')
assert not is_registered_term(searchindex, 'thistoo')
assert is_registered_term(searchindex, 'onlygerman')
assert not is_registered_term(searchindex, 'notgerman')
assert is_registered_term(searchindex, 'onlytoogerman')
@with_app(testroot='search')
def test_stemmer_does_not_remove_short_words(app, status, warning):
app.builder.build_all()
searchindex = (app.outdir / 'searchindex.js').text()
assert 'zfs' in searchindex
@with_app(testroot='search')
def test_stemmer(app, status, warning):
searchindex = jsload(app.outdir / 'searchindex.js')
print(searchindex)
assert is_registered_term(searchindex, 'findthisstemmedkei')
assert is_registered_term(searchindex, 'intern')
@with_app(testroot='search')
def test_term_in_heading_and_section(app, status, warning):
searchindex = (app.outdir / 'searchindex.js').text()
# if search term is in the title of one doc and in the text of another
# both documents should be a hit in the search index as a title,
# respectively text hit
assert 'textinhead:1' in searchindex
assert 'textinhead:0' in searchindex
@with_app(testroot='search')
def test_term_in_raw_directive(app, status, warning):
searchindex = jsload(app.outdir / 'searchindex.js')
assert not is_registered_term(searchindex, 'raw')
assert is_registered_term(searchindex, 'rawword')
assert not is_registered_term(searchindex, 'latex_keyword')
|