summaryrefslogtreecommitdiff
path: root/sphinx/search.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/search.py')
-rw-r--r--sphinx/search.py105
1 files changed, 63 insertions, 42 deletions
diff --git a/sphinx/search.py b/sphinx/search.py
index c0d3ab3c6..0d07fd72f 100644
--- a/sphinx/search.py
+++ b/sphinx/search.py
@@ -13,8 +13,14 @@ import cPickle as pickle
from docutils.nodes import comment, Text, NodeVisitor, SkipNode
-from sphinx.util.stemmer import PorterStemmer
from sphinx.util import jsdump, rpartition
+try:
+ # http://bitbucket.org/methane/porterstemmer/
+ from porterstemmer import Stemmer as CStemmer
+ CSTEMMER = True
+except ImportError:
+ from sphinx.util.stemmer import PorterStemmer
+ CSTEMMER = False
word_re = re.compile(r'\w+(?u)')
@@ -61,15 +67,23 @@ class _JavaScriptIndex(object):
js_index = _JavaScriptIndex()
-class Stemmer(PorterStemmer):
- """
- All those porter stemmer implementations look hideous.
- make at least the stem method nicer.
- """
+if CSTEMMER:
+ class Stemmer(CStemmer):
+
+ def stem(self, word):
+ return self(word.lower())
+
+else:
+ class Stemmer(PorterStemmer):
+ """
+ All those porter stemmer implementations look hideous.
+ make at least the stem method nicer.
+ """
+
+ def stem(self, word):
+ word = word.lower()
+ return PorterStemmer.stem(self, word, 0, len(word) - 1)
- def stem(self, word):
- word = word.lower()
- return PorterStemmer.stem(self, word, 0, len(word) - 1)
class WordCollector(NodeVisitor):
@@ -105,8 +119,10 @@ class IndexBuilder(object):
self._titles = {}
# stemmed word -> set(filenames)
self._mapping = {}
- # desctypes -> index
- self._desctypes = {}
+ # objtype -> index
+ self._objtypes = {}
+ # objtype index -> objname (localized)
+ self._objnames = {}
def load(self, stream, format):
"""Reconstruct from frozen data."""
@@ -124,7 +140,7 @@ class IndexBuilder(object):
self._mapping[k] = set([index2fn[v]])
else:
self._mapping[k] = set(index2fn[i] for i in v)
- # no need to load keywords/desctypes
+ # no need to load keywords/objtypes
def dump(self, stream, format):
"""Dump the frozen index to a stream."""
@@ -132,27 +148,33 @@ class IndexBuilder(object):
format = self.formats[format]
format.dump(self.freeze(), stream)
- def get_modules(self, fn2index):
- rv = {}
- for name, (doc, _, _, _) in self.env.modules.iteritems():
- if doc in fn2index:
- rv[name] = fn2index[doc]
- return rv
-
- def get_descrefs(self, fn2index):
+ def get_objects(self, fn2index):
rv = {}
- dt = self._desctypes
- for fullname, (doc, desctype) in self.env.descrefs.iteritems():
- if doc not in fn2index:
- continue
- prefix, name = rpartition(fullname, '.')
- pdict = rv.setdefault(prefix, {})
- try:
- i = dt[desctype]
- except KeyError:
- i = len(dt)
- dt[desctype] = i
- pdict[name] = (fn2index[doc], i)
+ otypes = self._objtypes
+ onames = self._objnames
+ for domainname, domain in self.env.domains.iteritems():
+ for fullname, dispname, type, docname, anchor, prio in \
+ domain.get_objects():
+ # XXX use dispname?
+ if docname not in fn2index:
+ continue
+ if prio < 0:
+ continue
+ # XXX splitting at dot is kind of Python specific
+ prefix, name = rpartition(fullname, '.')
+ pdict = rv.setdefault(prefix, {})
+ try:
+ i = otypes[domainname, type]
+ except KeyError:
+ i = len(otypes)
+ otypes[domainname, type] = i
+ otype = domain.object_types.get(type)
+ if otype:
+ # use str() to fire translation proxies
+ onames[i] = str(domain.get_type_name(otype))
+ else:
+ onames[i] = type
+ pdict[name] = (fn2index[docname], i, prio)
return rv
def get_terms(self, fn2index):
@@ -171,14 +193,13 @@ class IndexBuilder(object):
filenames = self._titles.keys()
titles = self._titles.values()
fn2index = dict((f, i) for (i, f) in enumerate(filenames))
- return dict(
- filenames=filenames,
- titles=titles,
- terms=self.get_terms(fn2index),
- descrefs=self.get_descrefs(fn2index),
- modules=self.get_modules(fn2index),
- desctypes=dict((v, k) for (k, v) in self._desctypes.items()),
- )
+ terms = self.get_terms(fn2index)
+ objects = self.get_objects(fn2index) # populates _objtypes
+ objtypes = dict((v, k[0] + ':' + k[1])
+ for (k, v) in self._objtypes.iteritems())
+ objnames = self._objnames
+ return dict(filenames=filenames, titles=titles, terms=terms,
+ objects=objects, objtypes=objtypes, objnames=objnames)
def prune(self, filenames):
"""Remove data for all filenames not in the list."""
@@ -197,11 +218,11 @@ class IndexBuilder(object):
visitor = WordCollector(doctree)
doctree.walk(visitor)
- def add_term(word, prefix='', stem=self._stemmer.stem):
+ def add_term(word, stem=self._stemmer.stem):
word = stem(word)
if len(word) < 3 or word in stopwords or word.isdigit():
return
- self._mapping.setdefault(prefix + word, set()).add(filename)
+ self._mapping.setdefault(word, set()).add(filename)
for word in word_re.findall(title):
add_term(word)