summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sphinx/search/en.py28
-rw-r--r--sphinx/search/zh.py28
-rw-r--r--sphinx/util/stemmer/__init__.py51
-rw-r--r--sphinx/util/stemmer/porter.py (renamed from sphinx/util/stemmer.py)4
4 files changed, 57 insertions, 54 deletions
diff --git a/sphinx/search/en.py b/sphinx/search/en.py
index 22d4e5acb..c6658ffdc 100644
--- a/sphinx/search/en.py
+++ b/sphinx/search/en.py
@@ -10,13 +10,7 @@
"""
from sphinx.search import SearchLanguage
-
-try:
- from Stemmer import Stemmer as PyStemmer
- PYSTEMMER = True
-except ImportError:
- from sphinx.util.stemmer import PorterStemmer
- PYSTEMMER = False
+from sphinx.util.stemmer import get_stemmer
english_stopwords = set("""
a and are as at
@@ -225,25 +219,7 @@ class SearchEnglish(SearchLanguage):
def init(self, options):
# type: (Dict) -> None
- if PYSTEMMER:
- class Stemmer(object):
- def __init__(self):
- # type: () -> None
- self.stemmer = PyStemmer('porter')
-
- def stem(self, word):
- # type: (unicode) -> unicode
- return self.stemmer.stemWord(word)
- else:
- class Stemmer(PorterStemmer):
- """All those porter stemmer implementations look hideous;
- make at least the stem method nicer.
- """
- def stem(self, word):
- # type: (unicode) -> unicode
- return PorterStemmer.stem(self, word, 0, len(word) - 1)
-
- self.stemmer = Stemmer()
+ self.stemmer = get_stemmer()
def stem(self, word):
# type: (unicode) -> unicode
diff --git a/sphinx/search/zh.py b/sphinx/search/zh.py
index bd4787506..520dd6493 100644
--- a/sphinx/search/zh.py
+++ b/sphinx/search/zh.py
@@ -13,13 +13,7 @@ import os
import re
from sphinx.search import SearchLanguage
-
-try:
- from Stemmer import Stemmer as PyStemmer
- PYSTEMMER = True
-except ImportError:
- from sphinx.util.stemmer import PorterStemmer
- PYSTEMMER = False
+from sphinx.util.stemmer import get_stemmer
try:
import jieba
@@ -244,25 +238,7 @@ class SearchChinese(SearchLanguage):
if dict_path and os.path.isfile(dict_path):
jieba.set_dictionary(dict_path)
- if PYSTEMMER:
- class Stemmer(object):
- def __init__(self):
- # type: () -> None
- self.stemmer = PyStemmer('porter')
-
- def stem(self, word):
- # type: (unicode) -> unicode
- return self.stemmer.stemWord(word)
- else:
- class Stemmer(PorterStemmer):
- """All those porter stemmer implementations look hideous;
- make at least the stem method nicer.
- """
- def stem(self, word):
- # type: (unicode) -> unicode
- return PorterStemmer.stem(self, word, 0, len(word) - 1)
-
- self.stemmer = Stemmer()
+ self.stemmer = get_stemmer()
def split(self, input):
# type: (unicode) -> List[unicode]
diff --git a/sphinx/util/stemmer/__init__.py b/sphinx/util/stemmer/__init__.py
new file mode 100644
index 000000000..ae9f76f1b
--- /dev/null
+++ b/sphinx/util/stemmer/__init__.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+"""
+ sphinx.util.stemmer
+ ~~~~~~~~~~~~~~~~~~~
+
+ Word stemming utilities for Sphinx.
+
+ :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from sphinx.util.stemmer.porter import PorterStemmer
+
+try:
+ from Stemmer import Stemmer as _PyStemmer
+ PYSTEMMER = True
+except ImportError:
+ PYSTEMMER = False
+
+
+class BaseStemmer(object):
+ def stem(self, word):
+ # type: (unicode) -> unicode
+ raise NotImplemented
+
+
+class PyStemmer(BaseStemmer):
+ def __init__(self):
+ # type: () -> None
+ self.stemmer = _PyStemmer('porter')
+
+ def stem(self, word):
+ # type: (unicode) -> unicode
+ return self.stemmer.stemWord(word)
+
+
+class StandardStemmer(BaseStemmer, PorterStemmer):
+ """All those porter stemmer implementations look hideous;
+ make at least the stem method nicer.
+ """
+ def stem(self, word):
+ # type: (unicode) -> unicode
+ return PorterStemmer.stem(self, word, 0, len(word) - 1)
+
+
+def get_stemmer():
+ # type: () -> BaseStemmer
+ if PYSTEMMER:
+ return PyStemmer()
+ else:
+ return StandardStemmer()
diff --git a/sphinx/util/stemmer.py b/sphinx/util/stemmer/porter.py
index 47fc41e87..7cff74b6c 100644
--- a/sphinx/util/stemmer.py
+++ b/sphinx/util/stemmer/porter.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
- sphinx.util.stemmer
- ~~~~~~~~~~~~~~~~~~~
+ sphinx.util.stemmer.porter
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
Porter Stemming Algorithm