summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsme <s-m-e@users.noreply.github.com>2018-04-26 05:20:35 +0200
committerVal Neekman <un33kvu@gmail.com>2018-04-25 23:20:35 -0400
commit5c766dd99d5675f70b34cfabc3a8b8556a0065f1 (patch)
treee75950d1ddbf986b175590ff587dfee5d5bf7f3b
parent4c1a344652dfef32f786a96d94b351f4277da964 (diff)
downloadpython-slugify-5c766dd99d5675f70b34cfabc3a8b8556a0065f1.tar.gz
Support for case sensitivity (#54)
* added support for case sensitivity * remove empty line * ignore W605 warning from pycodestyle * doc string for lowercase keyword argument
-rwxr-xr-xpycodestyle.sh4
-rw-r--r--slugify/slugify.py21
-rw-r--r--test.py5
3 files changed, 23 insertions, 7 deletions
diff --git a/pycodestyle.sh b/pycodestyle.sh
index ab6766e..cd6122d 100755
--- a/pycodestyle.sh
+++ b/pycodestyle.sh
@@ -7,5 +7,7 @@
# -- E261 at least two spaces before inline comment
# -- E225 missing whitespace around operator
# -- E501 line too long
+# Ignoring warning codes
+# -- W605 invalid escape sequence '\d'
-pycodestyle --ignore=E128,E261,E225,E501 slugify test.py setup.py
+pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py
diff --git a/slugify/slugify.py b/slugify/slugify.py
index 99afb7f..192bbd3 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -26,6 +26,7 @@ DECIMAL_PATTERN = re.compile('&#(\d+);')
HEX_PATTERN = re.compile('&#x([\da-fA-F]+);')
QUOTE_PATTERN = re.compile(r'[\']+')
ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
+ALLOWED_CHARS_PATTERN_WITH_UPPERCASE = re.compile(r'[^-a-zA-Z0-9]+')
DUPLICATE_DASH_PATTERN = re.compile('-{2,}')
NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)')
DEFAULT_SEPARATOR = '-'
@@ -74,7 +75,7 @@ def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', s
def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
- separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None):
+ separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True):
"""
Make a slug from the given text.
:param text (str): initial text
@@ -87,6 +88,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
:param separator (str): separator between words
:param stopwords (iterable): words to discount
:param regex_pattern (str): regex pattern for allowed characters
+ :param lowercase (bool): activate case sensitivity by setting it to False
:return (str):
"""
@@ -127,8 +129,9 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
if sys.version_info < (3,):
text = text.encode('ascii', 'ignore')
- # make the text lowercase
- text = text.lower()
+ # make the text lowercase (optional)
+ if lowercase:
+ text = text.lower()
# remove generated quotes -- post-process
text = QUOTE_PATTERN.sub('', text)
@@ -137,7 +140,10 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
text = NUMBERS_PATTERN.sub('', text)
# replace all other unwanted characters
- pattern = regex_pattern or ALLOWED_CHARS_PATTERN
+ if lowercase:
+ pattern = regex_pattern or ALLOWED_CHARS_PATTERN
+ else:
+ pattern = regex_pattern or ALLOWED_CHARS_PATTERN_WITH_UPPERCASE
text = re.sub(pattern, DEFAULT_SEPARATOR, text)
# remove redundant
@@ -145,8 +151,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
# remove stopwords
if stopwords:
- stopwords_lower = [s.lower() for s in stopwords]
- words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+ if lowercase:
+ stopwords_lower = [s.lower() for s in stopwords]
+ words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+ else:
+ words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
text = DEFAULT_SEPARATOR.join(words)
# smart truncate if requested
diff --git a/test.py b/test.py
index 3d37a0b..9ff9ec0 100644
--- a/test.py
+++ b/test.py
@@ -113,6 +113,11 @@ class TestSlugification(unittest.TestCase):
r = slugify(txt, stopwords=['stopword'])
self.assertEqual(r, 'this-has-a')
+ def test_stopword_removal_casesensitive(self):
+ txt = 'thIs Has a stopword Stopword'
+ r = slugify(txt, stopwords=['Stopword'], lowercase=False)
+ self.assertEqual(r, 'thIs-Has-a-stopword')
+
def test_multiple_stopword_occurances(self):
txt = 'the quick brown fox jumps over the lazy dog'
r = slugify(txt, stopwords=['the'])