# -*- coding: utf-8 -*- __all__ = ['slugify'] import re import unicodedata import types import sys from htmlentitydefs import name2codepoint from unidecode import unidecode # character entity reference CHAR_ENTITY_REXP = re.compile('&(%s);' % '|'.join(name2codepoint)) # decimal character reference DECIMAL_REXP = re.compile('&#(\d+);') # hexadecimal character reference HEX_REXP = re.compile('&#x([\da-fA-F]+);') REPLACE1_REXP = re.compile(r'[\']+') REPLACE2_REXP = re.compile(r'[^-a-z0-9]+') REMOVE_REXP = re.compile('-{2,}') def smart_truncate(string, max_length=0, word_boundaries=False, separator=' '): """ Truncate a string """ string = string.strip(separator) if not max_length: return string if len(string) < max_length: return string if not word_boundaries: return string[:max_length].strip(separator) if separator not in string: return string[:max_length] truncated = '' for word in string.split(separator): if word: next_len = len(truncated) + len(word) + len(separator) if next_len <= max_length: truncated += '{0}{1}'.format(word, separator) if not truncated: truncated = string[:max_length] return truncated.strip(separator) def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False, separator='-'): """ Make a slug from the given text """ # text to unicode if not isinstance(text, types.UnicodeType): text = unicode(text, 'utf-8', 'ignore') # decode unicode ( 影師嗎 = Ying Shi Ma) text = unidecode(text) # text back to unicode if not isinstance(text, types.UnicodeType): text = unicode(text, 'utf-8', 'ignore') # character entity reference if entities: text = CHAR_ENTITY_REXP.sub(lambda m: unichr(name2codepoint[m.group(1)]), text) # decimal character reference if decimal: try: text = DECIMAL_REXP.sub(lambda m: unichr(int(m.group(1))), text) except: pass # hexadecimal character reference if hexadecimal: try: text = HEX_REXP.sub(lambda m: unichr(int(m.group(1), 16)), text) except: pass # translate text = unicodedata.normalize('NFKD', text) if sys.version_info < (3,): text = text.encode('ascii', 'ignore') # replace unwanted characters text = REPLACE1_REXP.sub('', text.lower()) # replace ' with nothing instead with - text = REPLACE2_REXP.sub('-', text.lower()) # remove redundant - text = REMOVE_REXP.sub('-', text).strip('-') # smart truncate if requested if max_length > 0: text = smart_truncate(text, max_length, word_boundary, '-') if separator != '-': text = text.replace('-', separator) return text def main(): if len(sys.argv) < 2: print "Usage %s TEXT TO SLUGIFY" % sys.argv[0] return text = ' '.join(sys.argv[1:]) print slugify(text)