"""Backports for individual classes and functions.""" import os import re import sys import codecs __all__ = ['any', 'detect_encoding', 'fsencode', 'python_implementation', 'wraps'] try: any = any except NameError: def any(seq): for elem in seq: if elem: return True return False _cookie_re = re.compile("coding[:=]\s*([-\w.]+)") def _get_normal_name(orig_enc): """Imitates get_normal_name in tokenizer.c.""" # Only care about the first 12 characters. enc = orig_enc[:12].lower().replace("_", "-") if enc == "utf-8" or enc.startswith("utf-8-"): return "utf-8" if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): return "iso-8859-1" return orig_enc def detect_encoding(readline): """ The detect_encoding() function is used to detect the encoding that should be used to decode a Python source file. It requires one argment, readline, in the same way as the tokenize() generator. It will call readline a maximum of twice, and return the encoding used (as a string) and a list of any lines (left as bytes) it has read in. It detects the encoding from the presence of a utf-8 bom or an encoding cookie as specified in pep-0263. If both a bom and a cookie are present, but disagree, a SyntaxError will be raised. If the encoding cookie is an invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, 'utf-8-sig' is returned. If no encoding is specified, then the default of 'utf-8' will be returned. """ bom_found = False encoding = None default = 'utf-8' def read_or_stop(): try: return readline() except StopIteration: return '' def find_cookie(line): try: line_string = line.decode('ascii') except UnicodeDecodeError: return None matches = _cookie_re.findall(line_string) if not matches: return None encoding = _get_normal_name(matches[0]) try: codec = codecs.lookup(encoding) except LookupError: # This behaviour mimics the Python interpreter raise SyntaxError("unknown encoding: " + encoding) if bom_found: if codec.name != 'utf-8': # This behaviour mimics the Python interpreter raise SyntaxError('encoding problem: utf-8') encoding += '-sig' return encoding first = read_or_stop() if first.startswith(codecs.BOM_UTF8): bom_found = True first = first[3:] default = 'utf-8-sig' if not first: return default, [] encoding = find_cookie(first) if encoding: return encoding, [first] second = read_or_stop() if not second: return default, [first] encoding = find_cookie(second) if encoding: return encoding, [first, second] return default, [first, second] def fsencode(filename): if isinstance(filename, str): return filename elif isinstance(filename, unicode): return filename.encode(sys.getfilesystemencoding()) else: raise TypeError("expect bytes or str, not %s" % type(filename).__name__) try: from functools import wraps except ImportError: def wraps(func=None): """No-op replacement for functools.wraps""" def wrapped(func): return func return wrapped try: from platform import python_implementation except ImportError: def python_implementation(): """Return a string identifying the Python implementation.""" if 'PyPy' in sys.version: return 'PyPy' if os.name == 'java': return 'Jython' if sys.version.startswith('IronPython'): return 'IronPython' return 'CPython'