Merge pull request #287 from phdru/master

Convert string literals to unicode using u() for Py27
author: Vik <vmuriart@gmail.com> 2016-09-12 00:14:01 -0700
committer: GitHub <noreply@github.com> 2016-09-12 00:14:01 -0700
commit: 47fef6b7cb1fa8edd83dd31eb73aaf3c9afff086 (patch)
tree: cda75d04543a02e20bf04ab01c8dfc5e670a269d /sqlparse
parent: 791a3312a247670cdeed61e52e8ca449dbb27afa (diff)
parent: 843499915e91e0ee324a0407c78ac6f570806370 (diff)
download: sqlparse-47fef6b7cb1fa8edd83dd31eb73aaf3c9afff086.tar.gz
7 files changed, 32 insertions, 32 deletions
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index b5b8435..8aef5b1 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -57,7 +57,7 @@ def format(sql, encoding=None, **options):
     options = formatter.validate_options(options)
     stack = formatter.build_filter_stack(stack, options)
     stack.postprocess.append(filters.SerializerUnicode())
-    return ''.join(stack.run(sql, encoding))
+    return u''.join(stack.run(sql, encoding))
 
 
 def split(sql, encoding=None):
diff --git a/sqlparse/cli.py b/sqlparse/cli.py
index 80d547d..c329fdb 100644..100755
--- a/sqlparse/cli.py
+++ b/sqlparse/cli.py
@@ -123,7 +123,7 @@ def create_parser():
 
 def _error(msg):
     """Print msg and optionally exit with return code exit_."""
-    sys.stderr.write('[ERROR] {0}\n'.format(msg))
+    sys.stderr.write(u'[ERROR] {0}\n'.format(msg))
     return 1
 
 
@@ -138,13 +138,14 @@ def main(args=None):
             # TODO: Needs to deal with encoding
             data = ''.join(open(args.filename).readlines())
         except IOError as e:
-            return _error('Failed to read {0}: {1}'.format(args.filename, e))
+            return _error(
+                u'Failed to read {0}: {1}'.format(args.filename, e))
 
     if args.outfile:
         try:
             stream = open(args.outfile, 'w')
         except IOError as e:
-            return _error('Failed to open {0}: {1}'.format(args.outfile, e))
+            return _error(u'Failed to open {0}: {1}'.format(args.outfile, e))
     else:
         stream = sys.stdout
 
@@ -152,7 +153,7 @@ def main(args=None):
     try:
         formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
     except SQLParseError as e:
-        return _error('Invalid options: {0}'.format(e))
+        return _error(u'Invalid options: {0}'.format(e))
 
     s = sqlparse.format(data, **formatter_opts)
     if PY2:
diff --git a/sqlparse/compat.py b/sqlparse/compat.py
index d6a9144..933e0be 100644
--- a/sqlparse/compat.py
+++ b/sqlparse/compat.py
@@ -23,14 +23,10 @@ PY3 = sys.version_info[0] == 3
 
 
 if PY3:
-    def u(s, encoding=None):
-        return str(s)
-
-
     def unicode_compatible(cls):
         return cls
 
-
+    bytes_type = bytes
     text_type = str
     string_types = (str,)
     from io import StringIO
@@ -38,20 +34,12 @@ if PY3:
 
 
 elif PY2:
-    def u(s, encoding=None):
-        encoding = encoding or 'unicode-escape'
-        try:
-            return unicode(s)
-        except UnicodeDecodeError:
-            return unicode(s, encoding)
-
-
     def unicode_compatible(cls):
         cls.__unicode__ = cls.__str__
         cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
         return cls
 
-
+    bytes_type = str
     text_type = unicode
     string_types = (str, unicode,)
     from StringIO import StringIO
diff --git a/sqlparse/filters/output.py b/sqlparse/filters/output.py
index 77a7ac8..a029251 100644
--- a/sqlparse/filters/output.py
+++ b/sqlparse/filters/output.py
@@ -22,7 +22,7 @@ class OutputFilter(object):
     def process(self, stmt):
         self.count += 1
         if self.count > 1:
-            varname = '{f.varname}{f.count}'.format(f=self)
+            varname = u'{f.varname}{f.count}'.format(f=self)
         else:
             varname = self.varname
 
diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py
index 3d93441..70f3936 100644
--- a/sqlparse/filters/reindent.py
+++ b/sqlparse/filters/reindent.py
@@ -36,7 +36,7 @@ class ReindentFilter(object):
         return self.offset + self.indent * self.width
 
     def _get_offset(self, token):
-        raw = ''.join(map(text_type, self._flatten_up_to_token(token)))
+        raw = u''.join(map(text_type, self._flatten_up_to_token(token)))
         line = (raw or '\n').splitlines()[-1]
         # Now take current offset into account and return relative offset.
         return len(line) - len(self.char * self.leading_ws)
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index e7996b2..15a9aef 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -14,7 +14,7 @@
 
 from sqlparse import tokens
 from sqlparse.keywords import SQL_REGEX
-from sqlparse.compat import file_types, string_types, u
+from sqlparse.compat import bytes_type, text_type, file_types
 from sqlparse.utils import consume
 
 
@@ -37,10 +37,21 @@ class Lexer(object):
 
         ``stack`` is the inital stack (default: ``['root']``)
         """
-        if isinstance(text, string_types):
-            text = u(text, encoding)
-        elif isinstance(text, file_types):
-            text = u(text.read(), encoding)
+        if isinstance(text, file_types):
+            text = text.read()
+
+        if isinstance(text, text_type):
+            pass
+        elif isinstance(text, bytes_type):
+            try:
+                text = text.decode()
+            except UnicodeDecodeError:
+                if not encoding:
+                    encoding = 'unicode-escape'
+                text = text.decode(encoding)
+        else:
+            raise TypeError(u"Expected text or file-like object, got {!r}".
+                            format(type(text)))
 
         iterable = enumerate(text)
         for pos, char in iterable:
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index f780090..06347a0 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -48,8 +48,8 @@ class Token(object):
         cls = self._get_repr_name()
         value = self._get_repr_value()
 
-        q = '"' if value.startswith("'") and value.endswith("'") else "'"
-        return "<{cls} {q}{value}{q} at 0x{id:2X}>".format(
+        q = u'"' if value.startswith("'") and value.endswith("'") else u"'"
+        return u"<{cls} {q}{value}{q} at 0x{id:2X}>".format(
             id=id(self), **locals())
 
     def _get_repr_name(self):
@@ -143,7 +143,7 @@ class TokenList(Token):
         self.is_group = True
 
     def __str__(self):
-        return ''.join(token.value for token in self.flatten())
+        return u''.join(token.value for token in self.flatten())
 
     # weird bug
     # def __len__(self):
@@ -160,13 +160,13 @@ class TokenList(Token):
 
     def _pprint_tree(self, max_depth=None, depth=0, f=None):
         """Pretty-print the object tree."""
-        indent = ' | ' * depth
+        indent = u' | ' * depth
         for idx, token in enumerate(self.tokens):
             cls = token._get_repr_name()
             value = token._get_repr_value()
 
-            q = '"' if value.startswith("'") and value.endswith("'") else "'"
-            print("{indent}{idx:2d} {cls} {q}{value}{q}"
+            q = u'"' if value.startswith("'") and value.endswith("'") else u"'"
+            print(u"{indent}{idx:2d} {cls} {q}{value}{q}"
                   .format(**locals()), file=f)
 
             if token.is_group and (max_depth is None or depth < max_depth):
author	Vik <vmuriart@gmail.com>	2016-09-12 00:14:01 -0700
committer	GitHub <noreply@github.com>	2016-09-12 00:14:01 -0700
commit	47fef6b7cb1fa8edd83dd31eb73aaf3c9afff086 (patch)
tree	cda75d04543a02e20bf04ab01c8dfc5e670a269d /sqlparse
parent	791a3312a247670cdeed61e52e8ca449dbb27afa (diff)
parent	843499915e91e0ee324a0407c78ac6f570806370 (diff)
download	sqlparse-47fef6b7cb1fa8edd83dd31eb73aaf3c9afff086.tar.gz