2 files changed, 9 insertions, 11 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index dd15212..0fb8936 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -14,7 +14,7 @@
 
 from sqlparse import tokens
 from sqlparse.keywords import SQL_REGEX
-from sqlparse.compat import StringIO, string_types, text_type
+from sqlparse.compat import StringIO, string_types, u
 from sqlparse.utils import consume
 
 
@@ -37,17 +37,10 @@ class Lexer(object):
 
         ``stack`` is the inital stack (default: ``['root']``)
         """
-        encoding = encoding or 'utf-8'
-
         if isinstance(text, string_types):
-            text = StringIO(text)
-
-        text = text.read()
-        if not isinstance(text, text_type):
-            try:
-                text = text.decode(encoding)
-            except UnicodeDecodeError:
-                text = text.decode('unicode-escape')
+            text = u(text, encoding)
+        elif isinstance(text, StringIO):
+            text = u(text.read(), encoding)
 
         iterable = enumerate(text)
         for pos, char in iterable:
diff --git a/tests/test_split.py b/tests/test_split.py
index f6d5f50..7c2645d 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -133,6 +133,11 @@ class SQLSplitTest(TestCaseBase):
         stmts = list(sqlparse.parsestream(stream))
         self.assertEqual(type(stmts[0].tokens[0].value), text_type)
 
+    def test_unicode_parsestream(self):
+        stream = StringIO(u"SELECT ö")
+        stmts = list(sqlparse.parsestream(stream))
+        self.assertEqual(str(stmts[0]), "SELECT ö")
+
 
 def test_split_simple():
     stmts = sqlparse.split('select * from foo; select * from bar;')