summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Sottile <asottile@umich.edu>2018-10-23 22:52:27 +0000
committerAnthony Sottile <asottile@umich.edu>2018-10-23 22:52:27 +0000
commit0115fedbded6881ac320e878290595e57b44c17e (patch)
tree25af8225c39d45abaff7199cc03d13f735c0a93f
parentba2fb9c53a8316642249598f599bbf7608d54260 (diff)
parent308a0bdb50509987e437b6cb68469de5784c9e2c (diff)
downloadflake8-0115fedbded6881ac320e878290595e57b44c17e.tar.gz
Merge branch 'match_newlines_py3_v2' into 'master'
Fix inconsistent newlines read from a file in python3 (try 2) Closes #457 See merge request pycqa/flake8!255
-rw-r--r--src/flake8/processor.py9
-rw-r--r--tests/unit/test_file_processor.py27
2 files changed, 30 insertions, 6 deletions
diff --git a/src/flake8/processor.py b/src/flake8/processor.py
index fdf0a98..18f9f1d 100644
--- a/src/flake8/processor.py
+++ b/src/flake8/processor.py
@@ -1,6 +1,5 @@
"""Module containing our file processor that tokenizes a file for checks."""
import contextlib
-import io
import logging
import sys
import tokenize
@@ -308,11 +307,9 @@ class FileProcessor(object):
def _readlines_py3(self):
# type: () -> List[str]
try:
- with open(self.filename, "rb") as fd:
- (coding, lines) = tokenize.detect_encoding(fd.readline)
- textfd = io.TextIOWrapper(fd, coding, line_buffering=True)
- return [l.decode(coding) for l in lines] + textfd.readlines()
- except (LookupError, SyntaxError, UnicodeError):
+ with tokenize.open(self.filename) as fd:
+ return fd.readlines()
+ except (SyntaxError, UnicodeError):
# If we can't detect the codec with tokenize.detect_encoding, or
# the detected encoding is incorrect, just fallback to latin-1.
with open(self.filename, encoding="latin-1") as fd:
diff --git a/tests/unit/test_file_processor.py b/tests/unit/test_file_processor.py
index 312827b..1323011 100644
--- a/tests/unit/test_file_processor.py
+++ b/tests/unit/test_file_processor.py
@@ -27,6 +27,33 @@ def test_read_lines_splits_lines():
for line in lines)
+def lines_from_file(tmpdir, contents):
+ f = tmpdir.join('f.py')
+ # be careful to write the bytes exactly to avoid newline munging
+ f.write_binary(contents)
+ return processor.FileProcessor(f.strpath, options_from()).lines
+
+
+def test_read_lines_universal_newlines(tmpdir):
+ r"""Verify that line endings are translated to \n."""
+ lines = lines_from_file(tmpdir, b'# coding: utf-8\r\nx = 1\r\n')
+ assert lines == ['# coding: utf-8\n', 'x = 1\n']
+
+
+def test_read_lines_incorrect_utf_16(tmpdir):
+ """Verify that a file which incorrectly claims it is utf16 is still read
+ as latin-1.
+ """
+ lines = lines_from_file(tmpdir, b'# coding: utf16\nx = 1\n')
+ assert lines == ['# coding: utf16\n', 'x = 1\n']
+
+
+def test_read_lines_unknown_encoding(tmpdir):
+ """Verify that an unknown encoding is still read as latin-1."""
+ lines = lines_from_file(tmpdir, b'# coding: fake-encoding\nx = 1\n')
+ assert lines == ['# coding: fake-encoding\n', 'x = 1\n']
+
+
@pytest.mark.parametrize('first_line', [
'\xEF\xBB\xBF"""Module docstring."""\n',
u'\uFEFF"""Module docstring."""\n',