diff options
| author | Anthony Sottile <asottile@umich.edu> | 2018-10-23 22:52:27 +0000 |
|---|---|---|
| committer | Anthony Sottile <asottile@umich.edu> | 2018-10-23 22:52:27 +0000 |
| commit | 0115fedbded6881ac320e878290595e57b44c17e (patch) | |
| tree | 25af8225c39d45abaff7199cc03d13f735c0a93f | |
| parent | ba2fb9c53a8316642249598f599bbf7608d54260 (diff) | |
| parent | 308a0bdb50509987e437b6cb68469de5784c9e2c (diff) | |
| download | flake8-0115fedbded6881ac320e878290595e57b44c17e.tar.gz | |
Merge branch 'match_newlines_py3_v2' into 'master'
Fix inconsistent newlines read from a file in python3 (try 2)
Closes #457
See merge request pycqa/flake8!255
| -rw-r--r-- | src/flake8/processor.py | 9 | ||||
| -rw-r--r-- | tests/unit/test_file_processor.py | 27 |
2 files changed, 30 insertions, 6 deletions
diff --git a/src/flake8/processor.py b/src/flake8/processor.py index fdf0a98..18f9f1d 100644 --- a/src/flake8/processor.py +++ b/src/flake8/processor.py @@ -1,6 +1,5 @@ """Module containing our file processor that tokenizes a file for checks.""" import contextlib -import io import logging import sys import tokenize @@ -308,11 +307,9 @@ class FileProcessor(object): def _readlines_py3(self): # type: () -> List[str] try: - with open(self.filename, "rb") as fd: - (coding, lines) = tokenize.detect_encoding(fd.readline) - textfd = io.TextIOWrapper(fd, coding, line_buffering=True) - return [l.decode(coding) for l in lines] + textfd.readlines() - except (LookupError, SyntaxError, UnicodeError): + with tokenize.open(self.filename) as fd: + return fd.readlines() + except (SyntaxError, UnicodeError): # If we can't detect the codec with tokenize.detect_encoding, or # the detected encoding is incorrect, just fallback to latin-1. with open(self.filename, encoding="latin-1") as fd: diff --git a/tests/unit/test_file_processor.py b/tests/unit/test_file_processor.py index 312827b..1323011 100644 --- a/tests/unit/test_file_processor.py +++ b/tests/unit/test_file_processor.py @@ -27,6 +27,33 @@ def test_read_lines_splits_lines(): for line in lines) +def lines_from_file(tmpdir, contents): + f = tmpdir.join('f.py') + # be careful to write the bytes exactly to avoid newline munging + f.write_binary(contents) + return processor.FileProcessor(f.strpath, options_from()).lines + + +def test_read_lines_universal_newlines(tmpdir): + r"""Verify that line endings are translated to \n.""" + lines = lines_from_file(tmpdir, b'# coding: utf-8\r\nx = 1\r\n') + assert lines == ['# coding: utf-8\n', 'x = 1\n'] + + +def test_read_lines_incorrect_utf_16(tmpdir): + """Verify that a file which incorrectly claims it is utf16 is still read + as latin-1. + """ + lines = lines_from_file(tmpdir, b'# coding: utf16\nx = 1\n') + assert lines == ['# coding: utf16\n', 'x = 1\n'] + + +def test_read_lines_unknown_encoding(tmpdir): + """Verify that an unknown encoding is still read as latin-1.""" + lines = lines_from_file(tmpdir, b'# coding: fake-encoding\nx = 1\n') + assert lines == ['# coding: fake-encoding\n', 'x = 1\n'] + + @pytest.mark.parametrize('first_line', [ '\xEF\xBB\xBF"""Module docstring."""\n', u'\uFEFF"""Module docstring."""\n', |
