Merge pull request #1735 from pypa/bugfix/1702-utf8-config

When reading config files, require them to be encoded with UTF-8.
author: Jason R. Coombs <jaraco@jaraco.com> 2019-04-05 13:20:16 -0400
committer: GitHub <noreply@github.com> 2019-04-05 13:20:16 -0400
commit: 4edd0d57228da37795f9600ec06363d0a24cada8 (patch)
tree: 77d004a97ec9f95fa1dcfe75308772b55e5a313d
parent: 393809a02ed4d0f07faec5c1f23384233e6cd68e (diff)
parent: f36781084f8f870ea747d477bd742057ea022421 (diff)
download: python-setuptools-git-4edd0d57228da37795f9600ec06363d0a24cada8.tar.gz
5 files changed, 48 insertions, 42 deletions
diff --git a/changelog.d/1735.breaking.rst b/changelog.d/1735.breaking.rst
new file mode 100644
index 00000000..448730c4
--- /dev/null
+++ b/changelog.d/1735.breaking.rst
@@ -0,0 +1 @@
+When parsing setup.cfg files, setuptools now requires the files to be encoded as UTF-8. Any other encoding will lead to a UnicodeDecodeError. This change removes support for specifying an encoding using a 'coding: ' directive in the header of the file, a feature that was introduces in 40.7. Given the recent release of the aforementioned feature, it is assumed that few if any projects are utilizing the feature to specify an encoding other than UTF-8.
diff --git a/setuptools/dist.py b/setuptools/dist.py
index ae380290..9a165de0 100644
--- a/setuptools/dist.py
+++ b/setuptools/dist.py
@@ -35,7 +35,6 @@ from setuptools.depends import Require
 from setuptools import windows_support
 from setuptools.monkey import get_unpatched
 from setuptools.config import parse_configuration
-from .unicode_utils import detect_encoding
 import pkg_resources
 
 __import__('setuptools.extern.packaging.specifiers')
@@ -587,13 +586,9 @@ class Distribution(_Distribution):
 
         parser = ConfigParser()
         for filename in filenames:
-            with io.open(filename, 'rb') as fp:
-                encoding = detect_encoding(fp)
+            with io.open(filename, encoding='utf-8') as reader:
                 if DEBUG:
-                    self.announce("  reading %s [%s]" % (
-                        filename, encoding or 'locale')
-                    )
-                reader = io.TextIOWrapper(fp, encoding=encoding)
+                    self.announce("  reading {filename}".format(**locals()))
                 (parser.read_file if six.PY3 else parser.readfp)(reader)
             for section in parser.sections():
                 options = parser.options(section)
diff --git a/setuptools/tests/test_config.py b/setuptools/tests/test_config.py
index 4daf1df1..bc97664d 100644
--- a/setuptools/tests/test_config.py
+++ b/setuptools/tests/test_config.py
@@ -9,7 +9,6 @@ from mock import patch
 from setuptools.dist import Distribution, _Distribution
 from setuptools.config import ConfigHandler, read_configuration
 from setuptools.extern.six.moves import configparser
-from setuptools.tests import is_ascii
 from . import py2_only, py3_only
 from .textwrap import DALS
 
@@ -446,10 +445,6 @@ class TestMetadata:
             with get_dist(tmpdir):
                 pass
 
-    skip_if_not_ascii = pytest.mark.skipif(
-        not is_ascii, reason='Test not supported with this locale')
-
-    @skip_if_not_ascii
     def test_non_ascii_1(self, tmpdir):
         fake_env(
             tmpdir,
@@ -457,18 +452,8 @@ class TestMetadata:
             'description = éàïôñ\n',
             encoding='utf-8'
         )
-        with pytest.raises(UnicodeDecodeError):
-            with get_dist(tmpdir):
-                pass
-
-    def test_non_ascii_2(self, tmpdir):
-        fake_env(
-            tmpdir,
-            '# -*- coding: invalid\n'
-        )
-        with pytest.raises(LookupError):
-            with get_dist(tmpdir):
-                pass
+        with get_dist(tmpdir):
+            pass
 
     def test_non_ascii_3(self, tmpdir):
         fake_env(
@@ -479,7 +464,6 @@ class TestMetadata:
         with get_dist(tmpdir):
             pass
 
-    @skip_if_not_ascii
     def test_non_ascii_4(self, tmpdir):
         fake_env(
             tmpdir,
@@ -491,8 +475,10 @@ class TestMetadata:
         with get_dist(tmpdir) as dist:
             assert dist.metadata.description == 'éàïôñ'
 
-    @skip_if_not_ascii
-    def test_non_ascii_5(self, tmpdir):
+    def test_not_utf8(self, tmpdir):
+        """
+        Config files encoded not in UTF-8 will fail
+        """
         fake_env(
             tmpdir,
             '# vim: set fileencoding=iso-8859-15 :\n'
@@ -500,8 +486,9 @@ class TestMetadata:
             'description = éàïôñ\n',
             encoding='iso-8859-15'
         )
-        with get_dist(tmpdir) as dist:
-            assert dist.metadata.description == 'éàïôñ'
+        with pytest.raises(UnicodeDecodeError):
+            with get_dist(tmpdir):
+                pass
 
 
 class TestOptions:
diff --git a/setuptools/tests/test_setopt.py b/setuptools/tests/test_setopt.py
new file mode 100644
index 00000000..3fb04fb4
--- /dev/null
+++ b/setuptools/tests/test_setopt.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import io
+
+import six
+
+from setuptools.command import setopt
+from setuptools.extern.six.moves import configparser
+
+
+class TestEdit:
+    @staticmethod
+    def parse_config(filename):
+        parser = configparser.ConfigParser()
+        with io.open(filename, encoding='utf-8') as reader:
+            (parser.read_file if six.PY3 else parser.readfp)(reader)
+        return parser
+
+    @staticmethod
+    def write_text(file, content):
+        with io.open(file, 'wb') as strm:
+            strm.write(content.encode('utf-8'))
+
+    def test_utf8_encoding_retained(self, tmpdir):
+        """
+        When editing a file, non-ASCII characters encoded in
+        UTF-8 should be retained.
+        """
+        config = tmpdir.join('setup.cfg')
+        self.write_text(str(config), '[names]\njaraco=джарако')
+        setopt.edit_config(str(config), dict(names=dict(other='yes')))
+        parser = self.parse_config(str(config))
+        assert parser.get('names', 'jaraco') == 'джарако'
+        assert parser.get('names', 'other') == 'yes'
diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py
index 3b8179a8..7c63efd2 100644
--- a/setuptools/unicode_utils.py
+++ b/setuptools/unicode_utils.py
@@ -1,6 +1,5 @@
 import unicodedata
 import sys
-import re
 
 from setuptools.extern import six
 
@@ -43,15 +42,3 @@ def try_encode(string, enc):
         return string.encode(enc)
     except UnicodeEncodeError:
         return None
-
-
-CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
-
-
-def detect_encoding(fp):
-    first_line = fp.readline()
-    fp.seek(0)
-    m = CODING_RE.match(first_line)
-    if m is None:
-        return None
-    return m.group(1).decode('ascii')
author	Jason R. Coombs <jaraco@jaraco.com>	2019-04-05 13:20:16 -0400
committer	GitHub <noreply@github.com>	2019-04-05 13:20:16 -0400
commit	4edd0d57228da37795f9600ec06363d0a24cada8 (patch)
tree	77d004a97ec9f95fa1dcfe75308772b55e5a313d
parent	393809a02ed4d0f07faec5c1f23384233e6cd68e (diff)
parent	f36781084f8f870ea747d477bd742057ea022421 (diff)
download	python-setuptools-git-4edd0d57228da37795f9600ec06363d0a24cada8.tar.gz