summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason R. Coombs <jaraco@jaraco.com>2019-04-05 13:20:16 -0400
committerGitHub <noreply@github.com>2019-04-05 13:20:16 -0400
commit4edd0d57228da37795f9600ec06363d0a24cada8 (patch)
tree77d004a97ec9f95fa1dcfe75308772b55e5a313d
parent393809a02ed4d0f07faec5c1f23384233e6cd68e (diff)
parentf36781084f8f870ea747d477bd742057ea022421 (diff)
downloadpython-setuptools-git-4edd0d57228da37795f9600ec06363d0a24cada8.tar.gz
Merge pull request #1735 from pypa/bugfix/1702-utf8-config
When reading config files, require them to be encoded with UTF-8.
-rw-r--r--changelog.d/1735.breaking.rst1
-rw-r--r--setuptools/dist.py9
-rw-r--r--setuptools/tests/test_config.py31
-rw-r--r--setuptools/tests/test_setopt.py36
-rw-r--r--setuptools/unicode_utils.py13
5 files changed, 48 insertions, 42 deletions
diff --git a/changelog.d/1735.breaking.rst b/changelog.d/1735.breaking.rst
new file mode 100644
index 00000000..448730c4
--- /dev/null
+++ b/changelog.d/1735.breaking.rst
@@ -0,0 +1 @@
+When parsing setup.cfg files, setuptools now requires the files to be encoded as UTF-8. Any other encoding will lead to a UnicodeDecodeError. This change removes support for specifying an encoding using a 'coding: ' directive in the header of the file, a feature that was introduces in 40.7. Given the recent release of the aforementioned feature, it is assumed that few if any projects are utilizing the feature to specify an encoding other than UTF-8.
diff --git a/setuptools/dist.py b/setuptools/dist.py
index ae380290..9a165de0 100644
--- a/setuptools/dist.py
+++ b/setuptools/dist.py
@@ -35,7 +35,6 @@ from setuptools.depends import Require
from setuptools import windows_support
from setuptools.monkey import get_unpatched
from setuptools.config import parse_configuration
-from .unicode_utils import detect_encoding
import pkg_resources
__import__('setuptools.extern.packaging.specifiers')
@@ -587,13 +586,9 @@ class Distribution(_Distribution):
parser = ConfigParser()
for filename in filenames:
- with io.open(filename, 'rb') as fp:
- encoding = detect_encoding(fp)
+ with io.open(filename, encoding='utf-8') as reader:
if DEBUG:
- self.announce(" reading %s [%s]" % (
- filename, encoding or 'locale')
- )
- reader = io.TextIOWrapper(fp, encoding=encoding)
+ self.announce(" reading {filename}".format(**locals()))
(parser.read_file if six.PY3 else parser.readfp)(reader)
for section in parser.sections():
options = parser.options(section)
diff --git a/setuptools/tests/test_config.py b/setuptools/tests/test_config.py
index 4daf1df1..bc97664d 100644
--- a/setuptools/tests/test_config.py
+++ b/setuptools/tests/test_config.py
@@ -9,7 +9,6 @@ from mock import patch
from setuptools.dist import Distribution, _Distribution
from setuptools.config import ConfigHandler, read_configuration
from setuptools.extern.six.moves import configparser
-from setuptools.tests import is_ascii
from . import py2_only, py3_only
from .textwrap import DALS
@@ -446,10 +445,6 @@ class TestMetadata:
with get_dist(tmpdir):
pass
- skip_if_not_ascii = pytest.mark.skipif(
- not is_ascii, reason='Test not supported with this locale')
-
- @skip_if_not_ascii
def test_non_ascii_1(self, tmpdir):
fake_env(
tmpdir,
@@ -457,18 +452,8 @@ class TestMetadata:
'description = éàïôñ\n',
encoding='utf-8'
)
- with pytest.raises(UnicodeDecodeError):
- with get_dist(tmpdir):
- pass
-
- def test_non_ascii_2(self, tmpdir):
- fake_env(
- tmpdir,
- '# -*- coding: invalid\n'
- )
- with pytest.raises(LookupError):
- with get_dist(tmpdir):
- pass
+ with get_dist(tmpdir):
+ pass
def test_non_ascii_3(self, tmpdir):
fake_env(
@@ -479,7 +464,6 @@ class TestMetadata:
with get_dist(tmpdir):
pass
- @skip_if_not_ascii
def test_non_ascii_4(self, tmpdir):
fake_env(
tmpdir,
@@ -491,8 +475,10 @@ class TestMetadata:
with get_dist(tmpdir) as dist:
assert dist.metadata.description == 'éàïôñ'
- @skip_if_not_ascii
- def test_non_ascii_5(self, tmpdir):
+ def test_not_utf8(self, tmpdir):
+ """
+ Config files encoded not in UTF-8 will fail
+ """
fake_env(
tmpdir,
'# vim: set fileencoding=iso-8859-15 :\n'
@@ -500,8 +486,9 @@ class TestMetadata:
'description = éàïôñ\n',
encoding='iso-8859-15'
)
- with get_dist(tmpdir) as dist:
- assert dist.metadata.description == 'éàïôñ'
+ with pytest.raises(UnicodeDecodeError):
+ with get_dist(tmpdir):
+ pass
class TestOptions:
diff --git a/setuptools/tests/test_setopt.py b/setuptools/tests/test_setopt.py
new file mode 100644
index 00000000..3fb04fb4
--- /dev/null
+++ b/setuptools/tests/test_setopt.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import io
+
+import six
+
+from setuptools.command import setopt
+from setuptools.extern.six.moves import configparser
+
+
+class TestEdit:
+ @staticmethod
+ def parse_config(filename):
+ parser = configparser.ConfigParser()
+ with io.open(filename, encoding='utf-8') as reader:
+ (parser.read_file if six.PY3 else parser.readfp)(reader)
+ return parser
+
+ @staticmethod
+ def write_text(file, content):
+ with io.open(file, 'wb') as strm:
+ strm.write(content.encode('utf-8'))
+
+ def test_utf8_encoding_retained(self, tmpdir):
+ """
+ When editing a file, non-ASCII characters encoded in
+ UTF-8 should be retained.
+ """
+ config = tmpdir.join('setup.cfg')
+ self.write_text(str(config), '[names]\njaraco=джарако')
+ setopt.edit_config(str(config), dict(names=dict(other='yes')))
+ parser = self.parse_config(str(config))
+ assert parser.get('names', 'jaraco') == 'джарако'
+ assert parser.get('names', 'other') == 'yes'
diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py
index 3b8179a8..7c63efd2 100644
--- a/setuptools/unicode_utils.py
+++ b/setuptools/unicode_utils.py
@@ -1,6 +1,5 @@
import unicodedata
import sys
-import re
from setuptools.extern import six
@@ -43,15 +42,3 @@ def try_encode(string, enc):
return string.encode(enc)
except UnicodeEncodeError:
return None
-
-
-CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
-
-
-def detect_encoding(fp):
- first_line = fp.readline()
- fp.seek(0)
- m = CODING_RE.match(first_line)
- if m is None:
- return None
- return m.group(1).decode('ascii')