summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2012-03-14 02:59:51 -0400
committerR David Murray <rdmurray@bitdance.com>2012-03-14 02:59:51 -0400
commit7441a7aedd251d529eb14eff9a16708e9cb32409 (patch)
tree1d525eb5ac468752cacf460b4228a0150ee48814
parent21c71bac5f684b0ec1665d841d05f91e078c3964 (diff)
downloadcpython-git-7441a7aedd251d529eb14eff9a16708e9cb32409.tar.gz
#14291: if a header has non-ascii unicode, default to CTE using utf-8
In Python2, if a unicode string was assigned as the value of a header, email would automatically CTE encode it using the UTF8 charset. This capability was lost in the Python3 translation, and this patch restores it. Patch by Ali Ikinci, assisted by R. David Murray. I also added a fix for the mailbox test that was depending (with a comment that it was a bad idea to so depend) on non-ASCII causing message_from_string to raise an error. It now uses support.patch to induce an error during message serialization.
-rw-r--r--Lib/email/header.py7
-rw-r--r--Lib/email/test/test_email.py21
-rw-r--r--Lib/test/test_mailbox.py8
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
5 files changed, 33 insertions, 7 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 2e687b7a6f..3250d367ed 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -283,7 +283,12 @@ class Header:
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
if output_charset != _charset.UNKNOWN8BIT:
- s.encode(output_charset, errors)
+ try:
+ s.encode(output_charset, errors)
+ except UnicodeEncodeError:
+ if output_charset!='us-ascii':
+ raise
+ charset = UTF8
self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 102e15b9ff..f43bb38aa8 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -619,6 +619,19 @@ class TestMessageAPI(TestEmailBase):
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
self.assertRaises(errors.HeaderParseError, msg.as_string)
+ def test_unicode_header_defaults_to_utf8_encoding(self):
+ # Issue 14291
+ m = MIMEText('abc\n')
+ m['Subject'] = 'É test'
+ self.assertEqual(str(m),textwrap.dedent("""\
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Subject: =?utf-8?q?=C3=89_test?=
+
+ abc
+ """))
+
# Test the email.encoders module
class TestEncoders(unittest.TestCase):
@@ -1060,9 +1073,13 @@ Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-W
'f\xfcr Offshore-Windkraftprojekte '
'<a-very-long-address@example.com>')
msg['Reply-To'] = header_string
- self.assertRaises(UnicodeEncodeError, msg.as_string)
+ eq(msg.as_string(maxheaderlen=78), """\
+Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
+ =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
+
+""")
msg = Message()
- msg['Reply-To'] = Header(header_string, 'utf-8',
+ msg['Reply-To'] = Header(header_string,
header_name='Reply-To')
eq(msg.as_string(maxheaderlen=78), """\
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py
index e09aea41d1..8f76e18c73 100644
--- a/Lib/test/test_mailbox.py
+++ b/Lib/test/test_mailbox.py
@@ -111,10 +111,10 @@ class TestMailbox(TestBase):
self.assertMailboxEmpty()
def test_add_that_raises_leaves_mailbox_empty(self):
- # XXX This test will start failing when Message learns to handle
- # non-ASCII string headers, and a different internal failure will
- # need to be found or manufactured.
- with self.assertRaises(ValueError):
+ def raiser(*args, **kw):
+ raise Exception("a fake error")
+ support.patch(self, email.generator.BytesGenerator, 'flatten', raiser)
+ with self.assertRaises(Exception):
self._box.add(email.message_from_string("From: Alphöso"))
self.assertEqual(len(self._box), 0)
self._box.close()
diff --git a/Misc/ACKS b/Misc/ACKS
index a7d89d39c4..2b3dad5424 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -430,6 +430,7 @@ Jeremy Hylton
Gerhard Häring
Fredrik Håård
Mihai Ibanescu
+Ali Ikinci
Lars Immisch
Bobby Impollonia
Meador Inge
diff --git a/Misc/NEWS b/Misc/NEWS
index eea3a17e7d..b4dcf82608 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -22,6 +22,9 @@ Core and Builtins
Library
-------
+- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
+ instead of raising an error. This fixes a regression relative to 2.7.
+
- Issue #5219: Prevent event handler cascade in IDLE.
- Issue #14184: Increase the default stack size for secondary threads on