diff options
author | Barry Warsaw <barry@python.org> | 2006-07-17 23:07:51 +0000 |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2006-07-17 23:07:51 +0000 |
commit | 18d2f39af71608162b28fe1f41aa3e76efd83410 (patch) | |
tree | a60572e1b4f8cb549c2d1f1a467dc181695e3334 | |
parent | a2f60a47b5e5138f8a7c46226183f372174166c9 (diff) | |
download | cpython-git-18d2f39af71608162b28fe1f41aa3e76efd83410.tar.gz |
decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.
Specifically, instead of raising a ValueError when there is a single tick in
the parameter, simply return that the entire string unquoted, with None for
both the charset and the language. Also, if there are more than 2 ticks in
the parameter, interpret the first three parts as the standard RFC 2231 parts,
then the rest of the parts as the encoded string.
Test cases added.
Original fewer-than-3-parts fix by Tokio Kikuchi.
Resolves SF bug # 1218081. I will back port the fix and tests to Python 2.4
(email 3.0) and Python 2.3 (email 2.5).
Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.
-rw-r--r-- | Lib/email/__init__.py | 2 | ||||
-rw-r--r-- | Lib/email/test/test_email_renamed.py | 34 | ||||
-rw-r--r-- | Lib/email/utils.py | 11 |
3 files changed, 43 insertions, 4 deletions
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index f01260f57f..8d230fdeb7 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -4,7 +4,7 @@ """A package for parsing, handling, and generating email messages.""" -__version__ = '4.0a2' +__version__ = '4.0.1' __all__ = [ # Old names diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 95d06cb66f..4cfca6679c 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -3060,6 +3060,40 @@ Content-Disposition: inline; filename*0=X-UNKNOWN''myfile.txt msg = email.message_from_string(m) self.assertEqual(msg.get_filename(), 'myfile.txt') + def test_rfc2231_single_tick_in_filename(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, None) + eq(language, None) + eq(s, "Frank's Document") + + def test_rfc2231_tick_attack(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; +\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" + +""" + msg = email.message_from_string(m) + charset, language, s = msg.get_param('name') + eq(charset, 'us-ascii') + eq(language, 'en-us') + eq(s, "Frank's Document") + + def test_rfc2231_no_extended_values(self): + eq = self.assertEqual + m = """\ +Content-Type: application/x-foo; name=\"Frank's Document\" + +""" + msg = email.message_from_string(m) + eq(msg.get_param('name'), "Frank's Document") + def _testclasses(): diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 250eb19d93..ea59c27d52 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -45,6 +45,7 @@ COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = u'' CRLF = '\r\n' +TICK = "'" specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[][\\()"]') @@ -231,10 +232,14 @@ def unquote(str): def decode_rfc2231(s): """Decode string according to RFC 2231""" import urllib - parts = s.split("'", 2) - if len(parts) == 1: + parts = s.split(TICK, 2) + if len(parts) <= 2: return None, None, urllib.unquote(s) - charset, language, s = parts + if len(parts) > 3: + charset, language = parts[:2] + s = TICK.join(parts[2:]) + else: + charset, language, s = parts return charset, language, urllib.unquote(s) |