summaryrefslogtreecommitdiff
path: root/Lib/email
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2006-07-17 23:07:51 +0000
committerBarry Warsaw <barry@python.org>2006-07-17 23:07:51 +0000
commit18d2f39af71608162b28fe1f41aa3e76efd83410 (patch)
treea60572e1b4f8cb549c2d1f1a467dc181695e3334 /Lib/email
parenta2f60a47b5e5138f8a7c46226183f372174166c9 (diff)
downloadcpython-git-18d2f39af71608162b28fe1f41aa3e76efd83410.tar.gz
decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.
Specifically, instead of raising a ValueError when there is a single tick in the parameter, simply return that the entire string unquoted, with None for both the charset and the language. Also, if there are more than 2 ticks in the parameter, interpret the first three parts as the standard RFC 2231 parts, then the rest of the parts as the encoded string. Test cases added. Original fewer-than-3-parts fix by Tokio Kikuchi. Resolves SF bug # 1218081. I will back port the fix and tests to Python 2.4 (email 3.0) and Python 2.3 (email 2.5). Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/__init__.py2
-rw-r--r--Lib/email/test/test_email_renamed.py34
-rw-r--r--Lib/email/utils.py11
3 files changed, 43 insertions, 4 deletions
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index f01260f57f..8d230fdeb7 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,7 +4,7 @@
"""A package for parsing, handling, and generating email messages."""
-__version__ = '4.0a2'
+__version__ = '4.0.1'
__all__ = [
# Old names
diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py
index 95d06cb66f..4cfca6679c 100644
--- a/Lib/email/test/test_email_renamed.py
+++ b/Lib/email/test/test_email_renamed.py
@@ -3060,6 +3060,40 @@ Content-Disposition: inline; filename*0=X-UNKNOWN''myfile.txt
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(), 'myfile.txt')
+ def test_rfc2231_single_tick_in_filename(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
+
+"""
+ msg = email.message_from_string(m)
+ charset, language, s = msg.get_param('name')
+ eq(charset, None)
+ eq(language, None)
+ eq(s, "Frank's Document")
+
+ def test_rfc2231_tick_attack(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo;
+\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
+
+"""
+ msg = email.message_from_string(m)
+ charset, language, s = msg.get_param('name')
+ eq(charset, 'us-ascii')
+ eq(language, 'en-us')
+ eq(s, "Frank's Document")
+
+ def test_rfc2231_no_extended_values(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo; name=\"Frank's Document\"
+
+"""
+ msg = email.message_from_string(m)
+ eq(msg.get_param('name'), "Frank's Document")
+
def _testclasses():
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 250eb19d93..ea59c27d52 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -45,6 +45,7 @@ COMMASPACE = ', '
EMPTYSTRING = ''
UEMPTYSTRING = u''
CRLF = '\r\n'
+TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[][\\()"]')
@@ -231,10 +232,14 @@ def unquote(str):
def decode_rfc2231(s):
"""Decode string according to RFC 2231"""
import urllib
- parts = s.split("'", 2)
- if len(parts) == 1:
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
return None, None, urllib.unquote(s)
- charset, language, s = parts
+ if len(parts) > 3:
+ charset, language = parts[:2]
+ s = TICK.join(parts[2:])
+ else:
+ charset, language, s = parts
return charset, language, urllib.unquote(s)