Merged revisions 81675 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r81675 | r.david.murray | 2010-06-03 11:43:20 -0400 (Thu, 03 Jun 2010) | 10 lines #5610: use \Z not $ so we don't eat extra chars when body part ends with \r\n. If a body part ended with \r\n, feedparser, using '$' to terminate its search for the newline, would match on the \r\n, and think that it needed to strip two characters in order to account for the line end before the boundary. That made it chop one too many characters off the end of the body part. Using \Z makes the match correct. Patch and test by Tony Nelson. ........
author: R. David Murray <rdmurray@bitdance.com> 2010-06-16 00:57:25 +0000
committer: R. David Murray <rdmurray@bitdance.com> 2010-06-16 00:57:25 +0000
commit: ff4a01dfaf246bca560f16be35b21130cab43547 (patch)
tree: 8babf5213894d1d9e66880e8f7f2ad9e09c1f9de
parent: 27e33fcd52ee2e15c0693c0e57344336df245c1c (diff)
download: cpython-git-ff4a01dfaf246bca560f16be35b21130cab43547.tar.gz
3 files changed, 22 insertions, 1 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index afb02b32b2..163fadafd4 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -28,7 +28,7 @@ from email import message
 
 NLCRE = re.compile('\r\n|\r|\n')
 NLCRE_bol = re.compile('(\r\n|\r|\n)')
-NLCRE_eol = re.compile('(\r\n|\r|\n)$')
+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
 NLCRE_crack = re.compile('(\r\n|\r|\n)')
 # RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 350d0b0e92..6c23693cf1 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2590,6 +2590,24 @@ Here's the message body
         eq(headers, ['A', 'B', 'CC'])
         eq(msg.get_payload(), 'body')
 
+    def test_CRLFLF_at_end_of_part(self):
+        # issue 5610: feedparser should not eat two chars from body part ending
+        # with "\r\n\n".
+        m = (
+            "From: foo@bar.com\n"
+            "To: baz\n"
+            "Mime-Version: 1.0\n"
+            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
+            "\n"
+            "--BOUNDARY\n"
+            "Content-Type: text/plain\n"
+            "\n"
+            "body ending with CRLF newline\r\n"
+            "\n"
+            "--BOUNDARY--\n"
+          )
+        msg = email.message_from_string(m)
+        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
 
 
 class TestBase64(unittest.TestCase):
diff --git a/Misc/NEWS b/Misc/NEWS
index fee5682cd4..a9cd9bdf71 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -71,6 +71,9 @@ C-API
 Library
 -------
 
+- Issue #5610: feedparser no longer eats extra characters at the end of
+  a body part if the body part ends with a \r\n.
+
 - Issue #8924: logging: Improved error handling for Unicode in exception text.
 
 - Fix codecs.escape_encode to return the correct consumed size.
author	R. David Murray <rdmurray@bitdance.com>	2010-06-16 00:57:25 +0000
committer	R. David Murray <rdmurray@bitdance.com>	2010-06-16 00:57:25 +0000
commit	ff4a01dfaf246bca560f16be35b21130cab43547 (patch)
tree	8babf5213894d1d9e66880e8f7f2ad9e09c1f9de
parent	27e33fcd52ee2e15c0693c0e57344336df245c1c (diff)
download	cpython-git-ff4a01dfaf246bca560f16be35b21130cab43547.tar.gz