diff options
author | Martin Panter <vadmium+py@gmail.com> | 2016-08-24 06:33:33 +0000 |
---|---|---|
committer | Martin Panter <vadmium+py@gmail.com> | 2016-08-24 06:33:33 +0000 |
commit | 3c0d0baf2badfad7deb346d1043f7d83bb92691f (patch) | |
tree | 968ca71729f519aaf6ebb38477efdc73e5ae3ae9 /Lib/test | |
parent | a790fe7ff86f193670b3d8287b22c72cbe675c7b (diff) | |
download | cpython-git-3c0d0baf2badfad7deb346d1043f7d83bb92691f.tar.gz |
Issue #12319: Support for chunked encoding of HTTP request bodies
When the body object is a file, its size is no longer determined with
fstat(), since that can report the wrong result (e.g. reading from a pipe).
Instead, determine the size using seek(), or fall back to chunked encoding
for unseekable files.
Also, change the logic for detecting text files to check for TextIOBase
inheritance, rather than inspecting the “mode” attribute, which may not
exist (e.g. BytesIO and StringIO). The Content-Length for text files is no
longer determined ahead of time, because the original logic could have been
wrong depending on the codec and newline translation settings.
Patch by Demian Brecht and Rolf Krahl, with a few tweaks by me.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_httplib.py | 151 | ||||
-rw-r--r-- | Lib/test/test_urllib2.py | 103 |
2 files changed, 228 insertions, 26 deletions
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 1768a34308..a1796123e4 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -314,6 +314,124 @@ class HeaderTests(TestCase): conn.putheader(name, value) +class TransferEncodingTest(TestCase): + expected_body = b"It's just a flesh wound" + + def test_endheaders_chunked(self): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(b'') + conn.putrequest('POST', '/') + conn.endheaders(self._make_body(), encode_chunked=True) + + _, _, body = self._parse_request(conn.sock.data) + body = self._parse_chunked(body) + self.assertEqual(body, self.expected_body) + + def test_explicit_headers(self): + # explicit chunked + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(b'') + # this shouldn't actually be automatically chunk-encoded because the + # calling code has explicitly stated that it's taking care of it + conn.request( + 'POST', '/', self._make_body(), {'Transfer-Encoding': 'chunked'}) + + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + self.assertEqual(body, self.expected_body) + + # explicit chunked, string body + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(b'') + conn.request( + 'POST', '/', self.expected_body.decode('latin-1'), + {'Transfer-Encoding': 'chunked'}) + + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers.keys()]) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + self.assertEqual(body, self.expected_body) + + # User-specified TE, but request() does the chunk encoding + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(b'') + conn.request('POST', '/', + headers={'Transfer-Encoding': 'gzip, chunked'}, + encode_chunked=True, + body=self._make_body()) + _, headers, body = self._parse_request(conn.sock.data) + self.assertNotIn('content-length', [k.lower() for k in headers]) + self.assertEqual(headers['Transfer-Encoding'], 'gzip, chunked') + self.assertEqual(self._parse_chunked(body), self.expected_body) + + def test_request(self): + for empty_lines in (False, True,): + conn = client.HTTPConnection('example.com') + conn.sock = FakeSocket(b'') + conn.request( + 'POST', '/', self._make_body(empty_lines=empty_lines)) + + _, headers, body = self._parse_request(conn.sock.data) + body = self._parse_chunked(body) + self.assertEqual(body, self.expected_body) + self.assertEqual(headers['Transfer-Encoding'], 'chunked') + + # Content-Length and Transfer-Encoding SHOULD not be sent in the + # same request + self.assertNotIn('content-length', [k.lower() for k in headers]) + + def _make_body(self, empty_lines=False): + lines = self.expected_body.split(b' ') + for idx, line in enumerate(lines): + # for testing handling empty lines + if empty_lines and idx % 2: + yield b'' + if idx < len(lines) - 1: + yield line + b' ' + else: + yield line + + def _parse_request(self, data): + lines = data.split(b'\r\n') + request = lines[0] + headers = {} + n = 1 + while n < len(lines) and len(lines[n]) > 0: + key, val = lines[n].split(b':') + key = key.decode('latin-1').strip() + headers[key] = val.decode('latin-1').strip() + n += 1 + + return request, headers, b'\r\n'.join(lines[n + 1:]) + + def _parse_chunked(self, data): + body = [] + trailers = {} + n = 0 + lines = data.split(b'\r\n') + # parse body + while True: + size, chunk = lines[n:n+2] + size = int(size, 16) + + if size == 0: + n += 1 + break + + self.assertEqual(size, len(chunk)) + body.append(chunk) + + n += 2 + # we /should/ hit the end chunk, but check against the size of + # lines so we're not stuck in an infinite loop should we get + # malformed data + if n > len(lines): + break + + return b''.join(body) + + class BasicTest(TestCase): def test_status_lines(self): # Test HTTP status lines @@ -564,11 +682,11 @@ class BasicTest(TestCase): yield None yield 'data_two' - class UpdatingFile(): + class UpdatingFile(io.TextIOBase): mode = 'r' d = data() def read(self, blocksize=-1): - return self.d.__next__() + return next(self.d) expected = b'data' @@ -1546,6 +1664,26 @@ class RequestBodyTest(TestCase): message = client.parse_headers(f) return message, f + def test_list_body(self): + # Note that no content-length is automatically calculated for + # an iterable. The request will fall back to send chunked + # transfer encoding. + cases = ( + ([b'foo', b'bar'], b'3\r\nfoo\r\n3\r\nbar\r\n0\r\n\r\n'), + ((b'foo', b'bar'), b'3\r\nfoo\r\n3\r\nbar\r\n0\r\n\r\n'), + ) + for body, expected in cases: + with self.subTest(body): + self.conn = client.HTTPConnection('example.com') + self.conn.sock = self.sock = FakeSocket('') + + self.conn.request('PUT', '/url', body) + msg, f = self.get_headers_and_fp() + self.assertNotIn('Content-Type', msg) + self.assertNotIn('Content-Length', msg) + self.assertEqual(msg.get('Transfer-Encoding'), 'chunked') + self.assertEqual(expected, f.read()) + def test_manual_content_length(self): # Set an incorrect content-length so that we can verify that # it will not be over-ridden by the library. @@ -1588,8 +1726,13 @@ class RequestBodyTest(TestCase): message, f = self.get_headers_and_fp() self.assertEqual("text/plain", message.get_content_type()) self.assertIsNone(message.get_charset()) - self.assertEqual("4", message.get("content-length")) - self.assertEqual(b'body', f.read()) + # Note that the length of text files is unpredictable + # because it depends on character encoding and line ending + # translation. No content-length will be set, the body + # will be sent using chunked transfer encoding. + self.assertIsNone(message.get("content-length")) + self.assertEqual("chunked", message.get("transfer-encoding")) + self.assertEqual(b'4\r\nbody\r\n0\r\n\r\n', f.read()) def test_binary_file_body(self): self.addCleanup(support.unlink, support.TESTFN) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index eda7cccc60..0eea0c7f98 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -7,6 +7,8 @@ import io import socket import array import sys +import tempfile +import subprocess import urllib.request # The proxy bypass method imported below has logic specific to the OSX @@ -335,7 +337,8 @@ class MockHTTPClass: else: self._tunnel_headers.clear() - def request(self, method, url, body=None, headers=None): + def request(self, method, url, body=None, headers=None, *, + encode_chunked=False): self.method = method self.selector = url if headers is not None: @@ -343,6 +346,7 @@ class MockHTTPClass: self.req_headers.sort() if body: self.data = body + self.encode_chunked = encode_chunked if self.raise_on_endheaders: raise OSError() @@ -908,41 +912,96 @@ class HandlerTests(unittest.TestCase): self.assertEqual(req.unredirected_hdrs["Host"], "baz") self.assertEqual(req.unredirected_hdrs["Spam"], "foo") - # Check iterable body support - def iterable_body(): - yield b"one" - yield b"two" - yield b"three" + def test_http_body_file(self): + # A regular file - Content Length is calculated unless already set. - for headers in {}, {"Content-Length": 11}: - req = Request("http://example.com/", iterable_body(), headers) - if not headers: - # Having an iterable body without a Content-Length should - # raise an exception - self.assertRaises(ValueError, h.do_request_, req) - else: + h = urllib.request.AbstractHTTPHandler() + o = h.parent = MockOpener() + + file_obj = tempfile.NamedTemporaryFile(mode='w+b', delete=False) + file_path = file_obj.name + file_obj.write(b"Something\nSomething\nSomething\n") + file_obj.close() + + for headers in {}, {"Content-Length": 30}: + with open(file_path, "rb") as f: + req = Request("http://example.com/", f, headers) newreq = h.do_request_(req) + self.assertEqual(int(newreq.get_header('Content-length')), 30) - # A file object. - # Test only Content-Length attribute of request. + os.unlink(file_path) + + def test_http_body_fileobj(self): + # A file object - Content Length is calculated unless already set. + # (Note that there are some subtle differences to a regular + # file, that is why we are testing both cases.) + + h = urllib.request.AbstractHTTPHandler() + o = h.parent = MockOpener() file_obj = io.BytesIO() file_obj.write(b"Something\nSomething\nSomething\n") for headers in {}, {"Content-Length": 30}: + file_obj.seek(0) req = Request("http://example.com/", file_obj, headers) - if not headers: - # Having an iterable body without a Content-Length should - # raise an exception - self.assertRaises(ValueError, h.do_request_, req) - else: - newreq = h.do_request_(req) - self.assertEqual(int(newreq.get_header('Content-length')), 30) + newreq = h.do_request_(req) + self.assertEqual(int(newreq.get_header('Content-length')), 30) file_obj.close() + def test_http_body_pipe(self): + # A file reading from a pipe. + # A pipe cannot be seek'ed. There is no way to determine the + # content length up front. Thus, do_request_() should fall + # back to Transfer-encoding chunked. + + h = urllib.request.AbstractHTTPHandler() + o = h.parent = MockOpener() + + cmd = [sys.executable, "-c", + r"import sys; " + r"sys.stdout.buffer.write(b'Something\nSomething\nSomething\n')"] + for headers in {}, {"Content-Length": 30}: + with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc: + req = Request("http://example.com/", proc.stdout, headers) + newreq = h.do_request_(req) + if not headers: + self.assertEqual(newreq.get_header('Content-length'), None) + self.assertEqual(newreq.get_header('Transfer-encoding'), + 'chunked') + else: + self.assertEqual(int(newreq.get_header('Content-length')), + 30) + + def test_http_body_iterable(self): + # Generic iterable. There is no way to determine the content + # length up front. Fall back to Transfer-encoding chunked. + + h = urllib.request.AbstractHTTPHandler() + o = h.parent = MockOpener() + + def iterable_body(): + yield b"one" + yield b"two" + yield b"three" + + for headers in {}, {"Content-Length": 11}: + req = Request("http://example.com/", iterable_body(), headers) + newreq = h.do_request_(req) + if not headers: + self.assertEqual(newreq.get_header('Content-length'), None) + self.assertEqual(newreq.get_header('Transfer-encoding'), + 'chunked') + else: + self.assertEqual(int(newreq.get_header('Content-length')), 11) + + def test_http_body_array(self): # array.array Iterable - Content Length is calculated + h = urllib.request.AbstractHTTPHandler() + o = h.parent = MockOpener() + iterable_array = array.array("I",[1,2,3,4]) for headers in {}, {"Content-Length": 16}: |