diff options
Diffstat (limited to 'Lib/httplib.py')
-rw-r--r-- | Lib/httplib.py | 219 |
1 files changed, 134 insertions, 85 deletions
diff --git a/Lib/httplib.py b/Lib/httplib.py index 680e875041..6e72e2c37e 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -67,6 +67,7 @@ Req-sent-unread-response _CS_REQ_SENT <response_class> """ from array import array +import os import socket from sys import py3kwarning from urlparse import urlsplit @@ -211,6 +212,9 @@ responses = { # maximal amount of data to read at one time in _safe_read MAXAMOUNT = 1048576 +# maximal line length when calling readline(). +_MAXLINE = 65536 + class HTTPMessage(mimetools.Message): def addheader(self, key, value): @@ -273,7 +277,9 @@ class HTTPMessage(mimetools.Message): except IOError: startofline = tell = None self.seekable = 0 - line = self.fp.readline() + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("header line") if not line: self.status = 'EOF in headers' break @@ -326,8 +332,18 @@ class HTTPResponse: # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - def __init__(self, sock, debuglevel=0, strict=0, method=None): - self.fp = sock.makefile('rb', 0) + def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False): + if buffering: + # The caller won't be using any sock.recv() calls, so buffering + # is fine and recommended for performance. + self.fp = sock.makefile('rb') + else: + # The buffer size is specified as zero, because the headers of + # the response are read with readline(). If the reads were + # buffered the readline() calls could consume some of the + # response, which make be read via a recv() on the underlying + # socket. + self.fp = sock.makefile('rb', 0) self.debuglevel = debuglevel self.strict = strict self._method = method @@ -393,7 +409,10 @@ class HTTPResponse: break # skip the header from the 100 response while True: - skip = self.fp.readline().strip() + skip = self.fp.readline(_MAXLINE + 1) + if len(skip) > _MAXLINE: + raise LineTooLong("header line") + skip = skip.strip() if not skip: break if self.debuglevel > 0: @@ -552,7 +571,9 @@ class HTTPResponse: value = [] while True: if chunk_left is None: - line = self.fp.readline() + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("chunk size") i = line.find(';') if i >= 0: line = line[:i] # strip chunk-extensions @@ -587,7 +608,9 @@ class HTTPResponse: # read and discard trailer up to the CRLF terminator ### note: we shouldn't have any trailers! while True: - line = self.fp.readline() + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("trailer line") if not line: # a vanishingly small number of sites EOF without # sending the trailer @@ -614,6 +637,11 @@ class HTTPResponse: reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ + # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never + # return less than x bytes unless EOF is encountered. It now handles + # signal interruptions (socket.error EINTR) internally. This code + # never caught that exception anyways. It seems largely pointless. + # self.fp.read(amt) will work fine. s = [] while amt > 0: chunk = self.fp.read(min(amt, MAXAMOUNT)) @@ -623,6 +651,9 @@ class HTTPResponse: amt -= len(chunk) return ''.join(s) + def fileno(self): + return self.fp.fileno() + def getheader(self, name, default=None): if self.msg is None: raise ResponseNotReady() @@ -647,8 +678,9 @@ class HTTPConnection: strict = 0 def __init__(self, host, port=None, strict=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): self.timeout = timeout + self.source_address = source_address self.sock = None self._buffer = [] self.__response = None @@ -662,7 +694,7 @@ class HTTPConnection: if strict is not None: self.strict = strict - def _set_tunnel(self, host, port=None, headers=None): + def set_tunnel(self, host, port=None, headers=None): """ Sets up the host and the port for the HTTP CONNECT Tunnelling. The headers argument should be a mapping of extra HTTP headers @@ -710,14 +742,16 @@ class HTTPConnection: raise socket.error("Tunnel connection failed: %d %s" % (code, message.strip())) while True: - line = response.fp.readline() + line = response.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("header line") if line == '\r\n': break def connect(self): """Connect to the host and port specified in __init__.""" self.sock = socket.create_connection((self.host,self.port), - self.timeout) + self.timeout, self.source_address) if self._tunnel_host: self._tunnel() @@ -732,35 +766,25 @@ class HTTPConnection: self.__response = None self.__state = _CS_IDLE - def send(self, str): - """Send `str' to the server.""" + def send(self, data): + """Send `data' to the server.""" if self.sock is None: if self.auto_open: self.connect() else: raise NotConnected() - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. if self.debuglevel > 0: - print "send:", repr(str) - try: - blocksize=8192 - if hasattr(str,'read') and not isinstance(str, array): - if self.debuglevel > 0: print "sendIng a read()able" - data=str.read(blocksize) - while data: - self.sock.sendall(data) - data=str.read(blocksize) - else: - self.sock.sendall(str) - except socket.error, v: - if v.args[0] == 32: # Broken pipe - self.close() - raise + print "send:", repr(data) + blocksize = 8192 + if hasattr(data,'read') and not isinstance(data, array): + if self.debuglevel > 0: print "sendIng a read()able" + datablock = data.read(blocksize) + while datablock: + self.sock.sendall(datablock) + datablock = data.read(blocksize) + else: + self.sock.sendall(data) def _output(self, s): """Add a line of output to the current request buffer. @@ -769,15 +793,26 @@ class HTTPConnection: """ self._buffer.append(s) - def _send_output(self): + def _send_output(self, message_body=None): """Send the currently buffered request and clear the buffer. Appends an extra \\r\\n to the buffer. + A message_body may be specified, to be appended to the request. """ self._buffer.extend(("", "")) msg = "\r\n".join(self._buffer) del self._buffer[:] + # If msg and message_body are sent in a single send() call, + # it will avoid performance problems caused by the interaction + # between delayed ack and the Nagle algorithm. + if isinstance(message_body, str): + msg += message_body + message_body = None self.send(msg) + if message_body is not None: + #message_body was not a string (i.e. it is a file) and + #we must run the risk of Nagle + self.send(message_body) def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): """Send a request to the server. @@ -821,9 +856,9 @@ class HTTPConnection: self._method = method if not url: url = '/' - str = '%s %s %s' % (method, url, self._http_vsn_str) + hdr = '%s %s %s' % (method, url, self._http_vsn_str) - self._output(str) + self._output(hdr) if self._http_vsn == 11: # Issue some standard headers for better HTTP/1.1 compliance @@ -858,6 +893,9 @@ class HTTPConnection: host_enc = self.host.encode("ascii") except UnicodeEncodeError: host_enc = self.host.encode("idna") + # Wrap the IPv6 Host Header with [] (RFC 2732) + if host_enc.find(':') >= 0: + host_enc = "[" + host_enc + "]" if self.port == self.default_port: self.putheader('Host', host_enc) else: @@ -886,7 +924,7 @@ class HTTPConnection: # For HTTP/1.0, the server will assume "not chunked" pass - def putheader(self, header, value): + def putheader(self, header, *values): """Send a request header line to the server. For example: h.putheader('Accept', 'text/html') @@ -894,33 +932,47 @@ class HTTPConnection: if self.__state != _CS_REQ_STARTED: raise CannotSendHeader() - str = '%s: %s' % (header, value) - self._output(str) + hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values])) + self._output(hdr) - def endheaders(self): - """Indicate that the last header line has been sent to the server.""" + def endheaders(self, message_body=None): + """Indicate that the last header line has been sent to the server. + This method sends the request to the server. The optional + message_body argument can be used to pass message body + associated with the request. The message body will be sent in + the same packet as the message headers if possible. The + message_body should be a string. + """ if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT else: raise CannotSendHeader() - - self._send_output() + self._send_output(message_body) def request(self, method, url, body=None, headers={}): """Send a complete request to the server.""" + self._send_request(method, url, body, headers) + def _set_content_length(self, body): + # Set the content-length based on the body. + thelen = None try: - self._send_request(method, url, body, headers) - except socket.error, v: - # trap 'Broken pipe' if we're allowed to automatically reconnect - if v.args[0] != 32 or not self.auto_open: - raise - # try one more time - self._send_request(method, url, body, headers) + thelen = str(len(body)) + except TypeError, te: + # If this is a file-like object, try to + # fstat its file descriptor + try: + thelen = str(os.fstat(body.fileno()).st_size) + except (AttributeError, OSError): + # Don't send a length if this failed + if self.debuglevel > 0: print "Cannot stat!!" + + if thelen is not None: + self.putheader('Content-Length', thelen) def _send_request(self, method, url, body, headers): - # honour explicitly requested Host: and Accept-Encoding headers + # Honor explicitly requested Host: and Accept-Encoding: headers. header_names = dict.fromkeys([k.lower() for k in headers]) skips = {} if 'host' in header_names: @@ -931,29 +983,12 @@ class HTTPConnection: self.putrequest(method, url, **skips) if body and ('content-length' not in header_names): - thelen=None - try: - thelen=str(len(body)) - except TypeError, te: - # If this is a file-like object, try to - # fstat its file descriptor - import os - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print "Cannot stat!!" - - if thelen is not None: - self.putheader('Content-Length',thelen) + self._set_content_length(body) for hdr, value in headers.iteritems(): self.putheader(hdr, value) - self.endheaders() - - if body: - self.send(body) + self.endheaders(body) - def getresponse(self): + def getresponse(self, buffering=False): "Get the response from the server." # if a prior response has been completed, then forget about it. @@ -979,13 +1014,15 @@ class HTTPConnection: if self.__state != _CS_REQ_SENT or self.__response: raise ResponseNotReady() + args = (self.sock,) + kwds = {"strict":self.strict, "method":self._method} if self.debuglevel > 0: - response = self.response_class(self.sock, self.debuglevel, - strict=self.strict, - method=self._method) - else: - response = self.response_class(self.sock, strict=self.strict, - method=self._method) + args += (self.debuglevel,) + if buffering: + #only add this keyword if non-default, for compatibility with + #other response_classes. + kwds["buffering"] = True; + response = self.response_class(*args, **kwds) response.begin() assert response.will_close != _UNKNOWN @@ -1029,6 +1066,7 @@ class HTTP: # set up delegation to flesh out interface self.send = conn.send self.putrequest = conn.putrequest + self.putheader = conn.putheader self.endheaders = conn.endheaders self.set_debuglevel = conn.set_debuglevel @@ -1048,11 +1086,7 @@ class HTTP: "Provide a getfile, since the superclass' does not use this concept." return self.file - def putheader(self, header, *values): - "The superclass allows only one value argument." - self._conn.putheader(header, '\r\n\t'.join(values)) - - def getreply(self): + def getreply(self, buffering=False): """Compat definition since superclass does not define it. Returns a tuple consisting of: @@ -1061,7 +1095,12 @@ class HTTP: - any RFC822 headers in the response from the server """ try: - response = self._conn.getresponse() + if not buffering: + response = self._conn.getresponse() + else: + #only add this keyword if non-default for compatibility + #with other connection classes + response = self._conn.getresponse(buffering) except BadStatusLine, e: ### hmm. if getresponse() ever closes the socket on a bad request, ### then we are going to have problems with self.sock @@ -1101,15 +1140,18 @@ else: default_port = HTTPS_PORT def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - HTTPConnection.__init__(self, host, port, strict, timeout) + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + HTTPConnection.__init__(self, host, port, strict, timeout, + source_address) self.key_file = key_file self.cert_file = cert_file def connect(self): "Connect to a host on a given (SSL) port." - sock = socket.create_connection((self.host, self.port), self.timeout) + sock = socket.create_connection((self.host, self.port), + self.timeout, self.source_address) if self._tunnel_host: self.sock = sock self._tunnel() @@ -1200,9 +1242,16 @@ class ResponseNotReady(ImproperConnectionState): class BadStatusLine(HTTPException): def __init__(self, line): + if not line: + line = repr(line) self.args = line, self.line = line +class LineTooLong(HTTPException): + def __init__(self, line_type): + HTTPException.__init__(self, "got more than %d bytes when reading %s" + % (_MAXLINE, line_type)) + # for backwards compatibility error = HTTPException |