from __future__ import absolute_import import collections import logging import kafka.errors as Errors from kafka.protocol.commit import GroupCoordinatorResponse from kafka.protocol.frame import KafkaBytes from kafka.protocol.types import Int32, TaggedFields from kafka.version import __version__ log = logging.getLogger(__name__) class KafkaProtocol(object): """Manage the kafka network protocol Use an instance of KafkaProtocol to manage bytes send/recv'd from a network socket to a broker. Arguments: client_id (str): identifier string to be included in each request api_version (tuple): Optional tuple to specify api_version to use. Currently only used to check for 0.8.2 protocol quirks, but may be used for more in the future. """ def __init__(self, client_id=None, api_version=None): if client_id is None: client_id = self._gen_client_id() self._client_id = client_id self._api_version = api_version self._correlation_id = 0 self._header = KafkaBytes(4) self._rbuffer = None self._receiving = False self.in_flight_requests = collections.deque() self.bytes_to_send = [] def _next_correlation_id(self): self._correlation_id = (self._correlation_id + 1) % 2**31 return self._correlation_id def _gen_client_id(self): return 'kafka-python' + __version__ def send_request(self, request, correlation_id=None): """Encode and queue a kafka api request for sending. Arguments: request (object): An un-encoded kafka request. correlation_id (int, optional): Optionally specify an ID to correlate requests with responses. If not provided, an ID will be generated automatically. Returns: correlation_id """ log.debug('Sending request %s', request) if correlation_id is None: correlation_id = self._next_correlation_id() header = request.build_request_header(correlation_id=correlation_id, client_id=self._client_id) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message self.bytes_to_send.append(data) if request.expect_response(): ifr = (correlation_id, request) self.in_flight_requests.append(ifr) return correlation_id def send_bytes(self): """Retrieve all pending bytes to send on the network""" data = b''.join(self.bytes_to_send) self.bytes_to_send = [] return data def receive_bytes(self, data): """Process bytes received from the network. Arguments: data (bytes): any length bytes received from a network connection to a kafka broker. Returns: responses (list of (correlation_id, response)): any/all completed responses, decoded from bytes to python objects. Raises: KafkaProtocolError: if the bytes received could not be decoded. CorrelationIdError: if the response does not match the request correlation id. """ i = 0 n = len(data) responses = [] while i < n: # Not receiving is the state of reading the payload header if not self._receiving: bytes_to_read = min(4 - self._header.tell(), n - i) self._header.write(data[i:i+bytes_to_read]) i += bytes_to_read if self._header.tell() == 4: self._header.seek(0) nbytes = Int32.decode(self._header) # reset buffer and switch state to receiving payload bytes self._rbuffer = KafkaBytes(nbytes) self._receiving = True elif self._header.tell() > 4: raise Errors.KafkaError('this should not happen - are you threading?') if self._receiving: total_bytes = len(self._rbuffer) staged_bytes = self._rbuffer.tell() bytes_to_read = min(total_bytes - staged_bytes, n - i) self._rbuffer.write(data[i:i+bytes_to_read]) i += bytes_to_read staged_bytes = self._rbuffer.tell() if staged_bytes > total_bytes: raise Errors.KafkaError('Receive buffer has more bytes than expected?') if staged_bytes != total_bytes: break self._receiving = False self._rbuffer.seek(0) resp = self._process_response(self._rbuffer) responses.append(resp) self._reset_buffer() return responses def _process_response(self, read_buffer): if not self.in_flight_requests: raise Errors.CorrelationIdError('No in-flight-request found for server response') (correlation_id, request) = self.in_flight_requests.popleft() response_header = request.parse_response_header(read_buffer) recv_correlation_id = response_header.correlation_id log.debug('Received correlation id: %d', recv_correlation_id) # 0.8.2 quirk if (recv_correlation_id == 0 and correlation_id != 0 and request.RESPONSE_TYPE is GroupCoordinatorResponse[0] and (self._api_version == (0, 8, 2) or self._api_version is None)): log.warning('Kafka 0.8.2 quirk -- GroupCoordinatorResponse' ' Correlation ID does not match request. This' ' should go away once at least one topic has been' ' initialized on the broker.') elif correlation_id != recv_correlation_id: # return or raise? raise Errors.CorrelationIdError( 'Correlation IDs do not match: sent %d, recv %d' % (correlation_id, recv_correlation_id)) # decode response log.debug('Processing response %s', request.RESPONSE_TYPE.__name__) try: response = request.RESPONSE_TYPE.decode(read_buffer) except ValueError: read_buffer.seek(0) buf = read_buffer.read() log.error('Response %d [ResponseType: %s Request: %s]:' ' Unable to decode %d-byte buffer: %r', correlation_id, request.RESPONSE_TYPE, request, len(buf), buf) raise Errors.KafkaProtocolError('Unable to decode response') return (correlation_id, response) def _reset_buffer(self): self._receiving = False self._header.seek(0) self._rbuffer = None