diff options
author | Mahendra M <mahendra.m@gmail.com> | 2013-05-29 14:20:58 +0530 |
---|---|---|
committer | David Arthur <mumrah@gmail.com> | 2013-05-29 16:38:39 -0400 |
commit | 2c257eeb1f02748840a8f4535d8d2a88ef5235f2 (patch) | |
tree | 73ff852a845bdb947f0820ea1ff64c7b72436c9b /kafka/protocol.py | |
parent | 97962d4af3c0e57a549f432a7604f8ba0c5914fd (diff) | |
download | kafka-python-2c257eeb1f02748840a8f4535d8d2a88ef5235f2.tar.gz |
PEP8-ify most of the files
consumer.py and conn.py will be done later after pending merges
Diffstat (limited to 'kafka/protocol.py')
-rw-r--r-- | kafka/protocol.py | 231 |
1 files changed, 166 insertions, 65 deletions
diff --git a/kafka/protocol.py b/kafka/protocol.py index 94a7f2a..6bd5c73 100644 --- a/kafka/protocol.py +++ b/kafka/protocol.py @@ -18,10 +18,12 @@ from kafka.util import ( log = logging.getLogger("kafka") + class KafkaProtocol(object): """ - Class to encapsulate all of the protocol encoding/decoding. This class does not - have any state associated with it, it is purely for organization. + Class to encapsulate all of the protocol encoding/decoding. + This class does not have any state associated with it, it is purely + for organization. """ PRODUCE_KEY = 0 FETCH_KEY = 1 @@ -44,18 +46,18 @@ class KafkaProtocol(object): """ Encode the common request envelope """ - return struct.pack('>hhih%ds' % len(client_id), + return struct.pack('>hhih%ds' % len(client_id), request_key, # ApiKey 0, # ApiVersion correlation_id, # CorrelationId - len(client_id), # + len(client_id), client_id) # ClientId @classmethod def _encode_message_set(cls, messages): """ - Encode a MessageSet. Unlike other arrays in the protocol, MessageSets are - not length-prefixed + Encode a MessageSet. Unlike other arrays in the protocol, + MessageSets are not length-prefixed Format ====== @@ -66,7 +68,8 @@ class KafkaProtocol(object): message_set = "" for message in messages: encoded_message = KafkaProtocol._encode_message(message) - message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message) + message_set += struct.pack('>qi%ds' % len(encoded_message), 0, + len(encoded_message), encoded_message) return message_set @classmethod @@ -74,10 +77,10 @@ class KafkaProtocol(object): """ Encode a single message. - The magic number of a message is a format version number. The only supported - magic number right now is zero + The magic number of a message is a format version number. + The only supported magic number right now is zero - Format + Format ====== Message => Crc MagicByte Attributes Key Value Crc => int32 @@ -96,24 +99,27 @@ class KafkaProtocol(object): raise Exception("Unexpected magic number: %d" % message.magic) return msg - @classmethod def _decode_message_set_iter(cls, data): """ Iteratively decode a MessageSet - Reads repeated elements of (offset, message), calling decode_message to decode a - single message. Since compressed messages contain futher MessageSets, these two methods - have been decoupled so that they may recurse easily. + Reads repeated elements of (offset, message), calling decode_message + to decode a single message. Since compressed messages contain futher + MessageSets, these two methods have been decoupled so that they may + recurse easily. """ cur = 0 while cur < len(data): try: ((offset, ), cur) = relative_unpack('>q', data, cur) (msg, cur) = read_int_string(data, cur) - for (offset, message) in KafkaProtocol._decode_message(msg, offset): + for (offset, message) in KafkaProtocol._decode_message(msg, + offset): yield OffsetAndMessage(offset, message) - except BufferUnderflowError: # If we get a partial read of a message, stop + + except BufferUnderflowError: + # If we get a partial read of a message, stop raise StopIteration() @classmethod @@ -121,9 +127,10 @@ class KafkaProtocol(object): """ Decode a single Message - The only caller of this method is decode_message_set_iter. They are decoupled to - support nested messages (compressed MessageSets). The offset is actually read from - decode_message_set_iter (it is part of the MessageSet payload). + The only caller of this method is decode_message_set_iter. + They are decoupled to support nested messages (compressed MessageSets). + The offset is actually read from decode_message_set_iter (it is part + of the MessageSet payload). """ ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0) if crc != zlib.crc32(data[4:]): @@ -131,23 +138,29 @@ class KafkaProtocol(object): (key, cur) = read_int_string(data, cur) (value, cur) = read_int_string(data, cur) - if att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_NONE: + + codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK + + if codec == KafkaProtocol.CODEC_NONE: yield (offset, Message(magic, att, key, value)) - elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_GZIP: + + elif codec == KafkaProtocol.CODEC_GZIP: gz = gzip_decode(value) - for (offset, message) in KafkaProtocol._decode_message_set_iter(gz): - yield (offset, message) - elif att & KafkaProtocol.ATTRIBUTE_CODEC_MASK == KafkaProtocol.CODEC_SNAPPY: + for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz): + yield (offset, msg) + + elif codec == KafkaProtocol.CODEC_SNAPPY: snp = snappy_decode(value) - for (offset, message) in KafkaProtocol._decode_message_set_iter(snp): - yield (offset, message) + for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp): + yield (offset, msg) ################## # Public API # ################## @classmethod - def encode_produce_request(cls, client_id, correlation_id, payloads=[], acks=1, timeout=1000): + def encode_produce_request(cls, client_id, correlation_id, + payloads=None, acks=1, timeout=1000): """ Encode some ProduceRequest structs @@ -161,39 +174,53 @@ class KafkaProtocol(object): 1: written to disk by the leader 2+: waits for this many number of replicas to sync -1: waits for all replicas to be in sync - timeout: Maximum time the server will wait for acks from replicas. This is _not_ a socket timeout + timeout: Maximum time the server will wait for acks from replicas. + This is _not_ a socket timeout """ + payloads = [] if payloads is None else payloads grouped_payloads = group_by_topic_and_partition(payloads) - message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY) + + message = cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.PRODUCE_KEY) + message += struct.pack('>hii', acks, timeout, len(grouped_payloads)) + for topic, topic_payloads in grouped_payloads.items(): - message += struct.pack('>h%dsi' % len(topic), len(topic), topic, len(topic_payloads)) + message += struct.pack('>h%dsi' % len(topic), + len(topic), topic, len(topic_payloads)) + for partition, payload in topic_payloads.items(): - message_set = KafkaProtocol._encode_message_set(payload.messages) - message += struct.pack('>ii%ds' % len(message_set), partition, len(message_set), message_set) + msg_set = KafkaProtocol._encode_message_set(payload.messages) + message += struct.pack('>ii%ds' % len(msg_set), partition, + len(msg_set), msg_set) + return struct.pack('>i%ds' % len(message), len(message), message) @classmethod def decode_produce_response(cls, data): """ - Decode bytes to a ProduceResponse + Decode bytes to a ProduceResponse Params ====== data: bytes to decode """ ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0) + for i in range(num_topics): ((strlen,), cur) = relative_unpack('>h', data, cur) - topic = data[cur:cur+strlen] + topic = data[cur:cur + strlen] cur += strlen ((num_partitions,), cur) = relative_unpack('>i', data, cur) for i in range(num_partitions): - ((partition, error, offset), cur) = relative_unpack('>ihq', data, cur) + ((partition, error, offset), cur) = relative_unpack('>ihq', + data, cur) + yield ProduceResponse(topic, partition, error, offset) @classmethod - def encode_fetch_request(cls, client_id, correlation_id, payloads=[], max_wait_time=100, min_bytes=4096): + def encode_fetch_request(cls, client_id, correlation_id, payloads=None, + max_wait_time=100, min_bytes=4096): """ Encodes some FetchRequest structs @@ -203,17 +230,27 @@ class KafkaProtocol(object): correlation_id: string payloads: list of FetchRequest max_wait_time: int, how long to block waiting on min_bytes of data - min_bytes: int, the minimum number of bytes to accumulate before returning the response + min_bytes: int, the minimum number of bytes to accumulate before + returning the response """ - + + payloads = [] if payloads is None else payloads grouped_payloads = group_by_topic_and_partition(payloads) - message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.FETCH_KEY) - message += struct.pack('>iiii', -1, max_wait_time, min_bytes, len(grouped_payloads)) # -1 is the replica id + + message = cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.FETCH_KEY) + + # -1 is the replica id + message += struct.pack('>iiii', -1, max_wait_time, min_bytes, + len(grouped_payloads)) + for topic, topic_payloads in grouped_payloads.items(): message += write_short_string(topic) message += struct.pack('>i', len(topic_payloads)) for partition, payload in topic_payloads.items(): - message += struct.pack('>iqi', partition, payload.offset, payload.max_bytes) + message += struct.pack('>iqi', partition, payload.offset, + payload.max_bytes) + return struct.pack('>i%ds' % len(message), len(message), message) @classmethod @@ -226,25 +263,41 @@ class KafkaProtocol(object): data: bytes to decode """ ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0) + for i in range(num_topics): (topic, cur) = read_short_string(data, cur) ((num_partitions,), cur) = relative_unpack('>i', data, cur) + for i in range(num_partitions): - ((partition, error, highwater_mark_offset), cur) = relative_unpack('>ihq', data, cur) + ((partition, error, highwater_mark_offset), cur) = \ + relative_unpack('>ihq', data, cur) + (message_set, cur) = read_int_string(data, cur) - yield FetchResponse(topic, partition, error, highwater_mark_offset, + + yield FetchResponse( + topic, partition, error, + highwater_mark_offset, KafkaProtocol._decode_message_set_iter(message_set)) @classmethod - def encode_offset_request(cls, client_id, correlation_id, payloads=[]): + def encode_offset_request(cls, client_id, correlation_id, payloads=None): + payloads = [] if payloads is None else payloads grouped_payloads = group_by_topic_and_partition(payloads) - message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_KEY) - message += struct.pack('>ii', -1, len(grouped_payloads)) # -1 is the replica id + + message = cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.OFFSET_KEY) + + # -1 is the replica id + message += struct.pack('>ii', -1, len(grouped_payloads)) + for topic, topic_payloads in grouped_payloads.items(): message += write_short_string(topic) message += struct.pack('>i', len(topic_payloads)) + for partition, payload in topic_payloads.items(): - message += struct.pack('>iqi', partition, payload.time, payload.max_offsets) + message += struct.pack('>iqi', partition, payload.time, + payload.max_offsets) + return struct.pack('>i%ds' % len(message), len(message), message) @classmethod @@ -257,19 +310,24 @@ class KafkaProtocol(object): data: bytes to decode """ ((correlation_id, num_topics), cur) = relative_unpack('>ii', data, 0) + for i in range(num_topics): (topic, cur) = read_short_string(data, cur) ((num_partitions,), cur) = relative_unpack('>i', data, cur) + for i in range(num_partitions): - ((partition, error, num_offsets,), cur) = relative_unpack('>ihi', data, cur) + ((partition, error, num_offsets,), cur) = \ + relative_unpack('>ihi', data, cur) + offsets = [] for j in range(num_offsets): ((offset,), cur) = relative_unpack('>q', data, cur) offsets.append(offset) + yield OffsetResponse(topic, partition, error, tuple(offsets)) @classmethod - def encode_metadata_request(cls, client_id, correlation_id, topics=[]): + def encode_metadata_request(cls, client_id, correlation_id, topics=None): """ Encode a MetadataRequest @@ -279,10 +337,15 @@ class KafkaProtocol(object): correlation_id: string topics: list of strings """ - message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.METADATA_KEY) + topics = [] if topics is None else topics + message = cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.METADATA_KEY) + message += struct.pack('>i', len(topics)) + for topic in topics: message += struct.pack('>h%ds' % len(topic), len(topic), topic) + return write_int_string(message) @classmethod @@ -307,22 +370,34 @@ class KafkaProtocol(object): # Topic info ((num_topics,), cur) = relative_unpack('>i', data, cur) topicMetadata = {} + for i in range(num_topics): ((topicError,), cur) = relative_unpack('>h', data, cur) (topicName, cur) = read_short_string(data, cur) ((num_partitions,), cur) = relative_unpack('>i', data, cur) partitionMetadata = {} + for j in range(num_partitions): - ((partitionErrorCode, partition, leader, numReplicas), cur) = relative_unpack('>hiii', data, cur) - (replicas, cur) = relative_unpack('>%di' % numReplicas, data, cur) + ((partitionErrorCode, partition, leader, numReplicas), cur) = \ + relative_unpack('>hiii', data, cur) + + (replicas, cur) = relative_unpack('>%di' % numReplicas, + data, cur) + ((numIsr,), cur) = relative_unpack('>i', data, cur) (isr, cur) = relative_unpack('>%di' % numIsr, data, cur) - partitionMetadata[partition] = PartitionMetadata(topicName, partition, leader, replicas, isr) + + partitionMetadata[partition] = \ + PartitionMetadata(topicName, partition, leader, + replicas, isr) + topicMetadata[topicName] = partitionMetadata + return (brokers, topicMetadata) @classmethod - def encode_offset_commit_request(cls, client_id, correlation_id, group, payloads): + def encode_offset_commit_request(cls, client_id, correlation_id, + group, payloads): """ Encode some OffsetCommitRequest structs @@ -333,16 +408,21 @@ class KafkaProtocol(object): group: string, the consumer group you are committing offsets for payloads: list of OffsetCommitRequest """ - grouped_payloads= group_by_topic_and_partition(payloads) - message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_COMMIT_KEY) + grouped_payloads = group_by_topic_and_partition(payloads) + + message = cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.OFFSET_COMMIT_KEY) message += write_short_string(group) message += struct.pack('>i', len(grouped_payloads)) + for topic, topic_payloads in grouped_payloads.items(): message += write_short_string(topic) message += struct.pack('>i', len(topic_payloads)) + for partition, payload in topic_payloads.items(): message += struct.pack('>iq', partition, payload.offset) message += write_short_string(payload.metadata) + return struct.pack('>i%ds' % len(message), len(message), message) @classmethod @@ -357,15 +437,18 @@ class KafkaProtocol(object): ((correlation_id,), cur) = relative_unpack('>i', data, 0) (client_id, cur) = read_short_string(data, cur) ((num_topics,), cur) = relative_unpack('>i', data, cur) + for i in xrange(num_topics): (topic, cur) = read_short_string(data, cur) ((num_partitions,), cur) = relative_unpack('>i', data, cur) + for i in xrange(num_partitions): ((partition, error), cur) = relative_unpack('>ih', data, cur) yield OffsetCommitResponse(topic, partition, error) @classmethod - def encode_offset_fetch_request(cls, client_id, correlation_id, group, payloads): + def encode_offset_fetch_request(cls, client_id, correlation_id, + group, payloads): """ Encode some OffsetFetchRequest structs @@ -377,14 +460,19 @@ class KafkaProtocol(object): payloads: list of OffsetFetchRequest """ grouped_payloads = group_by_topic_and_partition(payloads) - message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.OFFSET_FETCH_KEY) + message = cls._encode_message_header(client_id, correlation_id, + KafkaProtocol.OFFSET_FETCH_KEY) + message += write_short_string(group) message += struct.pack('>i', len(grouped_payloads)) + for topic, topic_payloads in grouped_payloads.items(): message += write_short_string(topic) message += struct.pack('>i', len(topic_payloads)) + for partition, payload in topic_payloads.items(): message += struct.pack('>i', partition) + return struct.pack('>i%ds' % len(message), len(message), message) @classmethod @@ -400,14 +488,19 @@ class KafkaProtocol(object): ((correlation_id,), cur) = relative_unpack('>i', data, 0) (client_id, cur) = read_short_string(data, cur) ((num_topics,), cur) = relative_unpack('>i', data, cur) + for i in range(num_topics): (topic, cur) = read_short_string(data, cur) ((num_partitions,), cur) = relative_unpack('>i', data, cur) + for i in range(num_partitions): ((partition, offset), cur) = relative_unpack('>iq', data, cur) (metadata, cur) = read_short_string(data, cur) ((error,), cur) = relative_unpack('>h', data, cur) - yield OffsetFetchResponse(topic, partition, offset, metadata, error) + + yield OffsetFetchResponse(topic, partition, offset, + metadata, error) + def create_message(payload, key=None): """ @@ -420,6 +513,7 @@ def create_message(payload, key=None): """ return Message(0, 0, key, payload) + def create_gzip_message(payloads, key=None): """ Construct a Gzipped Message containing multiple Messages @@ -433,9 +527,13 @@ def create_gzip_message(payloads, key=None): key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( - [create_message(payload) for payload in payloads]) - gzipped = gzip_encode(message_set) - return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP), key, gzipped) + [create_message(payload) for payload in payloads]) + + gzipped = gzip_encode(message_set) + codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP + + return Message(0, 0x00 | codec, key, gzipped) + def create_snappy_message(payloads, key=None): """ @@ -450,6 +548,9 @@ def create_snappy_message(payloads, key=None): key: bytes, a key used for partition routing (optional) """ message_set = KafkaProtocol._encode_message_set( - [create_message(payload) for payload in payloads]) - snapped = snappy_encode(message_set) - return Message(0, 0x00 | (KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY), key, snapped) + [create_message(payload) for payload in payloads]) + + snapped = snappy_encode(message_set) + codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY + + return Message(0, 0x00 | codec, key, snapped) |