diff options
-rw-r--r-- | kafka/producer.py | 42 | ||||
-rw-r--r-- | kafka/protocol.py | 23 | ||||
-rw-r--r-- | test/test_protocol.py | 9 |
3 files changed, 49 insertions, 25 deletions
diff --git a/kafka/producer.py b/kafka/producer.py index 8f35963..9ecb341 100644 --- a/kafka/producer.py +++ b/kafka/producer.py @@ -11,7 +11,10 @@ from multiprocessing import Queue, Process from kafka.common import ProduceRequest, TopicAndPartition from kafka.partitioner import HashedPartitioner -from kafka.protocol import create_message +from kafka.protocol import ( + CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, ALL_CODECS, + create_message, create_gzip_message, create_snappy_message, +) log = logging.getLogger("kafka") @@ -21,7 +24,7 @@ BATCH_SEND_MSG_COUNT = 20 STOP_ASYNC_PRODUCER = -1 -def _send_upstream(queue, client, batch_time, batch_size, +def _send_upstream(queue, client, codec, batch_time, batch_size, req_acks, ack_timeout): """ Listen on the queue for a specified number of messages or till @@ -62,7 +65,14 @@ def _send_upstream(queue, client, batch_time, batch_size, # Send collected requests upstream reqs = [] - for topic_partition, messages in msgset.items(): + for topic_partition, msg in msgset.items(): + if codec == CODEC_GZIP: + messages = [create_gzip_message(msg)] + elif codec == CODEC_SNAPPY: + messages = [create_snappy_message(msg)] + else: + messages = [create_message(m) for m in msg] + req = ProduceRequest(topic_partition.topic, topic_partition.partition, messages) @@ -102,6 +112,7 @@ class Producer(object): def __init__(self, client, async=False, req_acks=ACK_AFTER_LOCAL_WRITE, ack_timeout=DEFAULT_ACK_TIMEOUT, + codec=None, batch_send=False, batch_send_every_n=BATCH_SEND_MSG_COUNT, batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL): @@ -119,11 +130,17 @@ class Producer(object): self.req_acks = req_acks self.ack_timeout = ack_timeout + if codec is None: + codec = CODEC_NONE + assert codec in ALL_CODECS + self.codec = codec + if self.async: self.queue = Queue() # Messages are sent through this queue self.proc = Process(target=_send_upstream, args=(self.queue, self.client.copy(), + self.codec, batch_send_every_t, batch_send_every_n, self.req_acks, @@ -139,11 +156,16 @@ class Producer(object): """ if self.async: for m in msg: - self.queue.put((TopicAndPartition(topic, partition), - create_message(m))) + self.queue.put((TopicAndPartition(topic, partition), m)) resp = [] else: - messages = [create_message(m) for m in msg] + if self.codec == CODEC_GZIP: + messages = [create_gzip_message(msg)] + elif self.codec == CODEC_SNAPPY: + messages = [create_snappy_message(msg)] + else: + messages = [create_message(m) for m in msg] + req = ProduceRequest(topic, partition, messages) try: resp = self.client.send_produce_request([req], acks=self.req_acks, @@ -168,7 +190,7 @@ class Producer(object): class SimpleProducer(Producer): """ - A simple, round-robbin producer. Each message goes to exactly one partition + A simple, round-robin producer. Each message goes to exactly one partition Params: client - The Kafka client instance to use @@ -189,6 +211,7 @@ class SimpleProducer(Producer): def __init__(self, client, async=False, req_acks=Producer.ACK_AFTER_LOCAL_WRITE, ack_timeout=Producer.DEFAULT_ACK_TIMEOUT, + codec=None, batch_send=False, batch_send_every_n=BATCH_SEND_MSG_COUNT, batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL, @@ -196,7 +219,7 @@ class SimpleProducer(Producer): self.partition_cycles = {} self.random_start = random_start super(SimpleProducer, self).__init__(client, async, req_acks, - ack_timeout, batch_send, + ack_timeout, codec, batch_send, batch_send_every_n, batch_send_every_t) @@ -241,6 +264,7 @@ class KeyedProducer(Producer): def __init__(self, client, partitioner=None, async=False, req_acks=Producer.ACK_AFTER_LOCAL_WRITE, ack_timeout=Producer.DEFAULT_ACK_TIMEOUT, + codec=None, batch_send=False, batch_send_every_n=BATCH_SEND_MSG_COUNT, batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL): @@ -250,7 +274,7 @@ class KeyedProducer(Producer): self.partitioners = {} super(KeyedProducer, self).__init__(client, async, req_acks, - ack_timeout, batch_send, + ack_timeout, codec, batch_send, batch_send_every_n, batch_send_every_t) diff --git a/kafka/protocol.py b/kafka/protocol.py index 7ec7946..730ae6b 100644 --- a/kafka/protocol.py +++ b/kafka/protocol.py @@ -18,6 +18,12 @@ from kafka.util import ( log = logging.getLogger("kafka") +ATTRIBUTE_CODEC_MASK = 0x03 +CODEC_NONE = 0x00 +CODEC_GZIP = 0x01 +CODEC_SNAPPY = 0x02 +ALL_CODECS = (CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY) + class KafkaProtocol(object): """ @@ -32,11 +38,6 @@ class KafkaProtocol(object): OFFSET_COMMIT_KEY = 8 OFFSET_FETCH_KEY = 9 - ATTRIBUTE_CODEC_MASK = 0x03 - CODEC_NONE = 0x00 - CODEC_GZIP = 0x01 - CODEC_SNAPPY = 0x02 - ################### # Private API # ################### @@ -150,17 +151,17 @@ class KafkaProtocol(object): (key, cur) = read_int_string(data, cur) (value, cur) = read_int_string(data, cur) - codec = att & KafkaProtocol.ATTRIBUTE_CODEC_MASK + codec = att & ATTRIBUTE_CODEC_MASK - if codec == KafkaProtocol.CODEC_NONE: + if codec == CODEC_NONE: yield (offset, Message(magic, att, key, value)) - elif codec == KafkaProtocol.CODEC_GZIP: + elif codec == CODEC_GZIP: gz = gzip_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz): yield (offset, msg) - elif codec == KafkaProtocol.CODEC_SNAPPY: + elif codec == CODEC_SNAPPY: snp = snappy_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp): yield (offset, msg) @@ -543,7 +544,7 @@ def create_gzip_message(payloads, key=None): [create_message(payload) for payload in payloads]) gzipped = gzip_encode(message_set) - codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_GZIP + codec = ATTRIBUTE_CODEC_MASK & CODEC_GZIP return Message(0, 0x00 | codec, key, gzipped) @@ -564,6 +565,6 @@ def create_snappy_message(payloads, key=None): [create_message(payload) for payload in payloads]) snapped = snappy_encode(message_set) - codec = KafkaProtocol.ATTRIBUTE_CODEC_MASK & KafkaProtocol.CODEC_SNAPPY + codec = ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY return Message(0, 0x00 | codec, key, snapped) diff --git a/test/test_protocol.py b/test/test_protocol.py index 8bd2f5e..854a439 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -16,7 +16,8 @@ from kafka.codec import ( snappy_encode, snappy_decode ) from kafka.protocol import ( - create_gzip_message, create_message, create_snappy_message, KafkaProtocol + create_gzip_message, create_message, create_snappy_message, KafkaProtocol, + ATTRIBUTE_CODEC_MASK, CODEC_GZIP, CODEC_SNAPPY ) class TestProtocol(unittest2.TestCase): @@ -33,8 +34,7 @@ class TestProtocol(unittest2.TestCase): payloads = ["v1", "v2"] msg = create_gzip_message(payloads) self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, KafkaProtocol.ATTRIBUTE_CODEC_MASK & - KafkaProtocol.CODEC_GZIP) + self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_GZIP) self.assertEqual(msg.key, None) # Need to decode to check since gzipped payload is non-deterministic decoded = gzip_decode(msg.value) @@ -63,8 +63,7 @@ class TestProtocol(unittest2.TestCase): payloads = ["v1", "v2"] msg = create_snappy_message(payloads) self.assertEqual(msg.magic, 0) - self.assertEqual(msg.attributes, KafkaProtocol.ATTRIBUTE_CODEC_MASK & - KafkaProtocol.CODEC_SNAPPY) + self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY) self.assertEqual(msg.key, None) decoded = snappy_decode(msg.value) expect = "".join([ |