summaryrefslogtreecommitdiff
path: root/kafka/producer.py
diff options
context:
space:
mode:
authorPatrick Lucas <plucas@yelp.com>2014-05-03 11:27:57 -0700
committerPatrick Lucas <plucas@yelp.com>2014-05-03 16:09:33 -0700
commit671b74ab2e035a2f2ba7f90419794f6dbec08366 (patch)
tree001041ce79d2f87b6d2d8b20dcaa28aed3588967 /kafka/producer.py
parent2415609ce0899ef53e761e7ccb13177782d46c4e (diff)
downloadkafka-python-671b74ab2e035a2f2ba7f90419794f6dbec08366.tar.gz
Add 'codec' parameter to Producer
Adds a codec parameter to Producer.__init__ that lets the user choose a compression codec to use for all messages sent by it.
Diffstat (limited to 'kafka/producer.py')
-rw-r--r--kafka/producer.py42
1 files changed, 33 insertions, 9 deletions
diff --git a/kafka/producer.py b/kafka/producer.py
index 12a2934..563d160 100644
--- a/kafka/producer.py
+++ b/kafka/producer.py
@@ -10,7 +10,10 @@ from multiprocessing import Queue, Process
from kafka.common import ProduceRequest, TopicAndPartition
from kafka.partitioner import HashedPartitioner
-from kafka.protocol import create_message
+from kafka.protocol import (
+ CODEC_NONE, CODEC_GZIP, CODEC_SNAPPY, ALL_CODECS,
+ create_message, create_gzip_message, create_snappy_message,
+)
log = logging.getLogger("kafka")
@@ -20,7 +23,7 @@ BATCH_SEND_MSG_COUNT = 20
STOP_ASYNC_PRODUCER = -1
-def _send_upstream(queue, client, batch_time, batch_size,
+def _send_upstream(queue, client, codec, batch_time, batch_size,
req_acks, ack_timeout):
"""
Listen on the queue for a specified number of messages or till
@@ -61,7 +64,14 @@ def _send_upstream(queue, client, batch_time, batch_size,
# Send collected requests upstream
reqs = []
- for topic_partition, messages in msgset.items():
+ for topic_partition, msg in msgset.items():
+ if codec == CODEC_GZIP:
+ messages = [create_gzip_message(msg)]
+ elif codec == CODEC_SNAPPY:
+ messages = [create_snappy_message(msg)]
+ else:
+ messages = [create_message(m) for m in msg]
+
req = ProduceRequest(topic_partition.topic,
topic_partition.partition,
messages)
@@ -101,6 +111,7 @@ class Producer(object):
def __init__(self, client, async=False,
req_acks=ACK_AFTER_LOCAL_WRITE,
ack_timeout=DEFAULT_ACK_TIMEOUT,
+ codec=None,
batch_send=False,
batch_send_every_n=BATCH_SEND_MSG_COUNT,
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
@@ -118,11 +129,17 @@ class Producer(object):
self.req_acks = req_acks
self.ack_timeout = ack_timeout
+ if codec is None:
+ codec = CODEC_NONE
+ assert codec in ALL_CODECS
+ self.codec = codec
+
if self.async:
self.queue = Queue() # Messages are sent through this queue
self.proc = Process(target=_send_upstream,
args=(self.queue,
self.client.copy(),
+ self.codec,
batch_send_every_t,
batch_send_every_n,
self.req_acks,
@@ -138,11 +155,16 @@ class Producer(object):
"""
if self.async:
for m in msg:
- self.queue.put((TopicAndPartition(topic, partition),
- create_message(m)))
+ self.queue.put((TopicAndPartition(topic, partition), m))
resp = []
else:
- messages = [create_message(m) for m in msg]
+ if self.codec == CODEC_GZIP:
+ messages = [create_gzip_message(msg)]
+ elif self.codec == CODEC_SNAPPY:
+ messages = [create_snappy_message(msg)]
+ else:
+ messages = [create_message(m) for m in msg]
+
req = ProduceRequest(topic, partition, messages)
try:
resp = self.client.send_produce_request([req], acks=self.req_acks,
@@ -167,7 +189,7 @@ class Producer(object):
class SimpleProducer(Producer):
"""
- A simple, round-robbin producer. Each message goes to exactly one partition
+ A simple, round-robin producer. Each message goes to exactly one partition
Params:
client - The Kafka client instance to use
@@ -184,12 +206,13 @@ class SimpleProducer(Producer):
def __init__(self, client, async=False,
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
+ codec=None,
batch_send=False,
batch_send_every_n=BATCH_SEND_MSG_COUNT,
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
self.partition_cycles = {}
super(SimpleProducer, self).__init__(client, async, req_acks,
- ack_timeout, batch_send,
+ ack_timeout, codec, batch_send,
batch_send_every_n,
batch_send_every_t)
@@ -227,6 +250,7 @@ class KeyedProducer(Producer):
def __init__(self, client, partitioner=None, async=False,
req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
ack_timeout=Producer.DEFAULT_ACK_TIMEOUT,
+ codec=None,
batch_send=False,
batch_send_every_n=BATCH_SEND_MSG_COUNT,
batch_send_every_t=BATCH_SEND_DEFAULT_INTERVAL):
@@ -236,7 +260,7 @@ class KeyedProducer(Producer):
self.partitioners = {}
super(KeyedProducer, self).__init__(client, async, req_acks,
- ack_timeout, batch_send,
+ ack_timeout, codec, batch_send,
batch_send_every_n,
batch_send_every_t)