summaryrefslogtreecommitdiff
path: root/kafka/partitioner
diff options
context:
space:
mode:
authorDana Powers <dana.powers@rd.io>2014-09-09 16:42:23 -0700
committerDana Powers <dana.powers@rd.io>2014-09-10 22:14:29 -0700
commit27e812e66bd04d3acf59ad6792b07d1c7056c036 (patch)
treea9960a0ec8d7fd4e1452a09f0ab7fcd69f6d0494 /kafka/partitioner
parentf806c24d849928c95342d2b82a9d2d9507ba175a (diff)
downloadkafka-python-27e812e66bd04d3acf59ad6792b07d1c7056c036.tar.gz
Separate consumers/producers/partitioners
Diffstat (limited to 'kafka/partitioner')
-rw-r--r--kafka/partitioner/__init__.py6
-rw-r--r--kafka/partitioner/base.py23
-rw-r--r--kafka/partitioner/hashed.py12
-rw-r--r--kafka/partitioner/roundrobin.py23
4 files changed, 64 insertions, 0 deletions
diff --git a/kafka/partitioner/__init__.py b/kafka/partitioner/__init__.py
new file mode 100644
index 0000000..fdb19bb
--- /dev/null
+++ b/kafka/partitioner/__init__.py
@@ -0,0 +1,6 @@
+from .roundrobin import RoundRobinPartitioner
+from .hashed import HashedPartitioner
+
+__all__ = [
+ 'RoundRobinPartitioner', 'HashedPartitioner'
+]
diff --git a/kafka/partitioner/base.py b/kafka/partitioner/base.py
new file mode 100644
index 0000000..c62b7ed
--- /dev/null
+++ b/kafka/partitioner/base.py
@@ -0,0 +1,23 @@
+
+class Partitioner(object):
+ """
+ Base class for a partitioner
+ """
+ def __init__(self, partitions):
+ """
+ Initialize the partitioner
+
+ partitions - A list of available partitions (during startup)
+ """
+ self.partitions = partitions
+
+ def partition(self, key, partitions):
+ """
+ Takes a string key and num_partitions as argument and returns
+ a partition to be used for the message
+
+ partitions - The list of partitions is passed in every call. This
+ may look like an overhead, but it will be useful
+ (in future) when we handle cases like rebalancing
+ """
+ raise NotImplementedError('partition function has to be implemented')
diff --git a/kafka/partitioner/hashed.py b/kafka/partitioner/hashed.py
new file mode 100644
index 0000000..587a3de
--- /dev/null
+++ b/kafka/partitioner/hashed.py
@@ -0,0 +1,12 @@
+from .base import Partitioner
+
+class HashedPartitioner(Partitioner):
+ """
+ Implements a partitioner which selects the target partition based on
+ the hash of the key
+ """
+ def partition(self, key, partitions):
+ size = len(partitions)
+ idx = hash(key) % size
+
+ return partitions[idx]
diff --git a/kafka/partitioner/roundrobin.py b/kafka/partitioner/roundrobin.py
new file mode 100644
index 0000000..54d00da
--- /dev/null
+++ b/kafka/partitioner/roundrobin.py
@@ -0,0 +1,23 @@
+from itertools import cycle
+
+from .base import Partitioner
+
+class RoundRobinPartitioner(Partitioner):
+ """
+ Implements a round robin partitioner which sends data to partitions
+ in a round robin fashion
+ """
+ def __init__(self, partitions):
+ super(RoundRobinPartitioner, self).__init__(partitions)
+ self.iterpart = cycle(partitions)
+
+ def _set_partitions(self, partitions):
+ self.partitions = partitions
+ self.iterpart = cycle(partitions)
+
+ def partition(self, key, partitions):
+ # Refresh the partition list if necessary
+ if self.partitions != partitions:
+ self._set_partitions(partitions)
+
+ return next(self.iterpart)