diff options
Diffstat (limited to 'kafka/partitioner.py')
-rw-r--r-- | kafka/partitioner.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/kafka/partitioner.py b/kafka/partitioner.py new file mode 100644 index 0000000..0f49b07 --- /dev/null +++ b/kafka/partitioner.py @@ -0,0 +1,52 @@ +from itertools import cycle + + +class Partitioner(object): + """ + Base class for a partitioner + """ + def __init__(self, partitions): + """ + Initialize the partitioner + + partitions - A list of available partitions (during startup) + """ + self.partitions = partitions + + def partition(self, key, partitions): + """ + Takes a string key and num_partitions as argument and returns + a partition to be used for the message + + partitions - The list of partitions is passed in every call. This + may look like an overhead, but it will be useful + (in future) when we handle cases like rebalancing + """ + raise NotImplemented('partition function has to be implemented') + + +class RoundRobinPartitioner(Partitioner): + """ + Implements a round robin partitioner which sends data to partitions + in a round robin fashion + """ + def __init__(self, partitions): + self.partitions = cycle(partitions) + + def partition(self, key, partitions): + # Refresh the partition list if necessary + if self.partitions != partitions: + self.partitions = cycle(partitions) + + return self.partitions.next() + + +class HashedPartitioner(Partitioner): + """ + Implements a partitioner which selects the target partition based on + the hash of the key + """ + def partition(self, key, partitions): + size = len(partitions) + idx = hash(key) % size + return partitions[idx] |