summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDana Powers <dana.powers@gmail.com>2017-10-05 10:15:33 -0700
committerDana Powers <dana.powers@gmail.com>2018-02-05 13:49:57 -0800
commit619895d1216cfc3b5da4270d3869cd31b7b55a14 (patch)
tree79ae89e8a5da9bdf92912774accfc724770c5a10
parent618c5051493693c1305aa9f08e8a0583d5fcf0e3 (diff)
downloadkafka-python-619895d1216cfc3b5da4270d3869cd31b7b55a14.tar.gz
Fix Fetcher.PartitionRecords to handle fetch_offset in the middle of compressed messageset
-rw-r--r--kafka/consumer/fetcher.py9
-rw-r--r--test/test_fetcher.py23
2 files changed, 30 insertions, 2 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py
index f9fcb37..6597448 100644
--- a/kafka/consumer/fetcher.py
+++ b/kafka/consumer/fetcher.py
@@ -835,12 +835,17 @@ class Fetcher(six.Iterator):
return parsed_records
- class PartitionRecords(six.Iterator):
+ class PartitionRecords(object):
def __init__(self, fetch_offset, tp, messages):
self.fetch_offset = fetch_offset
self.topic_partition = tp
self.messages = messages
- self.message_idx = 0
+ # When fetching an offset that is in the middle of a
+ # compressed batch, we will get all messages in the batch.
+ # But we want to start 'take' at the fetch_offset
+ for i, msg in enumerate(messages):
+ if msg.offset == fetch_offset:
+ self.message_idx = i
# For truthiness evaluation we need to define __len__ or __nonzero__
def __len__(self):
diff --git a/test/test_fetcher.py b/test/test_fetcher.py
index 429071a..aa8e9c3 100644
--- a/test/test_fetcher.py
+++ b/test/test_fetcher.py
@@ -498,3 +498,26 @@ def test__parse_fetched_data__out_of_range(fetcher, topic, mocker):
partition_record = fetcher._parse_fetched_data(completed_fetch)
assert partition_record is None
assert fetcher._subscriptions.assignment[tp].awaiting_reset is True
+
+
+def test_partition_records_offset():
+ """Test that compressed messagesets are handle correctly
+ when fetch offset is in the middle of the message list
+ """
+ batch_start = 120
+ batch_end = 130
+ fetch_offset = 123
+ tp = TopicPartition('foo', 0)
+ messages = [ConsumerRecord(tp.topic, tp.partition, i,
+ None, None, 'key', 'value', 'checksum', 0, 0)
+ for i in range(batch_start, batch_end)]
+ records = Fetcher.PartitionRecords(fetch_offset, None, messages)
+ assert records.has_more()
+ msgs = records.take(1)
+ assert msgs[0].offset == 123
+ assert records.fetch_offset == 124
+ msgs = records.take(2)
+ assert len(msgs) == 2
+ assert records.has_more()
+ records.discard()
+ assert not records.has_more()