summaryrefslogtreecommitdiff
path: root/kafka/consumer/fetcher.py
diff options
context:
space:
mode:
authorDana Powers <dana.powers@gmail.com>2018-02-05 16:25:18 -0800
committerGitHub <noreply@github.com>2018-02-05 16:25:18 -0800
commit441aeb864519d2f574650e24a327423308adca03 (patch)
tree8f834926048f56755bbd146b3e97a1370aa6afb4 /kafka/consumer/fetcher.py
parent618c5051493693c1305aa9f08e8a0583d5fcf0e3 (diff)
downloadkafka-python-441aeb864519d2f574650e24a327423308adca03.tar.gz
Avoid consuming duplicate compressed messages from mid-batch (#1367)
Diffstat (limited to 'kafka/consumer/fetcher.py')
-rw-r--r--kafka/consumer/fetcher.py13
1 files changed, 11 insertions, 2 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py
index f9fcb37..c9bbb97 100644
--- a/kafka/consumer/fetcher.py
+++ b/kafka/consumer/fetcher.py
@@ -835,12 +835,21 @@ class Fetcher(six.Iterator):
return parsed_records
- class PartitionRecords(six.Iterator):
+ class PartitionRecords(object):
def __init__(self, fetch_offset, tp, messages):
self.fetch_offset = fetch_offset
self.topic_partition = tp
self.messages = messages
- self.message_idx = 0
+ # When fetching an offset that is in the middle of a
+ # compressed batch, we will get all messages in the batch.
+ # But we want to start 'take' at the fetch_offset
+ for i, msg in enumerate(messages):
+ if msg.offset == fetch_offset:
+ self.message_idx = i
+ break
+ else:
+ self.message_idx = 0
+ self.messages = None
# For truthiness evaluation we need to define __len__ or __nonzero__
def __len__(self):