diff options
author | Dana Powers <dana.powers@gmail.com> | 2018-02-05 16:25:18 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-05 16:25:18 -0800 |
commit | 441aeb864519d2f574650e24a327423308adca03 (patch) | |
tree | 8f834926048f56755bbd146b3e97a1370aa6afb4 /kafka/consumer/fetcher.py | |
parent | 618c5051493693c1305aa9f08e8a0583d5fcf0e3 (diff) | |
download | kafka-python-441aeb864519d2f574650e24a327423308adca03.tar.gz |
Avoid consuming duplicate compressed messages from mid-batch (#1367)
Diffstat (limited to 'kafka/consumer/fetcher.py')
-rw-r--r-- | kafka/consumer/fetcher.py | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index f9fcb37..c9bbb97 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -835,12 +835,21 @@ class Fetcher(six.Iterator): return parsed_records - class PartitionRecords(six.Iterator): + class PartitionRecords(object): def __init__(self, fetch_offset, tp, messages): self.fetch_offset = fetch_offset self.topic_partition = tp self.messages = messages - self.message_idx = 0 + # When fetching an offset that is in the middle of a + # compressed batch, we will get all messages in the batch. + # But we want to start 'take' at the fetch_offset + for i, msg in enumerate(messages): + if msg.offset == fetch_offset: + self.message_idx = i + break + else: + self.message_idx = 0 + self.messages = None # For truthiness evaluation we need to define __len__ or __nonzero__ def __len__(self): |