diff options
Diffstat (limited to 'kafka/consumer/fetcher.py')
-rw-r--r-- | kafka/consumer/fetcher.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 62e28d6..34ff4cb 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -39,6 +39,7 @@ class Fetcher(six.Iterator): 'fetch_max_wait_ms': 500, 'max_partition_fetch_bytes': 1048576, 'check_crcs': True, + 'skip_double_compressed_messages': False, 'iterator_refetch_records': 1, # undocumented -- interface may change 'api_version': (0, 8, 0), } @@ -71,6 +72,13 @@ class Fetcher(six.Iterator): consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True + skip_double_compressed_messages (bool): A bug in KafkaProducer + caused some messages to be corrupted via double-compression. + By default, the fetcher will return the messages as a compressed + blob of bytes with a single offset, i.e. how the message was + actually published to the cluster. If you prefer to have the + fetcher automatically detect corrupt messages and skip them, + set this option to True. Default: False. """ self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: @@ -368,6 +376,10 @@ class Fetcher(six.Iterator): ' double-compressed. This should not' ' happen -- check your producers!', tp, offset) + if self.config['skip_double_compressed_messages']: + log.warning('Skipping double-compressed message at' + ' %s %d', tp, offset) + continue if msg.magic > 0: last_offset, _, _ = inner_mset[-1] |