diff options
-rw-r--r-- | kafka/consumer/fetcher.py | 12 | ||||
-rw-r--r-- | kafka/consumer/group.py | 8 |
2 files changed, 20 insertions, 0 deletions
diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 62e28d6..34ff4cb 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -39,6 +39,7 @@ class Fetcher(six.Iterator): 'fetch_max_wait_ms': 500, 'max_partition_fetch_bytes': 1048576, 'check_crcs': True, + 'skip_double_compressed_messages': False, 'iterator_refetch_records': 1, # undocumented -- interface may change 'api_version': (0, 8, 0), } @@ -71,6 +72,13 @@ class Fetcher(six.Iterator): consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True + skip_double_compressed_messages (bool): A bug in KafkaProducer + caused some messages to be corrupted via double-compression. + By default, the fetcher will return the messages as a compressed + blob of bytes with a single offset, i.e. how the message was + actually published to the cluster. If you prefer to have the + fetcher automatically detect corrupt messages and skip them, + set this option to True. Default: False. """ self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: @@ -368,6 +376,10 @@ class Fetcher(six.Iterator): ' double-compressed. This should not' ' happen -- check your producers!', tp, offset) + if self.config['skip_double_compressed_messages']: + log.warning('Skipping double-compressed message at' + ' %s %d', tp, offset) + continue if msg.magic > 0: last_offset, _, _ = inner_mset[-1] diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 8509999..7fe509a 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -123,6 +123,13 @@ class KafkaConsumer(six.Iterator): consumer_timeout_ms (int): number of milliseconds to block during message iteration before raising StopIteration (i.e., ending the iterator). Default -1 (block forever). + skip_double_compressed_messages (bool): A bug in KafkaProducer <= 1.2.4 + caused some messages to be corrupted via double-compression. + By default, the fetcher will return these messages as a compressed + blob of bytes with a single offset, i.e. how the message was + actually published to the cluster. If you prefer to have the + fetcher automatically detect corrupt messages and skip them, + set this option to True. Default: False. security_protocol (str): Protocol used to communicate with brokers. Valid values are: PLAINTEXT, SSL. Default: PLAINTEXT. ssl_context (ssl.SSLContext): pre-configured SSLContext for wrapping @@ -189,6 +196,7 @@ class KafkaConsumer(six.Iterator): 'send_buffer_bytes': None, 'receive_buffer_bytes': None, 'consumer_timeout_ms': -1, + 'skip_double_compressed_messages': False, 'security_protocol': 'PLAINTEXT', 'ssl_context': None, 'ssl_check_hostname': True, |