| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
 | from __future__ import absolute_import
import collections
import copy
import logging
import threading
import time
import six
from . import errors as Errors
from .future import Future
from .structs import BrokerMetadata, PartitionMetadata, TopicPartition
log = logging.getLogger(__name__)
class ClusterMetadata(object):
    DEFAULT_CONFIG = {
        'retry_backoff_ms': 100,
        'metadata_max_age_ms': 300000,
    }
    def __init__(self, **configs):
        self._brokers = {}  # node_id -> BrokerMetadata
        self._partitions = {}  # topic -> partition -> PartitionMetadata
        self._broker_partitions = collections.defaultdict(set)  # node_id -> {TopicPartition...}
        self._groups = {}  # group_name -> node_id
        self._last_refresh_ms = 0
        self._last_successful_refresh_ms = 0
        self._need_update = False
        self._future = None
        self._listeners = set()
        self._lock = threading.Lock()
        self.need_all_topic_metadata = False
        self.unauthorized_topics = set()
        self.internal_topics = set()
        self.controller = None
        self.config = copy.copy(self.DEFAULT_CONFIG)
        for key in self.config:
            if key in configs:
                self.config[key] = configs[key]
    def brokers(self):
        """Get all BrokerMetadata
        Returns:
            set: {BrokerMetadata, ...}
        """
        return set(self._brokers.values())
    def broker_metadata(self, broker_id):
        """Get BrokerMetadata
        Arguments:
            broker_id (int): node_id for a broker to check
        Returns:
            BrokerMetadata or None if not found
        """
        return self._brokers.get(broker_id)
    def partitions_for_topic(self, topic):
        """Return set of all partitions for topic (whether available or not)
        Arguments:
            topic (str): topic to check for partitions
        Returns:
            set: {partition (int), ...}
        """
        if topic not in self._partitions:
            return None
        return set(self._partitions[topic].keys())
    def available_partitions_for_topic(self, topic):
        """Return set of partitions with known leaders
        Arguments:
            topic (str): topic to check for partitions
        Returns:
            set: {partition (int), ...}
        """
        if topic not in self._partitions:
            return None
        return set([partition for partition, metadata
                              in six.iteritems(self._partitions[topic])
                              if metadata.leader != -1])
    def leader_for_partition(self, partition):
        """Return node_id of leader, -1 unavailable, None if unknown."""
        if partition.topic not in self._partitions:
            return None
        elif partition.partition not in self._partitions[partition.topic]:
            return None
        return self._partitions[partition.topic][partition.partition].leader
    def partitions_for_broker(self, broker_id):
        """Return TopicPartitions for which the broker is a leader.
        Arguments:
            broker_id (int): node id for a broker
        Returns:
            set: {TopicPartition, ...}
        """
        return self._broker_partitions.get(broker_id)
    def coordinator_for_group(self, group):
        """Return node_id of group coordinator.
        Arguments:
            group (str): name of consumer group
        Returns:
            int: node_id for group coordinator
        """
        return self._groups.get(group)
    def ttl(self):
        """Milliseconds until metadata should be refreshed"""
        now = time.time() * 1000
        if self._need_update:
            ttl = 0
        else:
            metadata_age = now - self._last_successful_refresh_ms
            ttl = self.config['metadata_max_age_ms'] - metadata_age
        retry_age = now - self._last_refresh_ms
        next_retry = self.config['retry_backoff_ms'] - retry_age
        return max(ttl, next_retry, 0)
    def refresh_backoff(self):
        """Return milliseconds to wait before attempting to retry after failure"""
        return self.config['retry_backoff_ms']
    def request_update(self):
        """Flags metadata for update, return Future()
        Actual update must be handled separately. This method will only
        change the reported ttl()
        Returns:
            kafka.future.Future (value will be the cluster object after update)
        """
        with self._lock:
            self._need_update = True
            if not self._future or self._future.is_done:
              self._future = Future()
            return self._future
    def topics(self, exclude_internal_topics=True):
        """Get set of known topics.
        Arguments:
            exclude_internal_topics (bool): Whether records from internal topics
                (such as offsets) should be exposed to the consumer. If set to
                True the only way to receive records from an internal topic is
                subscribing to it. Default True
        Returns:
            set: {topic (str), ...}
        """
        topics = set(self._partitions.keys())
        if exclude_internal_topics:
            return topics - self.internal_topics
        else:
            return topics
    def failed_update(self, exception):
        """Update cluster state given a failed MetadataRequest."""
        f = None
        with self._lock:
            if self._future:
                f = self._future
                self._future = None
        if f:
            f.failure(exception)
        self._last_refresh_ms = time.time() * 1000
    def update_metadata(self, metadata):
        """Update cluster state given a MetadataResponse.
        Arguments:
            metadata (MetadataResponse): broker response to a metadata request
        Returns: None
        """
        # In the common case where we ask for a single topic and get back an
        # error, we should fail the future
        if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
            error_code, topic = metadata.topics[0][:2]
            error = Errors.for_code(error_code)(topic)
            return self.failed_update(error)
        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse")
        for broker in metadata.brokers:
            if metadata.API_VERSION == 0:
                node_id, host, port = broker
                rack = None
            else:
                node_id, host, port, rack = broker
            self._brokers.update({
                node_id: BrokerMetadata(node_id, host, port, rack)
            })
        if metadata.API_VERSION == 0:
            self.controller = None
        else:
            self.controller = self._brokers.get(metadata.controller_id)
        _new_partitions = {}
        _new_broker_partitions = collections.defaultdict(set)
        _new_unauthorized_topics = set()
        _new_internal_topics = set()
        for topic_data in metadata.topics:
            if metadata.API_VERSION == 0:
                error_code, topic, partitions = topic_data
                is_internal = False
            else:
                error_code, topic, is_internal, partitions = topic_data
            if is_internal:
                _new_internal_topics.add(topic)
            error_type = Errors.for_code(error_code)
            if error_type is Errors.NoError:
                _new_partitions[topic] = {}
                for p_error, partition, leader, replicas, isr in partitions:
                    _new_partitions[topic][partition] = PartitionMetadata(
                        topic=topic, partition=partition, leader=leader,
                        replicas=replicas, isr=isr, error=p_error)
                    if leader != -1:
                        _new_broker_partitions[leader].add(
                            TopicPartition(topic, partition))
            elif error_type is Errors.LeaderNotAvailableError:
                log.warning("Topic %s is not available during auto-create"
                            " initialization", topic)
            elif error_type is Errors.UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is Errors.TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
                _new_unauthorized_topics.add(topic)
            elif error_type is Errors.InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s",
                          topic, error_type)
        with self._lock:
            self._partitions = _new_partitions
            self._broker_partitions = _new_broker_partitions
            self.unauthorized_topics = _new_unauthorized_topics
            self.internal_topics = _new_internal_topics
            f = None
            if self._future:
                f = self._future
            self._future = None
            self._need_update = False
        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now
        if f:
            f.success(self)
        log.debug("Updated cluster metadata to %s", self)
        for listener in self._listeners:
            listener(self)
    def add_listener(self, listener):
        """Add a callback function to be called on each metadata update"""
        self._listeners.add(listener)
    def remove_listener(self, listener):
        """Remove a previously added listener callback"""
        self._listeners.remove(listener)
    def add_group_coordinator(self, group, response):
        """Update with metadata for a group coordinator
        Arguments:
            group (str): name of group from GroupCoordinatorRequest
            response (GroupCoordinatorResponse): broker response
        Returns:
            bool: True if metadata is updated, False on error
        """
        log.debug("Updating coordinator for %s: %s", group, response)
        error_type = Errors.for_code(response.error_code)
        if error_type is not Errors.NoError:
            log.error("GroupCoordinatorResponse error: %s", error_type)
            self._groups[group] = -1
            return False
        node_id = response.coordinator_id
        coordinator = BrokerMetadata(
            response.coordinator_id,
            response.host,
            response.port,
            None)
        # Assume that group coordinators are just brokers
        # (this is true now, but could diverge in future)
        if node_id not in self._brokers:
            self._brokers[node_id] = coordinator
        # If this happens, either brokers have moved without
        # changing IDs, or our assumption above is wrong
        else:
            node = self._brokers[node_id]
            if coordinator.host != node.host or coordinator.port != node.port:
                log.error("GroupCoordinator metadata conflicts with existing"
                          " broker metadata. Coordinator: %s, Broker: %s",
                          coordinator, node)
                self._groups[group] = node_id
                return False
        log.info("Group coordinator for %s is %s", group, coordinator)
        self._groups[group] = node_id
        return True
    def __str__(self):
        return 'Cluster(brokers: %d, topics: %d, groups: %d)' % \
               (len(self._brokers), len(self._partitions), len(self._groups))
 |