summaryrefslogtreecommitdiff
path: root/kafka
diff options
context:
space:
mode:
authorZack Dever <zdever@pandora.com>2016-04-07 17:46:55 -0700
committerZack Dever <zdever@pandora.com>2016-04-13 17:26:39 -0700
commite010669b602ffdfddde6fa2a381dad6c3be1f05d (patch)
tree453f771664ac89d18a1962891580c1a1d7b19b3f /kafka
parentcaf4cdefe4f41b444d44ddef8f40f5ddeccf65b9 (diff)
downloadkafka-python-e010669b602ffdfddde6fa2a381dad6c3be1f05d.tar.gz
Beginnings of metrics instrumentation in kafka consumer.
This adds the parent metrics instance to kafka consumer, which will eventually be used to instrument everything under consumer. To start I ported the java consumer coordinator metrics.
Diffstat (limited to 'kafka')
-rw-r--r--kafka/consumer/group.py29
-rw-r--r--kafka/coordinator/consumer.py71
2 files changed, 53 insertions, 47 deletions
diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py
index 0a78e7f..afcc996 100644
--- a/kafka/consumer/group.py
+++ b/kafka/consumer/group.py
@@ -12,6 +12,7 @@ from kafka.consumer.subscription_state import SubscriptionState
from kafka.coordinator.consumer import ConsumerCoordinator
from kafka.coordinator.assignors.range import RangePartitionAssignor
from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
+from kafka.metrics import DictReporter, MetricConfig, Metrics
from kafka.protocol.offset import OffsetResetStrategy
from kafka.structs import TopicPartition
from kafka.version import __version__
@@ -143,6 +144,13 @@ class KafkaConsumer(six.Iterator):
offset commits; 0.8.0 is what is left. If set to 'auto', will
attempt to infer the broker version by probing various APIs.
Default: auto
+ metric_reporters (list): A list of classes to use as metrics reporters.
+ Implementing the AbstractMetricsReporter interface allows plugging
+ in classes that will be notified of new metric creation. Default: []
+ metrics_num_samples (int): The number of samples maintained to compute
+ metrics. Default: 2
+ metrics_sample_window_ms (int): The number of samples maintained to
+ compute metrics. Default: 30000
Note:
Configuration parameters are described in more detail at
@@ -181,9 +189,9 @@ class KafkaConsumer(six.Iterator):
'ssl_keyfile': None,
'api_version': 'auto',
'connections_max_idle_ms': 9 * 60 * 1000, # not implemented yet
- #'metric_reporters': None,
- #'metrics_num_samples': 2,
- #'metrics_sample_window_ms': 30000,
+ 'metric_reporters': [],
+ 'metrics_num_samples': 2,
+ 'metrics_sample_window_ms': 30000,
}
def __init__(self, *topics, **configs):
@@ -202,6 +210,16 @@ class KafkaConsumer(six.Iterator):
new_config, self.config['auto_offset_reset'])
self.config['auto_offset_reset'] = new_config
+ metrics_tags = {'client-id': self.config['client_id']}
+ metric_config = MetricConfig(samples=self.config['metrics_num_samples'],
+ time_window_ms=self.config['metrics_sample_window_ms'],
+ tags=metrics_tags)
+ reporters = [reporter() for reporter in self.config['metric_reporters']]
+ reporters.append(DictReporter('kafka.consumer'))
+ self._metrics = Metrics(metric_config, reporters)
+ metric_group_prefix = 'consumer'
+ # TODO _metrics likely needs to be passed to KafkaClient, Fetcher, etc.
+
self._client = KafkaClient(**self.config)
# Check Broker Version if not set explicitly
@@ -217,14 +235,13 @@ class KafkaConsumer(six.Iterator):
self._fetcher = Fetcher(
self._client, self._subscription, **self.config)
self._coordinator = ConsumerCoordinator(
- self._client, self._subscription,
+ self._client, self._subscription, self._metrics, metric_group_prefix,
assignors=self.config['partition_assignment_strategy'],
**self.config)
self._closed = False
self._iterator = None
self._consumer_timeout = float('inf')
- #self.metrics = None
if topics:
self._subscription.subscribe(topics=topics)
self._client.set_topics(topics)
@@ -277,7 +294,7 @@ class KafkaConsumer(six.Iterator):
log.debug("Closing the KafkaConsumer.")
self._closed = True
self._coordinator.close()
- #self.metrics.close()
+ self._metrics.close()
self._client.close()
try:
self.config['key_deserializer'].close()
diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py
index cd3d48a..50d2806 100644
--- a/kafka/coordinator/consumer.py
+++ b/kafka/coordinator/consumer.py
@@ -14,6 +14,8 @@ from .assignors.roundrobin import RoundRobinPartitionAssignor
from .protocol import ConsumerProtocol
from .. import errors as Errors
from ..future import Future
+from ..metrics import AnonMeasurable
+from ..metrics.stats import Avg, Count, Max, Rate
from ..protocol.commit import OffsetCommitRequest, OffsetFetchRequest
from ..structs import OffsetAndMetadata, TopicPartition
from ..util import WeakMethod
@@ -36,7 +38,8 @@ class ConsumerCoordinator(BaseCoordinator):
'api_version': (0, 9),
}
- def __init__(self, client, subscription, **configs):
+ def __init__(self, client, subscription, metrics, metric_group_prefix,
+ **configs):
"""Initialize the coordination manager.
Keyword Arguments:
@@ -97,10 +100,8 @@ class ConsumerCoordinator(BaseCoordinator):
interval = self.config['auto_commit_interval_ms'] / 1000.0
self._auto_commit_task = AutoCommitTask(weakref.proxy(self), interval)
- # metrics=None,
- # metric_group_prefix=None,
- # metric_tags=None,
- # self.sensors = ConsumerCoordinatorMetrics(metrics, metric_group_prefix, metric_tags)
+ self._sensors = ConsumerCoordinatorMetrics(metrics, metric_group_prefix,
+ self._subscription)
def __del__(self):
if hasattr(self, '_auto_commit_task') and self._auto_commit_task:
@@ -470,12 +471,13 @@ class ConsumerCoordinator(BaseCoordinator):
future = Future()
_f = self._client.send(node_id, request)
- _f.add_callback(self._handle_offset_commit_response, offsets, future)
+ _f.add_callback(self._handle_offset_commit_response, offsets, future, time.time())
_f.add_errback(self._failed_request, node_id, request, future)
return future
- def _handle_offset_commit_response(self, offsets, future, response):
- #self.sensors.commit_latency.record(response.requestLatencyMs())
+ def _handle_offset_commit_response(self, offsets, future, send_time, response):
+ # TODO look at adding request_latency_ms to response (like java kafka)
+ self._sensors.commit_latency.record((time.time() - send_time) * 1000)
unauthorized_topics = set()
for topic, partitions in response.topics:
@@ -720,38 +722,25 @@ class AutoCommitTask(object):
self._reschedule(next_at)
-# TODO
-"""
class ConsumerCoordinatorMetrics(object):
- def __init__(self, metrics, prefix, tags):
+ def __init__(self, metrics, metric_group_prefix, subscription):
self.metrics = metrics
- self.group_name = prefix + "-coordinator-metrics"
-
- self.commit_latency = metrics.sensor("commit-latency")
- self.commit_latency.add(metrics.MetricName(
- "commit-latency-avg", self.group_name,
- "The average time taken for a commit request",
- tags), metrics.Avg())
- self.commit_latency.add(metrics.MetricName(
- "commit-latency-max", self.group_name,
- "The max time taken for a commit request",
- tags), metrics.Max())
- self.commit_latency.add(metrics.MetricName(
- "commit-rate", self.group_name,
- "The number of commit calls per second",
- tags), metrics.Rate(metrics.Count()))
-
- '''
- def _num_partitions(config, now):
- new Measurable() {
- public double measure(MetricConfig config, long now) {
- return subscriptions.assignedPartitions().size();
- }
- };
- metrics.addMetric(new MetricName("assigned-partitions",
- this.metricGrpName,
- "The number of partitions currently assigned to this consumer",
- tags),
- numParts);
- '''
-"""
+ self.metric_group_name = '%s-coordinator-metrics' % metric_group_prefix
+
+ self.commit_latency = metrics.sensor('commit-latency')
+ self.commit_latency.add(metrics.metric_name(
+ 'commit-latency-avg', self.metric_group_name,
+ 'The average time taken for a commit request'), Avg())
+ self.commit_latency.add(metrics.metric_name(
+ 'commit-latency-max', self.metric_group_name,
+ 'The max time taken for a commit request'), Max())
+ self.commit_latency.add(metrics.metric_name(
+ 'commit-rate', self.metric_group_name,
+ 'The number of commit calls per second'), Rate(sampled_stat=Count()))
+
+ num_parts = AnonMeasurable(lambda config, now:
+ len(subscription.assigned_partitions()))
+ metrics.add_metric(metrics.metric_name(
+ 'assigned-partitions', self.metric_group_name,
+ 'The number of partitions currently assigned to this consumer'),
+ num_parts)