diff options
author | Mark Roberts <markroberts@kixeye.com> | 2014-02-26 21:36:22 -0800 |
---|---|---|
committer | Mark Roberts <markroberts@kixeye.com> | 2014-02-26 21:36:22 -0800 |
commit | 112158fa75879e2343c361c00a8aecb176dbb767 (patch) | |
tree | b09b03781394c553ad902ca4a941865b74a76125 /kafka | |
parent | 3b39d9d6589ee46ae4d31fb078f44019a2810983 (diff) | |
parent | ab89a44ecf9c93b116fcc8516cfc21749df74507 (diff) | |
download | kafka-python-112158fa75879e2343c361c00a8aecb176dbb767.tar.gz |
Merge branch 'master' into conn_refactor
Conflicts:
example.py
Diffstat (limited to 'kafka')
-rw-r--r-- | kafka/NOTES.md | 2 | ||||
-rw-r--r-- | kafka/client.py | 25 | ||||
-rw-r--r-- | kafka/conn.py | 28 |
3 files changed, 46 insertions, 9 deletions
diff --git a/kafka/NOTES.md b/kafka/NOTES.md index 540cdad..8fb0f47 100644 --- a/kafka/NOTES.md +++ b/kafka/NOTES.md @@ -18,7 +18,7 @@ There are a few levels of abstraction: # Possible API - client = KafkaClient("localhost", 9092) + client = KafkaClient("localhost:9092") producer = KafkaProducer(client, "topic") producer.send_string("hello") diff --git a/kafka/client.py b/kafka/client.py index 9578ee8..12452de 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -10,7 +10,7 @@ from kafka.common import (ErrorMapping, ErrorStrings, TopicAndPartition, BrokerResponseError, PartitionUnavailableError, KafkaUnavailableError, KafkaRequestError) -from kafka.conn import KafkaConnection, DEFAULT_SOCKET_TIMEOUT_SECONDS +from kafka.conn import collect_hosts, KafkaConnection, DEFAULT_SOCKET_TIMEOUT_SECONDS from kafka.protocol import KafkaProtocol log = logging.getLogger("kafka") @@ -24,14 +24,15 @@ class KafkaClient(object): # NOTE: The timeout given to the client should always be greater than the # one passed to SimpleConsumer.get_message(), otherwise you can get a # socket timeout. - def __init__(self, host, port, client_id=CLIENT_ID, + def __init__(self, hosts, client_id=CLIENT_ID, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS): # We need one connection to bootstrap self.client_id = client_id self.timeout = timeout - self.conns = { # (host, port) -> KafkaConnection - (host, port): KafkaConnection(host, port, timeout=timeout) - } + self.hosts = collect_hosts(hosts) + + # create connections only when we need them + self.conns = {} self.brokers = {} # broker_id -> BrokerMetadata self.topics_to_brokers = {} # topic_id -> broker_id self.topic_partitions = {} # topic_id -> [0, 1, 2, ...] @@ -41,6 +42,15 @@ class KafkaClient(object): # Private API # ################## + def _get_conn(self, host, port): + "Get or create a connection to a broker using host and port" + + host_key = (host, port) + if host_key not in self.conns: + self.conns[host_key] = KafkaConnection(host, port) + + return self.conns[host_key] + def _get_conn_for_broker(self, broker): """ Get or create a connection to a broker @@ -49,7 +59,7 @@ class KafkaClient(object): self.conns[(broker.host, broker.port)] = \ KafkaConnection(broker.host, broker.port, timeout=self.timeout) - return self.conns[(broker.host, broker.port)] + return self._get_conn(broker.host, broker.port) def _get_leader_for_partition(self, topic, partition): key = TopicAndPartition(topic, partition) @@ -72,7 +82,8 @@ class KafkaClient(object): Attempt to send a broker-agnostic request to one of the available brokers. Keep trying until you succeed. """ - for conn in self.conns.values(): + for (host, port) in self.hosts: + conn = self._get_conn(host, port) try: conn.send(requestId, request) response = conn.recv(requestId) diff --git a/kafka/conn.py b/kafka/conn.py index cc946fc..7dcd726 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -2,6 +2,7 @@ import copy import logging import socket import struct +from random import shuffle from threading import local from kafka.common import ConnectionError @@ -9,6 +10,31 @@ from kafka.common import ConnectionError log = logging.getLogger("kafka") DEFAULT_SOCKET_TIMEOUT_SECONDS = 120 +DEFAULT_KAFKA_PORT = 9092 + + +def collect_hosts(hosts, randomize=True): + """ + Collects a comma-separated set of hosts (host:port) and optionnaly + randomize the returned list. + """ + + if isinstance(hosts, str): + hosts = hosts.strip().split(',') + + result = [] + for host_port in hosts: + + res = host_port.split(':') + host = res[0] + port = int(res[1]) if len(res) > 1 else DEFAULT_KAFKA_PORT + result.append((host.strip(), port)) + + if randomize: + shuffle(result) + + return result + class KafkaConnection(local): """ @@ -84,7 +110,7 @@ class KafkaConnection(local): sent = self._sock.sendall(payload) if sent is not None: self._raise_connection_error() - except socket.error, e: + except socket.error: log.exception('Unable to send payload to Kafka') self._raise_connection_error() |