diff options
author | Dana Powers <dana.powers@gmail.com> | 2016-07-31 11:55:58 -0700 |
---|---|---|
committer | Dana Powers <dana.powers@gmail.com> | 2016-09-24 14:01:05 -0700 |
commit | 26fe1f2e296aa78e0fe79c01f0b974dfc8741246 (patch) | |
tree | f0ae7d24e3b341b550797000b2a80d84b52cefb8 | |
parent | 2a7aca1630b81669595d753083239ec9fbf66ff5 (diff) | |
download | kafka-python-26fe1f2e296aa78e0fe79c01f0b974dfc8741246.tar.gz |
First scratch commit kafka.streams
27 files changed, 4356 insertions, 0 deletions
diff --git a/kafka/streams/__init__.py b/kafka/streams/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/kafka/streams/__init__.py diff --git a/kafka/streams/errors.py b/kafka/streams/errors.py new file mode 100644 index 0000000..e35007e --- /dev/null +++ b/kafka/streams/errors.py @@ -0,0 +1,23 @@ +from __future__ import absolute_import + +from kafka.errors import KafkaError, IllegalStateError + + +class StreamsError(KafkaError): + pass + + +class ProcessorStateError(StreamsError): + pass + + +class TopologyBuilderError(StreamsError): + pass + + +class NoSuchElementError(StreamsError): + pass + + +class TaskAssignmentError(StreamsError): + pass diff --git a/kafka/streams/kafka.py b/kafka/streams/kafka.py new file mode 100644 index 0000000..bc7dcec --- /dev/null +++ b/kafka/streams/kafka.py @@ -0,0 +1,185 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import copy +import logging +import uuid + +import kafka.streams.errors as Errors + +from .processor.stream_thread import StreamThread +from .utils import AtomicInteger + +log = logging.getLogger(__name__) + + +# container states +CREATED = 0 +RUNNING = 1 +STOPPED = 2 + + +class KafkaStreams(object): + """ + Kafka Streams allows for performing continuous computation on input coming + from one or more input topics and sends output to zero or more output + topics. + + The computational logic can be specified either by using the TopologyBuilder + class to define the a DAG topology of Processors or by using the + KStreamBuilder class which provides the high-level KStream DSL to define + the transformation. + + The KafkaStreams class manages the lifecycle of a Kafka Streams instance. + One stream instance can contain one or more threads specified in the configs + for the processing work. + + A KafkaStreams instance can co-ordinate with any other instances with the + same application ID (whether in this same process, on other processes on + this machine, or on remote machines) as a single (possibly distributed) + stream processing client. These instances will divide up the work based on + the assignment of the input topic partitions so that all partitions are + being consumed. If instances are added or failed, all instances will + rebalance the partition assignment among themselves to balance processing + load. + + Internally the KafkaStreams instance contains a normal KafkaProducer and + KafkaConsumer instance that is used for reading input and writing output. + + A simple example might look like this: + + builder = (KStreamBuilder().stream('my-input-topic') + .map_values(lambda value: str(len(value)) + .to('my-output-topic')) + + streams = KafkaStreams(builder, + application_id='my-stream-processing-application', + bootstrap_servers=['localhost:9092'], + key_serializer=json.dumps, + key_deserializer=json.loads, + value_serializer=json.dumps, + value_deserializer=json.loads) + streams.start() + """ + STREAM_CLIENT_ID_SEQUENCE = AtomicInteger(0) + METRICS_PREFIX = 'kafka.streams' + + DEFAULT_CONFIG = { + 'application_id': None, + 'bootstrap_servers': None, + 'num_stream_threads': 1, + } + + def __init__(self, builder, **configs): + """Construct the stream instance. + + Arguments: + builder (...): The processor topology builder specifying the computational logic + """ + self.config = copy.copy(self.DEFAULT_CONFIG) + for key in self.config: + if key in configs: + self.config[key] = configs.pop(key) + + # Only check for extra config keys in top-level class + log.warning('Unrecognized configs: %s', configs.keys()) + + self._state = CREATED + + # processId is expected to be unique across JVMs and to be used + # in userData of the subscription request to allow assignor be aware + # of the co-location of stream thread's consumers. It is for internal + # usage only and should not be exposed to users at all. + self.config['process_id'] = uuid.uuid4().hex + + # The application ID is a required config and hence should always have value + if 'application_id' not in self.config: + raise Errors.StreamsError('application_id is a required parameter') + + builder.set_application_id(self.config['application_id']) + + if 'client_id' not in self.config: + next_id = self.STREAM_CLIENT_ID_SEQUENCE.increment() + self.config['client_id'] = self.config['application_id'] + "-" + str(next_id) + + # reporters = self.config['metric_reporters'] + + #MetricConfig metricConfig = new MetricConfig().samples(config.getInt(StreamsConfig.METRICS_NUM_SAMPLES_CONFIG)) + # .timeWindow(config.getLong(StreamsConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), + # TimeUnit.MILLISECONDS); + + #self._metrics = new Metrics(metricConfig, reporters, time); + + self._threads = [StreamThread(builder, **self.config) + for _ in range(self.config['num_stream_threads'])] + + #synchronized + def start(self): + """Start the stream instance by starting all its threads. + + Raises: + IllegalStateException if process was already started + """ + log.debug('Starting Kafka Stream process') + + if self._state == CREATED: + for thread in self._threads: + thread.start() + + self._state = RUNNING + + log.info('Started Kafka Stream process') + elif self._state == RUNNING: + raise Errors.IllegalStateError('This process was already started.') + else: + raise Errors.IllegalStateError('Cannot restart after closing.') + + #synchronized + def close(self): + """Shutdown this stream instance. + + Signals all the threads to stop, and then waits for them to join. + + Raises: + IllegalStateException if process has not started yet + """ + log.debug('Stopping Kafka Stream process') + + if self._state == RUNNING: + # signal the threads to stop and wait + for thread in self._threads: + thread.close() + + for thread in self._threads: + thread.join() + + if self._state != STOPPED: + #metrics.close() + self._state = STOPPED + log.info('Stopped Kafka Stream process') + + def setUncaughtExceptionHandler(self, handler): + """Sets the handler invoked when a stream thread abruptly terminates + due to an uncaught exception. + + Arguments: + handler: the object to use as this thread's uncaught exception handler. + If None then this thread has no explicit handler. + """ + for thread in self._threads: + thread.set_uncaught_exception_handler(handler) diff --git a/kafka/streams/processor/__init__.py b/kafka/streams/processor/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/kafka/streams/processor/__init__.py diff --git a/kafka/streams/processor/_partition_grouper.py b/kafka/streams/processor/_partition_grouper.py new file mode 100644 index 0000000..453438c --- /dev/null +++ b/kafka/streams/processor/_partition_grouper.py @@ -0,0 +1,62 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" + +""" + * Default implementation of the {@link PartitionGrouper} interface that groups partitions by the partition id. + * + * Join operations requires that topics of the joining entities are copartitoned, i.e., being partitioned by the same key and having the same + * number of partitions. Copartitioning is ensured by having the same number of partitions on + * joined topics, and by using the serialization and Producer's default partitioner. +""" +class DefaultPartitionGrouper(object): + + """ + * Generate tasks with the assigned topic partitions. + * + * @param topicGroups group of topics that need to be joined together + * @param metadata metadata of the consuming cluster + * @return The map from generated task ids to the assigned partitions + """ + def partition_groups(self, topic_groups, metadata): + groups = {} + + for topic_group_id, topic_group in topic_groups.items(): + + max_num_partitions = self.max_num_partitions(metadata, topic_group) + + for partition_id in range(max_num_partitions): + group = set() + + for topic in topic_group: + if partition_id < len(metadata.partitions_for_topic(topic)): + group.add(TopicPartition(topic, partition_id)) + groups[TaskId(topicGroupId, partitionId)] = group + + return groups + + def max_num_partitions(self, metadata, topics): + max_num_partitions = 0 + for topic in topics: + partitions = metadata.partitions_for_topic(topic) + + if not partitions: + raise Errors.StreamsError("Topic not found during partition assignment: " + topic) + + num_partitions = len(partitions) + if num_partitions > max_num_partitions: + max_num_partitions = num_partitions + return max_num_partitions diff --git a/kafka/streams/processor/assignment/__init__.py b/kafka/streams/processor/assignment/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/kafka/streams/processor/assignment/__init__.py diff --git a/kafka/streams/processor/assignment/assignment_info.py b/kafka/streams/processor/assignment/assignment_info.py new file mode 100644 index 0000000..791aa19 --- /dev/null +++ b/kafka/streams/processor/assignment/assignment_info.py @@ -0,0 +1,75 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import json +import logging + +from kafka import TopicPartition +from kafka.streams.errors import TaskAssignmentError +from kafka.streams.processor.partition_group import TaskId + +log = logging.getLogger(__name__) + + +class AssignmentInfo(object): + CURRENT_VERSION = 1 + + def __init__(self, active_tasks, standby_tasks, version=None): + self.version = self.CURRENT_VERSION if version is None else version + self.active_tasks = active_tasks + self.standby_tasks = standby_tasks + + def encode(self): + + try: + if self.version == self.CURRENT_VERSION: + data = { + 'version': self.version, + 'active_tasks': [list(task) for task in self.active_tasks], + 'standby_tasks': [[list(task), [list(tp) for tp in partitions]] + for task, partitions in self.standby_tasks.items()] + } + return json.dumps(data).encode('utf-8') + + else: + raise TaskAssignmentError('Unable to encode assignment data: version=' + str(self.version)) + + except Exception as ex: + raise TaskAssignmentError('Failed to encode AssignmentInfo', ex) + + @classmethod + def decode(cls, data): + try: + decoded = json.loads(data.decode('utf-8')) + + if decoded['version'] == cls.CURRENT_VERSION: + decoded['active_tasks'] = [TaskId(*task) for task in decoded['active_tasks']] + decoded['standby_tasks'] = dict([ + (TaskId(*task), set([TopicPartition(*partition) for partition in partitions])) + for task, partitions in decoded['standby_tasks']]) + + return AssignmentInfo(decoded['active_tasks'], decoded['standby_tasks']) + + else: + raise TaskAssignmentError('Unknown assignment data version: ' + str(cls.version)) + except Exception as ex: + raise TaskAssignmentError('Failed to decode AssignmentInfo', ex) + + def __str__(self): + return "[version=%d, active_tasks=%d, standby_tasks=%d]" % ( + self.version, len(self.active_tasks), len(self.standby_tasks)) diff --git a/kafka/streams/processor/assignment/client_state.py b/kafka/streams/processor/assignment/client_state.py new file mode 100644 index 0000000..cec8d68 --- /dev/null +++ b/kafka/streams/processor/assignment/client_state.py @@ -0,0 +1,66 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import copy + +class ClientState(object): + + COST_ACTIVE = 0.1 + COST_STANDBY = 0.2 + COST_LOAD = 0.5 + + def __init__(self, active_tasks=None, assigned_tasks=None, + prev_active_tasks=None, prev_assigned_tasks=None, + capacity=0.0): + self.active_tasks = active_tasks if active_tasks else set([]) + self.assigned_tasks = assigned_tasks if assigned_tasks else set([]) + self.prev_active_tasks = prev_active_tasks if prev_active_tasks else set([]) + self.prev_assigned_tasks = prev_assigned_tasks if prev_assigned_tasks else set([]) + self.capacity = capacity + self.cost = 0.0 + + def copy(self): + return ClientState(copy.deepcopy(self.active_tasks), + copy.deepcopy(self.assigned_tasks), + copy.deepcopy(self.prev_active_tasks), + copy.deepcopy(self.prev_assigned_tasks), + self.capacity) + + def assign(self, task_id, active): + if active: + self.active_tasks.add(task_id) + + self.assigned_tasks.add(task_id) + + cost = self.COST_LOAD + try: + self.prev_assigned_tasks.remove(task_id) + cost = self.COST_STANDBY + except KeyError: + pass + try: + self.prev_active_tasks.remove(task_id) + cost = self.COST_ACTIVE + except KeyError: + pass + + self.cost += cost + + def __str__(self): + return "[active_tasks: (%s) assigned_tasks: (%s) prev_active_tasks: (%s) prev_assigned_tasks: (%s) capacity: (%s) cost: (%s)]" % ( + self.active_tasks, self.assigned_tasks, self.prev_active_tasks, self.prev_assigned_tasks, self.capacity, self.cost) diff --git a/kafka/streams/processor/assignment/subscription_info.py b/kafka/streams/processor/assignment/subscription_info.py new file mode 100644 index 0000000..c7fce14 --- /dev/null +++ b/kafka/streams/processor/assignment/subscription_info.py @@ -0,0 +1,61 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import json +import logging + +from kafka.streams.errors import TaskAssignmentError + +log = logging.getLogger(__name__) + + +class SubscriptionInfo(object): + CURRENT_VERSION = 1 + + def __init__(self, process_id, prev_tasks, standby_tasks, version=None): + self.version = self.CURRENT_VERSION if version is None else version + self.process_id = process_id + self.prev_tasks = prev_tasks + self.standby_tasks = standby_tasks + + def encode(self): + if self.version == self.CURRENT_VERSION: + data = { + 'version': self.version, + 'process_id': self.process_id, + 'prev_tasks': list(self.prev_tasks), + 'standby_tasks': list(self.standby_tasks) + } + return json.dumps(data).encode('utf-8') + + else: + raise TaskAssignmentError('unable to encode subscription data: version=' + str(self.version)) + + @classmethod + def decode(cls, data): + try: + decoded = json.loads(data.decode('utf-8')) + if decoded['version'] != cls.CURRENT_VERSION: + raise TaskAssignmentError('unable to decode subscription data: version=' + str(cls.version)) + + decoded['prev_tasks'] = set(decoded['prev_tasks']) + decoded['standby_tasks'] = set(decoded['standby_tasks']) + return cls(decoded['process_id'], decoded['prev_tasks'], decoded['standby_tasks'], decoded['version']) + + except Exception as e: + raise TaskAssignmentError('unable to decode subscription data', e) diff --git a/kafka/streams/processor/assignment/task_assignor.py b/kafka/streams/processor/assignment/task_assignor.py new file mode 100644 index 0000000..8cb6c4e --- /dev/null +++ b/kafka/streams/processor/assignment/task_assignor.py @@ -0,0 +1,166 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import logging +import random + +from kafka.streams.errors import TaskAssignmentError +from kafka.streams.processor.assignment.client_state import ClientState + +log = logging.getLogger(__name__) + + +class TaskAssignor(object): + + @classmethod + def assign(cls, states, tasks, num_standby_replicas): + assignor = TaskAssignor(states, tasks) + log.info('Assigning tasks to clients: %s, prev_assignment_balanced: %s,' + ' prev_clients_unchangeed: %s, tasks: %s, replicas: %s', + states, assignor.prev_assignment_balanced, + assignor.prev_clients_unchanged, tasks, num_standby_replicas) + + assignor.assign_tasks() + if num_standby_replicas > 0: + assignor.assign_standby_tasks(num_standby_replicas) + + log.info('Assigned with: %s', assignor.states) + return assignor.states + + def __init__(self, states, tasks): + self.states = {} + self.task_pairs = set() + self.max_num_task_pairs = None + self.tasks = [] + self.prev_assignment_balanced = True + self.prev_clients_unchanged = True + + avg_num_tasks = len(tasks) // len(states) + existing_tasks = set() + for client, state in states.items(): + self.states[client] = state.copy() + old_tasks = state.prev_assigned_tasks + # make sure the previous assignment is balanced + self.prev_assignment_balanced &= len(old_tasks) < (2 * avg_num_tasks) + self.prev_assignment_balanced &= len(old_tasks) > (avg_num_tasks / 2) + + for task in old_tasks: + # Make sure there is no duplicates + self.prev_clients_unchanged &= task not in existing_tasks + existing_tasks.update(old_tasks) + + # Make sure the existing assignment didn't miss out any task + self.prev_clients_unchanged &= existing_tasks == tasks + + self.tasks = list(tasks) + + num_tasks = len(tasks) + self.max_num_task_pairs = num_tasks * (num_tasks - 1) / 2 + #self.taskPairs = set(range(self.max_num_task_pairs)) # XXX + + def assign_standby_tasks(self, num_standby_replicas): + num_replicas = min(num_standby_replicas, len(self.states) - 1) + for _ in range(num_replicas): + self.assign_tasks(active=False) + + def assign_tasks(self, active=True): + random.shuffle(self.tasks) + + for task in self.tasks: + state = self.find_client_for(task) + + if state: + state.assign(task, active) + else: + raise TaskAssignmentError('failed to find an assignable client') + + def find_client_for(self, task): + check_task_pairs = len(self.task_pairs) < self.max_num_task_pairs + + state = self.find_client_by_addition_cost(task, check_task_pairs) + + if state is None and check_task_pairs: + state = self.find_client_by_addition_cost(task, False) + + if state: + self.add_task_pairs(task, state) + + return state + + def find_client_by_addition_cost(self, task, check_task_pairs): + candidate = None + candidate_addition_cost = 0.0 + + for state in self.states.values(): + if (self.prev_assignment_balanced and self.prev_clients_unchanged + and task in state.prev_assigned_tasks): + return state; + if task not in state.assigned_tasks: + # if check_task_pairs is True, skip this client if this task doesn't introduce a new task combination + if (check_task_pairs and state.assigned_tasks + and not self.has_new_task_pair(task, state)): + continue + addition_cost = self.compute_addition_cost(task, state) + if (candidate is None or + (addition_cost < candidate_addition_cost or + (addition_cost == candidate_addition_cost + and state.cost < candidate.cost))): + candidate = state + candidate_addition_cost = addition_cost + return candidate + + def add_task_pairs(self, task, state): + for other in state.assigned_tasks: + self.task_pairs.add(self.pair(task, other)) + + def has_new_task_pair(self, task, state): + for other in state.assigned_tasks: + if self.pair(task, other) not in self.task_pairs: + return True + return False + + def compute_addition_cost(self, task, state): + cost = len(state.assignedTasks) // state.capacity + + if task in state.prev_assigned_tasks: + if task in state.prev_active_tasks: + cost += ClientState.COST_ACTIVE + else: + cost += ClientState.COST_STANDBY + else: + cost += ClientState.COST_LOAD + return cost + + def pair(self, task1, task2): + if task1 < task2: + return self.TaskPair(task1, task2) + else: + return self.TaskPair(task2, task1) + + class TaskPair(object): + def __init__(self, task1, task2): + self.task1 = task1 + self.task2 = task2 + + def __hash__(self): + return hash(self.task1) ^ hash(self.task2) + + def __eq__(self, other): + if isinstance(other, type(self)): + return self.task1 == other.task1 and self.task2 == other.task2 + return False diff --git a/kafka/streams/processor/context.py b/kafka/streams/processor/context.py new file mode 100644 index 0000000..14c9bf2 --- /dev/null +++ b/kafka/streams/processor/context.py @@ -0,0 +1,101 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" + +import kafka.errors as Errors + +NONEXIST_TOPIC = '__null_topic__' + + +class ProcessorContext(object): + + def __init__(self, task_id, task, record_collector, state_mgr, **config): + self.config = config + self._task = task + self.record_collector = record_collector + self.task_id = task_id + self.state_mgr = state_mgr + + #self.metrics = metrics + self.key_serializer = config['key_serializer'] + self.value_serializer = config['value_serializer'] + self.key_deserializer = config['key_deserializer'] + self.value_deserializer = config['value_deserializer'] + self._initialized = False + + def initialized(self): + self._initialized = True + + @property + def application_id(self): + return self._task.application_id + + def state_dir(self): + return self.state_mgr.base_dir() + + def register(self, state_store, logging_enabled, state_restore_callback): + if self._initialized: + raise Errors.IllegalStateError('Can only create state stores during initialization.') + + self.state_mgr.register(state_store, logging_enabled, state_restore_callback) + + def get_state_store(self, name): + """ + Raises TopologyBuilderError if an attempt is made to access this state store from an unknown node + """ + node = self._task.node() + if not node: + raise Errors.TopologyBuilderError('Accessing from an unknown node') + + # TODO: restore this once we fix the ValueGetter initialization issue + #if (!node.stateStores.contains(name)) + # throw new TopologyBuilderException("Processor " + node.name() + " has no access to StateStore " + name); + + return self.state_mgr.get_store(name) + + def topic(self): + if self._task.record() is None: + raise Errors.IllegalStateError('This should not happen as topic() should only be called while a record is processed') + + topic = self._task.record().topic() + if topic == NONEXIST_TOPIC: + return None + else: + return topic + + def partition(self): + if self._task.record() is None: + raise Errors.IllegalStateError('This should not happen as partition() should only be called while a record is processed') + return self._task.record().partition() + + def offset(self): + if self._task.record() is None: + raise Errors.IllegalStateError('This should not happen as offset() should only be called while a record is processed') + return self._task.record().offset() + + def timestamp(self): + if self._task.record() is None: + raise Errors.IllegalStateError('This should not happen as timestamp() should only be called while a record is processed') + return self._task.record().timestamp + + def forward(self, key, value, child_index=None, child_name=None): + self._task.forward(key, value, child_index=child_index, child_name=child_name) + + def commit(self): + self._task.need_commit() + + def schedule(self, interval): + self._task.schedule(interval) diff --git a/kafka/streams/processor/internal_topic_manager.py b/kafka/streams/processor/internal_topic_manager.py new file mode 100644 index 0000000..6d2aa4b --- /dev/null +++ b/kafka/streams/processor/internal_topic_manager.py @@ -0,0 +1,188 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import copy +import json +import logging + +""" +import org.I0Itec.zkclient.ZkClient +import org.I0Itec.zkclient.exception.ZkNoNodeException +import org.I0Itec.zkclient.exception.ZkNodeExistsException +import org.I0Itec.zkclient.serialize.ZkSerializer +import org.apache.zookeeper.ZooDefs; +""" + +from kafka.streams.errors import StreamsError + +log = logging.getLogger(__name__) + + +class InternalTopicManager(object): + + # TODO: the following ZK dependency should be removed after KIP-4 + ZK_TOPIC_PATH = '/brokers/topics' + ZK_BROKER_PATH = '/brokers/ids' + ZK_DELETE_TOPIC_PATH = '/admin/delete_topics' + ZK_ENTITY_CONFIG_PATH = '/config/topics' + # TODO: the following LogConfig dependency should be removed after KIP-4 + CLEANUP_POLICY_PROP = 'cleanup.policy' + COMPACT = 'compact' + ZK_ENCODING = 'utf-8' + + def __init__(self, zk_connect=None, replication_factor=0): + if zk_connect: + #self.zk_client = ZkClient(zk_connect, 30 * 1000, 30 * 1000, self.ZK_ENCODING) + self.zk_client = None + else: + self.zk_client = None + self.replication_factor = replication_factor + + def make_ready(self, topic, num_partitions, compact_topic): + topic_not_ready = True + + while topic_not_ready: + topic_metadata = self.get_topic_metadata(topic) + + if not topic_metadata: + try: + self.create_topic(topic, num_partitions, self.replication_factor, compact_topic) + except Exception: #ZkNodeExistsError: + # ignore and continue + pass + else: + if len(topic_metadata) > num_partitions: + # else if topic exists with more #.partitions than needed, delete in order to re-create it + try: + self.delete_topic(topic) + except Exception: #ZkNodeExistsError: + # ignore and continue + pass + elif len(topic_metadata) < num_partitions: + # else if topic exists with less #.partitions than needed, add partitions + try: + self.add_partitions(topic, num_partitions - len(topic_metadata), self.replication_factor, topic_metadata) + except Exception: #ZkNoNodeError: + # ignore and continue + pass + else: + topic_not_ready = False + + def get_brokers(self): + brokers = [] + for broker in self.zk_client.get_children(self.ZK_BROKER_PATH): + brokers.append(int(broker)) + brokers.sort() + log.debug("Read brokers %s from ZK in partition assignor.", brokers) + return brokers + + def get_topic_metadata(self, topic): + data = self.zk_client.read_data(self.ZK_TOPIC_PATH + "/" + topic, True) + + if data is None: + return None + + try: + partitions = json.loads(data).get('partitions') + log.debug("Read partitions %s for topic %s from ZK in partition assignor.", partitions, topic) + return partitions + except Exception as e: + raise StreamsError("Error while reading topic metadata from ZK for internal topic " + topic, e) + + def create_topic(self, topic, num_partitions, replication_factor, compact_topic): + log.debug("Creating topic %s with %s partitions from ZK in partition assignor.", topic, num_partitions) + brokers = self.get_brokers() + num_brokers = len(brokers) + if num_brokers < replication_factor: + log.warn("Not enough brokers found. The replication factor is reduced from " + replication_factor + " to " + num_brokers) + replication_factor = num_brokers + + assignment = {} + + for i in range(num_partitions): + broker_list = [] + for r in range(replication_factor): + shift = r * num_brokers / replication_factor + broker_list.append(brokers[(i + shift) % num_brokers]) + assignment[i] = broker_list + + # write out config first just like in AdminUtils.scala createOrUpdateTopicPartitionAssignmentPathInZK() + if compact_topic: + try: + data_map = { + 'version': 1, + 'config': { + 'cleanup.policy': 'compact' + } + } + data = json.dumps(data_map) + #zk_client.create_persistent(self.ZK_ENTITY_CONFIG_PATH + "/" + topic, data, ZooDefs.Ids.OPEN_ACL_UNSAFE) + except Exception as e: + raise StreamsError('Error while creating topic config in ZK for internal topic ' + topic, e) + + # try to write to ZK with open ACL + try: + data_map = { + 'version': 1, + 'partitions': assignment + } + data = json.dumps(data_map) + + #zk_client.create_persistent(self.ZK_TOPIC_PATH + "/" + topic, data, ZooDefs.Ids.OPEN_ACL_UNSAFE) + except Exception as e: + raise StreamsError('Error while creating topic metadata in ZK for internal topic ' + topic, e) + + def delete_topic(self, topic): + log.debug('Deleting topic %s from ZK in partition assignor.', topic) + + #zk_client.create_persistent(self.ZK_DELETE_TOPIC_PATH + "/" + topic, "", ZooDefs.Ids.OPEN_ACL_UNSAFE) + + def add_partitions(self, topic, num_partitions, replication_factor, existing_assignment): + log.debug('Adding %s partitions topic %s from ZK with existing' + ' partitions assigned as %s in partition assignor.', + topic, num_partitions, existing_assignment) + + brokers = self.get_brokers() + num_brokers = len(brokers) + if (num_brokers < replication_factor): + log.warning('Not enough brokers found. The replication factor is' + ' reduced from %s to %s', replication_factor, num_brokers) + replication_factor = num_brokers + + start_index = len(existing_assignment) + + new_assignment = copy.deepcopy(existing_assignment) + + for i in range(num_partitions): + broker_list = [] + for r in range(replication_factor): + shift = r * num_brokers / replication_factor + broker_list.append(brokers[(i + shift) % num_brokers]) + new_assignment[i + start_index] = broker_list + + # try to write to ZK with open ACL + try: + data_map = { + 'version': 1, + 'partitions': new_assignment + } + data = json.dumps(data_map) + + #zk_client.write_data(ZK_TOPIC_PATH + "/" + topic, data) + except Exception as e: + raise StreamsError('Error while updating topic metadata in ZK for internal topic ' + topic, e) diff --git a/kafka/streams/processor/node.py b/kafka/streams/processor/node.py new file mode 100644 index 0000000..90b0a6a --- /dev/null +++ b/kafka/streams/processor/node.py @@ -0,0 +1,132 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import kafka.errors as Errors +from .processor import Processor + + +class ProcessorNode(object): + + def __init__(self, name, processor=None, state_stores=None): + self.name = name + + # Could we construct a Processor here if the processor is just a function? + assert isinstance(processor, Processor), 'processor must subclass Processor' + + self.processor = processor + self.children = [] + self.state_stores = state_stores + + def add_child(self, child): + self.children.append(child) + + def init(self, context): + self.processor.init(context) + + def process(self, key, value): + self.processor.process(key, value) + + def close(self): + self.processor.close() + + +class SourceNode(ProcessorNode): + + def __init__(self, name, key_deserializer, val_deserializer): + super(SourceNode, self).__init__(name) + + self.key_deserializer = key_deserializer + self.val_deserializer = val_deserializer + self.context = None + + def deserialize_key(self, topic, data): + if self.key_deserializer is None: + return data + return self.key_deserializer.deserialize(topic, data) + + def deserialize_value(self, topic, data): + if self.value_deserializer is None: + return data + return self.val_deserializer.deserialize(topic, data) + + def init(self, context): + self.context = context + + # if deserializers are null, get the default ones from the context + if self.key_deserializer is None: + self.key_deserializer = self.context.key_deserializer + if self.val_deserializer is None: + self.val_deserializer = self.context.value_deserializer + + """ + // if value deserializers are for {@code Change} values, set the inner deserializer when necessary + if (this.valDeserializer instanceof ChangedDeserializer && + ((ChangedDeserializer) this.valDeserializer).inner() == null) + ((ChangedDeserializer) this.valDeserializer).setInner(context.valueSerde().deserializer()); + """ + + def process(self, key, value): + self.context.forward(key, value) + + def close(self): + # do nothing + pass + + +class SinkNode(ProcessorNode): + + def __init__(self, name, topic, key_serializer, val_serializer, partitioner): + super(SinkNode, self).__init__(name) + + self.topic = topic + self.key_serializer = key_serializer + self.val_serializer = val_serializer + self.partitioner = partitioner + self.context = None + + def add_child(self, child): + raise Errors.UnsupportedOperationError("sink node does not allow addChild") + + def init(self, context): + self.context = context + + # if serializers are null, get the default ones from the context + if self.key_serializer is None: + self.key_serializer = self.context.key_serializer + if self.val_serializer is None: + self.val_serializer = self.context.value_serializer + + """ + // if value serializers are for {@code Change} values, set the inner serializer when necessary + if (this.valSerializer instanceof ChangedSerializer && + ((ChangedSerializer) this.valSerializer).inner() == null) + ((ChangedSerializer) this.valSerializer).setInner(context.valueSerde().serializer()); + """ + + def process(self, key, value): + # send to all the registered topics + collector = self.context.record_collector + collector.send(self.topic, key=key, value=value, + timestamp_ms=self.context.timestamp(), + key_serializer=self.key_serializer, + val_serializer=self.val_serializer, + partitioner=self.partitioner) + + def close(self): + # do nothing + pass diff --git a/kafka/streams/processor/partition_group.py b/kafka/streams/processor/partition_group.py new file mode 100644 index 0000000..6d5ea3f --- /dev/null +++ b/kafka/streams/processor/partition_group.py @@ -0,0 +1,158 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import collections +import heapq + +import kafka.errors as Errors +from kafka.structs import TopicPartition +from .record_queue import RecordQueue + + +TaskId = collections.namedtuple('TaskId', 'topic_group_id partition_id') + + +class RecordInfo(object): + def __init__(self): + self.queue = RecordQueue() + + def node(self): + return self.queue.source() + + def partition(self): + return self.queue.partition() + + def queue(self): + return self.queue + + +class PartitionGroup(object): + """A PartitionGroup is composed from a set of partitions. It also maintains + the timestamp of this group, hence the associated task as the min timestamp + across all partitions in the group. + """ + def __init__(self, partition_queues, timestamp_extractor): + self._queues_by_time = [] # heapq + self._partition_queues = partition_queues + self._timestamp_extractor = timestamp_extractor + self._total_buffered = 0 + + def next_record(self, record_info): + """Get the next record and queue + + Returns: (timestamp, ConsumerRecord) + """ + record = None + + if self._queues_by_time: + _, queue = heapq.heappop(self._queues_by_time) + + # get the first record from this queue. + record = queue.poll() + + if queue: + heapq.heappush(self._queues_by_time, (queue.timestamp(), queue)) + + record_info.queue = queue + + if record: + self._total_buffered -= 1 + + return record + + def add_raw_records(self, partition, raw_records): + """Adds raw records to this partition group + + Arguments: + partition (TopicPartition): the partition + raw_records (list of ConsumerRecord): the raw records + + Returns: the queue size for the partition + """ + record_queue = self._partition_queues[partition] + + old_size = record_queue.size() + new_size = record_queue.add_raw_records(raw_records, self._timestamp_extractor) + + # add this record queue to be considered for processing in the future + # if it was empty before + if old_size == 0 and new_size > 0: + heapq.heappush(self._queues_by_time, (record_queue.timestamp(), record_queue)) + + self._total_buffered += new_size - old_size + + return new_size + + def partitions(self): + return set(self._partition_queues.keys()) + + def timestamp(self): + """Return the timestamp of this partition group + as the smallest partition timestamp among all its partitions + """ + # we should always return the smallest timestamp of all partitions + # to avoid group partition time goes backward + timestamp = float('inf') + for queue in self._partition_queues.values(): + if timestamp > queue.timestamp(): + timestamp = queue.timestamp() + return timestamp + + def num_buffered(self, partition=None): + if partition is None: + return self._total_buffered + record_queue = self._partition_queues.get(partition) + if not record_queue: + raise Errors.IllegalStateError('Record partition does not belong to this partition-group.') + return record_queue.size() + + def top_queue_size(self): + if not self._queues_by_time: + return 0 + return self._queues_by_time[0].size() + + def close(self): + self._queues_by_time = [] + self._partition_queues.clear() + + +def partition_grouper(topic_groups, metadata): + """Assign partitions to task/topic groups + + Arguments: + topic_groups ({topic_group_id: [topics]}) + metadata (kafka.Cluster) + + Returns: {TaskId: set([TopicPartition])} + """ + groups = {} + for topic_group_id, topic_group in topic_groups.items(): + + partitions = set() + for topic in topic_group: + partitions.update(metadata.partitions_for_topic(topic)) + + for partition_id in partitions: + group = set() + + for topic in topic_group: + if partition_id in metadata.partitions_for_topic(topic): + group.add(TopicPartition(topic, partition_id)) + groups[TaskId(topic_group_id, partition_id)] = group + + return groups diff --git a/kafka/streams/processor/processor.py b/kafka/streams/processor/processor.py new file mode 100644 index 0000000..115230e --- /dev/null +++ b/kafka/streams/processor/processor.py @@ -0,0 +1,80 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import abc + + +class Processor(object): + """A processor of key-value pair records.""" + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def init(self, context): + """Initialize this processor with the given context. + + The framework ensures this is called once per processor when the + topology that contains it is initialized. + + If this processor is to be called periodically by the framework, + (via punctuate) then this method should schedule itself with the + provided context. + + Arguments: + context (ProcessorContext): the context; may not be None + + Returns: None + """ + pass + + @abc.abstractmethod + def process(self, key, value): + """Process the record with the given key and value. + + Arguments: + key: the key for the record after deserialization + value: the value for the record after deserialization + + Returns: None + """ + pass + + @abc.abstractmethod + def punctuate(self, timestamp): + """Perform any periodic operations + + Requires that the processor scheduled itself with the context during + initialization + + Arguments: + timestamp (int): stream time in ms when this method is called + + Returns: None + """ + pass + + @abc.abstractmethod + def close(self): + """Close this processor and clean up any resources. + + Be aware that close() is called after an internal cleanup. + Thus, it is not possible to write anything to Kafka as underlying + clients are already closed. + + Returns: None + """ + pass diff --git a/kafka/streams/processor/processor_state_manager.py b/kafka/streams/processor/processor_state_manager.py new file mode 100644 index 0000000..0905c21 --- /dev/null +++ b/kafka/streams/processor/processor_state_manager.py @@ -0,0 +1,306 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import logging +import time + +import kafka.errors as Errors + +log = logging.getLogger(__name__) + +STATE_CHANGELOG_TOPIC_SUFFIX = "-changelog" +CHECKPOINT_FILE_NAME = ".checkpoint" + + +class ProcessorStateManager(object): + + def __init__(self, application_id, task_id, sources, restore_consumer, is_standby, state_directory): + + self.application_id = application_id + self.default_partition = task_id.partition + self.task_id = task_id + self.state_directory = state_directory + self.partition_for_topic = {} + for source in sources: + self.partition_for_topic[source.topic] = source + + self.stores = {} + self.logging_enabled = set() + self.restore_consumer = restore_consumer + self.restored_offsets = {} + self.is_standby = is_standby + if is_standby: + self.restore_callbacks = {} + else: + self.restore_callbacks = None + self.offset_limits = {} + self.base_dir = state_directory.directory_for_task(task_id) + + if not state_directory.lock(task_id, 5): + raise IOError("Failed to lock the state directory: " + self.base_dir) + + """ + # load the checkpoint information + checkpoint = OffsetCheckpoint(self.base_dir, CHECKPOINT_FILE_NAME) + self.checkpointed_offsets = checkpoint.read() + + # delete the checkpoint file after finish loading its stored offsets + checkpoint.delete() + """ + + + def store_changelog_topic(self, application_id, store_name): + return application_id + "-" + store_name + STATE_CHANGELOG_TOPIC_SUFFIX + + def base_dir(self): + return self.base_dir + + def register(self, store, logging_enabled, state_restore_callback): + """ + * @throws IllegalArgumentException if the store name has already been registered or if it is not a valid name + * (e.g., when it conflicts with the names of internal topics, like the checkpoint file name) + * @throws StreamsException if the store's change log does not contain the partition + """ + if (store.name == CHECKPOINT_FILE_NAME): + raise Errors.IllegalArgumentError("Illegal store name: " + CHECKPOINT_FILE_NAME) + + if store.name in self.stores: + raise Errors.IllegalArgumentError("Store " + store.name + " has already been registered.") + + if logging_enabled: + self.logging_enabled.add(store.name) + + # check that the underlying change log topic exist or not + if logging_enabled: + topic = self.store_changelog_topic(self.application_id, store.name) + else: + topic = store.name + + # block until the partition is ready for this state changelog topic or time has elapsed + partition = self.get_partition(topic) + partition_not_found = True + start_time = time.time() * 1000 + wait_time = 5000 # hard-code the value since we should not block after KIP-4 + + while True: + try: + time.sleep(50) + except KeyboardInterrupt: + # ignore + pass + + partitions = self.restore_consumer.partitions_for_topic(topic) + if partitions is None: + raise Errors.StreamsError("Could not find partition info for topic: " + topic) + + if partition in partitions: + partition_not_found = False + break + + if partition_not_found and (time.time() * 1000) < (start_time + wait_time): + continue + break + + if partition_not_found: + raise Errors.StreamsError("Store " + store.name + "'s change log (" + topic + ") does not contain partition " + partition) + + self.stores[store.name] = store + + if self.is_standby: + if store.persistent(): + self.restore_callbacks[topic] = state_restore_callback + else: + self.restore_active_state(topic, state_restore_callback) + + """ + def restore_active_state(self, topic_name, state_restore_callback): + # ---- try to restore the state from change-log ---- // + + # subscribe to the store's partition + if (!restoreConsumer.subscription().isEmpty()) { + throw new IllegalStateException("Restore consumer should have not subscribed to any partitions beforehand"); + } + TopicPartition storePartition = new TopicPartition(topicName, getPartition(topicName)); + restoreConsumer.assign(Collections.singletonList(storePartition)); + + try { + // calculate the end offset of the partition + // TODO: this is a bit hacky to first seek then position to get the end offset + restoreConsumer.seekToEnd(singleton(storePartition)); + long endOffset = restoreConsumer.position(storePartition); + + // restore from the checkpointed offset of the change log if it is persistent and the offset exists; + // restore the state from the beginning of the change log otherwise + if (checkpointedOffsets.containsKey(storePartition)) { + restoreConsumer.seek(storePartition, checkpointedOffsets.get(storePartition)); + } else { + restoreConsumer.seekToBeginning(singleton(storePartition)); + } + + // restore its state from changelog records + long limit = offsetLimit(storePartition); + while (true) { + long offset = 0L; + for (ConsumerRecord<byte[], byte[]> record : restoreConsumer.poll(100).records(storePartition)) { + offset = record.offset(); + if (offset >= limit) break; + stateRestoreCallback.restore(record.key(), record.value()); + } + + if (offset >= limit) { + break; + } else if (restoreConsumer.position(storePartition) == endOffset) { + break; + } else if (restoreConsumer.position(storePartition) > endOffset) { + // For a logging enabled changelog (no offset limit), + // the log end offset should not change while restoring since it is only written by this thread. + throw new IllegalStateException("Log end offset should not change while restoring"); + } + } + + // record the restored offset for its change log partition + long newOffset = Math.min(limit, restoreConsumer.position(storePartition)); + restoredOffsets.put(storePartition, newOffset); + } finally { + // un-assign the change log partition + restoreConsumer.assign(Collections.<TopicPartition>emptyList()); + } + } + + public Map<TopicPartition, Long> checkpointedOffsets() { + Map<TopicPartition, Long> partitionsAndOffsets = new HashMap<>(); + + for (Map.Entry<String, StateRestoreCallback> entry : restoreCallbacks.entrySet()) { + String topicName = entry.getKey(); + int partition = getPartition(topicName); + TopicPartition storePartition = new TopicPartition(topicName, partition); + + if (checkpointedOffsets.containsKey(storePartition)) { + partitionsAndOffsets.put(storePartition, checkpointedOffsets.get(storePartition)); + } else { + partitionsAndOffsets.put(storePartition, -1L); + } + } + return partitionsAndOffsets; + } + + public List<ConsumerRecord<byte[], byte[]>> updateStandbyStates(TopicPartition storePartition, List<ConsumerRecord<byte[], byte[]>> records) { + long limit = offsetLimit(storePartition); + List<ConsumerRecord<byte[], byte[]>> remainingRecords = null; + + // restore states from changelog records + + StateRestoreCallback restoreCallback = restoreCallbacks.get(storePartition.topic()); + + long lastOffset = -1L; + int count = 0; + for (ConsumerRecord<byte[], byte[]> record : records) { + if (record.offset() < limit) { + restoreCallback.restore(record.key(), record.value()); + lastOffset = record.offset(); + } else { + if (remainingRecords == null) + remainingRecords = new ArrayList<>(records.size() - count); + + remainingRecords.add(record); + } + count++; + } + // record the restored offset for its change log partition + restoredOffsets.put(storePartition, lastOffset + 1); + + return remainingRecords; + } + + public void putOffsetLimit(TopicPartition partition, long limit) { + offsetLimits.put(partition, limit); + } + + private long offsetLimit(TopicPartition partition) { + Long limit = offsetLimits.get(partition); + return limit != null ? limit : Long.MAX_VALUE; + } + + public StateStore getStore(String name) { + return stores.get(name); + } + + public void flush() { + if (!this.stores.isEmpty()) { + log.debug("Flushing stores."); + for (StateStore store : this.stores.values()) + store.flush(); + } + } + + /** + * @throws IOException if any error happens when flushing or closing the state stores + */ + public void close(Map<TopicPartition, Long> ackedOffsets) throws IOException { + try { + // attempting to flush and close the stores, just in case they + // are not closed by a ProcessorNode yet + if (!stores.isEmpty()) { + log.debug("Closing stores."); + for (Map.Entry<String, StateStore> entry : stores.entrySet()) { + log.debug("Closing storage engine {}", entry.getKey()); + entry.getValue().flush(); + entry.getValue().close(); + } + + Map<TopicPartition, Long> checkpointOffsets = new HashMap<>(); + for (String storeName : stores.keySet()) { + TopicPartition part; + if (loggingEnabled.contains(storeName)) + part = new TopicPartition(storeChangelogTopic(applicationId, storeName), getPartition(storeName)); + else + part = new TopicPartition(storeName, getPartition(storeName)); + + // only checkpoint the offset to the offsets file if it is persistent; + if (stores.get(storeName).persistent()) { + Long offset = ackedOffsets.get(part); + + if (offset != null) { + // store the last offset + 1 (the log position after restoration) + checkpointOffsets.put(part, offset + 1); + } else { + // if no record was produced. we need to check the restored offset. + offset = restoredOffsets.get(part); + if (offset != null) + checkpointOffsets.put(part, offset); + } + } + } + + // write the checkpoint file before closing, to indicate clean shutdown + OffsetCheckpoint checkpoint = new OffsetCheckpoint(new File(this.baseDir, CHECKPOINT_FILE_NAME)); + checkpoint.write(checkpointOffsets); + } + } finally { + // release the state directory directoryLock + stateDirectory.unlock(taskId); + } + } + + private int getPartition(String topic) { + TopicPartition partition = partitionForTopic.get(topic); + + return partition == null ? defaultPartition : partition.partition(); + } +} + """ diff --git a/kafka/streams/processor/punctuation.py b/kafka/streams/processor/punctuation.py new file mode 100644 index 0000000..cedd64c --- /dev/null +++ b/kafka/streams/processor/punctuation.py @@ -0,0 +1,32 @@ +from __future__ import absolute_import + +import heapq +import threading + + +class PunctuationQueue(object): + + def __init__(self): + self._pq = [] + self._lock = threading.Lock() + + def schedule(self, sched): + with self._lock: + heapq.heappush(self._pq, sched) + + def close(self): + with self._lock: + self._pq = [] + + def may_punctuate(self, timestamp, punctuator): + with self._lock: + punctuated = False + while (self._pq and self._pq[0][0] <= timestamp): + old_ts, node, interval_ms = heapq.heappop(self._pq) + if old_ts == 0: + old_ts = timestamp + punctuator.punctuate(node, timestamp) + sched = (old_ts + interval_ms, node, interval_ms) + heapq.heappush(self._pq, sched) + punctuated = True + return punctuated diff --git a/kafka/streams/processor/quick_union.py b/kafka/streams/processor/quick_union.py new file mode 100644 index 0000000..6a2e3f4 --- /dev/null +++ b/kafka/streams/processor/quick_union.py @@ -0,0 +1,62 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import kafka.streams.errors as Errors + + +class QuickUnion(object): + + def __init__(self): + self.ids = {} + + def add(self, foo): + self.ids[foo] = foo + + def exists(self, foo): + return foo in self.ids + + def root(self, foo): + """ + @throws NoSuchElementException if the parent of this node is null + """ + current = foo + parent = self.ids.get(current) + + if not parent: + raise Errors.NoSuchElementError("id: " + str(foo)) + + while parent != current: + # do the path compression + grandparent = self.ids.get(parent) + self.ids[current] = grandparent + + current = parent + parent = grandparent + + return current + + def unite(self, foo, foobars): + for bar in foobars: + self.unite_pair(foo, bar) + + def unite_pair(self, foo, bar): + root1 = self.root(foo) + root2 = self.root(bar) + + if root1 != root2: + self.ids[root1] = root2 diff --git a/kafka/streams/processor/record_collector.py b/kafka/streams/processor/record_collector.py new file mode 100644 index 0000000..de59b10 --- /dev/null +++ b/kafka/streams/processor/record_collector.py @@ -0,0 +1,68 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import logging + +from kafka.structs import TopicPartition + +log = logging.getLogger(__name__) + + +def _handle_send_success(offsets, metadata): + tp = TopicPartition(metadata.topic, metadata.partition) + offsets[tp] = metadata.offset + + +def _handle_send_failure(topic, exception): + log.error('Error sending record to topic %s: %s', topic, exception) + + +class RecordCollector(object): + def __init__(self, producer): + self.producer = producer + self.offsets = {} + + def send(self, topic, partition=None, key=None, value=None, timestamp_ms=None, + key_serializer=None, value_serializer=None, partitioner=None): + if key_serializer: + key_bytes = key_serializer(topic, key) + else: + key_bytes = key + + if value_serializer: + val_bytes = value_serializer(topic, value) + else: + val_bytes = value + + if partition is None and partitioner is not None: + partitions = self.producer.partitions_for(topic) + if partitions is not None: + partition = partitioner.partition(key, value, len(partitions)) + + future = self.producer.send(topic, partition=partition, + key=key_bytes, value=val_bytes, + timestamp_ms=timestamp_ms) + + future.add_callback(_handle_send_success, self.offsets) + future.add_errback(_handle_send_failure, topic) + + def flush(self): + self.producer.flush() + + def close(self): + self.producer.close() diff --git a/kafka/streams/processor/record_queue.py b/kafka/streams/processor/record_queue.py new file mode 100644 index 0000000..ba22b76 --- /dev/null +++ b/kafka/streams/processor/record_queue.py @@ -0,0 +1,166 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +from collections import deque + +from kafka.consumer.fetcher import ConsumerRecord +import kafka.errors as Errors + + +class MinTimestampTracker(object): + """MinTimestampTracker maintains the min timestamp of timestamped elements.""" + def __init__(self): + self.descending_subsequence = deque() + + # in the case that incoming traffic is very small, the records maybe + # put and polled within a single iteration, in this case we need to + # remember the last polled record's timestamp + self.last_known_time = -1 + + def add_element(self, elem): + if elem is None: + raise ValueError('elem must not be None') + + while self.descending_subsequence: + min_elem = self.descending_subsequence[-1] + if min_elem[0] < elem[0]: + break + self.descending_subsequence.pop() + self.descending_subsequence.append(elem) + + def remove_element(self, elem): + if elem is not None: + if self.descending_subsequence: + if self.descending_subsequence[0] == elem: + self.descending_subsequence.popleft() + + if not self.descending_subsequence: + self.last_known_time = elem[0] + + def size(self): + return len(self.descending_subsequence) + + def get(self): + if not self.descending_subsequence: + return self.last_known_time + return self.descending_subsequence[0][0] + + +class RecordQueue(object): + """ + RecordQueue is a FIFO queue of (timestamp, ConsumerRecord). + It also keeps track of the partition timestamp defined as the minimum + timestamp of records in its queue; in addition, its partition timestamp + is monotonically increasing such that once it is advanced, it will not be + decremented. + """ + + def __init__(self, partition, source): + self.partition = partition + self.source = source + + self.fifo_queue = deque() + self.time_tracker = MinTimestampTracker() + + self._partition_time = -1 + + def add_raw_records(self, raw_records, timestamp_extractor): + """Add a batch of ConsumerRecord into the queue + + Arguments: + raw_records (list of ConsumerRecord): the raw records + timestamp_extractor (TimestampExtractor) + + Returns: the size of this queue + """ + for raw_record in raw_records: + # deserialize the raw record, extract the timestamp and put into the queue + key = self.source.deserialize_key(raw_record.topic, raw_record.key) + value = self.source.deserialize_value(raw_record.topic, raw_record.value) + + record = ConsumerRecord(raw_record.topic, + raw_record.partition, + raw_record.offset, + raw_record.timestamp, + 0, # TimestampType.CREATE_TIME, + raw_record.checksum, + raw_record.serialized_key_size, + raw_record.serialized_value_size, + key, value) + + timestamp = timestamp_extractor.extract(record) + + # validate that timestamp must be non-negative + if timestamp < 0: + raise Errors.StreamsError('Extracted timestamp value is negative, which is not allowed.') + + stamped_record = (timestamp, record) + + self.fifo_queue.append(stamped_record) + self.time_tracker.add_element(stamped_record) + + # update the partition timestamp if its currently + # tracked min timestamp has exceeded its value; this will + # usually only take effect for the first added batch + timestamp = self.time_tracker.get() + + if timestamp > self._partition_time: + self._partition_time = timestamp + + return self.size() + + def poll(self): + """Get the next StampedRecord from the queue + + Returns: StampedRecord + """ + if not self.fifo_queue: + return None + + elem = self.fifo_queue.popleft() + self.time_tracker.remove_element(elem) + + # only advance the partition timestamp if its currently + # tracked min timestamp has exceeded its value + timestamp = self.time_tracker.get() + + if timestamp > self._partition_time: + self._partition_time = timestamp + + return elem + + def size(self): + """Returns the number of records in the queue + + Returns: the number of records + """ + return len(self.fifo_queue) + + def is_empty(self): + """Tests if the queue is empty + + Returns: True if the queue is empty, otherwise False + """ + return not bool(self.fifo_queue) + + def timestamp(self): + """Returns the tracked partition timestamp + + Returns: timestamp + """ + return self._partition_time diff --git a/kafka/streams/processor/stream_partition_assignor.py b/kafka/streams/processor/stream_partition_assignor.py new file mode 100644 index 0000000..dfe7e18 --- /dev/null +++ b/kafka/streams/processor/stream_partition_assignor.py @@ -0,0 +1,435 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import collections +import logging +import weakref + +from kafka import TopicPartition +from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor +from kafka.coordinator.protocol import ( + ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment) +import kafka.streams.errors as Errors +from .internal_topic_manager import InternalTopicManager +from .partition_group import TaskId +from .assignment.assignment_info import AssignmentInfo +from .assignment.client_state import ClientState +from .assignment.subscription_info import SubscriptionInfo +from .assignment.task_assignor import TaskAssignor + +log = logging.getLogger(__name__) + + +class AssignedPartition(object): + def __init__(self, task_id, partition): + self.task_id = task_id + self.partition = partition + + def __cmp__(self, that): + return cmp(self.partition, that.partition) + + +class SubscriptionUpdates(object): + """ + * Used to capture subscribed topic via Patterns discovered during the + * partition assignment process. + """ + def __init__(self): + self.updated_topic_subscriptions = set() + + def update_topics(self, topic_names): + self.updatedTopicSubscriptions.clear() + self.updated_topic_subscriptions.update(topic_names) + + def get_updates(self): + return self.updated_topic_subscriptions + + def has_updates(self): + return bool(self.updated_topic_subscriptions) + + +class StreamPartitionAssignor(AbstractPartitionAssignor): + name = 'stream' + version = 0 + + def __init__(self, **configs): + """ + We need to have the PartitionAssignor and its StreamThread to be mutually accessible + since the former needs later's cached metadata while sending subscriptions, + and the latter needs former's returned assignment when adding tasks. + """ + self.stream_thread = None + self.num_standby_replicas = None + self.topic_groups = {} + self.partition_to_task_ids = {} + self.state_changelog_topic_to_task_ids = {} + self.internal_source_topic_to_task_ids = {} + self.standby_tasks = {} + self.internal_topic_manager = None + + self.num_standby_replicas = configs.get('num_standby_replicas', 0) + + o = configs.get('stream_thread_instance') + if o is None: + raise Errors.KafkaError("StreamThread is not specified") + + #if not isinstance(o, StreamThread): + # raise Errors.KafkaError(o.__class__.__name__ + " is not an instance of StreamThread") + + self.stream_thread = weakref.proxy(o) + self.stream_thread.partition_assignor = self + + if 'zookeeper_connect_config' in configs: + self.internal_topic_manager = InternalTopicManager(configs['zookeeper_connect_config'], configs.get('replication_factor', 1)) + else: + log.info("Config 'zookeeper_connect_config' isn't supplied and hence no internal topics will be created.") + + def metadata(self, topics): + """Adds the following information to subscription + 1. Client UUID (a unique id assigned to an instance of KafkaStreams) + 2. Task ids of previously running tasks + 3. Task ids of valid local states on the client's state directory. + + Returns: ConsumerProtocolMemberMetadata + """ + + prev_tasks = self.stream_thread.prev_tasks() + standby_tasks = self.stream_thread.cached_tasks() + standby_tasks.difference_update(prev_tasks) + data = SubscriptionInfo(self.stream_thread.process_id, prev_tasks, standby_tasks) + + return ConsumerProtocolMemberMetadata(self.version, list(topics), data.encode()) + + def prepare_topic(self, topic_to_task_ids, compact_topic, post_partition_phase): + """Internal helper function that creates a Kafka topic + + Arguments: + topic_to_task_ids (dict): that contains the topic names to be created + compact_topic (bool): If True, the topic should be a compacted topic. + This is used for change log topics usually. + post_partition_phase (bool): If True, the computation for calculating + the number of partitions is slightly different. Set to True after + the initial topic-to-partition assignment. + + Returns: + set([TopicPartition]) + """ + partitions = set() + # if ZK is specified, prepare the internal source topic before calling partition grouper + if self.internal_topic_manager is not None: + log.debug("Starting to validate internal topics in partition assignor.") + + for topic, tasks in topic_to_task_ids.items(): + num_partitions = 0 + if post_partition_phase: + # the expected number of partitions is the max value of + # TaskId.partition + 1 + for task in tasks: + if num_partitions < task.partition + 1: + num_partitions = task.partition + 1 + else: + # should have size 1 only + num_partitions = -1 + for task in tasks: + num_partitions = task.partition + + self.internal_topic_manager.make_ready(topic, num_partitions, compact_topic) + + # wait until the topic metadata has been propagated to all brokers + partition_ints = [] + while True: + partition_ints = self.stream_thread.restore_consumer.partitions_for_topic(topic) + if partition_ints and len(partition_ints) == num_partitions: + break + + for partition in partition_ints: + partitions.add(TopicPartition(topic, partition)) + + log.info("Completed validating internal topics in partition assignor.") + else: + missing_topics = [] + for topic in topic_to_task_ids: + partition_ints = self.stream_thread.restore_consumer.partitions_for_topic(topic) + if partition_ints is None: + missing_topics.append(topic) + + if missing_topics: + log.warn("Topic {} do not exists but couldn't created as the config '{}' isn't supplied", + missing_topics, 'zookeeper_connect_config') + + return partitions + + def assign(self, metadata, subscriptions): + """Assigns tasks to consumer clients in two steps. + + 1. using TaskAssignor to assign tasks to consumer clients. + - Assign a task to a client which was running it previously. + If there is no such client, assign a task to a client which has + its valid local state. + - A client may have more than one stream threads. + The assignor tries to assign tasks to a client proportionally to + the number of threads. + - We try not to assign the same set of tasks to two different clients + We do the assignment in one-pass. The result may not satisfy above all. + 2. within each client, tasks are assigned to consumer clients in + round-robin manner. + + Returns: + {member_id: ConsumerProtocolMemberAssignment} + """ + import pdb; pdb.set_trace() + consumers_by_client = {} + states = {} + subscription_updates = SubscriptionUpdates() + # decode subscription info + for consumer_id, subscription in subscriptions.items(): + + if self.stream_thread.builder.source_topic_pattern() is not None: + # update the topic groups with the returned subscription list for regex pattern subscriptions + subscription_updates.update_topics(subscription.topics()) + + info = SubscriptionInfo.decode(subscription.user_data) + + consumers = consumers_by_client.get(info.process_id) + if consumers is None: + consumers = set() + consumers_by_client[info.process_id] = consumers + consumers.add(consumer_id) + + state = states.get(info.process_id) + if state is None: + state = ClientState() + states[info.process_id] = state + + state.prev_active_tasks.update(info.prev_tasks) + state.prev_assigned_tasks.update(info.prev_tasks) + state.prev_assigned_tasks.update(info.standby_tasks) + state.capacity = state.capacity + 1 + + self.stream_thread.builder.subscription_updates = subscription_updates + self.topic_groups = self.stream_thread.builder.topic_groups() + + # ensure the co-partitioning topics within the group have the same + # number of partitions, and enforce the number of partitions for those + # internal topics. + source_topic_groups = {} + internal_source_topic_groups = {} + for key, value in self.topic_groups.items(): + source_topic_groups[key] = value.source_topics + internal_source_topic_groups[key] = value.inter_source_topics + + # for all internal source topics + # set the number of partitions to the maximum of the depending + # sub-topologies source topics + internal_partitions = set() + all_internal_topic_names = set() + for topic_group_id, topics_info in self.topic_groups.items(): + internal_topics = topics_info.inter_source_topics + all_internal_topic_names.update(internal_topics) + for internal_topic in internal_topics: + tasks = self.internal_source_topic_to_task_ids.get(internal_topic) + + if tasks is None: + num_partitions = -1 + for other in self.topic_groups.values(): + other_sink_topics = other.sink_topics + + if internal_topic in other_sink_topics: + for topic in other.source_topics: + partitions = None + # It is possible the source_topic is another internal topic, i.e, + # map().join().join(map()) + if topic in all_internal_topic_names: + task_ids = self.internal_source_topic_to_task_ids.get(topic) + if task_ids is not None: + for task_id in task_ids: + partitions = task_id.partition + else: + partitions = len(metadata.partitions_for_topic(topic)) + + if partitions is not None and partitions > num_partitions: + num_partitions = partitions + + self.internal_source_topic_to_task_ids[internal_topic] = [TaskId(topic_group_id, num_partitions)] + for partition in range(num_partitions): + internal_partitions.add(TopicPartition(internal_topic, partition)) + + copartition_topic_groups = self.stream_thread.builder.copartition_groups() + self.ensure_copartitioning(copartition_topic_groups, internal_source_topic_groups, + metadata.with_partitions(internal_partitions)) + + + internal_partitions = self.prepare_topic(self.internal_source_topic_to_task_ids, False, False); + self.internal_source_topic_to_task_ids.clear() + + metadata_with_internal_topics = metadata + if self.internal_topic_manager: + metadata_with_internal_topics = metadata.with_partitions(internal_partitions) + + # get the tasks as partition groups from the partition grouper + partitions_for_task = self.stream_thread.partition_grouper(source_topic_groups, metadata_with_internal_topics) + + # add tasks to state change log topic subscribers + self.state_changelog_topic_to_task_ids = {} + for task in partitions_for_task: + for topic_name in self.topic_groups[task.topic_group_id].state_changelog_topics: + tasks = self.state_changelog_topic_to_task_ids.get(topic_name) + if tasks is None: + tasks = set() + self.state_changelog_topic_to_task_ids[topic_name] = tasks + + tasks.add(task) + + for topic_name in self.topic_groups[task.topic_group_id].inter_source_topics: + tasks = self.internal_source_topic_to_task_ids.get(topic_name) + if tasks is None: + tasks = set() + self.internal_source_topic_to_task_ids[topic_name] = tasks + + tasks.add(task) + + # assign tasks to clients + states = TaskAssignor.assign(states, set(partitions_for_task), self.num_standby_replicas) + assignment = {} + + for process_id, consumers in consumers_by_client.items(): + state = states[process_id] + + task_ids = [] + num_active_tasks = len(state.active_tasks) + for task_id in state.active_tasks: + task_ids.append(task_id) + + for task_id in state.assigned_tasks: + if task_id not in state.active_tasks: + task_ids.append(task_id) + + num_consumers = len(consumers) + standby = {} + + i = 0 + for consumer in consumers: + assigned_partitions = [] + + num_task_ids = len(task_ids) + j = i + while j < num_task_ids: + task_id = task_ids[j] + if j < num_active_tasks: + for partition in partitions_for_task[task_id]: + assigned_partitions.append(AssignedPartition(task_id, partition)) + else: + standby_partitions = standby.get(task_id) + if standby_partitions is None: + standby_partitions = set() + standby[task_id] = standby_partitions + standby_partitions.update(partitions_for_task[task_id]) + j += num_consumers + + assigned_partitions.sort() + active = [] + active_partitions = collections.defaultdict(list) + for partition in assigned_partitions: + active.append(partition.task_id) + active_partitions[partition.topic].append(partition.partition) + + data = AssignmentInfo(active, standby) + assignment[consumer] = ConsumerProtocolMemberAssignment( + self.version, + sorted(active_partitions.items()), + data.encode()) + i += 1 + + active.clear() + standby.clear() + + # if ZK is specified, validate the internal topics again + self.prepare_topic(self.internal_source_topic_to_task_ids, False, True) + # change log topics should be compacted + self.prepare_topic(self.state_changelog_topic_to_task_ids, True, True) + + return assignment + + def on_assignment(self, assignment): + partitions = [TopicPartition(topic, partition) + for topic, topic_partitions in assignment.partition_assignment + for partition in topic_partitions] + + partitions.sort() + + info = AssignmentInfo.decode(assignment.user_data) + self.standby_tasks = info.standby_tasks + + partition_to_task_ids = {} + task_iter = iter(info.active_tasks) + for partition in partitions: + task_ids = self.partition_to_task_ids.get(partition) + if task_ids is None: + task_ids = set() + self.partition_to_task_ids[partition] = task_ids + + try: + task_ids.add(next(task_iter)) + except StopIteration: + raise Errors.TaskAssignmentError( + "failed to find a task id for the partition=%s" + ", partitions=%d, assignment_info=%s" + % (partition, len(partitions), info)) + self.partition_to_task_ids = partition_to_task_ids + + def ensure_copartitioning(self, copartition_groups, internal_topic_groups, metadata): + internal_topics = set() + for topics in internal_topic_groups.values(): + internal_topics.update(topics) + + for copartition_group in copartition_groups: + num_partitions = -1 + + for topic in copartition_group: + if topic not in internal_topics: + infos = metadata.partitions_for_topic(topic) + + if infos is None: + raise Errors.TopologyBuilderError("External source topic not found: " + topic) + + if num_partitions == -1: + num_partitions = len(infos) + elif num_partitions != len(infos): + raise Errors.TopologyBuilderError("Topics not copartitioned: [%s]" % copartition_group) + + if num_partitions == -1: + for topic in internal_topics: + if topic in copartition_group: + partitions = len(metadata.partitions_for_topic(topic)) + if partitions is not None and partitions > num_partitions: + num_partitions = partitions + + # enforce co-partitioning restrictions to internal topics reusing + # internalSourceTopicToTaskIds + for topic in internal_topics: + if topic in copartition_group: + self.internal_source_topic_to_task_ids[topic] = [TaskId(-1, num_partitions)] + + def tasks_for_partition(self, partition): + return self.partition_to_task_ids.get(partition) + + def standby_tasks(self): + return self.standby_tasks + + def set_internal_topic_manager(self, internal_topic_manager): + self.internal_topic_manager = internal_topic_manager diff --git a/kafka/streams/processor/stream_task.py b/kafka/streams/processor/stream_task.py new file mode 100644 index 0000000..5c073d6 --- /dev/null +++ b/kafka/streams/processor/stream_task.py @@ -0,0 +1,277 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import logging +import threading + +from kafka.consumer.fetcher import ConsumerRecord +import kafka.errors as Errors +from kafka.structs import OffsetAndMetadata +from .context import ProcessorContext +from .partition_group import PartitionGroup, RecordInfo +from .punctuation import PunctuationQueue +from .record_collector import RecordCollector +from .record_queue import RecordQueue +from .task import AbstractTask + +NONEXIST_TOPIC = '__null_topic__' +DUMMY_RECORD = ConsumerRecord(NONEXIST_TOPIC, -1, -1, -1, -1, None, None, -1, -1, -1) + +log = logging.getLogger(__name__) + + +class StreamTask(AbstractTask): + """A StreamTask is associated with a PartitionGroup, + and is assigned to a StreamThread for processing.""" + + def __init__(self, task_id, partitions, topology, consumer, producer, restore_consumer, **config): + """Create StreamTask with its assigned partitions + + Arguments: + task_id (str): the ID of this task + partitions (list of TopicPartition): the assigned partitions + topology (ProcessorTopology): the instance of ProcessorTopology + consumer (Consumer): the instance of Consumer + producer (Producer): the instance of Producer + restore_consumer (Consumer): the instance of Consumer used when + restoring state + """ + super(StreamTask, self).__init__(task_id, partitions, topology, + consumer, restore_consumer, False, **config) + self._punctuation_queue = PunctuationQueue() + self._record_info = RecordInfo() + + self.max_buffered_size = config['buffered_records_per_partition'] + self._process_lock = threading.Lock() + + self._commit_requested = False + self._commit_offset_needed = False + self._curr_record = None + self._curr_node = None + self.requires_poll = True + + # create queues for each assigned partition and associate them + # to corresponding source nodes in the processor topology + partition_queues = {} + + for partition in partitions: + source = self.topology.source(partition.topic()) + queue = self.create_record_queue(partition, source) + partition_queues[partition] = queue + + self.partition_group = PartitionGroup(partition_queues, self.config['timestamp_extractor_class']) + + # initialize the consumed offset cache + self.consumed_offsets = {} + + # create the record recordCollector that maintains the produced offsets + self.record_collector = RecordCollector(self.producer) + + log.info('Creating restoration consumer client for stream task #%s', self.id) + + # initialize the topology with its own context + self.processor_context = ProcessorContext(self.id, self, self.record_collector, self.state_mgr, **config) + + # initialize the state stores + self.initialize_state_stores() + + # initialize the task by initializing all its processor nodes in the topology + for node in self.topology.processors(): + self._curr_node = node + try: + node.init(self.processor_context) + finally: + self._curr_node = None + + self.processor_context.initialized() + + + def add_records(self, partition, records): + """Adds records to queues""" + queue_size = self.partition_group.add_raw_records(partition, records) + + # if after adding these records, its partition queue's buffered size has + # been increased beyond the threshold, we can then pause the consumption + # for this partition + if queue_size > self.max_buffered_size: + self.consumer.pause(partition) + + def process(self): + """Process one record + + Returns: + number of records left in the buffer of this task's partition group after the processing is done + """ + with self._process_lock: + # get the next record to process + record = self.partition_group.next_record(self._record_info) + + # if there is no record to process, return immediately + if record is None: + self.requires_poll = True + return 0 + + self.requires_poll = False + + try: + # process the record by passing to the source node of the topology + self._curr_record = record + self._curr_node = self._record_info.node() + partition = self._record_info.partition() + + log.debug('Start processing one record [%s]', self._curr_record) + + self._curr_node.process(self._curr_record.key, self._curr_record.value) + + log.debug('Completed processing one record [%s]', self._curr_record) + + # update the consumed offset map after processing is done + self.consumed_offsets[partition] = self._curr_record.offset + self._commit_offset_needed = True + + # after processing this record, if its partition queue's + # buffered size has been decreased to the threshold, we can then + # resume the consumption on this partition + if self._record_info.queue().size() == self.max_buffered_size: + self.consumer.resume(partition) + self.requires_poll = True + + if self.partition_group.top_queue_size() <= self.max_buffered_size: + self.requires_poll = True + + finally: + self._curr_record = None + self._curr_node = None + + return self.partition_group.num_buffered() + + def maybe_punctuate(self): + """Possibly trigger registered punctuation functions if + current partition group timestamp has reached the defined stamp + """ + timestamp = self.partition_group.timestamp() + + # if the timestamp is not known yet, meaning there is not enough data + # accumulated to reason stream partition time, then skip. + if timestamp == -1: + return False + else: + return self._punctuation_queue.may_punctuate(timestamp, self) + + def punctuate(self, node, timestamp): + if self._curr_node is not None: + raise Errors.IllegalStateError('Current node is not null') + + self._curr_node = node + self._curr_record = (timestamp, DUMMY_RECORD) + + try: + node.processor().punctuate(timestamp) + finally: + self._curr_node = None + self._curr_record = None + + def record(self): + return self._curr_record + + def node(self): + return self._curr_node + + def commit(self): + """Commit the current task state""" + # 1) flush local state + self.state_mgr.flush() + + # 2) flush produced records in the downstream and change logs of local states + self.record_collector.flush() + + # 3) commit consumed offsets if it is dirty already + if self._commit_offset_needed: + consumed_offsets_and_metadata = {} + for partition, offset in self.consumed_offsets.items(): + consumed_offsets_and_metadata[partition] = OffsetAndMetadata(offset + 1) + self.state_mgr.put_offset_limit(partition, offset + 1) + self.consumer.commit_sync(consumed_offsets_and_metadata) + self._commit_offset_needed = False + + self._commit_requested = False + + def commit_needed(self): + """Whether or not a request has been made to commit the current state""" + return self._commit_requested + + def need_commit(self): + """Request committing the current task's state""" + self._commit_requested = True + + def schedule(self, interval_ms): + """Schedules a punctuation for the processor + + Arguments: + interval_ms (int): the interval in milliseconds + + Raises: IllegalStateError if the current node is not None + """ + if self._curr_node is None: + raise Errors.IllegalStateError('Current node is null') + + schedule = (0, self._curr_node, interval_ms) + self._punctuation_queue.schedule(schedule) + + def close(self): + self.partition_group.close() + self.consumed_offsets.clear() + + # close the processors + # make sure close() is called for each node even when there is a RuntimeException + exception = None + for node in self.topology.processors(): + self._curr_node = node + try: + node.close() + except RuntimeError as e: + exception = e + finally: + self._curr_node = None + + super(StreamTask, self).close() + + if exception is not None: + raise exception + + def record_collector_offsets(self): + return self.record_collector.offsets() + + def _create_record_queue(self, partition, source): + return RecordQueue(partition, source) + + def forward(self, key, value, child_index=None, child_name=None): + this_node = self._curr_node + try: + children = this_node.children() + + if child_index is not None: + children = [children[child_index]] + elif child_name is not None: + children = [child for child in children if child.name == child_name] + + for child_node in children: + self._curr_node = child_node + child_node.process(key, value) + finally: + self._curr_node = this_node diff --git a/kafka/streams/processor/stream_thread.py b/kafka/streams/processor/stream_thread.py new file mode 100644 index 0000000..021fb0a --- /dev/null +++ b/kafka/streams/processor/stream_thread.py @@ -0,0 +1,697 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import collections +import copy +import logging +from multiprocessing import Process +import os +import time + +import six + +from kafka import KafkaConsumer, KafkaProducer +from kafka.consumer.subscription_state import ConsumerRebalanceListener +import kafka.errors as Errors +from kafka.streams.errors import StreamsError +from kafka.streams.utils import AtomicInteger +from .partition_group import partition_grouper +from .stream_partition_assignor import StreamPartitionAssignor +from .task import StreamTask + +log = logging.getLogger(__name__) + +STREAM_THREAD_ID_SEQUENCE = AtomicInteger(0) + + +class StreamThread(object):#Process): + DEFAULT_CONFIG = { + 'application_id': None, # required + 'bootstrap_servers': None, # required + 'process_id': None, # required + 'client_id': 'kafka-python-streams', + 'poll_ms': 100, + 'num_stream_threads': 1, + 'commit_interval_ms': 30000, + 'partition_grouper': partition_grouper, + 'key_serializer': None, + 'value_serializer': None, + 'key_deserializer': None, + 'value_deserializer': None, + 'state_dir': '/tmp/kafka-streams', + #'client_supplier': ...., + 'state_cleanup_delay_ms': 60000, + 'linger_ms': 100, + 'auto_offset_reset': 'earliest', + 'enable_auto_commit': False, + 'max_poll_records': 1000, + } + + def __init__(self, builder, **configs): + stream_id = STREAM_THREAD_ID_SEQUENCE.increment() + #super(StreamThread, self).__init__(name='StreamThread-' + str(stream_id)) + self.name = 'StreamThread-' + str(stream_id) + + self.config = copy.copy(self.DEFAULT_CONFIG) + for key in self.config: + if key in configs: + self.config[key] = configs.pop(key) + + for key in ('application_id', 'process_id', 'bootstrap_servers'): + assert self.config[key], 'Required configuration: ' + key + + # Only check for extra config keys in top-level class + log.warning('Unrecognized configs: %s', configs.keys()) + + self.builder = builder + self._partition_grouper = self.config.get('partition_grouper', partition_grouper) + self.source_topics = builder.source_topics() + self.topic_pattern = builder.source_topic_pattern() + self.partition_assignor = None + + self._rebalance_exception = None + self._process_standby_records = False + self._rebalance_listener = KStreamsConsumerRebalanceListener(self) + self.config['thread_client_id'] = self.config['client_id'] + "-" + self.name + self._running = False + + @property + def application_id(self): + return self.config['application_id'] + + @property + def process_id(self): + return self.config['process_id'] + + @property + def client_id(self): + return self.config['client_id'] + + def initialize(self): + assert not self._running + log.info('Creating producer client for stream thread [%s]', self.name) + #client_supplier = self.config['client_supplier'] + self.producer = KafkaProducer(**self.config) + log.info('Creating consumer client for stream thread [%s]', self.name) + assignor = StreamPartitionAssignor( + stream_thread_instance=self, **self.config) + self.consumer = KafkaConsumer( + partition_assignment_strategy=[assignor], **self.config) + log.info('Creating restore consumer client for stream thread [%s]', self.name) + restore_assignor = StreamPartitionAssignor( + stream_thread_instance=self, **self.config) + self.restore_consumer = KafkaConsumer( + partition_assignment_strategy=[restore_assignor], **self.config) + + # initialize the task list + self._active_tasks = {} + self._standby_tasks = {} + self._active_tasks_by_partition = {} + self._standby_tasks_by_partition = {} + self._prev_tasks = set() + + # standby ktables + self._standby_records = {} + + # read in task specific config values + """ + self._state_dir = os.path.join(self.config['state_dir'], self.config['application_id']) + if not os.path.isdir(self._state_dir): + os.makedirs(self._state_dir) + """ + + # the cleaning cycle won't start until partition assignment + self._last_clean_ms = float('inf') + self._last_commit_ms = time.time() * 1000 + + #self._sensors = StreamsMetricsImpl(metrics) + + self._running = True #new AtomicBoolean(true); + + def run(self): + """Execute the stream processors. + + Raises: + KafkaError for any Kafka-related exceptions + Exception for any other non-Kafka exceptions + """ + self.initialize() + log.info('Starting stream thread [%s]', self.name) + + try: + self._run_loop() + except Errors.KafkaError: + # just re-throw the exception as it should be logged already + raise + except Exception: + # we have caught all Kafka related exceptions, and other runtime exceptions + # should be due to user application errors + log.exception('Streams application error during processing in thread [%s]', self.name) + raise + finally: + self.shutdown() + + def close(self): + """Shutdown this stream thread.""" + self._running = False #.set(False) + + def tasks(self): + return self._active_tasks + + def shutdown(self): + log.info('Shutting down stream thread [%s]', self.name) + + # Exceptions should not prevent this call from going through all shutdown steps + try: + self._commit_all() + except Exception: + # already logged in commitAll() + pass + + # Close standby tasks before closing the restore consumer since closing + # standby tasks uses the restore consumer. + self._remove_standby_tasks() + + # We need to first close the underlying clients before closing the state + # manager, for example we need to make sure producer's record sends + # have all been acked before the state manager records + # changelog sent offsets + try: + self.producer.close() + except Exception: + log.exception('Failed to close producer in thread [%s]', self.name) + try: + self.consumer.close() + except Exception: + log.exception('Failed to close consumer in thread [%s]', self.name) + try: + self.restore_consumer.close() + except Exception: + log.exception('Failed to close restore consumer in thread [%s]', self.name) + + self._remove_stream_tasks() + log.info('Stream thread shutdown complete [%s]', self.name) + + def _run_loop(self): + total_num_buffered = 0 + last_poll = 0 + requires_poll = True + + if self.topic_pattern is not None: + self.consumer.subscribe(pattern=self.topic_pattern, + listener=self._rebalance_listener) + else: + self.consumer.subscribe(topics=self.source_topics, + listener=self._rebalance_listener) + + while self._still_running(): + # try to fetch some records if necessary + if requires_poll: + requires_poll = False + + start_poll = time.time() * 1000 + + if total_num_buffered == 0: + poll_ms = self.config['poll_ms'] + else: + poll_ms = 0 + records = self.consumer.poll(poll_ms) + last_poll = time.time() * 1000 + + if self._rebalance_exception is not None: + raise StreamsError('Failed to rebalance', + self._rebalance_exception) + + if records: + for partition in records: + task = self._active_tasks_by_partition[partition] + task.add_records(partition, records[partition]) + + end_poll = time.time() + #self._sensors.poll_time_sensor.record(end_poll - start_poll) + + total_num_buffered = 0 + + # try to process one fetch record from each task via the topology, + # and also trigger punctuate functions if necessary, which may + # result in more records going through the topology in this loop + if self._active_tasks: + for task in six.itervalues(self._active_tasks): + start_process = time.time() + + total_num_buffered += task.process() + requires_poll = requires_poll or task.requires_poll() + + latency_ms = (time.time() - start_process) * 1000 + #self._sensors.process_time_sensor.record(latency_ms) + + self._maybe_punctuate(task) + + if task.commit_needed(): + self._commit_one(task) + + # if poll_ms has passed since the last poll, we poll to respond + # to a possible rebalance even when we paused all partitions. + if (last_poll + self.config['poll_ms'] < time.time() * 1000): + requires_poll = True + + else: + # even when no task is assigned, we must poll to get a task. + requires_poll = True + + self._maybe_commit() + self._maybe_update_standby_tasks() + self._maybe_clean() + + def _maybe_update_standby_tasks(self): + if self._standby_tasks: + if self._process_standby_records: + if self._standby_records: + remaining_standby_records = {} + for partition in self._standby_records: + remaining = self._standby_records[partition] + if remaining: + task = self._standby_tasks_by_partition[partition] + remaining = task.update(partition, remaining) + if remaining: + remaining_standby_records[partition] = remaining + else: + self.restore_consumer.resume(partition) + self._standby_records = remaining_standby_records; + self._process_standby_records = False + + records = self.restore_consumer.poll(0) + + if records: + for partition in records: + task = self._standby_tasks_by_partition.get(partition) + + if task is None: + log.error('missing standby task for partition %s', partition) + raise StreamsError('missing standby task for partition %s' % partition) + + remaining = task.update(partition, records[partition]) + if remaining: + self.restore_consumer.pause(partition) + self._standby_records[partition] = remaining + + def _still_running(self): + if not self._running: + log.debug('Shutting down at user request.') + return False + return True + + def _maybe_punctuate(self, task): + try: + now = time.time() + + # check whether we should punctuate based on the task's partition + # group timestamp which are essentially based on record timestamp. + if task.maybe_punctuate(): + latency_ms = (time.time() - now) * 1000 + #self._sensors.punctuate_time_sensor.record(latency_ms) + + except Errors.KafkaError: + log.exception('Failed to punctuate active task #%s in thread [%s]', + task.id, self.name) + raise + + def _maybe_commit(self): + now_ms = time.time() * 1000 + + if (self.config['commit_interval_ms'] >= 0 and + self._last_commit_ms + self.config['commit_interval_ms'] < now_ms): + log.log(0, 'Committing processor instances because the commit interval has elapsed.') + + self._commit_all() + self._last_commit_ms = now_ms + + self._proces_standby_records = True + + def _commit_all(self): + """Commit the states of all its tasks""" + for task in six.itervalues(self._active_tasks): + self._commit_one(task) + for task in six.itervalues(self._standby_tasks): + self._commit_one(task) + + def _commit_one(self, task): + """Commit the state of a task""" + start = time.time() + try: + task.commit() + except Errors.CommitFailedError: + # commit failed. Just log it. + log.warning('Failed to commit %s #%s in thread [%s]', + task.__class__.__name__, task.id, self.name, + exc_info=True) + except Errors.KafkaError: + # commit failed due to an unexpected exception. + # Log it and rethrow the exception. + log.exception('Failed to commit %s #%s in thread [%s]', + task.__class__.__name__, task.id, self.name) + raise + + timer_ms = (time.time() - start) * 1000 + #self._sensors.commit_time_sensor.record(timer_ms) + + def _maybe_clean(self): + """Cleanup any states of the tasks that have been removed from this thread""" + now_ms = time.time() * 1000 + + clean_time_ms = self.config['state_cleanup_delay_ms'] + if now_ms > self._last_clean_ms + clean_time_ms: + """ + File[] stateDirs = stateDir.listFiles(); + if (stateDirs != null) { + for (File dir : stateDirs) { + try { + String dirName = dir.getName(); + TaskId id = TaskId.parse(dirName.substring(dirName.lastIndexOf("-") + 1)); # task_id as (topic_group_id, partition_id) + + // try to acquire the exclusive lock on the state directory + if (dir.exists()) { + FileLock directoryLock = null; + try { + directoryLock = ProcessorStateManager.lockStateDirectory(dir); + if (directoryLock != null) { + log.info("Deleting obsolete state directory {} for task {} after delayed {} ms.", dir.getAbsolutePath(), id, cleanTimeMs); + Utils.delete(dir); + } + } catch (FileNotFoundException e) { + // the state directory may be deleted by another thread + } catch (IOException e) { + log.error("Failed to lock the state directory due to an unexpected exception", e); + } finally { + if (directoryLock != null) { + try { + directoryLock.release(); + directoryLock.channel().close(); + } catch (IOException e) { + log.error("Failed to release the state directory lock"); + } + } + } + } + } catch (TaskIdFormatException e) { + // there may be some unknown files that sits in the same directory, + // we should ignore these files instead trying to delete them as well + } + } + } + """ + self._last_clean_ms = now_ms + + def prev_tasks(self): + """Returns ids of tasks that were being executed before the rebalance.""" + return self._prev_tasks + + def cached_tasks(self): + """Returns ids of tasks whose states are kept on the local storage.""" + # A client could contain some inactive tasks whose states are still + # kept on the local storage in the following scenarios: + # 1) the client is actively maintaining standby tasks by maintaining + # their states from the change log. + # 2) the client has just got some tasks migrated out of itself to other + # clients while these task states have not been cleaned up yet (this + # can happen in a rolling bounce upgrade, for example). + + tasks = set() + """ + File[] stateDirs = stateDir.listFiles(); + if (stateDirs != null) { + for (File dir : stateDirs) { + try { + TaskId id = TaskId.parse(dir.getName()); + // if the checkpoint file exists, the state is valid. + if (new File(dir, ProcessorStateManager.CHECKPOINT_FILE_NAME).exists()) + tasks.add(id); + + } catch (TaskIdFormatException e) { + // there may be some unknown files that sits in the same directory, + // we should ignore these files instead trying to delete them as well + } + } + } + """ + return tasks + + def _create_stream_task(self, task_id, partitions): + #self._sensors.task_creation_sensor.record() + + topology = self.builder.build(self.config['application_id'], + task_id.topic_group_id) + + return StreamTask(task_id, self.config['application_id'], + partitions, topology, + self.consumer, self.producer, self.restore_consumer, + **self.config) # self._sensors + + def _add_stream_tasks(self, assignment): + if self.partition_assignor is None: + raise Errors.IllegalStateError( + 'Partition assignor has not been initialized while adding' + ' stream tasks: this should not happen.') + + partitions_for_task = collections.defaultdict(set) + + for partition in assignment: + task_ids = self.partition_assignor.tasks_for_partition(partition) + for task_id in task_ids: + partitions = partitions_for_task[task_id].add(partition) + + # create the active tasks + for task_id, partitions in partitions_for_task.items(): + try: + task = self._create_stream_task(task_id, partitions) + self._active_tasks[task_id] = task + + for partition in partitions: + self._active_tasks_by_partition[partition] = task + except StreamsError: + log.exception('Failed to create an active task #%s in thread [%s]', + task_id, self.name) + raise + + def _remove_stream_tasks(self): + try: + for task in self._active_tasks.values(): + self._close_one(task) + self._prev_tasks.clear() + self._prev_tasks.update(set(self._active_tasks.keys())) + + self._active_tasks.clear() + self._active_tasks_by_partition.clear() + + except Exception: + log.exception('Failed to remove stream tasks in thread [%s]', self.name) + + def _close_one(self, task): + log.info('Removing a task %s', task.id) + try: + task.close() + except StreamsError: + log.exception('Failed to close a %s #%s in thread [%s]', + task.__class__.__name__, task.id, self.name) + #self._sensors.task_destruction_sensor.record() + + def _create_standby_task(self, task_id, partitions): + #self._sensors.task_creation_sensor.record() + raise NotImplementedError('no standby tasks yet') + + topology = self.builder.build(self.config['application_id'], + task_id.topic_group_id) + + """ + if topology.state_store_suppliers(): + return StandbyTask(task_id, partitions, topology, + self.consumer, self.restore_consumer, + **self.config) # self._sensors + else: + return None + """ + + def _add_standby_tasks(self): + if self.partition_assignor is None: + raise Errors.IllegalStateError( + 'Partition assignor has not been initialized while adding' + ' standby tasks: this should not happen.') + + checkpointed_offsets = {} + + # create the standby tasks + for task_id, partitions in self.partition_assignor.standby_tasks().items(): + task = self._create_standby_task(task_id, partitions) + if task: + self._standby_tasks[task_id] = task + for partition in partitions: + self._standby_tasks_by_partition[partition] = task + + # collect checkpointed offsets to position the restore consumer + # this includes all partitions from which we restore states + for partition in task.checkpointed_offsets(): + self._standby_tasks_by_partition[partition] = task + + checkpointed_offsets.update(task.checkpointed_offsets()) + + self.restore_consumer.assign(checkpointed_offsets.keys()) + + for partition, offset in checkpointed_offsets.items(): + if offset >= 0: + self.restore_consumer.seek(partition, offset) + else: + self.restore_consumer.seek_to_beginning(partition) + + def _remove_standby_tasks(self): + try: + for task in self._standby_tasks.values(): + self._close_one(task) + self._standby_tasks.clear() + self._standby_tasks_by_partition.clear() + self._standby_records.clear() + + # un-assign the change log partitions + self.restore_consumer.assign([]) + + except Exception: + log.exception('Failed to remove standby tasks in thread [%s]', self.name) + +""" + private class StreamsMetricsImpl implements StreamsMetrics { + final Metrics metrics; + final String metricGrpName; + final Map<String, String> metricTags; + + final Sensor commitTimeSensor; + final Sensor pollTimeSensor; + final Sensor processTimeSensor; + final Sensor punctuateTimeSensor; + final Sensor taskCreationSensor; + final Sensor taskDestructionSensor; + + public StreamsMetricsImpl(Metrics metrics) { + + this.metrics = metrics; + this.metricGrpName = "stream-metrics"; + this.metricTags = new LinkedHashMap<>(); + this.metricTags.put("client-id", clientId + "-" + getName()); + + this.commitTimeSensor = metrics.sensor("commit-time"); + this.commitTimeSensor.add(metrics.metricName("commit-time-avg", metricGrpName, "The average commit time in ms", metricTags), new Avg()); + this.commitTimeSensor.add(metrics.metricName("commit-time-max", metricGrpName, "The maximum commit time in ms", metricTags), new Max()); + this.commitTimeSensor.add(metrics.metricName("commit-calls-rate", metricGrpName, "The average per-second number of commit calls", metricTags), new Rate(new Count())); + + this.pollTimeSensor = metrics.sensor("poll-time"); + this.pollTimeSensor.add(metrics.metricName("poll-time-avg", metricGrpName, "The average poll time in ms", metricTags), new Avg()); + this.pollTimeSensor.add(metrics.metricName("poll-time-max", metricGrpName, "The maximum poll time in ms", metricTags), new Max()); + this.pollTimeSensor.add(metrics.metricName("poll-calls-rate", metricGrpName, "The average per-second number of record-poll calls", metricTags), new Rate(new Count())); + + this.processTimeSensor = metrics.sensor("process-time"); + this.processTimeSensor.add(metrics.metricName("process-time-avg-ms", metricGrpName, "The average process time in ms", metricTags), new Avg()); + this.processTimeSensor.add(metrics.metricName("process-time-max-ms", metricGrpName, "The maximum process time in ms", metricTags), new Max()); + this.processTimeSensor.add(metrics.metricName("process-calls-rate", metricGrpName, "The average per-second number of process calls", metricTags), new Rate(new Count())); + + this.punctuateTimeSensor = metrics.sensor("punctuate-time"); + this.punctuateTimeSensor.add(metrics.metricName("punctuate-time-avg", metricGrpName, "The average punctuate time in ms", metricTags), new Avg()); + this.punctuateTimeSensor.add(metrics.metricName("punctuate-time-max", metricGrpName, "The maximum punctuate time in ms", metricTags), new Max()); + this.punctuateTimeSensor.add(metrics.metricName("punctuate-calls-rate", metricGrpName, "The average per-second number of punctuate calls", metricTags), new Rate(new Count())); + + this.taskCreationSensor = metrics.sensor("task-creation"); + this.taskCreationSensor.add(metrics.metricName("task-creation-rate", metricGrpName, "The average per-second number of newly created tasks", metricTags), new Rate(new Count())); + + this.taskDestructionSensor = metrics.sensor("task-destruction"); + this.taskDestructionSensor.add(metrics.metricName("task-destruction-rate", metricGrpName, "The average per-second number of destructed tasks", metricTags), new Rate(new Count())); + } + + @Override + public void recordLatency(Sensor sensor, long startNs, long endNs) { + sensor.record((endNs - startNs) / 1000000, endNs); + } + + /** + * @throws IllegalArgumentException if tags is not constructed in key-value pairs + */ + @Override + public Sensor addLatencySensor(String scopeName, String entityName, String operationName, String... tags) { + // extract the additional tags if there are any + Map<String, String> tagMap = new HashMap<>(this.metricTags); + if ((tags.length % 2) != 0) + throw new IllegalArgumentException("Tags needs to be specified in key-value pairs"); + + for (int i = 0; i < tags.length; i += 2) + tagMap.put(tags[i], tags[i + 1]); + + String metricGroupName = "stream-" + scopeName + "-metrics"; + + // first add the global operation metrics if not yet, with the global tags only + Sensor parent = metrics.sensor(scopeName + "-" + operationName); + addLatencyMetrics(metricGroupName, parent, "all", operationName, this.metricTags); + + // add the store operation metrics with additional tags + Sensor sensor = metrics.sensor(scopeName + "-" + entityName + "-" + operationName, parent); + addLatencyMetrics(metricGroupName, sensor, entityName, operationName, tagMap); + + return sensor; + } + + private void addLatencyMetrics(String metricGrpName, Sensor sensor, String entityName, String opName, Map<String, String> tags) { + maybeAddMetric(sensor, metrics.metricName(entityName + "-" + opName + "-avg-latency-ms", metricGrpName, + "The average latency in milliseconds of " + entityName + " " + opName + " operation.", tags), new Avg()); + maybeAddMetric(sensor, metrics.metricName(entityName + "-" + opName + "-max-latency-ms", metricGrpName, + "The max latency in milliseconds of " + entityName + " " + opName + " operation.", tags), new Max()); + maybeAddMetric(sensor, metrics.metricName(entityName + "-" + opName + "-qps", metricGrpName, + "The average number of occurrence of " + entityName + " " + opName + " operation per second.", tags), new Rate(new Count())); + } + + private void maybeAddMetric(Sensor sensor, MetricName name, MeasurableStat stat) { + if (!metrics.metrics().containsKey(name)) + sensor.add(name, stat); + } + } +} + """ + + +class KStreamsConsumerRebalanceListener(ConsumerRebalanceListener): + def __init__(self, stream_thread): + self.stream_thread = stream_thread + + def on_partitions_assigned(self, assignment): + try: + self.stream_thread._add_stream_tasks(assignment) + self.stream_thread._add_standby_tasks() + # start the cleaning cycle + self.stream_thread._last_clean_ms = time.time() * 1000 + except Exception as e: + self.stream_thread._rebalance_exception = e + raise + + def on_partitions_revoked(self, assignment): + try: + self.stream_thread._commit_all() + # stop the cleaning cycle until partitions are assigned + self.stream_thread._last_clean_ms = float('inf') + except Exception as e: + self.stream_thread._rebalance_exception = e + raise + finally: + # TODO: right now upon partition revocation, we always remove all + # the tasks; this behavior can be optimized to only remove affected + # tasks in the future + self.stream_thread._remove_stream_tasks() + self.stream_thread._remove_standby_tasks() diff --git a/kafka/streams/processor/task.py b/kafka/streams/processor/task.py new file mode 100644 index 0000000..67a276b --- /dev/null +++ b/kafka/streams/processor/task.py @@ -0,0 +1,333 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import abc +import logging +import threading + +from kafka.consumer.fetcher import ConsumerRecord +import kafka.errors as Errors +from kafka.streams.errors import ProcessorStateError +from kafka.structs import OffsetAndMetadata +from .context import ProcessorContext +from .partition_group import PartitionGroup, RecordInfo +from .punctuation import PunctuationQueue +from .record_collector import RecordCollector +from .record_queue import RecordQueue + +log = logging.getLogger(__name__) + +NONEXIST_TOPIC = '__null_topic__' +DUMMY_RECORD = ConsumerRecord(NONEXIST_TOPIC, -1, -1, -1, -1, None, None, -1, -1, -1) + + +class AbstractTask(object): + __metaclass__ = abc.ABCMeta + + def __init__(self, task_id, partitions, topology, + consumer, restore_consumer, is_standby, **config): + """Raises ProcessorStateError if the state manager cannot be created""" + self.id = task_id + self.application_id = self.config['application_id'] + self.partitions = set(partitions) + self.topology = topology + self.consumer = consumer + self.processor_context = None + + # create the processor state manager + """ + try: + File applicationStateDir = StreamThread.makeStateDir(applicationId, config.getString(StreamsConfig.STATE_DIR_CONFIG)); + File stateFile = new File(applicationStateDir.getCanonicalPath(), id.toString()); + # if partitions is null, this is a standby task + self.state_mgr = ProcessorStateManager(applicationId, id.partition, partitions, stateFile, restoreConsumer, isStandby); + except Exception as e: + raise ProcessorStateError('Error while creating the state manager', e) + """ + + def initialize_state_stores(self): + # set initial offset limits + self.initialize_offset_limits() + + for state_store_supplier in self.topology.state_store_suppliers(): + store = state_store_supplier.get() + store.init(self.processor_context, store) + + @abc.abstractmethod + def commit(self): + pass + + def close(self): + """Close the task. + + Raises ProcessorStateError if there is an error while closing the state manager + """ + try: + self.state_mgr.close(self.record_collector_offsets()) + except Exception as e: + raise ProcessorStateError('Error while closing the state manager', e) + + def record_collector_offsets(self): + return {} + + def initialize_offset_limits(self): + for partition in self.partitions: + metadata = self.consumer.committed(partition) # TODO: batch API? + self.state_mgr.put_offset_limit(partition, metadata.offset if metadata else 0) + + +class StreamTask(AbstractTask): + """A StreamTask is associated with a PartitionGroup, + and is assigned to a StreamThread for processing.""" + + def __init__(self, task_id, partitions, topology, consumer, producer, restore_consumer, **config): + """Create StreamTask with its assigned partitions + + Arguments: + task_id (str): the ID of this task + partitions (list of TopicPartition): the assigned partitions + topology (ProcessorTopology): the instance of ProcessorTopology + consumer (Consumer): the instance of Consumer + producer (Producer): the instance of Producer + restore_consumer (Consumer): the instance of Consumer used when + restoring state + """ + super(StreamTask, self).__init__(task_id, partitions, topology, + consumer, restore_consumer, False, **config) + self._punctuation_queue = PunctuationQueue() + self._record_info = RecordInfo() + + self.max_buffered_size = config['buffered_records_per_partition'] + self._process_lock = threading.Lock() + + self._commit_requested = False + self._commit_offset_needed = False + self._curr_record = None + self._curr_node = None + self.requires_poll = True + + # create queues for each assigned partition and associate them + # to corresponding source nodes in the processor topology + partition_queues = {} + + for partition in partitions: + source = self.topology.source(partition.topic()) + queue = self._create_record_queue(partition, source) + partition_queues[partition] = queue + + self.partition_group = PartitionGroup(partition_queues, self.config['timestamp_extractor_class']) + + # initialize the consumed offset cache + self.consumed_offsets = {} + + # create the RecordCollector that maintains the produced offsets + self.record_collector = RecordCollector(self.producer) + + log.info('Creating restoration consumer client for stream task #%s', self.id) + + # initialize the topology with its own context + self.processor_context = ProcessorContext(self.id, self, self.record_collector, self.state_mgr, **config) + + # initialize the state stores + self.initialize_state_stores() + + # initialize the task by initializing all its processor nodes in the topology + for node in self.topology.processors(): + self._curr_node = node + try: + node.init(self.processor_context) + finally: + self._curr_node = None + + self.processor_context.initialized() + + + def add_records(self, partition, records): + """Adds records to queues""" + queue_size = self.partition_group.add_raw_records(partition, records) + + # if after adding these records, its partition queue's buffered size has + # been increased beyond the threshold, we can then pause the consumption + # for this partition + if queue_size > self.max_buffered_size: + self.consumer.pause(partition) + + def process(self): + """Process one record + + Returns: + number of records left in the buffer of this task's partition group after the processing is done + """ + with self._process_lock: + # get the next record to process + record = self.partition_group.next_record(self._record_info) + + # if there is no record to process, return immediately + if record is None: + self.requires_poll = True + return 0 + + self.requires_poll = False + + try: + # process the record by passing to the source node of the topology + self._curr_record = record + self._curr_node = self._record_info.node() + partition = self._record_info.partition() + + log.debug('Start processing one record [%s]', self._curr_record) + + self._curr_node.process(self._curr_record.key, self._curr_record.value) + + log.debug('Completed processing one record [%s]', self._curr_record) + + # update the consumed offset map after processing is done + self.consumed_offsets[partition] = self._curr_record.offset + self._commit_offset_needed = True + + # after processing this record, if its partition queue's + # buffered size has been decreased to the threshold, we can then + # resume the consumption on this partition + if self._record_info.queue().size() == self.max_buffered_size: + self.consumer.resume(partition) + self.requires_poll = True + + if self.partition_group.top_queue_size() <= self.max_buffered_size: + self.requires_poll = True + + finally: + self._curr_record = None + self._curr_node = None + + return self.partition_group.num_buffered() + + def maybe_punctuate(self): + """Possibly trigger registered punctuation functions if + current partition group timestamp has reached the defined stamp + """ + timestamp = self.partition_group.timestamp() + + # if the timestamp is not known yet, meaning there is not enough data + # accumulated to reason stream partition time, then skip. + if timestamp == -1: + return False + else: + return self._punctuation_queue.may_punctuate(timestamp, self) + + def punctuate(self, node, timestamp): + if self._curr_node is not None: + raise Errors.IllegalStateError('Current node is not null') + + self._curr_node = node + self._curr_record = (timestamp, DUMMY_RECORD) + + try: + node.processor().punctuate(timestamp) + finally: + self._curr_node = None + self._curr_record = None + + def record(self): + return self._curr_record + + def node(self): + return self._curr_node + + def commit(self): + """Commit the current task state""" + # 1) flush local state + self.state_mgr.flush() + + # 2) flush produced records in the downstream and change logs of local states + self.record_collector.flush() + + # 3) commit consumed offsets if it is dirty already + if self._commit_offset_needed: + consumed_offsets_and_metadata = {} + for partition, offset in self.consumed_offsets.items(): + consumed_offsets_and_metadata[partition] = OffsetAndMetadata(offset + 1) + self.state_mgr.put_offset_limit(partition, offset + 1) + self.consumer.commit_sync(consumed_offsets_and_metadata) + self._commit_offset_needed = False + + self._commit_requested = False + + def commit_needed(self): + """Whether or not a request has been made to commit the current state""" + return self._commit_requested + + def need_commit(self): + """Request committing the current task's state""" + self._commit_requested = True + + def schedule(self, interval_ms): + """Schedules a punctuation for the processor + + Arguments: + interval_ms (int): the interval in milliseconds + + Raises: IllegalStateError if the current node is not None + """ + if self._curr_node is None: + raise Errors.IllegalStateError('Current node is null') + + schedule = (0, self._curr_node, interval_ms) + self._punctuation_queue.schedule(schedule) + + def close(self): + self.partition_group.close() + self.consumed_offsets.clear() + + # close the processors + # make sure close() is called for each node even when there is a RuntimeException + exception = None + for node in self.topology.processors(): + self._curr_node = node + try: + node.close() + except RuntimeError as e: + exception = e + finally: + self._curr_node = None + + super(StreamTask, self).close() + + if exception is not None: + raise exception + + def record_collector_offsets(self): + return self.record_collector.offsets() + + def _create_record_queue(self, partition, source): + return RecordQueue(partition, source) + + def forward(self, key, value, child_index=None, child_name=None): + this_node = self._curr_node + try: + children = this_node.children() + + if child_index is not None: + children = [children[child_index]] + elif child_name is not None: + children = [child for child in children if child.name == child_name] + + for child_node in children: + self._curr_node = child_node + child_node.process(key, value) + finally: + self._curr_node = this_node diff --git a/kafka/streams/processor/topology.py b/kafka/streams/processor/topology.py new file mode 100644 index 0000000..a2fa441 --- /dev/null +++ b/kafka/streams/processor/topology.py @@ -0,0 +1,21 @@ +class ProcessorTopology(object): + + def __init__(self, processor_nodes, source_by_topics, state_store_suppliers): + self.processor_nodes = processor_nodes + self.source_by_topics = source_by_topics + self.state_store_suppliers = state_store_suppliers + + def sourceTopics(self): + return set(self.source_by_topics) + + def source(self, topic): + return self.source_by_topics.get(topic) + + def sources(self): + return set(self.source_by_topics.values()) + + def processors(self): + return self.processor_nodes + + def state_store_suppliers(self): + return self.state_store_suppliers diff --git a/kafka/streams/processor/topology_builder.py b/kafka/streams/processor/topology_builder.py new file mode 100644 index 0000000..223cda9 --- /dev/null +++ b/kafka/streams/processor/topology_builder.py @@ -0,0 +1,642 @@ +""" + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +""" +from __future__ import absolute_import + +import abc +import re + +import kafka.streams.errors as Errors +from .node import ProcessorNode, SourceNode, SinkNode +from .processor_state_manager import STATE_CHANGELOG_TOPIC_SUFFIX +from .quick_union import QuickUnion +from .topology import ProcessorTopology + + +class StateStoreFactory(object): + def __init__(self, is_internal, supplier): + self.users = set() + self.is_internal = is_internal + self.supplier = supplier + +class NodeFactory(object): + __metaclass__ = abc.ABCMeta + + def __init__(self, builder, name): + self.builder = builder + self.name = name + + @abc.abstractmethod + def build(self, application_id): + pass + +class ProcessorNodeFactory(NodeFactory): + def __init__(self, builder, name, parents, supplier): + self.builder = builder + self.name = name + self.parents = list(parents) + self.supplier = supplier + self.state_store_names = set() + + def add_state_store(self, state_store_name): + self.state_store_names.add(state_store_name) + + def build(self, application_id): + return ProcessorNode(self.name, self.supplier(), self.state_store_names) + +class SourceNodeFactory(NodeFactory): + def __init__(self, builder, name, topics, pattern, key_deserializer, val_deserializer): + self.builder = builder + self.name = name + self.topics = list(topics) if topics else None + self.pattern = pattern + self.key_deserializer = key_deserializer + self.val_deserializer = val_deserializer + + def get_topics(self): + return self.topics + + """ + def get_topics(self, subscribed_topics=None): + if not subscribed_topics: + return self.topics + matched_topics = [] + for update in subscribed_topics: + if self.pattern == topicToPatterns.get(update)) { + matchedTopics.add(update); + //not same pattern instance,but still matches not allowed + } else if (topicToPatterns.containsKey(update) && isMatch(update)) { + throw new TopologyBuilderException("Topic " + update + " already matched check for overlapping regex patterns"); + } else if (isMatch(update)) { + topicToPatterns.put(update, this.pattern); + matchedTopics.add(update); + } + } + return matchedTopics.toArray(new String[matchedTopics.size()]); + } + """ + + def build(self, application_id): + return SourceNode(self.name, self.key_deserializer, self.val_deserializer) + + """ + private boolean isMatch(String topic) { + return this.pattern.matcher(topic).matches(); + """ + +class SinkNodeFactory(NodeFactory): + def __init__(self, builder, name, parents, topic, key_serializer, val_serializer, partitioner): + self.builder = builder + self.name = name + self.parents = list(parents) + self.topic = topic + self.key_serializer = key_serializer + self.val_serializer = val_serializer + self.partitioner = partitioner + + def build(self, application_id): + if self.topic in self.builder.internal_topics: + sink_name = application_id + '-' + self.topic + else: + sink_name = self.topic + return SinkNode(self.name, sink_name, self.key_serializer, self.val_serializer, self.partitioner) + +class TopicsInfo(object): + def __init__(self, builder, sink_topics, source_topics, inter_source_topics, state_changelog_topics): + self.sink_topics = set(sink_topics) + self.source_topics = set(source_topics) + self.inter_source_topics = set(inter_source_topics) + self.state_changelog_topics = set(state_changelog_topics) + + def __eq__(self, other): + if isinstance(other, TopicsInfo): + return (other.source_topics == self.source_topics and + other.state_changelog_topics == self.state_changelog_topics) + else: + return False + + """ + @Override + public int hashCode() { + long n = ((long) sourceTopics.hashCode() << 32) | (long) stateChangelogTopics.hashCode(); + return (int) (n % 0xFFFFFFFFL); + """ + + +class TopologyBuilder(object): + """TopologyBuilder is used to build a ProcessorTopology. + + A topology contains an acyclic graph of sources, processors, and sinks. + + A source is a node in the graph that consumes one or more Kafka topics + and forwards them to its child nodes. + + A processor is a node in the graph that receives input records from + upstream nodes, processes the records, and optionally forwarding new + records to one or all of its children. + + A sink is a node in the graph that receives records from upstream nodes + and writes them to a Kafka topic. + + This builder allows you to construct an acyclic graph of these nodes, + and the builder is then passed into a new KafkaStreams instance that will + then begin consuming, processing, and producing records. + """ + + def __init__(self): + """Create a new builder.""" + # node factories in a topological order + self.node_factories = {} + + # state factories + self.state_factories = {} + + self.source_topic_names = set() + self.internal_topic_names = set() + self.node_grouper = QuickUnion() + self.copartition_source_groups = [] + self.node_to_source_topics = {} + self.node_to_source_patterns = {} + self.topic_to_patterns = {} + self.node_to_sink_topic = {} + self.subscription_updates = set() + self.application_id = None + + self._node_groups = None + self.topic_pattern = None + + def add_source(self, name, *topics, **kwargs): + """Add a named source node that consumes records from kafka. + + Source consumes named topics or topics that match a pattern and + forwards the records to child processor and/or sink nodes. + The source will use the specified key and value deserializers. + + Arguments: + name (str): unique name of the source used to reference this node + when adding processor children + topics (*str): one or more Kafka topics to consume with this source + + Keyword Arguments: + topic_pattern (str): pattern to match source topics + key_deserializer (callable): used when consuming records, if None, + uses the default key deserializer specified in the stream + configuration + val_deserializer (callable): the value deserializer used when + consuming records; if None, uses the default value + deserializer specified in the stream configuration. + + Raises: TopologyBuilderError if processor is already added or if topics + have already been registered by another source + + Returns: self, so methods can be chained together + """ + topic_pattern = kwargs.get('topic_pattern', None) + key_deserializer = kwargs.get('key_deserializer', None) + val_deserializer = kwargs.get('val_deserializer', None) + if name in self.node_factories: + raise Errors.TopologyBuilderError("Processor " + name + " is already added.") + + if topic_pattern: + if topics: + raise Errors.TopologyBuilderError('Cannot supply both topics and a topic_pattern') + + for source_topic_name in self.source_topic_names: + if re.match(topic_pattern, source_topic_name): + raise Errors.TopologyBuilderError("Pattern " + topic_pattern + " will match a topic that has already been registered by another source.") + + self.node_to_source_patterns[name] = topic_pattern + self.node_factories[name] = SourceNodeFactory(self, name, None, topic_pattern, key_deserializer, val_deserializer) + self.node_grouper.add(name) + + return self + + for topic in topics: + if topic in self.source_topic_names: + raise Errors.TopologyBuilderError("Topic " + topic + " has already been registered by another source.") + + for pattern in self.node_to_source_patterns.values(): + if re.match(pattern, topic): + raise Errors.TopologyBuilderError("Topic " + topic + " matches a Pattern already registered by another source.") + + self.source_topic_names.add(topic) + + self.node_factories[name] = SourceNodeFactory(self, name, topics, None, key_deserializer, val_deserializer) + self.node_to_source_topics[name] = list(topics) + self.node_grouper.add(name) + + return self + + def add_sink(self, name, topic, *parent_names, **kwargs): + """Add a named sink node that writes records to a named kafka topic. + + The sink node forwards records from upstream parent processor and/or + source nodes to the named Kafka topic. The sink will use the specified + key and value serializers, and the supplied partitioner. + + Arguments; + name (str): unique name of the sink node + topic (str): name of the output topic for the sink + parent_names (*str): one or more source or processor nodes whose + output records should consumed by this sink and written to + the output topic + + Keyword Arguments: + key_serializer (callable): the key serializer used when consuming + records; if None, uses the default key serializer specified in + the stream configuration. + val_serializer (callable): the value serializer used when consuming + records; if None, uses the default value serializer specified + in the stream configuration. + partitioner (callable): function used to determine the partition + for each record processed by the sink + + Raises: TopologyBuilderError if parent processor is not added yet, or + if this processor's name is equal to the parent's name + + Returns: self, so methods can be chained together + """ + key_serializer = kwargs.get('key_serializer', None) + val_serializer = kwargs.get('val_serializer', None) + partitioner = kwargs.get('partitioner', None) + if name in self.node_factories: + raise Errors.TopologyBuilderError("Processor " + name + " is already added.") + + for parent in parent_names: + if parent == name: + raise Errors.TopologyBuilderError("Processor " + name + " cannot be a parent of itself.") + if parent not in self.node_factories: + raise Errors.TopologyBuilderError("Parent processor " + parent + " is not added yet.") + + self.node_factories[name] = SinkNodeFactory(self, name, parent_names, topic, key_serializer, val_serializer, partitioner) + self.node_to_sink_topic[name] = topic + self.node_grouper.add(name) + self.node_grouper.unite(name, parent_names) + return self + + def add_processor(self, name, supplier, *parent_names): + """Add a node to process consumed messages from parent nodes. + + A processor node receives and processes records output by one or more + parent source or processor nodes. Any new record output by this + processor will be forwarded to its child processor or sink nodes. + + Arguments: + name (str): unique name of the processor node + supplier (callable): factory function that returns a Processor + parent_names (*str): the name of one or more source or processor + nodes whose output records this processor should receive + and process + + Returns: self (so methods can be chained together) + + Raises: TopologyBuilderError if parent processor is not added yet, + or if this processor's name is equal to the parent's name + """ + if name in self.node_factories: + raise Errors.TopologyBuilderError("Processor " + name + " is already added.") + + for parent in parent_names: + if parent == name: + raise Errors.TopologyBuilderError("Processor " + name + " cannot be a parent of itself.") + if not parent in self.node_factories: + raise Errors.TopologyBuilderError("Parent processor " + parent + " is not added yet.") + + self.node_factories[name] = ProcessorNodeFactory(self, name, parent_names, supplier) + self.node_grouper.add(name) + self.node_grouper.unite(name, parent_names) + return self + + def add_state_store(self, supplier, *processor_names, **kwargs): + """Adds a state store + + @param supplier the supplier used to obtain this state store {@link StateStore} instance + @return this builder instance so methods can be chained together; never null + @throws TopologyBuilderException if state store supplier is already added + """ + is_internal = kwargs.get('is_internal', True) + if supplier.name in self.state_factories: + raise Errors.TopologyBuilderError("StateStore " + supplier.name + " is already added.") + + self.state_factories[supplier.name] = StateStoreFactory(is_internal, supplier) + + for processor_name in processor_names: + self.connect_processor_and_state_store(processor_name, supplier.name) + + return self + + def connect_processor_and_state_stores(self, processor_name, *state_store_names): + """ + Connects the processor and the state stores + + @param processorName the name of the processor + @param stateStoreNames the names of state stores that the processor uses + @return this builder instance so methods can be chained together; never null + """ + for state_store_name in state_store_names: + self.connect_processor_and_state_store(processor_name, state_store_name) + + return self + + def connect_processors(self, *processor_names): + """ + Connects a list of processors. + + NOTE this function would not needed by developers working with the processor APIs, but only used + for the high-level DSL parsing functionalities. + + @param processorNames the name of the processors + @return this builder instance so methods can be chained together; never null + @throws TopologyBuilderException if less than two processors are specified, or if one of the processors is not added yet + """ + if len(processor_names) < 2: + raise Errors.TopologyBuilderError("At least two processors need to participate in the connection.") + + for processor_name in processor_names: + if processor_name not in self.node_factories: + raise Errors.TopologyBuilderError("Processor " + processor_name + " is not added yet.") + + self.node_grouper.unite(processor_names[0], processor_names[1:]) + + return self + + def add_internal_topic(self, topic_name): + """ + Adds an internal topic + + @param topicName the name of the topic + @return this builder instance so methods can be chained together; never null + """ + self.internal_topic_names.add(topic_name) + + return self + + def connect_processor_and_state_store(self, processor_name, state_store_name): + if state_store_name not in self.state_factories: + raise Errors.TopologyBuilderError("StateStore " + state_store_name + " is not added yet.") + if processor_name not in self.node_factories: + raise Errors.TopologyBuilderError("Processor " + processor_name + " is not added yet.") + + state_store_factory = self.state_factories.get(state_store_name) + for user in state_store_factory.users: + self.node_grouper.unite(user, [processor_name]) + state_store_factory.users.add(processor_name) + + node_factory = self.node_factories.get(processor_name) + if isinstance(node_factory, ProcessorNodeFactory): + node_factory.add_state_store(state_store_name) + else: + raise Errors.TopologyBuilderError("cannot connect a state store " + state_store_name + " to a source node or a sink node.") + + def topic_groups(self): + """ + Returns the map of topic groups keyed by the group id. + A topic group is a group of topics in the same task. + + @return groups of topic names + """ + topic_groups = {} + + if self.subscription_updates: + for name in self.node_to_source_patterns: + source_node = self.node_factories[name] + # need to update node_to_source_topics with topics matched from given regex + self.node_to_source_topics[name] = source_node.get_topics(self.subscription_updates) + + if self._node_groups is None: + self._node_groups = self.make_node_groups() + + for group_id, nodes in self._node_groups.items(): + sink_topics = set() + source_topics = set() + internal_source_topics = set() + state_changelog_topics = set() + for node in nodes: + # if the node is a source node, add to the source topics + topics = self.node_to_source_topics.get(node) + if topics: + # if some of the topics are internal, add them to the internal topics + for topic in topics: + if topic in self.internal_topic_names: + if self.application_id is None: + raise Errors.TopologyBuilderError("There are internal topics and" + " applicationId hasn't been " + "set. Call setApplicationId " + "first") + # prefix the internal topic name with the application id + internal_topic = self.application_id + "-" + topic + internal_source_topics.add(internal_topic) + source_topics.add(internal_topic) + else: + source_topics.add(topic) + + # if the node is a sink node, add to the sink topics + topic = self.node_to_sink_topic.get(node) + if topic: + if topic in self.internal_topic_names: + # prefix the change log topic name with the application id + sink_topics.add(self.application_id + "-" + topic) + else: + sink_topics.add(topic) + + # if the node is connected to a state, add to the state topics + for state_factory in self.state_factories.values(): + if state_factory.is_internal and node in state_factory.users: + # prefix the change log topic name with the application id + state_changelog_topics.add(self.application_id + "-" + state_factory.supplier.name + STATE_CHANGELOG_TOPIC_SUFFIX) + + topic_groups[group_id] = TopicsInfo( + self, + sink_topics, + source_topics, + internal_source_topics, + state_changelog_topics) + + return topic_groups + + def node_groups(self): + """ + Returns the map of node groups keyed by the topic group id. + + @return groups of node names + """ + if self._node_groups is None: + self._node_groups = self.make_node_groups() + + return self._node_groups + + def make_node_groups(self): + node_groups = {} + root_to_node_group = {} + + node_group_id = 0 + + # Go through source nodes first. This makes the group id assignment easy to predict in tests + for node_name in sorted(self.node_to_source_topics): + root = self.node_grouper.root(node_name) + node_group = root_to_node_group.get(root) + if node_group is None: + node_group = set() + root_to_node_group[root] = node_group + node_group_id += 1 + node_groups[node_group_id] = node_group + node_group.add(node_name) + + # Go through non-source nodes + for node_name in sorted(self.node_factories): + if node_name not in self.node_to_source_topics: + root = self.node_grouper.root(node_name) + node_group = root_to_node_group.get(root) + if node_group is None: + node_group = set() + root_to_node_group[root] = node_group + node_group_id += 1 + node_groups[node_group_id] = node_group + node_group.add(node_name) + + return node_groups + + def copartition_sources(self, source_nodes): + """ + Asserts that the streams of the specified source nodes must be copartitioned. + + @param sourceNodes a set of source node names + @return this builder instance so methods can be chained together; never null + """ + self.copartition_source_groups.append(source_nodes) + return self + + def copartition_groups(self): + """ + Returns the copartition groups. + A copartition group is a group of source topics that are required to be copartitioned. + + @return groups of topic names + """ + groups = [] + for node_names in self.copartition_source_groups: + copartition_group = set() + for node in node_names: + topics = self.node_to_source_topics.get(node) + if topics: + copartition_group.update(self.convert_internal_topic_names(topics)) + groups.append(copartition_group) + return groups + + def convert_internal_topic_names(self, *topics): + topic_names = [] + for topic in topics: + if topic in self.internal_topic_names: + if self.application_id is None: + raise Errors.TopologyBuilderError("there are internal topics " + "and applicationId hasn't been set. Call " + "setApplicationId first") + topic_names.append(self.application_id + "-" + topic) + else: + topic_names.append(topic) + return topic_names + + def build(self, application_id, topic_group_id=None, node_group=None): + """ + Build the topology for the specified topic group. This is called automatically when passing this builder into the + {@link org.apache.kafka.streams.KafkaStreams#KafkaStreams(TopologyBuilder, org.apache.kafka.streams.StreamsConfig)} constructor. + + @see org.apache.kafka.streams.KafkaStreams#KafkaStreams(TopologyBuilder, org.apache.kafka.streams.StreamsConfig) + """ + if topic_group_id is not None: + node_group = None + if topic_group_id is not None: + node_group = self.node_groups().get(topic_group_id) + else: + # when nodeGroup is null, we build the full topology. this is used in some tests. + node_group = None + + processor_nodes = [] + processor_map = {} + topic_source_map = {} + state_store_map = {} + + # create processor nodes in a topological order ("nodeFactories" is already topologically sorted) + for factory in self.node_factories.values(): + if node_group is None or factory.name in node_group: + node = factory.build(application_id) + processor_nodes.append(node) + processor_map[node.name] = node + + if isinstance(factory, ProcessorNodeFactory): + for parent in factory.parents: + processor_map[parent].add_child(node) + for state_store_name in factory.state_store_names: + if state_store_name not in state_store_map: + state_store_map[state_store_name] = self.state_factories[state_store_name].supplier + elif isinstance(factory, SourceNodeFactory): + if factory.pattern is not None: + topics = factory.get_topics(self.subscription_updates) + else: + topics = factory.get_topics() + for topic in topics: + if topic in self.internal_topic_names: + # prefix the internal topic name with the application id + topic_source_map[application_id + "-" + topic] = node + else: + topic_source_map[topic] = node + elif isinstance(factory, SinkNodeFactory): + for parent in factory.parents: + processor_map[parent].add_child(node) + else: + raise Errors.TopologyBuilderError("Unknown definition class: " + factory.__class__.__name__) + + return ProcessorTopology(processor_nodes, topic_source_map, state_store_map.values()) + + def source_topics(self): + """ + Get the names of topics that are to be consumed by the source nodes created by this builder. + @return the unmodifiable set of topic names used by source nodes, which changes as new sources are added; never null + """ + topics = set() + for topic in self.source_topic_names: + if topic in self.internal_topic_names: + if self.application_id is None: + raise Errors.TopologyBuilderError("there are internal topics and " + "applicationId is null. Call " + "setApplicationId before sourceTopics") + topics.add(self.application_id + "-" + topic) + else: + topics.add(topic) + return topics + + def source_topic_pattern(self): + if self.topic_pattern is None and self.node_to_source_patterns: + topic_pattern = '' + for pattern in self.node_to_source_patterns.values(): + topic_pattern += pattern + topic_pattern += "|" + if self.node_to_source_topics: + for topics in self.node_to_source_topics.values(): + for topic in topics: + topic_pattern += topic + topic_pattern += "|" + self.topic_pattern = topic_pattern[:-1] + return self.topic_pattern; + + def set_application_id(self, application_id): + """ + Set the applicationId. This is required before calling + {@link #sourceTopics}, {@link #topicGroups} and {@link #copartitionSources} + @param applicationId the streams applicationId. Should be the same as set by + {@link org.apache.kafka.streams.StreamsConfig#APPLICATION_ID_CONFIG} + """ + self.application_id = application_id diff --git a/kafka/streams/utils.py b/kafka/streams/utils.py new file mode 100644 index 0000000..b0161dc --- /dev/null +++ b/kafka/streams/utils.py @@ -0,0 +1,20 @@ +import threading + + +class AtomicInteger(object): + def __init__(self, val=0): + self._lock = threading.Lock() + self._val = val + + def increment(self): + with self._lock: + self._val += 1 + return self._val + + def decrement(self): + with self._lock: + self._val -= 1 + return self._val + + def get(self): + return self._val |