diff options
| author | Matthew Sackman <matthew@rabbitmq.com> | 2011-04-08 18:13:35 +0100 |
|---|---|---|
| committer | Matthew Sackman <matthew@rabbitmq.com> | 2011-04-08 18:13:35 +0100 |
| commit | 1864b5b876d5d5d60c0ee6ab0f9696fd736d34f6 (patch) | |
| tree | 224751eb83c88db1edc7c473106da2946d69cb69 | |
| parent | d363b6cf58759d8457799d1917e1bc97b85f5a0f (diff) | |
| download | rabbitmq-server-git-1864b5b876d5d5d60c0ee6ab0f9696fd736d34f6.tar.gz | |
Some initial HA design documentation
| -rw-r--r-- | src/rabbit_mirror_queue_coordinator.erl | 75 | ||||
| -rw-r--r-- | src/rabbit_mirror_queue_master.erl | 3 | ||||
| -rw-r--r-- | src/rabbit_mirror_queue_slave.erl | 3 |
3 files changed, 81 insertions, 0 deletions
diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 5fd07e6015..f780f6b589 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -35,6 +35,81 @@ -define(ONE_SECOND, 1000). +%%---------------------------------------------------------------------------- +%% +%% Mirror Queues +%% +%% A queue with mirrors consists of the following: +%% +%% #amqqueue{ pid, mirror_pids } +%% | | +%% +----------+ +-------+--------------+-----------...etc... +%% | | | +%% V V V +%% amqqueue_process---+ slave-----+ slave-----+ ...etc... +%% | BQ = master----+ | | BQ = vq | | BQ = vq | +%% | | BQ = vq | | +-+-------+ +-+-------+ +%% | +-+-------+ | | | +%% +-++-----|---------+ | | +%% || | | | +%% || coordinator-+ | | +%% || +-+---------+ | | +%% || | | | +%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc... +%% || +--+ +--+ +--+ +%% || +%% consumers +%% +%% The master is merely an implementation of BQ, and thus is invoked +%% through the normal BQ interface by the amqqueue_process. The slaves +%% meanwhile are processes in their own right (as is the +%% coordinator). The coordinator and all slaves belong to the same gm +%% group. Every member of a gm group receives messages sent to the gm +%% group. Because the master is the BQ of amqqueue_process, it doesn't +%% have sole control over its mailbox, and as a result, the master +%% itself cannot be passed messages directly, yet it needs to react to +%% gm events, such as the death of slaves. Thus the master creates the +%% coordinator, and it is the coordinator that is the gm callback +%% module and event handler for the master. +%% +%% Consumers are only attached to the master. Thus the master is +%% responsible for informing all slaves when messages are fetched from +%% the BQ, when they're acked, and when they're requeued. +%% +%% The basic goal is to ensure that all slaves performs actions on +%% their BQ in the same order as the master. Thus the master +%% intercepts all events going to its BQ, and suitably broadcasts +%% these events on the gm. The slaves thus receive two streams of +%% events: one stream is via the gm, and one stream is from channels +%% directly. Note that whilst the stream via gm is guaranteed to be +%% consistently seen by all slaves, the same is not true of the stream +%% via channels. For example, in the event of an unexpected death of a +%% channel during a publish, only some of the mirrors may receive that +%% publish. As a result of this problem, the messages broadcast over +%% the gm contain published content, and thus slaves can operate +%% successfully on messages that they only receive via the gm. The key +%% purpose of also sending messages directly from the channels to the +%% slaves is that without this, in the event of the death of the +%% master, messages can be lost until a suitable slave is promoted. +%% +%% However, there are other reasons as well. For example, if confirms +%% are in use, then there is no guarantee that every slave will see +%% the delivery with the same msg_seq_no. As a result, the slaves have +%% to wait until they've seen both the publish via gm, and the publish +%% via the channel before they have enough information to be able to +%% issue the confirm, if necessary. Either form of publish can arrive +%% first, and a slave can be upgraded to the master at any point +%% during this process. Confirms continue to be issued correctly, +%% however. +%% +%% Because the slave is a full process, it impersonates parts of the +%% amqqueue API. However, it does not need to implement all parts: for +%% example, no ack or consumer-related message can arrive directly at +%% a slave from a channel: it is only publishes that pass both +%% directly to the slaves and go via gm. +%% +%%---------------------------------------------------------------------------- + start_link(Queue, GM) -> gen_server2:start_link(?MODULE, [Queue, GM], []). diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 664c706d33..e6a71370ae 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -42,6 +42,9 @@ ack_msg_id }). +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator +%% %% Some notes on transactions %% %% We don't support transactions on mirror queues. To do so is diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 70b5c43da6..89b8971cf1 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -16,6 +16,9 @@ -module(rabbit_mirror_queue_slave). +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator +%% %% We join the GM group before we add ourselves to the amqqueue %% record. As a result: %% 1. We can receive msgs from GM that correspond to messages we will |
