summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2015-02-23 15:18:06 +0000
committerSimon MacMullen <simon@rabbitmq.com>2015-02-23 15:18:06 +0000
commitbdf92102a7094a2c80e2bb52ea2cc69226795f36 (patch)
tree88a24ea441415247990de876cdb69e604077e8d5
parentbcb88a2e53ac7e2c63e5a5cbf7346c193992cf1b (diff)
parentbe037e00261e6b15f9236c24f9fe675b7158a981 (diff)
downloadrabbitmq-server-git-bdf92102a7094a2c80e2bb52ea2cc69226795f36.tar.gz
Merge branch 'bug26465'
-rw-r--r--src/rabbit_autoheal.erl9
-rw-r--r--src/rabbit_channel.erl4
-rw-r--r--src/rabbit_node_monitor.erl93
3 files changed, 77 insertions, 29 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl
index 09e9aa6ae9..af1795f953 100644
--- a/src/rabbit_autoheal.erl
+++ b/src/rabbit_autoheal.erl
@@ -16,7 +16,8 @@
-module(rabbit_autoheal).
--export([init/0, maybe_start/1, rabbit_down/2, node_down/2, handle_msg/3]).
+-export([init/0, enabled/0, maybe_start/1, rabbit_down/2, node_down/2,
+ handle_msg/3]).
%% The named process we are running in.
-define(SERVER, rabbit_node_monitor).
@@ -90,7 +91,11 @@ maybe_start(State) ->
State.
enabled() ->
- {ok, autoheal} =:= application:get_env(rabbit, cluster_partition_handling).
+ case application:get_env(rabbit, cluster_partition_handling) of
+ {ok, autoheal} -> true;
+ {ok, {pause_if_all_down, _, autoheal}} -> true;
+ _ -> false
+ end.
%% This is the winner receiving its last notification that a node has
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index 83b3c53d0c..63a5eb7e79 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -1736,7 +1736,7 @@ send_nacks(_, State) ->
send_confirms(State = #ch{tx = none, confirmed = []}) ->
State;
send_confirms(State = #ch{tx = none, confirmed = C}) ->
- case rabbit_node_monitor:pause_minority_guard() of
+ case rabbit_node_monitor:pause_partition_guard() of
ok -> MsgSeqNos =
lists:foldl(
fun ({MsgSeqNo, XName}, MSNs) ->
@@ -1748,7 +1748,7 @@ send_confirms(State = #ch{tx = none, confirmed = C}) ->
pausing -> State
end;
send_confirms(State) ->
- case rabbit_node_monitor:pause_minority_guard() of
+ case rabbit_node_monitor:pause_partition_guard() of
ok -> maybe_complete_tx(State);
pausing -> State
end.
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 82a7a89be9..12ac08120a 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -25,7 +25,7 @@
update_cluster_status/0, reset_cluster_status/0]).
-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]).
-export([partitions/0, partitions/1, status/1, subscribe/1]).
--export([pause_minority_guard/0]).
+-export([pause_partition_guard/0]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
@@ -64,7 +64,7 @@
-spec(partitions/1 :: ([node()]) -> [{node(), [node()]}]).
-spec(status/1 :: ([node()]) -> {[{node(), [node()]}], [node()]}).
-spec(subscribe/1 :: (pid()) -> 'ok').
--spec(pause_minority_guard/0 :: () -> 'ok' | 'pausing').
+-spec(pause_partition_guard/0 :: () -> 'ok' | 'pausing').
-spec(all_rabbit_nodes_up/0 :: () -> boolean()).
-spec(run_outside_applications/1 :: (fun (() -> any())) -> pid()).
@@ -194,46 +194,66 @@ subscribe(Pid) ->
gen_server:cast(?SERVER, {subscribe, Pid}).
%%----------------------------------------------------------------------------
-%% pause_minority safety
+%% pause_minority/pause_if_all_down safety
%%----------------------------------------------------------------------------
%% If we are in a minority and pause_minority mode then a) we are
%% going to shut down imminently and b) we should not confirm anything
%% until then, since anything we confirm is likely to be lost.
%%
-%% We could confirm something by having an HA queue see the minority
+%% The same principles apply to a node which isn't part of the preferred
+%% partition when we are in pause_if_all_down mode.
+%%
+%% We could confirm something by having an HA queue see the pausing
%% state (and fail over into it) before the node monitor stops us, or
%% by using unmirrored queues and just having them vanish (and
%% confiming messages as thrown away).
%%
%% So we have channels call in here before issuing confirms, to do a
-%% lightweight check that we have not entered a minority state.
+%% lightweight check that we have not entered a pausing state.
-pause_minority_guard() ->
- case get(pause_minority_guard) of
- not_minority_mode ->
+pause_partition_guard() ->
+ case get(pause_partition_guard) of
+ not_pause_mode ->
ok;
undefined ->
{ok, M} = application:get_env(rabbit, cluster_partition_handling),
case M of
- pause_minority -> pause_minority_guard([]);
- _ -> put(pause_minority_guard, not_minority_mode),
- ok
+ pause_minority ->
+ pause_minority_guard([]);
+ {pause_if_all_down, PreferredNodes, _} ->
+ pause_if_all_down_guard(PreferredNodes, []);
+ _ ->
+ put(pause_partition_guard, not_pause_mode),
+ ok
end;
{minority_mode, Nodes} ->
- pause_minority_guard(Nodes)
+ pause_minority_guard(Nodes);
+ {pause_if_all_down_mode, PreferredNodes, Nodes} ->
+ pause_if_all_down_guard(PreferredNodes, Nodes)
end.
pause_minority_guard(LastNodes) ->
case nodes() of
LastNodes -> ok;
- _ -> put(pause_minority_guard, {minority_mode, nodes()}),
+ _ -> put(pause_partition_guard, {minority_mode, nodes()}),
case majority() of
false -> pausing;
true -> ok
end
end.
+pause_if_all_down_guard(PreferredNodes, LastNodes) ->
+ case nodes() of
+ LastNodes -> ok;
+ _ -> put(pause_partition_guard,
+ {pause_if_all_down_mode, PreferredNodes, nodes()}),
+ case in_preferred_partition(PreferredNodes) of
+ false -> pausing;
+ true -> ok
+ end
+ end.
+
%%----------------------------------------------------------------------------
%% gen_server callbacks
%%----------------------------------------------------------------------------
@@ -289,8 +309,9 @@ handle_cast(notify_node_up, State = #state{guid = GUID}) ->
%% 'check_partial_partition' to all the nodes it still thinks are
%% alive. If any of those (intermediate) nodes still see the "down"
%% node as up, they inform it that this has happened. The original
-%% node (in 'ignore' or 'autoheal' mode) will then disconnect from the
-%% intermediate node to "upgrade" to a full partition.
+%% node (in 'ignore', 'pause_if_all_down' or 'autoheal' mode) will then
+%% disconnect from the intermediate node to "upgrade" to a full
+%% partition.
%%
%% In pause_minority mode it will instead immediately pause until all
%% nodes come back. This is because the contract for pause_minority is
@@ -525,10 +546,11 @@ handle_dead_node(Node, State = #state{autoheal = Autoheal}) ->
%% that we can respond in the same way to "rabbitmqctl stop_app"
%% and "rabbitmqctl stop" as much as possible.
%%
- %% However, for pause_minority mode we can't do this, since we
- %% depend on looking at whether other nodes are up to decide
- %% whether to come back up ourselves - if we decide that based on
- %% the rabbit application we would go down and never come back.
+ %% However, for pause_minority and pause_if_all_down modes we can't do
+ %% this, since we depend on looking at whether other nodes are up
+ %% to decide whether to come back up ourselves - if we decide that
+ %% based on the rabbit application we would go down and never come
+ %% back.
case application:get_env(rabbit, cluster_partition_handling) of
{ok, pause_minority} ->
case majority() of
@@ -536,6 +558,17 @@ handle_dead_node(Node, State = #state{autoheal = Autoheal}) ->
false -> await_cluster_recovery(fun majority/0)
end,
State;
+ {ok, {pause_if_all_down, PreferredNodes, HowToRecover}} ->
+ case in_preferred_partition(PreferredNodes) of
+ true -> ok;
+ false -> await_cluster_recovery(
+ fun in_preferred_partition/0)
+ end,
+ case HowToRecover of
+ autoheal -> State#state{autoheal =
+ rabbit_autoheal:node_down(Node, Autoheal)};
+ _ -> State
+ end;
{ok, ignore} ->
State;
{ok, autoheal} ->
@@ -547,8 +580,8 @@ handle_dead_node(Node, State = #state{autoheal = Autoheal}) ->
end.
await_cluster_recovery(Condition) ->
- rabbit_log:warning("Cluster minority status detected - awaiting recovery~n",
- []),
+ rabbit_log:warning("Cluster minority/secondary status detected - "
+ "awaiting recovery~n", []),
run_outside_applications(fun () ->
rabbit:stop(),
wait_for_cluster_recovery(Condition)
@@ -681,15 +714,25 @@ disconnect(Node) ->
%% here. "rabbit" in a function's name implies we test if the rabbit
%% application is up, not just the node.
-%% As we use these functions to decide what to do in pause_minority
-%% state, they *must* be fast, even in the case where TCP connections
-%% are timing out. So that means we should be careful about whether we
-%% connect to nodes which are currently disconnected.
+%% As we use these functions to decide what to do in pause_minority or
+%% pause_if_all_down states, they *must* be fast, even in the case where
+%% TCP connections are timing out. So that means we should be careful
+%% about whether we connect to nodes which are currently disconnected.
majority() ->
Nodes = rabbit_mnesia:cluster_nodes(all),
length(alive_nodes(Nodes)) / length(Nodes) > 0.5.
+in_preferred_partition() ->
+ {ok, {pause_if_all_down, PreferredNodes, _}} =
+ application:get_env(rabbit, cluster_partition_handling),
+ in_preferred_partition(PreferredNodes).
+
+in_preferred_partition(PreferredNodes) ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ RealPreferredNodes = [N || N <- PreferredNodes, lists:member(N, Nodes)],
+ RealPreferredNodes =:= [] orelse alive_nodes(RealPreferredNodes) =/= [].
+
all_nodes_up() ->
Nodes = rabbit_mnesia:cluster_nodes(all),
length(alive_nodes(Nodes)) =:= length(Nodes).