summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2014-02-26 10:18:24 +0000
committerSimon MacMullen <simon@rabbitmq.com>2014-02-26 10:18:24 +0000
commit354a4927cf2ba39b01804c7b0430b7571b2088b5 (patch)
treee10756bdbd6e8f236458da9fe2ef9267fde413c7 /src
parent05b62c27143e980529da39c552c90aae0f810539 (diff)
downloadrabbitmq-server-git-354a4927cf2ba39b01804c7b0430b7571b2088b5.tar.gz
Eliminate the node_stopped message, since it is possible that a badly-timed stop_app could lead to us missing it. Instead just go based on whether the rabbit stops - if it stops for any reason other than autoheal, we just send it a message it will ignore and continue.
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_autoheal.erl36
-rw-r--r--src/rabbit_node_monitor.erl8
2 files changed, 22 insertions, 22 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl
index 43d35fb5e4..7dc5e55387 100644
--- a/src/rabbit_autoheal.erl
+++ b/src/rabbit_autoheal.erl
@@ -16,7 +16,7 @@
-module(rabbit_autoheal).
--export([init/0, maybe_start/1, node_down/2, handle_msg/3]).
+-export([init/0, maybe_start/1, rabbit_down/2, node_down/2, handle_msg/3]).
%% The named process we are running in.
-define(SERVER, rabbit_node_monitor).
@@ -75,6 +75,21 @@ maybe_start(State) ->
enabled() ->
{ok, autoheal} =:= application:get_env(rabbit, cluster_partition_handling).
+
+%% This is the winner receiving its last notification that a node has
+%% stopped - all nodes can now start again
+rabbit_down(Node, {winner_waiting, [Node], Notify}) ->
+ rabbit_log:info("Autoheal: final node has stopped, starting...~n",[]),
+ [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify],
+ not_healing;
+
+rabbit_down(Node, {winner_waiting, WaitFor, Notify}) ->
+ {winner_waiting, WaitFor -- [Node], Notify};
+
+rabbit_down(_Node, State) ->
+ %% ignore, we already cancelled the autoheal process
+ State.
+
node_down(_Node, not_healing) ->
not_healing;
node_down(Node, _State) ->
@@ -127,7 +142,6 @@ handle_msg({winner_is, Winner},
fun () ->
MRef = erlang:monitor(process, {?SERVER, Winner}),
rabbit:stop(),
- send(Winner, {node_stopped, node()}),
receive
{'DOWN', MRef, process, {?SERVER, Winner}, _Reason} -> ok;
autoheal_safe_to_start -> ok
@@ -137,25 +151,9 @@ handle_msg({winner_is, Winner},
end),
restarting;
-%% This is the winner receiving its last notification that a node has
-%% stopped - all nodes can now start again
-handle_msg({node_stopped, Node},
- {winner_waiting, [Node], Notify}, _Partitions) ->
- rabbit_log:info("Autoheal: final node has stopped, starting...~n",[]),
- [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify],
- not_healing;
-
-handle_msg({node_stopped, Node},
- {winner_waiting, WaitFor, Notify}, _Partitions) ->
- {winner_waiting, WaitFor -- [Node], Notify};
-
handle_msg(_, restarting, _Partitions) ->
%% ignore, we can contribute no further
- restarting;
-
-handle_msg({node_stopped, _Node}, State, _Partitions) ->
- %% ignore, we already cancelled the autoheal process
- State.
+ restarting.
%%----------------------------------------------------------------------------
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index c47e9b24d2..46dbd7b7a1 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -387,7 +387,8 @@ wait_for_cluster_recovery(Nodes) ->
wait_for_cluster_recovery(Nodes)
end.
-handle_dead_rabbit(Node, State = #state{partitions = Partitions}) ->
+handle_dead_rabbit(Node, State = #state{partitions = Partitions,
+ autoheal = Autoheal}) ->
%% TODO: This may turn out to be a performance hog when there are
%% lots of nodes. We really only need to execute some of these
%% statements on *one* node, rather than all of them.
@@ -404,8 +405,9 @@ handle_dead_rabbit(Node, State = #state{partitions = Partitions}) ->
[] -> [];
_ -> Partitions
end,
- ensure_ping_timer(State#state{partitions = Partitions1}).
-
+ ensure_ping_timer(
+ State#state{partitions = Partitions1,
+ autoheal = rabbit_autoheal:rabbit_down(Node, Autoheal)}).
ensure_ping_timer(State) ->
rabbit_misc:ensure_timer(