diff options
| author | Simon MacMullen <simon@rabbitmq.com> | 2015-03-05 12:01:45 +0000 |
|---|---|---|
| committer | Simon MacMullen <simon@rabbitmq.com> | 2015-03-05 12:01:45 +0000 |
| commit | 3efe9390419c3a842b74423387a621897fc0b820 (patch) | |
| tree | 5e816d144b343615b176e2da6d82dd326f5044a9 | |
| parent | 3625fbeb85b7903c30ca5790e7cc10de2310a9d3 (diff) | |
| parent | da57a42a26f82bce2205996d5b3870da55ae5368 (diff) | |
| download | rabbitmq-server-git-3efe9390419c3a842b74423387a621897fc0b820.tar.gz | |
Merge branch 'bug26628'
| -rw-r--r-- | src/rabbit_autoheal.erl | 190 |
1 files changed, 147 insertions, 43 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index af1795f953..6a87186364 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -24,6 +24,8 @@ -define(MNESIA_STOPPED_PING_INTERNAL, 200). +-define(AUTOHEAL_STATE_AFTER_RESTART, rabbit_autoheal_state_after_restart). + %%---------------------------------------------------------------------------- %% In order to autoheal we want to: @@ -46,9 +48,20 @@ %% stops - if a node stops for any other reason it just gets a message %% it will ignore, and otherwise we carry on. %% +%% Meanwhile, the leader may continue to receive new autoheal requests: +%% all of them are ignored. The winner notifies the leader when the +%% current autoheal process is finished (ie. when all losers stopped and +%% were asked to start again) or was aborted. When the leader receives +%% the notification or if it looses contact with the winner, it can +%% accept new autoheal requests. +%% %% The winner and the leader are not necessarily the same node. %% -%% Possible states: +%% The leader can be a loser and will restart in this case. It remembers +%% there is an autoheal in progress by temporarily saving the autoheal +%% state to the application environment. +%% +%% == Possible states == %% %% not_healing %% - the default @@ -57,31 +70,73 @@ %% - we are the winner and are waiting for all losing nodes to stop %% before telling them they can restart %% -%% about_to_heal -%% - we are the leader, and have already assigned the winner and -%% losers. We are part of the losers and we wait for the winner_is -%% announcement. This leader-specific state differs from not_healing -%% (the state other losers are in), because the leader could still -%% receive request_start messages: those subsequent requests must be -%% ignored. -%% -%% {leader_waiting, OutstandingStops} +%% {leader_waiting, Winner, Notify} %% - we are the leader, and have already assigned the winner and losers. -%% We are neither but need to ignore further requests to autoheal. +%% We are waiting for a confirmation from the winner that the autoheal +%% process has ended. Meanwhile we can ignore autoheal requests. +%% Because we may be a loser too, this state is saved to the application +%% environment and restored on startup. %% %% restarting %% - we are restarting. Of course the node monitor immediately dies %% then so this state does not last long. We therefore send the %% autoheal_safe_to_start message to the rabbit_outside_app_process %% instead. +%% +%% == Message flow == +%% +%% 1. Any node (leader included) >> {request_start, node()} >> Leader +%% When Mnesia detects it is running partitioned or +%% when a remote node starts, rabbit_node_monitor calls +%% rabbit_autoheal:maybe_start/1. The message above is sent to the +%% leader so the leader can take a decision. +%% +%% 2. Leader >> {become_winner, Losers} >> Winner +%% The leader notifies the winner so the latter can proceed with +%% the autoheal. +%% +%% 3. Winner >> {winner_is, Winner} >> All losers +%% The winner notifies losers they must stop. +%% +%% 4. Winner >> autoheal_safe_to_start >> All losers +%% When either all losers stopped or the autoheal process was +%% aborted, the winner notifies losers they can start again. +%% +%% 5. Leader >> report_autoheal_status >> Winner +%% The leader asks the autoheal status to the winner. This only +%% happens when the leader is a loser too. If this is not the case, +%% this message is never sent. +%% +%% 6. Winner >> {autoheal_finished, Winner} >> Leader +%% The winner notifies the leader that the autoheal process was +%% either finished or aborted (ie. autoheal_safe_to_start was sent +%% to losers). %%---------------------------------------------------------------------------- -init() -> not_healing. +init() -> + %% We check the application environment for a saved autoheal state + %% saved during a restart. If this node is a leader, it is used + %% to determine if it needs to ask the winner to report about the + %% autoheal progress. + State = case application:get_env(rabbit, ?AUTOHEAL_STATE_AFTER_RESTART) of + {ok, S} -> S; + undefined -> not_healing + end, + ok = application:unset_env(rabbit, ?AUTOHEAL_STATE_AFTER_RESTART), + case State of + {leader_waiting, Winner, _} -> + rabbit_log:info( + "Autoheal: in progress, requesting report from ~p~n", [Winner]), + send(Winner, report_autoheal_status); + _ -> + ok + end, + State. maybe_start(not_healing) -> case enabled() of - true -> [Leader | _] = lists:usort(rabbit_mnesia:cluster_nodes(all)), + true -> Leader = leader(), send(Leader, {request_start, node()}), rabbit_log:info("Autoheal request sent to ~p~n", [Leader]), not_healing; @@ -97,6 +152,9 @@ enabled() -> _ -> false end. +leader() -> + [Leader | _] = lists:usort(rabbit_mnesia:cluster_nodes(all)), + Leader. %% This is the winner receiving its last notification that a node has %% stopped - all nodes can now start again @@ -107,14 +165,13 @@ rabbit_down(Node, {winner_waiting, [Node], Notify}) -> rabbit_down(Node, {winner_waiting, WaitFor, Notify}) -> {winner_waiting, WaitFor -- [Node], Notify}; -rabbit_down(Node, {leader_waiting, [Node]}) -> - not_healing; - -rabbit_down(Node, {leader_waiting, WaitFor}) -> - {leader_waiting, WaitFor -- [Node]}; +rabbit_down(Winner, {leader_waiting, Winner, Losers}) -> + abort([Winner], Losers); rabbit_down(_Node, State) -> - %% ignore, we already cancelled the autoheal process + %% Ignore. Either: + %% o we already cancelled the autoheal process; + %% o we are still waiting the winner's report. State. node_down(_Node, not_healing) -> @@ -146,15 +203,10 @@ handle_msg({request_start, Node}, case node() =:= Winner of true -> handle_msg({become_winner, Losers}, not_healing, Partitions); - false -> send(Winner, {become_winner, Losers}), %% [0] - case lists:member(node(), Losers) of - true -> about_to_heal; - false -> {leader_waiting, Losers} - end + false -> send(Winner, {become_winner, Losers}), + {leader_waiting, Winner, Losers} end end; -%% [0] If we are a loser we will never receive this message - but it -%% won't stick in the mailbox as we are restarting anyway handle_msg({request_start, Node}, State, _Partitions) -> @@ -175,27 +227,49 @@ handle_msg({become_winner, Losers}, _ -> abort(Down, Losers) end; -handle_msg({winner_is, Winner}, - State, _Partitions) - when State =:= not_healing orelse State =:= about_to_heal -> - rabbit_log:warning( - "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), - rabbit_node_monitor:run_outside_applications( - fun () -> - MRef = erlang:monitor(process, {?SERVER, Winner}), - rabbit:stop(), - receive - {'DOWN', MRef, process, {?SERVER, Winner}, _Reason} -> ok; - autoheal_safe_to_start -> ok - end, - erlang:demonitor(MRef, [flush]), - rabbit:start() - end), +handle_msg({winner_is, Winner}, State = not_healing, + _Partitions) -> + %% This node is a loser, nothing else. + restart_loser(State, Winner), + restarting; +handle_msg({winner_is, Winner}, State = {leader_waiting, Winner, _}, + _Partitions) -> + %% This node is the leader and a loser at the same time. + restart_loser(State, Winner), restarting; handle_msg(_, restarting, _Partitions) -> %% ignore, we can contribute no further - restarting. + restarting; + +handle_msg(report_autoheal_status, not_healing, _Partitions) -> + %% The leader is asking about the autoheal status to us (the + %% winner). This happens when the leader is a loser and it just + %% restarted. We are in the "not_healing" state, so the previous + %% autoheal process ended: let's tell this to the leader. + send(leader(), {autoheal_finished, node()}), + not_healing; + +handle_msg(report_autoheal_status, State, _Partitions) -> + %% Like above, the leader is asking about the autoheal status. We + %% are not finished with it. There is no need to send anything yet + %% to the leader: we will send the notification when it is over. + State; + +handle_msg({autoheal_finished, Winner}, + {leader_waiting, Winner, _}, _Partitions) -> + %% The winner is finished with the autoheal process and notified us + %% (the leader). We can transition to the "not_healing" state and + %% accept new requests. + rabbit_log:info("Autoheal finished according to winner ~p~n", [Winner]), + not_healing; + +handle_msg({autoheal_finished, Winner}, not_healing, _Partitions) + when Winner =:= node() -> + %% We are the leader and the winner. The state already transitioned + %% to "not_healing" at the end of the autoheal process. + rabbit_log:info("Autoheal finished according to winner ~p~n", [node()]), + not_healing. %%---------------------------------------------------------------------------- @@ -220,6 +294,7 @@ winner_finish(Notify) -> %% losing nodes before sending the "autoheal_safe_to_start" signal. wait_for_mnesia_shutdown(Notify), [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify], + send(leader(), {autoheal_finished, node()}), not_healing. wait_for_mnesia_shutdown([Node | Rest] = AllNodes) -> @@ -238,6 +313,35 @@ wait_for_mnesia_shutdown([Node | Rest] = AllNodes) -> wait_for_mnesia_shutdown([]) -> ok. +restart_loser(State, Winner) -> + rabbit_log:warning( + "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), + rabbit_node_monitor:run_outside_applications( + fun () -> + MRef = erlang:monitor(process, {?SERVER, Winner}), + rabbit:stop(), + NextState = receive + {'DOWN', MRef, process, {?SERVER, Winner}, _Reason} -> + not_healing; + autoheal_safe_to_start -> + State + end, + erlang:demonitor(MRef, [flush]), + %% During the restart, the autoheal state is lost so we + %% store it in the application environment temporarily so + %% init/0 can pick it up. + %% + %% This is useful to the leader which is a loser at the + %% same time: because the leader is restarting, there + %% is a great chance it misses the "autoheal finished!" + %% notification from the winner. Thanks to the saved + %% state, it knows it needs to ask the winner if the + %% autoheal process is finished or not. + application:set_env(rabbit, + ?AUTOHEAL_STATE_AFTER_RESTART, NextState), + rabbit:start() + end). + make_decision(AllPartitions) -> Sorted = lists:sort([{partition_value(P), P} || P <- AllPartitions]), [[Winner | _] | Rest] = lists:reverse([P || {_, P} <- Sorted]), |
