summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_error_logger_file_h.erl3
-rw-r--r--src/rabbit_node_monitor.erl50
2 files changed, 49 insertions, 4 deletions
diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl
index 3efc9c0ccb..eb6247e0c2 100644
--- a/src/rabbit_error_logger_file_h.erl
+++ b/src/rabbit_error_logger_file_h.erl
@@ -76,6 +76,9 @@ init_file(File, PrevHandler) ->
Error -> Error
end.
+%% filter out "application: foo; exited: stopped; type: temporary"
+handle_event({info_report, _, {_, std_info, _}}, State) ->
+ {ok, State};
handle_event(Event, State) ->
error_logger_file_h:handle_event(Event, State).
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 7411b3d6dc..47c753e30e 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -208,9 +208,10 @@ handle_call(_Request, _From, State) ->
%% mnesia information since the message can (and will) overtake the
%% mnesia propagation.
handle_cast({node_up, Node, NodeType},
- State = #state{monitors = Monitors}) ->
+ State = #state{monitors = Monitors, partitions = Partitions}) ->
+ State1 = State#state{partitions = Partitions -- [Node]},
case pmon:is_monitored({rabbit, Node}, Monitors) of
- true -> {noreply, State};
+ true -> {noreply, State1};
false -> rabbit_log:info("rabbit on node ~p up~n", [Node]),
{AllNodes, DiscNodes, RunningNodes} = read_cluster_status(),
write_cluster_status({add_node(Node, AllNodes),
@@ -220,7 +221,7 @@ handle_cast({node_up, Node, NodeType},
end,
add_node(Node, RunningNodes)}),
ok = handle_live_rabbit(Node),
- {noreply, State#state{
+ {noreply, State1#state{
monitors = pmon:monitor({rabbit, Node}, Monitors)}}
end;
handle_cast({joined_cluster, Node, NodeType}, State) ->
@@ -282,7 +283,48 @@ handle_dead_rabbit(Node) ->
ok = rabbit_networking:on_node_down(Node),
ok = rabbit_amqqueue:on_node_down(Node),
ok = rabbit_alarm:on_node_down(Node),
- ok = rabbit_mnesia:on_node_down(Node).
+ ok = rabbit_mnesia:on_node_down(Node),
+ case application:get_env(rabbit, cluster_partition_handling) of
+ {ok, pause_minority} ->
+ case majority() of
+ true -> ok;
+ false -> await_cluster_recovery()
+ end;
+ {ok, ignore} ->
+ ok;
+ {ok, Term} ->
+ rabbit_log:warning("cluster_partition_handling ~p unrecognised, "
+ "assuming 'ignore'~n", [Term]),
+ ok
+ end,
+ ok.
+
+majority() ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ Alive = [N || N <- Nodes, pong =:= net_adm:ping(N)],
+ length(Alive) / length(Nodes) > 0.5.
+
+await_cluster_recovery() ->
+ rabbit_log:warning("Cluster minority status detected - awaiting recovery~n",
+ []),
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ spawn(fun () ->
+ %% If our group leader is inside an application we are about
+ %% to stop, application:stop/1 does not return.
+ group_leader(whereis(init), self()),
+ %% Ensure only one restarting process at a time, will
+ %% exit(badarg) (harmlessly) if one is already running
+ register(rabbit_restarting_process, self()),
+ rabbit:stop(),
+ wait_for_cluster_recovery(Nodes)
+ end).
+
+wait_for_cluster_recovery(Nodes) ->
+ case majority() of
+ true -> rabbit:start();
+ false -> timer:sleep(1000),
+ wait_for_cluster_recovery(Nodes)
+ end.
handle_live_rabbit(Node) ->
ok = rabbit_alarm:on_node_up(Node),