diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_error_logger_file_h.erl | 2 | ||||
| -rw-r--r-- | src/rabbit_node_monitor.erl | 41 |
2 files changed, 42 insertions, 1 deletions
diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl index 3efc9c0ccb..c00c1df95d 100644 --- a/src/rabbit_error_logger_file_h.erl +++ b/src/rabbit_error_logger_file_h.erl @@ -76,6 +76,8 @@ init_file(File, PrevHandler) -> Error -> Error end. +handle_event({info_report, _, {_, std_info, _}}, State) -> + ok; %% filter out "application: foo; exited: stopped; type: temporary" handle_event(Event, State) -> error_logger_file_h:handle_event(Event, State). diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 71c2c80ac7..5d5879774f 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -270,7 +270,46 @@ handle_dead_rabbit(Node) -> ok = rabbit_networking:on_node_down(Node), ok = rabbit_amqqueue:on_node_down(Node), ok = rabbit_alarm:on_node_down(Node), - ok = rabbit_mnesia:on_node_down(Node). + ok = rabbit_mnesia:on_node_down(Node), + case application:get_env(rabbit, cluster_partition_handling) of + {ok, pause_minority} -> + case majority() of + true -> ok; + false -> await_cluster_recovery() + end; + {ok, ignore} -> + ok; + {ok, Term} -> + rabbit_log:warning("cluster_partition_handling ~p unrecognised, " + "assuming 'ignore'~n", [Term]), + ok + end, + ok. + +majority() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + Alive = [N || N <- Nodes, pong =:= net_adm:ping(N)], + length(Alive) / length(Nodes) > 0.5. + +await_cluster_recovery() -> + rabbit_log:warning("Cluster minority status detected - awaiting recovery~n", + []), + Nodes = rabbit_mnesia:cluster_nodes(all), + spawn(fun () -> + %% If our group leader is inside an application we are about + %% to stop, application:stop/1 does not return. + group_leader(whereis(init), self()), + rabbit:stop(), + wait_for_cluster_recovery(Nodes) + end). + +wait_for_cluster_recovery(Nodes) -> + [erlang:disconnect_node(Node) || Node <- Nodes], + case majority() of + true -> rabbit:start(); + false -> timer:sleep(1000), + wait_for_cluster_recovery(Nodes) + end. handle_live_rabbit(Node) -> ok = rabbit_alarm:on_node_up(Node), |
