diff options
| author | Emile Joubert <emile@rabbitmq.com> | 2013-03-18 16:20:24 +0000 |
|---|---|---|
| committer | Emile Joubert <emile@rabbitmq.com> | 2013-03-18 16:20:24 +0000 |
| commit | ace35ee0dd04aa2f0d3dbba811c2ed8cbb3da19d (patch) | |
| tree | 69b18c93795a925bd24f055c3df094fadf653f4d /src | |
| parent | 7c25a669cc01dd7c51941ef0b48b8a4b4a3cfd8b (diff) | |
| parent | c50a3bb4b0bf39c609543a698584d3dcee0c05d6 (diff) | |
| download | rabbitmq-server-git-ace35ee0dd04aa2f0d3dbba811c2ed8cbb3da19d.tar.gz | |
Merged bug25474 and bug25486
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_node_monitor.erl | 37 |
1 files changed, 32 insertions, 5 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 98e26a6a6d..de53b7f0b3 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -249,7 +249,8 @@ handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), ok = handle_dead_rabbit(Node), [P ! {node_down, Node} || P <- pmon:monitored(Subscribers)], - {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}}; + {noreply, handle_dead_rabbit_state( + State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})}; handle_info({'DOWN', _MRef, process, Pid, _Reason}, State = #state{subscribers = Subscribers}) -> @@ -257,10 +258,19 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason}, handle_info({mnesia_system_event, {inconsistent_database, running_partitioned_network, Node}}, - State = #state{partitions = Partitions}) -> + State = #state{partitions = Partitions, + monitors = Monitors}) -> + %% We will not get a node_up from this node - yet we should treat it as + %% up (mostly). + State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of + true -> State; + false -> State#state{ + monitors = pmon:monitor({rabbit, Node}, Monitors)} + end, + ok = handle_live_rabbit(Node), Partitions1 = ordsets:to_list( ordsets:add_element(Node, ordsets:from_list(Partitions))), - {noreply, State#state{partitions = Partitions1}}; + {noreply, State1#state{partitions = Partitions1}}; handle_info(_Info, State) -> {noreply, State}. @@ -299,9 +309,14 @@ handle_dead_rabbit(Node) -> ok. majority() -> + length(alive_nodes()) / length(rabbit_mnesia:cluster_nodes(all)) > 0.5. + +%% mnesia:system_info(db_nodes) (and hence +%% rabbit_mnesia:cluster_nodes(running)) does not give reliable results +%% when partitioned. +alive_nodes() -> Nodes = rabbit_mnesia:cluster_nodes(all), - Alive = [N || N <- Nodes, pong =:= net_adm:ping(N)], - length(Alive) / length(Nodes) > 0.5. + [N || N <- Nodes, pong =:= net_adm:ping(N)]. await_cluster_recovery() -> rabbit_log:warning("Cluster minority status detected - awaiting recovery~n", @@ -325,6 +340,18 @@ wait_for_cluster_recovery(Nodes) -> wait_for_cluster_recovery(Nodes) end. +handle_dead_rabbit_state(State = #state{partitions = Partitions}) -> + %% If we have been partitioned, and we are now in the only remaining + %% partition, we no longer care about partitions - forget them. Note + %% that we do not attempt to deal with individual (other) partitions + %% going away. It's only safe to forget anything about partitions when + %% there are no partitions. + Partitions1 = case Partitions -- (Partitions -- alive_nodes()) of + [] -> []; + _ -> Partitions + end, + State#state{partitions = Partitions1}. + handle_live_rabbit(Node) -> ok = rabbit_alarm:on_node_up(Node), ok = rabbit_mnesia:on_node_up(Node). |
