diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_control_main.erl | 10 | ||||
| -rw-r--r-- | src/rabbit_mnesia.erl | 108 |
2 files changed, 81 insertions, 37 deletions
diff --git a/src/rabbit_control_main.erl b/src/rabbit_control_main.erl index d927206bdb..10bb0f1bd7 100644 --- a/src/rabbit_control_main.erl +++ b/src/rabbit_control_main.erl @@ -26,11 +26,13 @@ -define(NODE_OPT, "-n"). -define(VHOST_OPT, "-p"). -define(RAM_OPT, "--ram"). +-define(OFFLINE_OPT, "--offline"). -define(QUIET_DEF, {?QUIET_OPT, flag}). -define(NODE_DEF(Node), {?NODE_OPT, {option, Node}}). -define(VHOST_DEF, {?VHOST_OPT, {option, "/"}}). -define(RAM_DEF, {?RAM_OPT, flag}). +-define(OFFLINE_DEF, {?OFFLINE_OPT, flag}). -define(GLOBAL_DEFS(Node), [?QUIET_DEF, ?NODE_DEF(Node)]). @@ -46,7 +48,7 @@ {join_cluster, [?RAM_DEF]}, change_node_type, recluster, - remove_node, + {remove_node, [?OFFLINE_OPT]}, cluster_status, add_user, @@ -256,10 +258,12 @@ action(recluster, Node, [ClusterNodeS], _Opts, Inform) -> Inform("Re-clustering ~p with ~p", [Node, ClusterNode]), rpc_call(Node, rabbit_mnesia, recluster, [ClusterNode]); -action(remove_node, Node, [ClusterNodeS], _Opts, Inform) -> +action(remove_node, Node, [ClusterNodeS], Opts, Inform) -> ClusterNode = list_to_atom(ClusterNodeS), + RemoveWhenOffline = proplists:get_bool(?OFFLINE_OPT, Opts), Inform("Removing node ~p from cluster", [ClusterNode]), - rpc_call(Node, rabbit_mnesia, remove_node, [ClusterNode]); + rpc_call(Node, rabbit_mnesia, remove_node, + [ClusterNode, RemoveWhenOffline]); action(wait, Node, [PidFile], _Opts, Inform) -> Inform("Waiting for ~p", [Node]), diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index d285123168..400c3d7d0f 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -24,7 +24,7 @@ force_reset/0, recluster/1, change_node_type/1, - remove_node/1, + remove_node/2, status/0, is_db_empty/0, @@ -77,7 +77,7 @@ -spec(force_reset/0 :: () -> 'ok'). -spec(recluster/1 :: (node()) -> 'ok'). -spec(change_node_type/1 :: (node_type()) -> 'ok'). --spec(remove_node/1 :: (node()) -> 'ok'). +-spec(remove_node/2 :: (node(), boolean()) -> 'ok'). %% Various queries to get the status of the db -spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | @@ -294,44 +294,82 @@ recluster(DiscoveryNode) -> ok. -%% We proceed like this: try to remove the node locally. If mnesia is offline -%% then we try to remove it remotely on some other node. If there are no other -%% nodes running, then *if the current node is a disc node* we force-load mnesia -%% and remove the node. -remove_node(Node) -> - case ordsets:is_element(Node, all_clustered_nodes()) of +%% We proceed like this: try to remove the node locally. If the node if offline, +%% we remove the node if: +%% * This node is a disc node +%% * All other nodes are offline +%% * This node was, at the best of our knowledge (see comment below) the last +%% or second to last after the node we're removing to go down +remove_node(Node, RemoveWhenOffline) -> + case is_clustered() of true -> ok; false -> throw({error, {not_a_cluster_node, "The node selected is not in the cluster."}}) end, + case {mnesia:system_info(is_running), RemoveWhenOffline} of + {yes, true} -> throw({error, {online_node_offline_flag, + "You set the --offline flag, which is " + "used to remove nodes remotely from " + "offline nodes, but this node is " + "online. "}}); + _ -> ok + end, case remove_node_if_mnesia_running(Node) of ok -> ok; {error, mnesia_not_running} -> - case remove_node_remotely(Node) of - ok -> - ok; - {error, no_running_cluster_nodes} -> - case is_disc_node() of - false -> - throw({error, - {removing_node_from_ram_node, - "There are no nodes running and this is a " - "RAM node"}}); - true -> + case {ordsets:del_element(Node, + running_nodes(all_clustered_nodes())), + is_disc_node(), RemoveWhenOffline} + of + {[], true, true} -> + %% Note that while we check if the nodes was the last to go + %% down, apart from the node we're removing from, this is + %% still unsafe. + %% Consider the situation in which A and B are clustered. A + %% goes down, and records B as the running node. Then B gets + %% clustered with C, C goes down and B goes down. In this + %% case, C is the second-to-last, but we don't know that and + %% we'll remove B from A anyway, even if that will lead to + %% bad things. + case ordsets:subtract(running_clustered_nodes(), + ordsets:from_list([node(), Node])) + of + [] -> start_mnesia(), try [mnesia:force_load_table(T) || T <- rabbit_mnesia:table_names()], - remove_node(Node), + remove_node(Node, false), ensure_mnesia_running() after stop_mnesia() - end - end + end; + _ -> + throw({error, + {not_last_node_to_go_down, + "The node you're trying to remove was not " + "the last to go down (excluding the node " + "you are removing). Please use the the " + "last node to go down to remove nodes when " + "the cluster is offline."}}) + end; + {_, _, false} -> + throw({error, + {offline_node_no_offline_flag, + "You are trying to remove a node from an offline " + "node. That's dangerous, but can be done with the " + "--offline flag. Please consult the manual for " + "rabbitmqctl for more informations."}}); + {_, _, _} -> + throw({error, + {removing_node_from_offline_node, + "To remove a node remotely from an offline node, " + "the node you're removing from must be a disc node " + "and all the other nodes must be offline."}}) end; - {error, Reason} -> - throw({error, Reason}) + Err = {error, _} -> + throw(Err) end. %%---------------------------------------------------------------------------- @@ -1007,18 +1045,17 @@ remove_node_if_mnesia_running(Node) -> end. leave_cluster() -> - remove_node_remotely(node()). - -remove_node_remotely(Removee) -> - case running_clustered_nodes() -- [Removee] of - [] -> - {error, no_running_cluster_nodes}; - RunningNodes -> + case {is_clustered(), + running_nodes(ordsets:del_element(node(), all_clustered_nodes()))} + of + {false, []} -> + ok; + {_, AllNodes} -> case lists:any( fun (Node) -> case rpc:call(Node, rabbit_mnesia, remove_node_if_mnesia_running, - [Removee]) + [node()]) of ok -> true; @@ -1030,10 +1067,13 @@ remove_node_remotely(Removee) -> false end end, - RunningNodes) + AllNodes) of true -> ok; - false -> {error, no_running_cluster_nodes} + false -> throw({error, + {no_running_cluster_nodes, + "You cannot leave a cluster if no online " + "nodes are present"}}) end end. |
