summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_control_main.erl10
-rw-r--r--src/rabbit_mnesia.erl108
2 files changed, 81 insertions, 37 deletions
diff --git a/src/rabbit_control_main.erl b/src/rabbit_control_main.erl
index d927206bdb..10bb0f1bd7 100644
--- a/src/rabbit_control_main.erl
+++ b/src/rabbit_control_main.erl
@@ -26,11 +26,13 @@
-define(NODE_OPT, "-n").
-define(VHOST_OPT, "-p").
-define(RAM_OPT, "--ram").
+-define(OFFLINE_OPT, "--offline").
-define(QUIET_DEF, {?QUIET_OPT, flag}).
-define(NODE_DEF(Node), {?NODE_OPT, {option, Node}}).
-define(VHOST_DEF, {?VHOST_OPT, {option, "/"}}).
-define(RAM_DEF, {?RAM_OPT, flag}).
+-define(OFFLINE_DEF, {?OFFLINE_OPT, flag}).
-define(GLOBAL_DEFS(Node), [?QUIET_DEF, ?NODE_DEF(Node)]).
@@ -46,7 +48,7 @@
{join_cluster, [?RAM_DEF]},
change_node_type,
recluster,
- remove_node,
+ {remove_node, [?OFFLINE_OPT]},
cluster_status,
add_user,
@@ -256,10 +258,12 @@ action(recluster, Node, [ClusterNodeS], _Opts, Inform) ->
Inform("Re-clustering ~p with ~p", [Node, ClusterNode]),
rpc_call(Node, rabbit_mnesia, recluster, [ClusterNode]);
-action(remove_node, Node, [ClusterNodeS], _Opts, Inform) ->
+action(remove_node, Node, [ClusterNodeS], Opts, Inform) ->
ClusterNode = list_to_atom(ClusterNodeS),
+ RemoveWhenOffline = proplists:get_bool(?OFFLINE_OPT, Opts),
Inform("Removing node ~p from cluster", [ClusterNode]),
- rpc_call(Node, rabbit_mnesia, remove_node, [ClusterNode]);
+ rpc_call(Node, rabbit_mnesia, remove_node,
+ [ClusterNode, RemoveWhenOffline]);
action(wait, Node, [PidFile], _Opts, Inform) ->
Inform("Waiting for ~p", [Node]),
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index d285123168..400c3d7d0f 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -24,7 +24,7 @@
force_reset/0,
recluster/1,
change_node_type/1,
- remove_node/1,
+ remove_node/2,
status/0,
is_db_empty/0,
@@ -77,7 +77,7 @@
-spec(force_reset/0 :: () -> 'ok').
-spec(recluster/1 :: (node()) -> 'ok').
-spec(change_node_type/1 :: (node_type()) -> 'ok').
--spec(remove_node/1 :: (node()) -> 'ok').
+-spec(remove_node/2 :: (node(), boolean()) -> 'ok').
%% Various queries to get the status of the db
-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} |
@@ -294,44 +294,82 @@ recluster(DiscoveryNode) ->
ok.
-%% We proceed like this: try to remove the node locally. If mnesia is offline
-%% then we try to remove it remotely on some other node. If there are no other
-%% nodes running, then *if the current node is a disc node* we force-load mnesia
-%% and remove the node.
-remove_node(Node) ->
- case ordsets:is_element(Node, all_clustered_nodes()) of
+%% We proceed like this: try to remove the node locally. If the node if offline,
+%% we remove the node if:
+%% * This node is a disc node
+%% * All other nodes are offline
+%% * This node was, at the best of our knowledge (see comment below) the last
+%% or second to last after the node we're removing to go down
+remove_node(Node, RemoveWhenOffline) ->
+ case is_clustered() of
true -> ok;
false -> throw({error, {not_a_cluster_node,
"The node selected is not in the cluster."}})
end,
+ case {mnesia:system_info(is_running), RemoveWhenOffline} of
+ {yes, true} -> throw({error, {online_node_offline_flag,
+ "You set the --offline flag, which is "
+ "used to remove nodes remotely from "
+ "offline nodes, but this node is "
+ "online. "}});
+ _ -> ok
+ end,
case remove_node_if_mnesia_running(Node) of
ok ->
ok;
{error, mnesia_not_running} ->
- case remove_node_remotely(Node) of
- ok ->
- ok;
- {error, no_running_cluster_nodes} ->
- case is_disc_node() of
- false ->
- throw({error,
- {removing_node_from_ram_node,
- "There are no nodes running and this is a "
- "RAM node"}});
- true ->
+ case {ordsets:del_element(Node,
+ running_nodes(all_clustered_nodes())),
+ is_disc_node(), RemoveWhenOffline}
+ of
+ {[], true, true} ->
+ %% Note that while we check if the nodes was the last to go
+ %% down, apart from the node we're removing from, this is
+ %% still unsafe.
+ %% Consider the situation in which A and B are clustered. A
+ %% goes down, and records B as the running node. Then B gets
+ %% clustered with C, C goes down and B goes down. In this
+ %% case, C is the second-to-last, but we don't know that and
+ %% we'll remove B from A anyway, even if that will lead to
+ %% bad things.
+ case ordsets:subtract(running_clustered_nodes(),
+ ordsets:from_list([node(), Node]))
+ of
+ [] ->
start_mnesia(),
try
[mnesia:force_load_table(T) ||
T <- rabbit_mnesia:table_names()],
- remove_node(Node),
+ remove_node(Node, false),
ensure_mnesia_running()
after
stop_mnesia()
- end
- end
+ end;
+ _ ->
+ throw({error,
+ {not_last_node_to_go_down,
+ "The node you're trying to remove was not "
+ "the last to go down (excluding the node "
+ "you are removing). Please use the the "
+ "last node to go down to remove nodes when "
+ "the cluster is offline."}})
+ end;
+ {_, _, false} ->
+ throw({error,
+ {offline_node_no_offline_flag,
+ "You are trying to remove a node from an offline "
+ "node. That's dangerous, but can be done with the "
+ "--offline flag. Please consult the manual for "
+ "rabbitmqctl for more informations."}});
+ {_, _, _} ->
+ throw({error,
+ {removing_node_from_offline_node,
+ "To remove a node remotely from an offline node, "
+ "the node you're removing from must be a disc node "
+ "and all the other nodes must be offline."}})
end;
- {error, Reason} ->
- throw({error, Reason})
+ Err = {error, _} ->
+ throw(Err)
end.
%%----------------------------------------------------------------------------
@@ -1007,18 +1045,17 @@ remove_node_if_mnesia_running(Node) ->
end.
leave_cluster() ->
- remove_node_remotely(node()).
-
-remove_node_remotely(Removee) ->
- case running_clustered_nodes() -- [Removee] of
- [] ->
- {error, no_running_cluster_nodes};
- RunningNodes ->
+ case {is_clustered(),
+ running_nodes(ordsets:del_element(node(), all_clustered_nodes()))}
+ of
+ {false, []} ->
+ ok;
+ {_, AllNodes} ->
case lists:any(
fun (Node) ->
case rpc:call(Node, rabbit_mnesia,
remove_node_if_mnesia_running,
- [Removee])
+ [node()])
of
ok ->
true;
@@ -1030,10 +1067,13 @@ remove_node_remotely(Removee) ->
false
end
end,
- RunningNodes)
+ AllNodes)
of
true -> ok;
- false -> {error, no_running_cluster_nodes}
+ false -> throw({error,
+ {no_running_cluster_nodes,
+ "You cannot leave a cluster if no online "
+ "nodes are present"}})
end
end.