summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/rabbitmqctl.1.xml7
-rw-r--r--src/rabbit_mnesia.erl52
2 files changed, 34 insertions, 25 deletions
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml
index 0f3c0faf20..1d6411440a 100644
--- a/docs/rabbitmqctl.1.xml
+++ b/docs/rabbitmqctl.1.xml
@@ -405,6 +405,13 @@
must be offline, while the node we are removing from must be
online, except when using the <command>--offline</command> flag.
</para>
+ <para>
+ When using the <command>--offline</command> flag the node you
+ connect to will become the canonical source for cluster metadata
+ (e.g. which queues exist), even if it was not before. Therefore
+ you should use this command on the latest node to shut down if
+ at all possible.
+ </para>
<para role="example-prefix">For example:</para>
<screen role="example">rabbitmqctl -n hare@mcnulty forget_cluster_node rabbit@stringer</screen>
<para role="example">
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index a0bad8c0a4..738b36cfd1 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -296,27 +296,18 @@ remove_node_offline_node(Node) ->
%% this operation from disc nodes.
case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of
{[], disc} ->
- %% Note that while we check if the nodes was the last to go down,
- %% apart from the node we're removing from, this is still unsafe.
- %% Consider the situation in which A and B are clustered. A goes
- %% down, and records B as the running node. Then B gets clustered
- %% with C, C goes down and B goes down. In this case, C is the
- %% second-to-last, but we don't know that and we'll remove B from A
- %% anyway, even if that will lead to bad things.
- case cluster_nodes(running) -- [node(), Node] of
- [] -> start_mnesia(),
- try
- %% What we want to do here is replace the last node to
- %% go down with the current node. The way we do this
- %% is by force loading the table, and making sure that
- %% they are loaded.
- rabbit_table:force_load(),
- rabbit_table:wait_for_replicated(),
- forget_cluster_node(Node, false)
- after
- stop_mnesia()
- end;
- _ -> e(not_last_node_to_go_down)
+ start_mnesia(),
+ try
+ %% What we want to do here is replace the last node to
+ %% go down with the current node. The way we do this
+ %% is by force loading the table, and making sure that
+ %% they are loaded.
+ rabbit_table:force_load(),
+ rabbit_table:wait_for_replicated(),
+ forget_cluster_node(Node, false),
+ force_load_next_boot()
+ after
+ stop_mnesia()
end;
{_, _} ->
e(removing_node_from_offline_node)
@@ -441,11 +432,13 @@ init_db(ClusterNodes, NodeType, CheckOtherNodes) ->
ok = create_schema();
{[], true, disc} ->
%% First disc node up
+ maybe_force_load(),
ok;
{[AnotherNode | _], _, _} ->
%% Subsequent node in cluster, catch up
ensure_version_ok(
rpc:call(AnotherNode, rabbit_version, recorded, [])),
+ maybe_force_load(),
ok = rabbit_table:wait_for_replicated(),
ok = rabbit_table:create_local_copy(NodeType)
end,
@@ -526,6 +519,19 @@ copy_db(Destination) ->
ok = ensure_mnesia_not_running(),
rabbit_file:recursive_copy(dir(), Destination).
+force_load_filename() ->
+ filename:join(rabbit_mnesia:dir(), "force_load").
+
+force_load_next_boot() ->
+ rabbit_file:write_file(force_load_filename(), <<"">>).
+
+maybe_force_load() ->
+ case rabbit_file:is_file(force_load_filename()) of
+ true -> rabbit_table:force_load(),
+ rabbit_file:delete(force_load_filename());
+ false -> ok
+ end.
+
%% This does not guarantee us much, but it avoids some situations that
%% will definitely end up badly
check_cluster_consistency() ->
@@ -873,10 +879,6 @@ error_description(offline_node_no_offline_flag) ->
"You are trying to remove a node from an offline node. That is dangerous, "
"but can be done with the --offline flag. Please consult the manual "
"for rabbitmqctl for more information.";
-error_description(not_last_node_to_go_down) ->
- "The node you are trying to remove from was not the last to go down "
- "(excluding the node you are removing). Please use the the last node "
- "to go down to remove nodes when the cluster is offline.";
error_description(removing_node_from_offline_node) ->
"To remove a node remotely from an offline node, the node you are removing "
"from must be a disc node and all the other nodes must be offline.";