diff options
| -rw-r--r-- | docs/rabbitmqctl.1.xml | 7 | ||||
| -rw-r--r-- | src/rabbit_mnesia.erl | 52 |
2 files changed, 34 insertions, 25 deletions
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index 0f3c0faf20..1d6411440a 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -405,6 +405,13 @@ must be offline, while the node we are removing from must be online, except when using the <command>--offline</command> flag. </para> + <para> + When using the <command>--offline</command> flag the node you + connect to will become the canonical source for cluster metadata + (e.g. which queues exist), even if it was not before. Therefore + you should use this command on the latest node to shut down if + at all possible. + </para> <para role="example-prefix">For example:</para> <screen role="example">rabbitmqctl -n hare@mcnulty forget_cluster_node rabbit@stringer</screen> <para role="example"> diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index a0bad8c0a4..738b36cfd1 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -296,27 +296,18 @@ remove_node_offline_node(Node) -> %% this operation from disc nodes. case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of {[], disc} -> - %% Note that while we check if the nodes was the last to go down, - %% apart from the node we're removing from, this is still unsafe. - %% Consider the situation in which A and B are clustered. A goes - %% down, and records B as the running node. Then B gets clustered - %% with C, C goes down and B goes down. In this case, C is the - %% second-to-last, but we don't know that and we'll remove B from A - %% anyway, even if that will lead to bad things. - case cluster_nodes(running) -- [node(), Node] of - [] -> start_mnesia(), - try - %% What we want to do here is replace the last node to - %% go down with the current node. The way we do this - %% is by force loading the table, and making sure that - %% they are loaded. - rabbit_table:force_load(), - rabbit_table:wait_for_replicated(), - forget_cluster_node(Node, false) - after - stop_mnesia() - end; - _ -> e(not_last_node_to_go_down) + start_mnesia(), + try + %% What we want to do here is replace the last node to + %% go down with the current node. The way we do this + %% is by force loading the table, and making sure that + %% they are loaded. + rabbit_table:force_load(), + rabbit_table:wait_for_replicated(), + forget_cluster_node(Node, false), + force_load_next_boot() + after + stop_mnesia() end; {_, _} -> e(removing_node_from_offline_node) @@ -441,11 +432,13 @@ init_db(ClusterNodes, NodeType, CheckOtherNodes) -> ok = create_schema(); {[], true, disc} -> %% First disc node up + maybe_force_load(), ok; {[AnotherNode | _], _, _} -> %% Subsequent node in cluster, catch up ensure_version_ok( rpc:call(AnotherNode, rabbit_version, recorded, [])), + maybe_force_load(), ok = rabbit_table:wait_for_replicated(), ok = rabbit_table:create_local_copy(NodeType) end, @@ -526,6 +519,19 @@ copy_db(Destination) -> ok = ensure_mnesia_not_running(), rabbit_file:recursive_copy(dir(), Destination). +force_load_filename() -> + filename:join(rabbit_mnesia:dir(), "force_load"). + +force_load_next_boot() -> + rabbit_file:write_file(force_load_filename(), <<"">>). + +maybe_force_load() -> + case rabbit_file:is_file(force_load_filename()) of + true -> rabbit_table:force_load(), + rabbit_file:delete(force_load_filename()); + false -> ok + end. + %% This does not guarantee us much, but it avoids some situations that %% will definitely end up badly check_cluster_consistency() -> @@ -873,10 +879,6 @@ error_description(offline_node_no_offline_flag) -> "You are trying to remove a node from an offline node. That is dangerous, " "but can be done with the --offline flag. Please consult the manual " "for rabbitmqctl for more information."; -error_description(not_last_node_to_go_down) -> - "The node you are trying to remove from was not the last to go down " - "(excluding the node you are removing). Please use the the last node " - "to go down to remove nodes when the cluster is offline."; error_description(removing_node_from_offline_node) -> "To remove a node remotely from an offline node, the node you are removing " "from must be a disc node and all the other nodes must be offline."; |
