diff options
| author | Simon MacMullen <simon@rabbitmq.com> | 2011-02-16 17:58:58 +0000 |
|---|---|---|
| committer | Simon MacMullen <simon@rabbitmq.com> | 2011-02-16 17:58:58 +0000 |
| commit | 5766f2a1067b899dda0836eee7523651acc5e040 (patch) | |
| tree | 88aff5ba5192ea93fb02b591d171785ca9a91e0e /src | |
| parent | 30db2e72cb0f3b92f4ae16b383c6cb8375a2305b (diff) | |
| download | rabbitmq-server-git-5766f2a1067b899dda0836eee7523651acc5e040.tar.gz | |
(Untested) Record the nodes that were up when we shut down.
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit.erl | 1 | ||||
| -rw-r--r-- | src/rabbit_mnesia.erl | 36 | ||||
| -rw-r--r-- | src/rabbit_upgrade.erl | 46 |
3 files changed, 73 insertions, 10 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl index 1beed5c1a7..ffb6610d5d 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -203,6 +203,7 @@ start() -> end. stop() -> + rabbit_mnesia:record_running_disc_nodes(), ok = rabbit_misc:stop_applications(?APPS). stop_and_halt() -> diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index e7da6a43d1..3f7fc0d8c3 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -21,7 +21,9 @@ cluster/1, force_cluster/1, reset/0, force_reset/0, is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0, empty_ram_only_tables/0, copy_db/1, - create_cluster_nodes_config/1, read_cluster_nodes_config/0]). + create_cluster_nodes_config/1, read_cluster_nodes_config/0, + record_running_disc_nodes/0, read_previous_run_disc_nodes/0, + delete_previous_run_disc_nodes/0, running_nodes_filename/0]). -export([table_names/0]). @@ -57,6 +59,10 @@ -spec(copy_db/1 :: (file:filename()) -> rabbit_types:ok_or_error(any())). -spec(create_cluster_nodes_config/1 :: ([node()]) -> 'ok'). -spec(read_cluster_nodes_config/0 :: () -> [node()]). +-spec(record_running_disc_nodes/0 :: () -> 'ok'). +-spec(read_previous_run_disc_nodes/0 :: () -> [node()]). +-spec(delete_previous_run_disc_nodes/0 :: () -> 'ok'). +-spec(running_nodes_filename/0 :: () -> file:filename()). -endif. @@ -349,6 +355,34 @@ delete_cluster_nodes_config() -> FileName, Reason}}) end. +running_nodes_filename() -> + dir() ++ "/nodes_running_at_shutdown". + +record_running_disc_nodes() -> + FileName = running_nodes_filename(), + Nodes = rabbit_mnesia:nodes_of_type(disc_copies) -- [node()], + %% Don't check the result: we're shutting down anyway and this is + %% a best-effort-basis. + rabbit_misc:write_term_file(FileName, [Nodes]). + +read_previous_run_disc_nodes() -> + FileName = running_nodes_filename(), + case rabbit_misc:read_term_file(FileName) of + {ok, [Nodes]} -> Nodes; + {error, enoent} -> []; + {error, Reason} -> throw({error, {cannot_read_previous_nodes_file, + FileName, Reason}}) + end. + +delete_previous_run_disc_nodes() -> + FileName = running_nodes_filename(), + case file:delete(FileName) of + ok -> ok; + {error, enoent} -> ok; + {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file, + FileName, Reason}}) + end. + %% Take a cluster node config and create the right kind of node - a %% standalone disk node, or disk or ram node connected to the %% specified cluster nodes. If Force is false, don't allow diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index 0fdb973b30..2377068675 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -49,8 +49,8 @@ %% clusters. %% %% Firstly, we have two different types of upgrades to do: Mnesia and -%% everythinq else. Mnesia upgrades need to only be done by one node -%% in the cluster (we treat a non-clustered node as a single-node +%% everythinq else. Mnesia upgrades must only be done by one node in +%% the cluster (we treat a non-clustered node as a single-node %% cluster). This is the primary upgrader. The other upgrades need to %% be done by all nodes. %% @@ -75,7 +75,7 @@ %% into the boot process by prelaunch before the mnesia application is %% started. By the time Mnesia is started the upgrades have happened %% (on the primary), or Mnesia has been reset (on the secondary) and -%% rabbit_mnesia:init_db/2 can then make the node rejoin the clister +%% rabbit_mnesia:init_db/2 can then make the node rejoin the cluster %% in the normal way. %% %% The non-mnesia upgrades are then triggered by @@ -83,6 +83,22 @@ %% upgrade process to only require Mnesia upgrades, or only require %% non-Mnesia upgrades. In the latter case no Mnesia resets and %% reclusterings occur. +%% +%% The primary upgrader needs to be a disc node. Ideally we would like +%% it to be the last disc node to shut down (since otherwise there's a +%% risk of data loss). On each node we therefore record the disc nodes +%% that were still running when we shut down. A disc node that knows +%% other nodes were up when it shut down, or a ram node, will refuse +%% to be the primary upgrader, and will thus not start when upgrades +%% are needed. +%% +%% However, this is racy if several nodes are shut down at once. Since +%% rabbit records the running nodes, and shuts down before mnesia, the +%% race manifests as all disc nodes thinking they are not the primary +%% upgrader. Therefore the user can remove the record of the last disc +%% node to shut down to get things going again. This may lose any +%% mnesia changes that happened after the node chosen as the primary +%% upgrader was shut down. %% ------------------------------------------------------------------- @@ -103,16 +119,28 @@ maybe_upgrade_mnesia() -> primary -> primary_upgrade(Upgrades, Nodes); secondary -> non_primary_upgrade(Nodes) end - end. + end, + ok = rabbit_mnesia:delete_previous_run_disc_nodes(). upgrade_mode(Nodes) -> case nodes_running(Nodes) of [] -> - case am_i_disc_node() of - true -> primary; - false -> die("Cluster upgrade needed but this is a ram " - "node.~n Please start any of the disc nodes " - "first.", []) + AfterUs = rabbit_mnesia:read_previous_run_disc_nodes(), + case {am_i_disc_node(), AfterUs} of + {true, []} -> + primary; + {true, _} -> + Filename = rabbit_mnesia:running_nodes_filename(), + die("Cluster upgrade needed but other disc nodes shut " + "down after this one.~n Please start one of the " + "disc nodes: ~p first.~n~n Note: if several disc " + "nodes were shut down simultaneously they may all " + "show this message. In which case, remove ~s on one " + "of them and start that.", [AfterUs, Filename]); + {false, _} -> + die("Cluster upgrade needed but this is a ram " + "node.~n Please start one of the disc nodes: " + "~p first.", [AfterUs]) end; [Another|_] -> ClusterVersion = |
