summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2011-02-16 17:58:58 +0000
committerSimon MacMullen <simon@rabbitmq.com>2011-02-16 17:58:58 +0000
commit5766f2a1067b899dda0836eee7523651acc5e040 (patch)
tree88aff5ba5192ea93fb02b591d171785ca9a91e0e /src
parent30db2e72cb0f3b92f4ae16b383c6cb8375a2305b (diff)
downloadrabbitmq-server-git-5766f2a1067b899dda0836eee7523651acc5e040.tar.gz
(Untested) Record the nodes that were up when we shut down.
Diffstat (limited to 'src')
-rw-r--r--src/rabbit.erl1
-rw-r--r--src/rabbit_mnesia.erl36
-rw-r--r--src/rabbit_upgrade.erl46
3 files changed, 73 insertions, 10 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl
index 1beed5c1a7..ffb6610d5d 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -203,6 +203,7 @@ start() ->
end.
stop() ->
+ rabbit_mnesia:record_running_disc_nodes(),
ok = rabbit_misc:stop_applications(?APPS).
stop_and_halt() ->
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index e7da6a43d1..3f7fc0d8c3 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -21,7 +21,9 @@
cluster/1, force_cluster/1, reset/0, force_reset/0,
is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0,
empty_ram_only_tables/0, copy_db/1,
- create_cluster_nodes_config/1, read_cluster_nodes_config/0]).
+ create_cluster_nodes_config/1, read_cluster_nodes_config/0,
+ record_running_disc_nodes/0, read_previous_run_disc_nodes/0,
+ delete_previous_run_disc_nodes/0, running_nodes_filename/0]).
-export([table_names/0]).
@@ -57,6 +59,10 @@
-spec(copy_db/1 :: (file:filename()) -> rabbit_types:ok_or_error(any())).
-spec(create_cluster_nodes_config/1 :: ([node()]) -> 'ok').
-spec(read_cluster_nodes_config/0 :: () -> [node()]).
+-spec(record_running_disc_nodes/0 :: () -> 'ok').
+-spec(read_previous_run_disc_nodes/0 :: () -> [node()]).
+-spec(delete_previous_run_disc_nodes/0 :: () -> 'ok').
+-spec(running_nodes_filename/0 :: () -> file:filename()).
-endif.
@@ -349,6 +355,34 @@ delete_cluster_nodes_config() ->
FileName, Reason}})
end.
+running_nodes_filename() ->
+ dir() ++ "/nodes_running_at_shutdown".
+
+record_running_disc_nodes() ->
+ FileName = running_nodes_filename(),
+ Nodes = rabbit_mnesia:nodes_of_type(disc_copies) -- [node()],
+ %% Don't check the result: we're shutting down anyway and this is
+ %% a best-effort-basis.
+ rabbit_misc:write_term_file(FileName, [Nodes]).
+
+read_previous_run_disc_nodes() ->
+ FileName = running_nodes_filename(),
+ case rabbit_misc:read_term_file(FileName) of
+ {ok, [Nodes]} -> Nodes;
+ {error, enoent} -> [];
+ {error, Reason} -> throw({error, {cannot_read_previous_nodes_file,
+ FileName, Reason}})
+ end.
+
+delete_previous_run_disc_nodes() ->
+ FileName = running_nodes_filename(),
+ case file:delete(FileName) of
+ ok -> ok;
+ {error, enoent} -> ok;
+ {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file,
+ FileName, Reason}})
+ end.
+
%% Take a cluster node config and create the right kind of node - a
%% standalone disk node, or disk or ram node connected to the
%% specified cluster nodes. If Force is false, don't allow
diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl
index 0fdb973b30..2377068675 100644
--- a/src/rabbit_upgrade.erl
+++ b/src/rabbit_upgrade.erl
@@ -49,8 +49,8 @@
%% clusters.
%%
%% Firstly, we have two different types of upgrades to do: Mnesia and
-%% everythinq else. Mnesia upgrades need to only be done by one node
-%% in the cluster (we treat a non-clustered node as a single-node
+%% everythinq else. Mnesia upgrades must only be done by one node in
+%% the cluster (we treat a non-clustered node as a single-node
%% cluster). This is the primary upgrader. The other upgrades need to
%% be done by all nodes.
%%
@@ -75,7 +75,7 @@
%% into the boot process by prelaunch before the mnesia application is
%% started. By the time Mnesia is started the upgrades have happened
%% (on the primary), or Mnesia has been reset (on the secondary) and
-%% rabbit_mnesia:init_db/2 can then make the node rejoin the clister
+%% rabbit_mnesia:init_db/2 can then make the node rejoin the cluster
%% in the normal way.
%%
%% The non-mnesia upgrades are then triggered by
@@ -83,6 +83,22 @@
%% upgrade process to only require Mnesia upgrades, or only require
%% non-Mnesia upgrades. In the latter case no Mnesia resets and
%% reclusterings occur.
+%%
+%% The primary upgrader needs to be a disc node. Ideally we would like
+%% it to be the last disc node to shut down (since otherwise there's a
+%% risk of data loss). On each node we therefore record the disc nodes
+%% that were still running when we shut down. A disc node that knows
+%% other nodes were up when it shut down, or a ram node, will refuse
+%% to be the primary upgrader, and will thus not start when upgrades
+%% are needed.
+%%
+%% However, this is racy if several nodes are shut down at once. Since
+%% rabbit records the running nodes, and shuts down before mnesia, the
+%% race manifests as all disc nodes thinking they are not the primary
+%% upgrader. Therefore the user can remove the record of the last disc
+%% node to shut down to get things going again. This may lose any
+%% mnesia changes that happened after the node chosen as the primary
+%% upgrader was shut down.
%% -------------------------------------------------------------------
@@ -103,16 +119,28 @@ maybe_upgrade_mnesia() ->
primary -> primary_upgrade(Upgrades, Nodes);
secondary -> non_primary_upgrade(Nodes)
end
- end.
+ end,
+ ok = rabbit_mnesia:delete_previous_run_disc_nodes().
upgrade_mode(Nodes) ->
case nodes_running(Nodes) of
[] ->
- case am_i_disc_node() of
- true -> primary;
- false -> die("Cluster upgrade needed but this is a ram "
- "node.~n Please start any of the disc nodes "
- "first.", [])
+ AfterUs = rabbit_mnesia:read_previous_run_disc_nodes(),
+ case {am_i_disc_node(), AfterUs} of
+ {true, []} ->
+ primary;
+ {true, _} ->
+ Filename = rabbit_mnesia:running_nodes_filename(),
+ die("Cluster upgrade needed but other disc nodes shut "
+ "down after this one.~n Please start one of the "
+ "disc nodes: ~p first.~n~n Note: if several disc "
+ "nodes were shut down simultaneously they may all "
+ "show this message. In which case, remove ~s on one "
+ "of them and start that.", [AfterUs, Filename]);
+ {false, _} ->
+ die("Cluster upgrade needed but this is a ram "
+ "node.~n Please start one of the disc nodes: "
+ "~p first.", [AfterUs])
end;
[Another|_] ->
ClusterVersion =