summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFrancesco Mazzoli <francesco@rabbitmq.com>2012-07-03 16:23:28 +0100
committerFrancesco Mazzoli <francesco@rabbitmq.com>2012-07-03 16:23:28 +0100
commit659dd59d9eede0cd8aedb46f452a002910b5f7e8 (patch)
tree0fc2a1284f4cd957b70de712eb38b459a1ea478c /src
parent4071897cad67b90bf1efc18d1b37eb95c633e1f9 (diff)
downloadrabbitmq-server-git-659dd59d9eede0cd8aedb46f452a002910b5f7e8.tar.gz
wipe the mnesia data before starting if the cluster nodes are inconsistent
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_misc.erl7
-rw-r--r--src/rabbit_mnesia.erl102
-rw-r--r--src/rabbit_node_monitor.erl3
3 files changed, 74 insertions, 38 deletions
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 5bfb7de013..9a3325caa0 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -61,6 +61,7 @@
-export([os_cmd/1]).
-export([gb_sets_difference/2]).
-export([rabbit_version/0]).
+-export([sequence_error/1]).
%%----------------------------------------------------------------------------
@@ -214,6 +215,8 @@
-spec(os_cmd/1 :: (string()) -> string()).
-spec(gb_sets_difference/2 :: (gb_set(), gb_set()) -> gb_set()).
-spec(rabbit_version/0 :: () -> string()).
+-spec(sequence_error/1 :: ([({'error', any()} | any())])
+ -> {'error', any()} | any()).
-endif.
@@ -945,3 +948,7 @@ gb_sets_difference(S1, S2) ->
rabbit_version() ->
{ok, VSN} = application:get_key(rabbit, vsn),
VSN.
+
+sequence_error([T]) -> T;
+sequence_error([{error, _} = Error | _]) -> Error;
+sequence_error([_ | Rest]) -> sequence_error(Rest).
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 04d0d292a0..d3eac8aee9 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -608,44 +608,51 @@ wait_for_tables(TableNames) ->
%% This does not guarantee us much, but it avoids some situations that will
%% definitely end up badly
check_cluster_consistency() ->
- CheckVsn = fun (This, This, _) ->
- ok;
- (This, Remote, Name) ->
- throw({error,
- {inconsistent_cluster,
- rabbit_misc:format(
- "~s version mismatch: local node is ~s, "
- "remote node ~s", [Name, This, Remote])}})
- end,
-
- lists:foreach(
- fun(Node) ->
- case rpc:call(Node, rabbit_mnesia, node_info, []) of
- {badrpc, _Reason} ->
- ok;
- {OTP, Rabbit, Res} ->
- CheckVsn(erlang:system_info(otp_release), OTP, "OTP"),
- CheckVsn(rabbit_misc:rabbit_version(), Rabbit, "Rabbit"),
- case Res of
- {ok, {AllNodes, _, _}} ->
- ThisNode = node(),
- case lists:member(ThisNode, AllNodes) of
- true ->
- ok;
- false ->
- throw(
- {error,
- {inconsistent_cluster,
- rabbit_misc:format(
- "Node ~p thinks it's clustered "
- "with node ~p, but ~p disagrees",
- [ThisNode, Node, Node])}})
- end;
- {error, _Reason} ->
- ok
- end
- end
- end, all_clustered_nodes()).
+ AllNodes = all_clustered_nodes(),
+ %% We want to find 0 or 1 consistent nodes.
+ case
+ lists:foldl(
+ fun(Node, {error, Error}) ->
+ case rpc:call(Node, rabbit_mnesia, node_info, []) of
+ {badrpc, _Reason} ->
+ {error, Error};
+ {OTP, Rabbit, Res} ->
+ rabbit_misc:sequence_error(
+ [check_version_consistency(
+ erlang:system_info(otp_release), OTP, "OTP"),
+ check_version_consistency(
+ rabbit_misc:rabbit_version(), Rabbit, "Rabbit"),
+ case Res of
+ {ok, Status} ->
+ check_nodes_consistency(Node, Status);
+ {error, _Reason} ->
+ {error, Error}
+ end])
+ end;
+ (_Node, {ok, Status}) ->
+ {ok, Status}
+ end, {error, no_nodes}, AllNodes)
+ of
+ {ok, Status = {RemoteAllNodes, _, _}} ->
+ case ordsets:is_subset(all_clustered_nodes(), RemoteAllNodes) of
+ true -> ok;
+ false -> %% We delete the schema here since we have more nodes
+ %% than the actually clustered ones, and there is no
+ %% way to remove those nodes from our schema
+ %% otherwise. On the other hand, we are sure that there
+ %% is another online node that we can use to sync the
+ %% tables with. There is a race here: if between this
+ %% check and the `init_db' invocation the cluster gets
+ %% disbanded, we're left with a node with no mnesia
+ %% data that will try to connect to offline nodes.
+ mnesia:delete_schema([node()])
+ end,
+ rabbit_node_monitor:write_cluster_status_file(Status);
+ {error, no_nodes} ->
+ ok;
+ {error, Error} ->
+ throw({error, Error})
+ end.
%%--------------------------------------------------------------------
%% Hooks for `rabbit_node_monitor'
@@ -1031,3 +1038,22 @@ running_nodes(Nodes) ->
is_running_remote() ->
{proplists:is_defined(rabbit, application:which_applications()), node()}.
+
+check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) ->
+ ThisNode = node(),
+ case ordsets:is_element(ThisNode, RemoteAllNodes) of
+ true ->
+ {ok, RemoteStatus};
+ false ->
+ {error, {inconsistent_cluster,
+ rabbit_misc:format("Node ~p thinks it's clustered "
+ "with node ~p, but ~p disagrees",
+ [ThisNode, Node, Node])}}
+ end.
+
+check_version_consistency(This, Remote, _) when This =:= Remote ->
+ ok;
+check_version_consistency(This, Remote, Name) ->
+ {error, {inconsistent_cluster,
+ rabbit_misc:format("~s version mismatch: local node is ~s, "
+ "remote node ~s", [Name, This, Remote])}}.
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 10ea7a14fc..788a06c72a 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -19,6 +19,7 @@
-behaviour(gen_server).
-export([prepare_cluster_status_file/0,
+ write_cluster_status_file/1,
read_cluster_status_file/0,
update_cluster_status_file/0,
reset_cluster_status_file/0,
@@ -47,6 +48,8 @@
-ifdef(use_specs).
-spec(prepare_cluster_status_file/0 :: () -> 'ok').
+-spec(write_cluster_status_file/1 :: (rabbit_mnesia:cluster_status())
+ -> 'ok').
-spec(read_cluster_status_file/0 :: () -> rabbit_mnesia:cluster_status()).
-spec(update_cluster_status_file/0 :: () -> 'ok').
-spec(reset_cluster_status_file/0 :: () -> 'ok').