diff options
| author | Francesco Mazzoli <francesco@rabbitmq.com> | 2012-07-03 16:23:28 +0100 |
|---|---|---|
| committer | Francesco Mazzoli <francesco@rabbitmq.com> | 2012-07-03 16:23:28 +0100 |
| commit | 659dd59d9eede0cd8aedb46f452a002910b5f7e8 (patch) | |
| tree | 0fc2a1284f4cd957b70de712eb38b459a1ea478c /src | |
| parent | 4071897cad67b90bf1efc18d1b37eb95c633e1f9 (diff) | |
| download | rabbitmq-server-git-659dd59d9eede0cd8aedb46f452a002910b5f7e8.tar.gz | |
wipe the mnesia data before starting if the cluster nodes are inconsistent
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_misc.erl | 7 | ||||
| -rw-r--r-- | src/rabbit_mnesia.erl | 102 | ||||
| -rw-r--r-- | src/rabbit_node_monitor.erl | 3 |
3 files changed, 74 insertions, 38 deletions
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index 5bfb7de013..9a3325caa0 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -61,6 +61,7 @@ -export([os_cmd/1]). -export([gb_sets_difference/2]). -export([rabbit_version/0]). +-export([sequence_error/1]). %%---------------------------------------------------------------------------- @@ -214,6 +215,8 @@ -spec(os_cmd/1 :: (string()) -> string()). -spec(gb_sets_difference/2 :: (gb_set(), gb_set()) -> gb_set()). -spec(rabbit_version/0 :: () -> string()). +-spec(sequence_error/1 :: ([({'error', any()} | any())]) + -> {'error', any()} | any()). -endif. @@ -945,3 +948,7 @@ gb_sets_difference(S1, S2) -> rabbit_version() -> {ok, VSN} = application:get_key(rabbit, vsn), VSN. + +sequence_error([T]) -> T; +sequence_error([{error, _} = Error | _]) -> Error; +sequence_error([_ | Rest]) -> sequence_error(Rest). diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 04d0d292a0..d3eac8aee9 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -608,44 +608,51 @@ wait_for_tables(TableNames) -> %% This does not guarantee us much, but it avoids some situations that will %% definitely end up badly check_cluster_consistency() -> - CheckVsn = fun (This, This, _) -> - ok; - (This, Remote, Name) -> - throw({error, - {inconsistent_cluster, - rabbit_misc:format( - "~s version mismatch: local node is ~s, " - "remote node ~s", [Name, This, Remote])}}) - end, - - lists:foreach( - fun(Node) -> - case rpc:call(Node, rabbit_mnesia, node_info, []) of - {badrpc, _Reason} -> - ok; - {OTP, Rabbit, Res} -> - CheckVsn(erlang:system_info(otp_release), OTP, "OTP"), - CheckVsn(rabbit_misc:rabbit_version(), Rabbit, "Rabbit"), - case Res of - {ok, {AllNodes, _, _}} -> - ThisNode = node(), - case lists:member(ThisNode, AllNodes) of - true -> - ok; - false -> - throw( - {error, - {inconsistent_cluster, - rabbit_misc:format( - "Node ~p thinks it's clustered " - "with node ~p, but ~p disagrees", - [ThisNode, Node, Node])}}) - end; - {error, _Reason} -> - ok - end - end - end, all_clustered_nodes()). + AllNodes = all_clustered_nodes(), + %% We want to find 0 or 1 consistent nodes. + case + lists:foldl( + fun(Node, {error, Error}) -> + case rpc:call(Node, rabbit_mnesia, node_info, []) of + {badrpc, _Reason} -> + {error, Error}; + {OTP, Rabbit, Res} -> + rabbit_misc:sequence_error( + [check_version_consistency( + erlang:system_info(otp_release), OTP, "OTP"), + check_version_consistency( + rabbit_misc:rabbit_version(), Rabbit, "Rabbit"), + case Res of + {ok, Status} -> + check_nodes_consistency(Node, Status); + {error, _Reason} -> + {error, Error} + end]) + end; + (_Node, {ok, Status}) -> + {ok, Status} + end, {error, no_nodes}, AllNodes) + of + {ok, Status = {RemoteAllNodes, _, _}} -> + case ordsets:is_subset(all_clustered_nodes(), RemoteAllNodes) of + true -> ok; + false -> %% We delete the schema here since we have more nodes + %% than the actually clustered ones, and there is no + %% way to remove those nodes from our schema + %% otherwise. On the other hand, we are sure that there + %% is another online node that we can use to sync the + %% tables with. There is a race here: if between this + %% check and the `init_db' invocation the cluster gets + %% disbanded, we're left with a node with no mnesia + %% data that will try to connect to offline nodes. + mnesia:delete_schema([node()]) + end, + rabbit_node_monitor:write_cluster_status_file(Status); + {error, no_nodes} -> + ok; + {error, Error} -> + throw({error, Error}) + end. %%-------------------------------------------------------------------- %% Hooks for `rabbit_node_monitor' @@ -1031,3 +1038,22 @@ running_nodes(Nodes) -> is_running_remote() -> {proplists:is_defined(rabbit, application:which_applications()), node()}. + +check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) -> + ThisNode = node(), + case ordsets:is_element(ThisNode, RemoteAllNodes) of + true -> + {ok, RemoteStatus}; + false -> + {error, {inconsistent_cluster, + rabbit_misc:format("Node ~p thinks it's clustered " + "with node ~p, but ~p disagrees", + [ThisNode, Node, Node])}} + end. + +check_version_consistency(This, Remote, _) when This =:= Remote -> + ok; +check_version_consistency(This, Remote, Name) -> + {error, {inconsistent_cluster, + rabbit_misc:format("~s version mismatch: local node is ~s, " + "remote node ~s", [Name, This, Remote])}}. diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 10ea7a14fc..788a06c72a 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -19,6 +19,7 @@ -behaviour(gen_server). -export([prepare_cluster_status_file/0, + write_cluster_status_file/1, read_cluster_status_file/0, update_cluster_status_file/0, reset_cluster_status_file/0, @@ -47,6 +48,8 @@ -ifdef(use_specs). -spec(prepare_cluster_status_file/0 :: () -> 'ok'). +-spec(write_cluster_status_file/1 :: (rabbit_mnesia:cluster_status()) + -> 'ok'). -spec(read_cluster_status_file/0 :: () -> rabbit_mnesia:cluster_status()). -spec(update_cluster_status_file/0 :: () -> 'ok'). -spec(reset_cluster_status_file/0 :: () -> 'ok'). |
