summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_mnesia.erl55
-rw-r--r--src/rabbit_peer_discovery.erl21
-rw-r--r--src/rabbit_peer_discovery_classic_config.erl3
3 files changed, 58 insertions, 21 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index b627225785..d0ef4d5dc8 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -97,48 +97,61 @@ init() ->
ok.
init_with_lock() ->
- {Retries, Timeout} = rabbit_peer_discovery:retry_timeout(),
- init_with_lock(Retries, Timeout, fun init_from_config/0).
+ {Retries, Timeout} = rabbit_peer_discovery:locking_retry_timeout(),
+ init_with_lock(Retries, Timeout, fun run_peer_discovery/0).
-init_with_lock(0, _, InitFromConfig) ->
+init_with_lock(0, _, RunPeerDiscovery) ->
case rabbit_peer_discovery:lock_acquisition_failure_mode() of
ignore ->
rabbit_log:warning("Cannot acquire a lock during clustering", []),
- InitFromConfig(),
+ RunPeerDiscovery(),
rabbit_peer_discovery:maybe_register();
fail ->
exit(cannot_acquire_startup_lock)
end;
-init_with_lock(Retries, Timeout, InitFromConfig) ->
+init_with_lock(Retries, Timeout, RunPeerDiscovery) ->
case rabbit_peer_discovery:lock() of
not_supported ->
rabbit_log:info("Peer discovery backend does not support locking, falling back to randomized delay"),
%% See rabbitmq/rabbitmq-server#1202 for details.
rabbit_peer_discovery:maybe_inject_randomized_delay(),
- InitFromConfig(),
+ RunPeerDiscovery(),
rabbit_peer_discovery:maybe_register();
{error, _Reason} ->
timer:sleep(Timeout),
- init_with_lock(Retries - 1, Timeout, InitFromConfig);
+ init_with_lock(Retries - 1, Timeout, RunPeerDiscovery);
{ok, Data} ->
try
- InitFromConfig(),
+ RunPeerDiscovery(),
rabbit_peer_discovery:maybe_register()
after
rabbit_peer_discovery:unlock(Data)
end
end.
-init_from_config() ->
+-spec run_peer_discovery() -> ok | {[node()], node_type()}.
+run_peer_discovery() ->
+ {RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(),
+ run_peer_discovery_with_retries(RetriesLeft, DelayInterval).
+
+-spec run_peer_discovery_with_retries(non_neg_integer(), non_neg_integer()) -> ok | {[node()], node_type()}.
+run_peer_discovery_with_retries(0, _DelayInterval) ->
+ ok;
+run_peer_discovery_with_retries(RetriesLeft, DelayInterval) ->
FindBadNodeNames = fun
(Name, BadNames) when is_atom(Name) -> BadNames;
(Name, BadNames) -> [Name | BadNames]
end,
{DiscoveredNodes, NodeType} =
case rabbit_peer_discovery:discover_cluster_nodes() of
+ {error, Reason} ->
+ RetriesLeft1 = RetriesLeft - 1,
+ rabbit_log:error("Peer discovery returned an error: ~p. Will retry after a delay of ~b, ~b retries left...",
+ [Reason, DelayInterval, RetriesLeft1]),
+ timer:sleep(DelayInterval),
+ run_peer_discovery_with_retries(RetriesLeft1, DelayInterval);
{ok, {Nodes, Type} = Config}
- when is_list(Nodes) andalso
- (Type == disc orelse Type == disk orelse Type == ram) ->
+ when is_list(Nodes) andalso (Type == disc orelse Type == disk orelse Type == ram) ->
case lists:foldr(FindBadNodeNames, [], Nodes) of
[] -> Config;
BadNames -> e({invalid_cluster_node_names, BadNames})
@@ -167,6 +180,16 @@ init_from_config() ->
%% reachable and compatible (in terms of Mnesia internal protocol version and such)
%% cluster peers in order.
join_discovered_peers(TryNodes, NodeType) ->
+ {RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(),
+ join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval).
+
+join_discovered_peers_with_retries(TryNodes, _NodeType, 0, _DelayInterval) ->
+ rabbit_log:warning(
+ "Could not successfully contact any node of: ~s (as in Erlang distribution). "
+ "Starting as a blank standalone node...~n",
+ [string:join(lists:map(fun atom_to_list/1, TryNodes), ",")]),
+ init_db_and_upgrade([node()], disc, false, _Retry = true);
+join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval) ->
case find_reachable_peer_to_cluster_with(nodes_excl_me(TryNodes)) of
{ok, Node} ->
rabbit_log:info("Node '~s' selected for auto-clustering~n", [Node]),
@@ -175,11 +198,11 @@ join_discovered_peers(TryNodes, NodeType) ->
rabbit_connection_tracking:boot(),
rabbit_node_monitor:notify_joined_cluster();
none ->
- rabbit_log:warning(
- "Could not successfully contact any node of: ~s (as in Erlang distribution). "
- "Starting as a blank standalone node...~n",
- [string:join(lists:map(fun atom_to_list/1, TryNodes), ",")]),
- init_db_and_upgrade([node()], disc, false, _Retry = true)
+ RetriesLeft1 = RetriesLeft - 1,
+ rabbit_log:error("Trying to join discovered peers failed. Will retry after a delay of ~b, ~b retries left...",
+ [DelayInterval, RetriesLeft1]),
+ timer:sleep(DelayInterval),
+ join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft1, DelayInterval)
end.
%% Make the node join a cluster. The node will be reset automatically
diff --git a/src/rabbit_peer_discovery.erl b/src/rabbit_peer_discovery.erl
index 44c36e06d2..d2646bf76f 100644
--- a/src/rabbit_peer_discovery.erl
+++ b/src/rabbit_peer_discovery.erl
@@ -23,8 +23,9 @@
-export([maybe_init/0, discover_cluster_nodes/0, backend/0, node_type/0,
normalize/1, format_discovered_nodes/1, log_configured_backend/0,
register/0, unregister/0, maybe_register/0, maybe_unregister/0,
- maybe_inject_randomized_delay/0, lock/0, unlock/1]).
--export([append_node_prefix/1, node_prefix/0, retry_timeout/0,
+ maybe_inject_randomized_delay/0, lock/0, unlock/1,
+ discovery_retries/0]).
+-export([append_node_prefix/1, node_prefix/0, locking_retry_timeout/0,
lock_acquisition_failure_mode/0]).
-define(DEFAULT_BACKEND, rabbit_peer_discovery_classic_config).
@@ -61,9 +62,9 @@ node_type() ->
?DEFAULT_NODE_TYPE
end.
--spec retry_timeout() -> {Retries :: integer(), Timeout :: integer()}.
+-spec locking_retry_timeout() -> {Retries :: integer(), Timeout :: integer()}.
-retry_timeout() ->
+locking_retry_timeout() ->
case application:get_env(rabbit, cluster_formation) of
{ok, Proplist} ->
Retries = proplists:get_value(lock_retry_limit, Proplist, 10),
@@ -146,6 +147,18 @@ maybe_unregister() ->
ok
end.
+-spec discovery_retries() -> {Retries :: integer(), Interval :: integer()}.
+
+discovery_retries() ->
+ case application:get_env(rabbit, cluster_formation) of
+ {ok, Proplist} ->
+ Retries = proplists:get_value(discovery_retry_limit, Proplist, 10),
+ Interval = proplists:get_value(discovery_retry_interval, Proplist, 500),
+ {Retries, Interval};
+ undefined ->
+ {50, 100}
+ end.
+
-spec maybe_inject_randomized_delay() -> ok.
maybe_inject_randomized_delay() ->
diff --git a/src/rabbit_peer_discovery_classic_config.erl b/src/rabbit_peer_discovery_classic_config.erl
index 2183cda04d..42d9db1c61 100644
--- a/src/rabbit_peer_discovery_classic_config.erl
+++ b/src/rabbit_peer_discovery_classic_config.erl
@@ -26,7 +26,8 @@
%% API
%%
--spec list_nodes() -> {ok, {Nodes :: [node()], rabbit_types:node_type()}}.
+-spec list_nodes() -> {ok, {Nodes :: [node()], rabbit_types:node_type()}} |
+ {error, Reason :: string()}.
list_nodes() ->
case application:get_env(rabbit, cluster_nodes, {[], disc}) of