summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Klishin <michael@novemberain.com>2019-01-28 21:05:25 +0300
committerGitHub <noreply@github.com>2019-01-28 21:05:25 +0300
commitbffd562001b9e2657eb69e2688dc503f241b2df7 (patch)
tree0d7eefbdd4a1f0c18b23930ff479506ce222178c
parentdd947c6081e5497e74aa2ba8ff4386971bac69dc (diff)
parentb7065eac8c6e0240c2f78ac37d952e7a4d5301e6 (diff)
downloadrabbitmq-server-git-bffd562001b9e2657eb69e2688dc503f241b2df7.tar.gz
Merge pull request #1848 from rabbitmq/await_startup_with_a_timeout
Introduce a function that awaits startup with a timeout
-rw-r--r--src/rabbit.erl96
1 files changed, 78 insertions, 18 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl
index 44a044a4dc..0fdd0326cf 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -23,7 +23,7 @@
-behaviour(application).
-export([start/0, boot/0, stop/0,
- stop_and_halt/0, await_startup/0, await_startup/1,
+ stop_and_halt/0, await_startup/0, await_startup/1, await_startup/3,
status/0, is_running/0, alarms/0,
is_running/1, environment/0, rotate_logs/0,
start_fhc/0]).
@@ -236,6 +236,13 @@
-define(ASYNC_THREADS_WARNING_THRESHOLD, 8).
+%% 1 minute
+-define(BOOT_START_TIMEOUT, 1 * 60 * 1000).
+%% 12 hours
+-define(BOOT_FINISH_TIMEOUT, 12 * 60 * 60 * 1000).
+%% 100 ms
+-define(BOOT_STATUS_CHECK_INTERVAL, 100).
+
%%----------------------------------------------------------------------------
-type restart_type() :: 'permanent' | 'transient' | 'temporary'.
@@ -248,7 +255,7 @@
-spec boot() -> 'ok'.
-spec stop() -> 'ok'.
-spec stop_and_halt() -> no_return().
--spec await_startup() -> 'ok'.
+
-spec status
() -> [{pid, integer()} |
{running_applications, [{atom(), string(), string()}]} |
@@ -682,34 +689,64 @@ handle_app_error(Term) ->
throw({Term, App, Reason})
end.
+is_booting() -> is_booting(node()).
+
+is_booting(Node) ->
+ case rpc:call(Node, erlang, whereis, [rabbit_boot]) of
+ {badrpc, _} = Err -> Err;
+ undefined -> false;
+ P when is_pid(P) -> true
+ end.
+
+
+-spec await_startup() -> 'ok' | {'error', 'timeout'}.
await_startup() ->
- await_startup(node()).
+ await_startup(node(), false).
-await_startup(Node) ->
+-spec await_startup(node() | non_neg_integer()) -> 'ok' | {'error', 'timeout'}.
+await_startup(Node) when is_atom(Node) ->
+ await_startup(Node, false);
+ await_startup(Timeout) when is_integer(Timeout) ->
+ await_startup(node(), false, Timeout).
+
+-spec await_startup(node(), boolean()) -> 'ok' | {'error', 'timeout'}.
+await_startup(Node, PrintProgressReports) ->
case is_booting(Node) of
- true -> wait_for_boot_to_finish(Node);
+ true -> wait_for_boot_to_finish(Node, PrintProgressReports);
false ->
case is_running(Node) of
- true -> ok;
+ true -> ok;
false -> wait_for_boot_to_start(Node),
- wait_for_boot_to_finish(Node)
+ wait_for_boot_to_finish(Node, PrintProgressReports)
end
end.
-is_booting() -> is_booting(node()).
-
-is_booting(Node) ->
- case rpc:call(Node, erlang, whereis, [rabbit_boot]) of
- {badrpc, _} = Err -> Err;
- undefined -> false;
- P when is_pid(P) -> true
+-spec await_startup(node(), boolean(), non_neg_integer()) -> 'ok' | {'error', 'timeout'}.
+await_startup(Node, PrintProgressReports, Timeout) ->
+ case is_booting(Node) of
+ true -> wait_for_boot_to_finish(Node, PrintProgressReports, Timeout);
+ false ->
+ case is_running(Node) of
+ true -> ok;
+ false -> wait_for_boot_to_start(Node, Timeout),
+ wait_for_boot_to_finish(Node, PrintProgressReports, Timeout)
+ end
end.
wait_for_boot_to_start(Node) ->
+ wait_for_boot_to_start(Node, ?BOOT_START_TIMEOUT).
+
+wait_for_boot_to_start(Node, Timeout) ->
+ Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL,
+ do_wait_for_boot_to_start(Node, Iterations).
+
+do_wait_for_boot_to_start(_Node, IterationsLeft) when IterationsLeft =< 0 ->
+ {error, timeout};
+do_wait_for_boot_to_start(Node, IterationsLeft) ->
case is_booting(Node) of
false ->
- timer:sleep(100),
- wait_for_boot_to_start(Node);
+ timer:sleep(?BOOT_STATUS_CHECK_INTERVAL),
+ do_wait_for_boot_to_start(Node, IterationsLeft - 1);
{badrpc, _} = Err ->
Err;
true ->
@@ -717,6 +754,18 @@ wait_for_boot_to_start(Node) ->
end.
wait_for_boot_to_finish(Node) ->
+ wait_for_boot_to_finish(Node, false, ?BOOT_FINISH_TIMEOUT).
+
+wait_for_boot_to_finish(Node, PrintProgressReports) ->
+ wait_for_boot_to_finish(Node, PrintProgressReports, ?BOOT_FINISH_TIMEOUT).
+
+wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) ->
+ Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL,
+ do_wait_for_boot_to_finish(Node, PrintProgressReports, Iterations).
+
+do_wait_for_boot_to_finish(_Node, _PrintProgressReports, IterationsLeft) when IterationsLeft =< 0 ->
+ {error, timeout};
+do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft) ->
case is_booting(Node) of
false ->
%% We don't want badrpc error to be interpreted as false,
@@ -729,10 +778,21 @@ wait_for_boot_to_finish(Node) ->
{badrpc, _} = Err ->
Err;
true ->
- timer:sleep(100),
- wait_for_boot_to_finish(Node)
+ maybe_print_boot_progress(PrintProgressReports, IterationsLeft),
+ timer:sleep(?BOOT_STATUS_CHECK_INTERVAL),
+ do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft - 1)
end.
+maybe_print_boot_progress(false = _PrintProgressReports, _IterationsLeft) ->
+ ok;
+maybe_print_boot_progress(true, IterationsLeft) ->
+ case IterationsLeft rem 100 of
+ %% This will be printed on the CLI command end to illustrate some
+ %% progress.
+ 0 -> io:format("Still booting, will check again in 10 seconds...~n");
+ _ -> ok
+ end.
+
status() ->
S1 = [{pid, list_to_integer(os:getpid())},
%% The timeout value used is twice that of gen_server:call/2.