diff options
| author | Michael Klishin <michael@novemberain.com> | 2019-01-28 21:05:25 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-01-28 21:05:25 +0300 |
| commit | bffd562001b9e2657eb69e2688dc503f241b2df7 (patch) | |
| tree | 0d7eefbdd4a1f0c18b23930ff479506ce222178c | |
| parent | dd947c6081e5497e74aa2ba8ff4386971bac69dc (diff) | |
| parent | b7065eac8c6e0240c2f78ac37d952e7a4d5301e6 (diff) | |
| download | rabbitmq-server-git-bffd562001b9e2657eb69e2688dc503f241b2df7.tar.gz | |
Merge pull request #1848 from rabbitmq/await_startup_with_a_timeout
Introduce a function that awaits startup with a timeout
| -rw-r--r-- | src/rabbit.erl | 96 |
1 files changed, 78 insertions, 18 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl index 44a044a4dc..0fdd0326cf 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -23,7 +23,7 @@ -behaviour(application). -export([start/0, boot/0, stop/0, - stop_and_halt/0, await_startup/0, await_startup/1, + stop_and_halt/0, await_startup/0, await_startup/1, await_startup/3, status/0, is_running/0, alarms/0, is_running/1, environment/0, rotate_logs/0, start_fhc/0]). @@ -236,6 +236,13 @@ -define(ASYNC_THREADS_WARNING_THRESHOLD, 8). +%% 1 minute +-define(BOOT_START_TIMEOUT, 1 * 60 * 1000). +%% 12 hours +-define(BOOT_FINISH_TIMEOUT, 12 * 60 * 60 * 1000). +%% 100 ms +-define(BOOT_STATUS_CHECK_INTERVAL, 100). + %%---------------------------------------------------------------------------- -type restart_type() :: 'permanent' | 'transient' | 'temporary'. @@ -248,7 +255,7 @@ -spec boot() -> 'ok'. -spec stop() -> 'ok'. -spec stop_and_halt() -> no_return(). --spec await_startup() -> 'ok'. + -spec status () -> [{pid, integer()} | {running_applications, [{atom(), string(), string()}]} | @@ -682,34 +689,64 @@ handle_app_error(Term) -> throw({Term, App, Reason}) end. +is_booting() -> is_booting(node()). + +is_booting(Node) -> + case rpc:call(Node, erlang, whereis, [rabbit_boot]) of + {badrpc, _} = Err -> Err; + undefined -> false; + P when is_pid(P) -> true + end. + + +-spec await_startup() -> 'ok' | {'error', 'timeout'}. await_startup() -> - await_startup(node()). + await_startup(node(), false). -await_startup(Node) -> +-spec await_startup(node() | non_neg_integer()) -> 'ok' | {'error', 'timeout'}. +await_startup(Node) when is_atom(Node) -> + await_startup(Node, false); + await_startup(Timeout) when is_integer(Timeout) -> + await_startup(node(), false, Timeout). + +-spec await_startup(node(), boolean()) -> 'ok' | {'error', 'timeout'}. +await_startup(Node, PrintProgressReports) -> case is_booting(Node) of - true -> wait_for_boot_to_finish(Node); + true -> wait_for_boot_to_finish(Node, PrintProgressReports); false -> case is_running(Node) of - true -> ok; + true -> ok; false -> wait_for_boot_to_start(Node), - wait_for_boot_to_finish(Node) + wait_for_boot_to_finish(Node, PrintProgressReports) end end. -is_booting() -> is_booting(node()). - -is_booting(Node) -> - case rpc:call(Node, erlang, whereis, [rabbit_boot]) of - {badrpc, _} = Err -> Err; - undefined -> false; - P when is_pid(P) -> true +-spec await_startup(node(), boolean(), non_neg_integer()) -> 'ok' | {'error', 'timeout'}. +await_startup(Node, PrintProgressReports, Timeout) -> + case is_booting(Node) of + true -> wait_for_boot_to_finish(Node, PrintProgressReports, Timeout); + false -> + case is_running(Node) of + true -> ok; + false -> wait_for_boot_to_start(Node, Timeout), + wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) + end end. wait_for_boot_to_start(Node) -> + wait_for_boot_to_start(Node, ?BOOT_START_TIMEOUT). + +wait_for_boot_to_start(Node, Timeout) -> + Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL, + do_wait_for_boot_to_start(Node, Iterations). + +do_wait_for_boot_to_start(_Node, IterationsLeft) when IterationsLeft =< 0 -> + {error, timeout}; +do_wait_for_boot_to_start(Node, IterationsLeft) -> case is_booting(Node) of false -> - timer:sleep(100), - wait_for_boot_to_start(Node); + timer:sleep(?BOOT_STATUS_CHECK_INTERVAL), + do_wait_for_boot_to_start(Node, IterationsLeft - 1); {badrpc, _} = Err -> Err; true -> @@ -717,6 +754,18 @@ wait_for_boot_to_start(Node) -> end. wait_for_boot_to_finish(Node) -> + wait_for_boot_to_finish(Node, false, ?BOOT_FINISH_TIMEOUT). + +wait_for_boot_to_finish(Node, PrintProgressReports) -> + wait_for_boot_to_finish(Node, PrintProgressReports, ?BOOT_FINISH_TIMEOUT). + +wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) -> + Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL, + do_wait_for_boot_to_finish(Node, PrintProgressReports, Iterations). + +do_wait_for_boot_to_finish(_Node, _PrintProgressReports, IterationsLeft) when IterationsLeft =< 0 -> + {error, timeout}; +do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft) -> case is_booting(Node) of false -> %% We don't want badrpc error to be interpreted as false, @@ -729,10 +778,21 @@ wait_for_boot_to_finish(Node) -> {badrpc, _} = Err -> Err; true -> - timer:sleep(100), - wait_for_boot_to_finish(Node) + maybe_print_boot_progress(PrintProgressReports, IterationsLeft), + timer:sleep(?BOOT_STATUS_CHECK_INTERVAL), + do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft - 1) end. +maybe_print_boot_progress(false = _PrintProgressReports, _IterationsLeft) -> + ok; +maybe_print_boot_progress(true, IterationsLeft) -> + case IterationsLeft rem 100 of + %% This will be printed on the CLI command end to illustrate some + %% progress. + 0 -> io:format("Still booting, will check again in 10 seconds...~n"); + _ -> ok + end. + status() -> S1 = [{pid, list_to_integer(os:getpid())}, %% The timeout value used is twice that of gen_server:call/2. |
