summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/rabbit.erl35
-rw-r--r--test/clustering_management_SUITE.erl50
2 files changed, 71 insertions, 14 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl
index 8e6c9ead26..bc71994169 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -471,7 +471,7 @@ stop() ->
undefined -> ok;
_ ->
rabbit_log:info("RabbitMQ hasn't finished starting yet. Waiting for startup to finish before stopping..."),
- await_startup(true)
+ wait_to_finish_booting()
end,
rabbit_log:info("RabbitMQ is asked to stop...~n", []),
Apps = ?APPS ++ rabbit_plugins:active(),
@@ -641,19 +641,32 @@ handle_app_error(Term) ->
end.
await_startup() ->
- await_startup(false).
+ case is_booting() of
+ true -> wait_to_finish_booting();
+ false ->
+ case is_running() of
+ true -> ok;
+ false -> wait_to_start_booting(),
+ wait_to_finish_booting()
+ end
+ end.
+
+is_booting() ->
+ whereis(rabbit_boot) /= undefined.
+
+wait_to_start_booting() ->
+ case whereis(rabbit_boot) of
+ undefined -> timer:sleep(100),
+ wait_to_start_booting();
+ _ -> ok
+ end.
-await_startup(HaveSeenRabbitBoot) ->
- %% We don't take absence of rabbit_boot as evidence we've started,
- %% since there's a small window before it is registered.
+wait_to_finish_booting() ->
case whereis(rabbit_boot) of
- undefined -> case HaveSeenRabbitBoot orelse is_running() of
- true -> ok;
- false -> timer:sleep(100),
- await_startup(false)
- end;
+ undefined -> true = is_running(),
+ ok;
_ -> timer:sleep(100),
- await_startup(true)
+ wait_to_finish_booting()
end.
status() ->
diff --git a/test/clustering_management_SUITE.erl b/test/clustering_management_SUITE.erl
index 2484f8c910..5418827faf 100644
--- a/test/clustering_management_SUITE.erl
+++ b/test/clustering_management_SUITE.erl
@@ -52,7 +52,8 @@ groups() ->
reset_removes_things,
forget_offline_removes_things,
force_boot,
- status_with_alarm
+ status_with_alarm,
+ wait_fails_when_cluster_fails
]},
{cluster_size_4, [], [
forget_promotes_offline_slave
@@ -73,8 +74,10 @@ suite() ->
init_per_suite(Config) ->
rabbit_ct_helpers:log_environment(),
Config1 = rabbit_ct_helpers:merge_app_env(
- Config,
- {rabbit, [{mnesia_table_loading_retry_limit, 1}]}),
+ Config, {rabbit, [
+ {mnesia_table_loading_retry_limit, 2},
+ {mnesia_table_loading_retry_timeout,1000}
+ ]}),
rabbit_ct_helpers:run_setup_steps(Config1).
end_per_suite(Config) ->
@@ -595,8 +598,49 @@ status_with_alarm(Config) ->
ok = alarm_information_on_each_node(R, Rabbit, Hare).
+wait_fails_when_cluster_fails(Config) ->
+ [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config,
+ nodename),
+ RabbitConfig = rabbit_ct_broker_helpers:get_node_config(Config,Rabbit),
+ RabbitPidFile = ?config(pid_file, RabbitConfig),
+ %% ensure pid file is readable
+ {ok, _} = file:read_file(RabbitPidFile),
+ %% ensure wait works on running node
+ {ok, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, Rabbit,
+ ["wait", RabbitPidFile]),
+ %% stop both nodes
+ ok = rabbit_ct_broker_helpers:stop_node(Config, Rabbit),
+ ok = rabbit_ct_broker_helpers:stop_node(Config, Hare),
+ %% starting first node fails - it was not the last node to stop
+ {error, _} = rabbit_ct_broker_helpers:start_node(Config, Rabbit),
+ %% start first node in the background
+ spawn_link(fun() ->
+ rabbit_ct_broker_helpers:start_node(Config, Rabbit)
+ end),
+ Attempts = 10,
+ Timeout = 500,
+ wait_for_pid_file_to_contain_running_process_pid(RabbitPidFile, Attempts, Timeout),
+ {error, _, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, Rabbit,
+ ["wait", RabbitPidFile]).
+
%% ----------------------------------------------------------------------------
%% Internal utils
+%% ----------------------------------------------------------------------------
+
+wait_for_pid_file_to_contain_running_process_pid(_, 0, _) ->
+ error(timeout_waiting_for_pid_file_to_have_running_pid);
+wait_for_pid_file_to_contain_running_process_pid(PidFile, Attempts, Timeout) ->
+ Pid = pid_from_file(PidFile),
+ case rabbit_misc:is_os_process_alive(Pid) of
+ true -> ok;
+ false ->
+ ct:sleep(Timeout),
+ wait_for_pid_file_to_contain_running_process_pid(PidFile, Attempts - 1, Timeout)
+ end.
+
+pid_from_file(PidFile) ->
+ {ok, Content} = file:read_file(PidFile),
+ string:strip(binary_to_list(Content), both, $\n).
cluster_members(Config) ->
rabbit_ct_broker_helpers:get_node_configs(Config, nodename).