diff options
| -rw-r--r-- | src/rabbit.erl | 35 | ||||
| -rw-r--r-- | test/clustering_management_SUITE.erl | 50 |
2 files changed, 71 insertions, 14 deletions
diff --git a/src/rabbit.erl b/src/rabbit.erl index 8e6c9ead26..bc71994169 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -471,7 +471,7 @@ stop() -> undefined -> ok; _ -> rabbit_log:info("RabbitMQ hasn't finished starting yet. Waiting for startup to finish before stopping..."), - await_startup(true) + wait_to_finish_booting() end, rabbit_log:info("RabbitMQ is asked to stop...~n", []), Apps = ?APPS ++ rabbit_plugins:active(), @@ -641,19 +641,32 @@ handle_app_error(Term) -> end. await_startup() -> - await_startup(false). + case is_booting() of + true -> wait_to_finish_booting(); + false -> + case is_running() of + true -> ok; + false -> wait_to_start_booting(), + wait_to_finish_booting() + end + end. + +is_booting() -> + whereis(rabbit_boot) /= undefined. + +wait_to_start_booting() -> + case whereis(rabbit_boot) of + undefined -> timer:sleep(100), + wait_to_start_booting(); + _ -> ok + end. -await_startup(HaveSeenRabbitBoot) -> - %% We don't take absence of rabbit_boot as evidence we've started, - %% since there's a small window before it is registered. +wait_to_finish_booting() -> case whereis(rabbit_boot) of - undefined -> case HaveSeenRabbitBoot orelse is_running() of - true -> ok; - false -> timer:sleep(100), - await_startup(false) - end; + undefined -> true = is_running(), + ok; _ -> timer:sleep(100), - await_startup(true) + wait_to_finish_booting() end. status() -> diff --git a/test/clustering_management_SUITE.erl b/test/clustering_management_SUITE.erl index 2484f8c910..5418827faf 100644 --- a/test/clustering_management_SUITE.erl +++ b/test/clustering_management_SUITE.erl @@ -52,7 +52,8 @@ groups() -> reset_removes_things, forget_offline_removes_things, force_boot, - status_with_alarm + status_with_alarm, + wait_fails_when_cluster_fails ]}, {cluster_size_4, [], [ forget_promotes_offline_slave @@ -73,8 +74,10 @@ suite() -> init_per_suite(Config) -> rabbit_ct_helpers:log_environment(), Config1 = rabbit_ct_helpers:merge_app_env( - Config, - {rabbit, [{mnesia_table_loading_retry_limit, 1}]}), + Config, {rabbit, [ + {mnesia_table_loading_retry_limit, 2}, + {mnesia_table_loading_retry_timeout,1000} + ]}), rabbit_ct_helpers:run_setup_steps(Config1). end_per_suite(Config) -> @@ -595,8 +598,49 @@ status_with_alarm(Config) -> ok = alarm_information_on_each_node(R, Rabbit, Hare). +wait_fails_when_cluster_fails(Config) -> + [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config, + nodename), + RabbitConfig = rabbit_ct_broker_helpers:get_node_config(Config,Rabbit), + RabbitPidFile = ?config(pid_file, RabbitConfig), + %% ensure pid file is readable + {ok, _} = file:read_file(RabbitPidFile), + %% ensure wait works on running node + {ok, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, Rabbit, + ["wait", RabbitPidFile]), + %% stop both nodes + ok = rabbit_ct_broker_helpers:stop_node(Config, Rabbit), + ok = rabbit_ct_broker_helpers:stop_node(Config, Hare), + %% starting first node fails - it was not the last node to stop + {error, _} = rabbit_ct_broker_helpers:start_node(Config, Rabbit), + %% start first node in the background + spawn_link(fun() -> + rabbit_ct_broker_helpers:start_node(Config, Rabbit) + end), + Attempts = 10, + Timeout = 500, + wait_for_pid_file_to_contain_running_process_pid(RabbitPidFile, Attempts, Timeout), + {error, _, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, Rabbit, + ["wait", RabbitPidFile]). + %% ---------------------------------------------------------------------------- %% Internal utils +%% ---------------------------------------------------------------------------- + +wait_for_pid_file_to_contain_running_process_pid(_, 0, _) -> + error(timeout_waiting_for_pid_file_to_have_running_pid); +wait_for_pid_file_to_contain_running_process_pid(PidFile, Attempts, Timeout) -> + Pid = pid_from_file(PidFile), + case rabbit_misc:is_os_process_alive(Pid) of + true -> ok; + false -> + ct:sleep(Timeout), + wait_for_pid_file_to_contain_running_process_pid(PidFile, Attempts - 1, Timeout) + end. + +pid_from_file(PidFile) -> + {ok, Content} = file:read_file(PidFile), + string:strip(binary_to_list(Content), both, $\n). cluster_members(Config) -> rabbit_ct_broker_helpers:get_node_configs(Config, nodename). |
