summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Klishin <michael@novemberain.com>2016-07-14 17:37:05 +0400
committerGitHub <noreply@github.com>2016-07-14 17:37:05 +0400
commitdf28c63f6bb703e953bcda6ffe3cb62915b9d28c (patch)
tree6fc3208a525c0a3628e2008f915e89f368849c0a
parent78a9220e077f79055c225f0f319cbabe13eb3c50 (diff)
parentdb33a5669e3b48f216dd7003a4d076bc3bd992c8 (diff)
downloadrabbitmq-server-git-df28c63f6bb703e953bcda6ffe3cb62915b9d28c.tar.gz
Merge pull request #884 from binarin/rabbitmq-server-818-master
Merge 883 into master
-rw-r--r--include/rabbit_cli.hrl2
-rwxr-xr-xscripts/rabbitmq-env5
-rw-r--r--scripts/rabbitmq-env.bat19
-rw-r--r--scripts/rabbitmq-server.bat3
-rw-r--r--scripts/rabbitmq-service.bat3
-rw-r--r--src/rabbit_control_main.erl56
-rw-r--r--test/health_check_SUITE.erl167
7 files changed, 222 insertions, 33 deletions
diff --git a/include/rabbit_cli.hrl b/include/rabbit_cli.hrl
index 7f5db6053b..a0d1ecfdd5 100644
--- a/include/rabbit_cli.hrl
+++ b/include/rabbit_cli.hrl
@@ -34,7 +34,7 @@
-define(NODE_DEF(Node), {?NODE_OPT, {option, Node}}).
-define(QUIET_DEF, {?QUIET_OPT, flag}).
-define(VHOST_DEF, {?VHOST_OPT, {option, "/"}}).
--define(TIMEOUT_DEF, {?TIMEOUT_OPT, {option, "infinity"}}).
+-define(TIMEOUT_DEF, {?TIMEOUT_OPT, {option, use_default}}).
-define(VERBOSE_DEF, {?VERBOSE_OPT, flag}).
-define(MINIMAL_DEF, {?MINIMAL_OPT, flag}).
diff --git a/scripts/rabbitmq-env b/scripts/rabbitmq-env
index 62cff0b248..6a7b2f91f0 100755
--- a/scripts/rabbitmq-env
+++ b/scripts/rabbitmq-env
@@ -62,8 +62,11 @@ RABBITMQ_HOME="$(rmq_realpath "${RABBITMQ_SCRIPTS_DIR}/..")"
## Set defaults
. ${RABBITMQ_SCRIPTS_DIR}/rabbitmq-defaults
+DEFAULT_SCHEDULER_BIND_TYPE="db"
+[ "x" = "x$RABBITMQ_SCHEDULER_BIND_TYPE" ] && RABBITMQ_SCHEDULER_BIND_TYPE=${DEFAULT_SCHEDULER_BIND_TYPE}
+
## Common defaults
-SERVER_ERL_ARGS="+P 1048576"
+SERVER_ERL_ARGS="+P 1048576 +stbt $RABBITMQ_SCHEDULER_BIND_TYPE "
# We save the current value of $RABBITMQ_PID_FILE in case it was set by
# an init script. If $CONF_ENV_FILE overrides it again, we must ignore
diff --git a/scripts/rabbitmq-env.bat b/scripts/rabbitmq-env.bat
index 4c5691bedb..a4a5c9dc19 100644
--- a/scripts/rabbitmq-env.bat
+++ b/scripts/rabbitmq-env.bat
@@ -30,10 +30,14 @@ REM ## Set defaults
REM . ${SCRIPT_DIR}/rabbitmq-defaults
call "%SCRIPT_DIR%\rabbitmq-defaults.bat"
-REM These common defaults aren't referenced in the batch scripts
-REM ## Common defaults
-REM SERVER_ERL_ARGS="+P 1048576"
-REM
+set DEFAULT_SCHEDULER_BIND_TYPE=db
+
+REM [ "x" = "x$RABBITMQ_SCHEDULER_BIND_TYPE" ] && RABBITMQ_SCHEDULER_BIND_TYPE=${DEFAULT_SCHEDULER_BIND_TYPE}
+REM set the default scheduling bind type
+if "!RABBITMQ_SCHEDULER_BIND_TYPE!"=="" (
+ set RABBITMQ_SCHEDULER_BIND_TYPE=!DEFAULT_SCHEDULER_BIND_TYPE!
+)
+
REM # warn about old rabbitmq.conf file, if no new one
REM if [ -f /etc/rabbitmq/rabbitmq.conf ] && \
REM [ ! -f ${CONF_ENV_FILE} ] ; then
@@ -41,9 +45,8 @@ REM echo -n "WARNING: ignoring /etc/rabbitmq/rabbitmq.conf -- "
REM echo "location has moved to ${CONF_ENV_FILE}"
REM fi
-REM ERL_ARGS aren't referenced in the batch scripts
REM Common defaults
-REM set SERVER_ERL_ARGS=+P 1048576
+set SERVER_ERL_ARGS=+P 1048576 +stbt !RABBITMQ_SCHEDULER_BIND_TYPE!
REM ## Get configuration variables from the configure environment file
REM [ -f ${CONF_ENV_FILE} ] && . ${CONF_ENV_FILE} || true
@@ -151,7 +154,9 @@ if "!RABBITMQ_DIST_PORT!"=="" (
)
REM [ "x" = "x$RABBITMQ_SERVER_ERL_ARGS" ] && RABBITMQ_SERVER_ERL_ARGS=${SERVER_ERL_ARGS}
-REM No Windows equivalent
+if "!RABBITMQ_SERVER_ERL_ARGS!"=="" (
+ set RABBITMQ_SERVER_ERL_ARGS=!SERVER_ERL_ARGS!
+)
REM [ "x" = "x$RABBITMQ_CONFIG_FILE" ] && RABBITMQ_CONFIG_FILE=${CONFIG_FILE}
diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index 20a0bd8823..a15f24e586 100644
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -156,9 +156,8 @@ if "!ENV_OK!"=="false" (
!RABBITMQ_NAME_TYPE! !RABBITMQ_NODENAME! ^
+W w ^
+A "!RABBITMQ_IO_THREAD_POOL_SIZE!" ^
-+P 1048576 ^
-!RABBITMQ_LISTEN_ARG! ^
!RABBITMQ_SERVER_ERL_ARGS! ^
+!RABBITMQ_LISTEN_ARG! ^
-kernel inet_default_connect_options "[{nodelay, true}]" ^
!RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS! ^
-sasl errlog_type error ^
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat
index 2fb34ddb28..c9e404db46 100644
--- a/scripts/rabbitmq-service.bat
+++ b/scripts/rabbitmq-service.bat
@@ -232,9 +232,8 @@ set ERLANG_SERVICE_ARGUMENTS= ^
!RABBITMQ_CONFIG_ARG! ^
+W w ^
+A "!RABBITMQ_IO_THREAD_POOL_SIZE!" ^
-+P 1048576 ^
-!RABBITMQ_LISTEN_ARG! ^
!RABBITMQ_SERVER_ERL_ARGS! ^
+!RABBITMQ_LISTEN_ARG! ^
-kernel inet_default_connect_options "[{nodelay,true}]" ^
!RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS! ^
-sasl errlog_type error ^
diff --git a/src/rabbit_control_main.erl b/src/rabbit_control_main.erl
index b9b21352ae..7f410ac752 100644
--- a/src/rabbit_control_main.erl
+++ b/src/rabbit_control_main.erl
@@ -114,13 +114,15 @@
[stop, stop_app, start_app, wait, reset, force_reset, rotate_logs,
join_cluster, change_cluster_node_type, update_cluster_nodes,
forget_cluster_node, rename_cluster_node, cluster_status, status,
- environment, eval, force_boot, help, node_health_check, hipe_compile]).
+ environment, eval, force_boot, help, hipe_compile]).
+%% [Command | {Command, DefaultTimeoutInMilliSeconds}]
-define(COMMANDS_WITH_TIMEOUT,
[list_user_permissions, list_policies, list_queues, list_exchanges,
list_bindings, list_connections, list_channels, list_consumers,
list_vhosts, list_parameters,
- purge_queue]).
+ purge_queue,
+ {node_health_check, 70000}]).
%%----------------------------------------------------------------------------
@@ -152,7 +154,7 @@ start() ->
end
end,
try
- T = case get_timeout(Opts) of
+ T = case get_timeout(Command, Opts) of
{ok, Timeout} ->
Timeout;
{error, _} ->
@@ -187,8 +189,23 @@ print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg) ->
end,
io:nl().
-get_timeout(Opts) ->
- parse_timeout(proplists:get_value(?TIMEOUT_OPT, Opts, ?RPC_TIMEOUT)).
+get_timeout(Command, Opts) ->
+ Default = case proplists:lookup(Command, ?COMMANDS_WITH_TIMEOUT) of
+ none ->
+ infinity;
+ {Command, true} ->
+ ?RPC_TIMEOUT;
+ {Command, D} ->
+ D
+ end,
+ Result = case proplists:get_value(?TIMEOUT_OPT, Opts, Default) of
+ use_default ->
+ parse_timeout(Default);
+ Value ->
+ parse_timeout(Value)
+ end,
+ Result.
+
parse_number(N) when is_list(N) ->
try list_to_integer(N) of
@@ -234,11 +251,11 @@ do_action(Command, Node, Args, Opts, Inform, Timeout) ->
false ->
case ensure_app_running(Node) of
ok ->
- case lists:member(Command, ?COMMANDS_WITH_TIMEOUT) of
- true ->
+ case proplists:lookup(Command, ?COMMANDS_WITH_TIMEOUT) of
+ {Command, _} ->
announce_timeout(Timeout, Inform),
action(Command, Node, Args, Opts, Inform, Timeout);
- false ->
+ none ->
action(Command, Node, Args, Opts, Inform)
end;
E -> E
@@ -559,17 +576,6 @@ action(eval, Node, [Expr], _Opts, _Inform) ->
action(help, _Node, _Args, _Opts, _Inform) ->
io:format("~s", [rabbit_ctl_usage:usage()]);
-action(node_health_check, Node, _Args, _Opts, Inform) ->
- Inform("Checking health of node ~p", [Node]),
- try
- rabbit_health_check:node(Node),
- io:format("Health check passed~n")
- catch
- {node_is_ko, ErrorMsg, ErrorCode} ->
- io:format("Heath check failed:~n~s~n", [ErrorMsg]),
- halt(ErrorCode)
- end;
-
action(Command, Node, Args, Opts, Inform) ->
%% For backward compatibility, run commands accepting a timeout with
%% the default timeout.
@@ -685,7 +691,17 @@ action(list_consumers, Node, _Args, Opts, Inform, Timeout) ->
Nodes = nodes_in_cluster(Node, Timeout),
call_emitter(Node, {rabbit_amqqueue, emit_consumers_all, [Nodes, VHostArg]},
rabbit_amqqueue:consumer_info_keys(),
- [{timeout, Timeout}, {chunks, length(Nodes)}]).
+ [{timeout, Timeout}, {chunks, length(Nodes)}]);
+
+action(node_health_check, Node, _Args, _Opts, Inform, Timeout) ->
+ Inform("Checking health of node ~p", [Node]),
+ case rabbit_health_check:node(Node, Timeout) of
+ ok ->
+ io:format("Health check passed~n"),
+ ok;
+ Other ->
+ Other
+ end.
format_parse_error({_Line, Mod, Err}) -> lists:flatten(Mod:format_error(Err)).
diff --git a/test/health_check_SUITE.erl b/test/health_check_SUITE.erl
new file mode 100644
index 0000000000..4d8f56e9d3
--- /dev/null
+++ b/test/health_check_SUITE.erl
@@ -0,0 +1,167 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License
+%% at http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and
+%% limitations under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is GoPivotal, Inc.
+%% Copyright (c) 2016 Pivotal Software, Inc. All rights reserved.
+%%
+-module(health_check_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+-include_lib("amqp_client/include/amqp_client.hrl").
+
+-export([all/0
+ ,groups/0
+ ,init_per_suite/1
+ ,end_per_suite/1
+ ,init_per_testcase/2
+ ,end_per_testcase/2
+ ]).
+
+-export([ignores_remote_dead_channel/1
+ ,detects_local_dead_channel/1
+ ,ignores_remote_dead_queue/1
+ ,detects_local_dead_queue/1
+ ,ignores_remote_alarms/1
+ ,detects_local_alarm/1
+ ,honors_timeout_argument/1
+ ]).
+
+all() ->
+ [{group, all_cases}].
+
+groups() ->
+ [{all_cases, [],
+ [ignores_remote_dead_queue
+ ,detects_local_dead_queue
+ ,ignores_remote_dead_channel
+ ,detects_local_dead_channel
+ ,ignores_remote_alarms
+ ,detects_local_alarm
+ ,honors_timeout_argument
+ ]}].
+
+init_per_suite(Config) ->
+ rabbit_ct_helpers:log_environment(),
+ rabbit_ct_helpers:run_setup_steps(Config).
+
+end_per_suite(Config) ->
+ rabbit_ct_helpers:run_teardown_steps(Config).
+
+init_per_testcase(Testcase, Config0) ->
+ rabbit_ct_helpers:testcase_started(Config0, Testcase),
+ Config1 = rabbit_ct_helpers:set_config(
+ Config0, [{rmq_nodes_count, 2},
+ {rmq_nodes_clustered, true}]),
+ rabbit_ct_helpers:run_steps(Config1,
+ rabbit_ct_broker_helpers:setup_steps() ++
+ rabbit_ct_client_helpers:setup_steps()).
+
+end_per_testcase(Testcase, Config0) ->
+ Config1 = case rabbit_ct_helpers:get_config(Config0, save_config) of
+ undefined -> Config0;
+ C -> C
+ end,
+ Config2 = rabbit_ct_helpers:run_steps(Config1,
+ rabbit_ct_client_helpers:teardown_steps() ++
+ rabbit_ct_broker_helpers:teardown_steps()),
+ rabbit_ct_helpers:testcase_finished(Config2, Testcase).
+
+%%----------------------------------------------------------------------------
+%% Test cases
+%%----------------------------------------------------------------------------
+ignores_remote_dead_channel(Config) ->
+ [A, B] = open_channel_and_declare_queue_everywhere(Config),
+ CPid = suspend_single_channel(Config, B),
+ {ok, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, A, ["-t", "5", "node_health_check"]),
+ resume_sys_process(Config, B, CPid),
+ ok.
+
+detects_local_dead_channel(Config) ->
+ [A|_] = open_channel_and_declare_queue_everywhere(Config),
+ CPid = suspend_single_channel(Config, A),
+ {error, 75, Str} = rabbit_ct_broker_helpers:rabbitmqctl(Config, A, ["-t", "5", "node_health_check"]),
+ {match, _} = re:run(Str, "operation node_health_check.*timed out"),
+ resume_sys_process(Config, A, CPid),
+ ok.
+
+ignores_remote_dead_queue(Config) ->
+ [A, B] = open_channel_and_declare_queue_everywhere(Config),
+ QPid = suspend_single_queue(Config, B),
+ {ok, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, A, ["-t", "5", "node_health_check"]),
+ resume_sys_process(Config, B, QPid),
+ ok.
+
+detects_local_dead_queue(Config) ->
+ [A|_] = open_channel_and_declare_queue_everywhere(Config),
+ QPid = suspend_single_queue(Config, A),
+ {error, 75, Str} = rabbit_ct_broker_helpers:rabbitmqctl(Config, A, ["-t", "5", "node_health_check"]),
+ {match, _} = re:run(Str, "operation node_health_check.*timed out"),
+ resume_sys_process(Config, A, QPid),
+ ok.
+
+ignores_remote_alarms(Config) ->
+ [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
+ rabbit_ct_broker_helpers:rabbitmqctl(Config, B,
+ ["set_vm_memory_high_watermark", "0.000000001"]),
+ {ok, _} = rabbit_ct_broker_helpers:rabbitmqctl(Config, A, ["-t", "5", "node_health_check"]),
+ ok.
+
+detects_local_alarm(Config) ->
+ [A|_] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
+ rabbit_ct_broker_helpers:rabbitmqctl(Config, A,
+ ["set_vm_memory_high_watermark", "0.000000001"]),
+ {error, 70, Str} = rabbit_ct_broker_helpers:rabbitmqctl(Config, A, ["-t", "5", "node_health_check"]),
+ {match, _} = re:run(Str, "resource alarm.*in effect"),
+ ok.
+
+honors_timeout_argument(Config) ->
+ [A|_] = open_channel_and_declare_queue_everywhere(Config),
+ QPid = suspend_single_queue(Config, A),
+
+ case timer:tc(rabbit_ct_broker_helpers, rabbitmqctl, [Config, A, ["-t", "5", "node_health_check"]]) of
+ {TimeSpent, {error, 75, _}} ->
+ if TimeSpent < 5000000 -> exit({too_fast, TimeSpent});
+ TimeSpent > 7000000 -> exit({too_slow, TimeSpent}); %% +2 seconds for rabbitmqctl overhead
+ true -> ok
+ end;
+ {_, Unexpected} ->
+ exit({unexpected, Unexpected})
+ end,
+ resume_sys_process(Config, A, QPid),
+ ok.
+
+%%----------------------------------------------------------------------------
+%% Helpers
+%%----------------------------------------------------------------------------
+open_channel_and_declare_queue_everywhere(Config) ->
+ Nodes = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
+ lists:foreach(fun(Node) ->
+ Ch = rabbit_ct_client_helpers:open_channel(Config, Node),
+ #'queue.declare_ok'{} = amqp_channel:call(Ch, #'queue.declare'{})
+ end,
+ Nodes),
+ Nodes.
+
+suspend_single_queue(Config, Node) ->
+ [QPid|_] = [rabbit_amqqueue:pid_of(Q) || Q <- rabbit_ct_broker_helpers:rpc(Config, Node, rabbit_amqqueue, list, []),
+ Node == node(rabbit_amqqueue:pid_of(Q))],
+ rabbit_ct_broker_helpers:rpc(Config, Node, sys, suspend, [QPid]),
+ QPid.
+
+suspend_single_channel(Config, Node) ->
+ [CPid|_] = [Pid || Pid <- rabbit_ct_broker_helpers:rpc(Config, Node, rabbit_channel, list_local, []),
+ Node == node(Pid)],
+ rabbit_ct_broker_helpers:rpc(Config, Node, sys, suspend, [CPid]),
+ CPid.
+
+resume_sys_process(Config, Node, Pid) ->
+ rabbit_ct_broker_helpers:rpc(Config, Node, sys, resume, [Pid]).