diff options
| author | Michael Klishin <mklishin@pivotal.io> | 2017-04-12 14:29:25 +0300 |
|---|---|---|
| committer | Michael Klishin <mklishin@pivotal.io> | 2017-04-12 14:29:25 +0300 |
| commit | c4519ddf4baf8af772b8b1a79945eef02cda17f4 (patch) | |
| tree | 50d9a76e254c61f6b13eb8afe3e20a0fb4dd8677 | |
| parent | 536674232015587befed5be325bc89aeca3c17ff (diff) | |
| parent | bafb80126c539e8d3c745622fbc5bebe8197439c (diff) | |
| download | rabbitmq-server-git-c4519ddf4baf8af772b8b1a79945eef02cda17f4.tar.gz | |
Merge branch 'stable'
Conflicts:
Makefile
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | src/rabbit_disk_monitor.erl | 45 | ||||
| -rw-r--r-- | test/unit_inbroker_non_parallel_SUITE.erl | 32 |
3 files changed, 68 insertions, 13 deletions
@@ -115,7 +115,9 @@ define PROJECT_ENV {background_gc_enabled, false}, {background_gc_target_interval, 60000}, %% rabbitmq-server-589 - {proxy_protocol, false} + {proxy_protocol, false}, + {disk_monitor_failure_retries, 10}, + {disk_monitor_failure_retry_interval, 120000} ] endef diff --git a/src/rabbit_disk_monitor.erl b/src/rabbit_disk_monitor.erl index b2548cb61a..629228a1be 100644 --- a/src/rabbit_disk_monitor.erl +++ b/src/rabbit_disk_monitor.erl @@ -65,7 +65,12 @@ alarmed, %% is monitoring enabled? false on unsupported %% platforms - enabled + enabled, + %% number of retries to enable monitoring if it fails + %% on start-up + retries, + %% Interval between retries + interval }). %%---------------------------------------------------------------------------- @@ -114,20 +119,17 @@ start_link(Args) -> init([Limit]) -> Dir = dir(), + {ok, Retries} = application:get_env(rabbit, disk_monitor_failure_retries), + {ok, Interval} = application:get_env(rabbit, disk_monitor_failure_retry_interval), State = #state{dir = Dir, min_interval = ?DEFAULT_MIN_DISK_CHECK_INTERVAL, max_interval = ?DEFAULT_MAX_DISK_CHECK_INTERVAL, alarmed = false, - enabled = true}, - case {catch get_disk_free(Dir), - vm_memory_monitor:get_total_memory()} of - {N1, N2} when is_integer(N1), is_integer(N2) -> - {ok, start_timer(set_disk_limits(State, Limit))}; - Err -> - rabbit_log:info("Disabling disk free space monitoring " - "on unsupported platform:~n~p~n", [Err]), - {ok, State#state{enabled = false}} - end. + enabled = true, + limit = Limit, + retries = Retries, + interval = Interval}, + {ok, enable(State)}. handle_call(get_disk_free_limit, _From, State = #state{limit = Limit}) -> {reply, Limit, State}; @@ -161,6 +163,8 @@ handle_call(_Request, _From, State) -> handle_cast(_Request, State) -> {noreply, State}. +handle_info(try_enable, #state{retries = Retries} = State) -> + {noreply, enable(State#state{retries = Retries - 1})}; handle_info(update, State) -> {noreply, start_timer(internal_update(State))}; @@ -246,7 +250,7 @@ interpret_limit(Absolute) -> emit_update_info(StateStr, CurrentFree, Limit) -> rabbit_log:info( - "Disk free space ~s. Free bytes:~p Limit:~p~n", + "Free disk space is ~s. Free bytes: ~p. Limit: ~p~n", [StateStr, CurrentFree, Limit]). start_timer(State) -> @@ -261,3 +265,20 @@ interval(#state{limit = Limit, max_interval = MaxInterval}) -> IdealInterval = 2 * (Actual - Limit) / ?FAST_RATE, trunc(erlang:max(MinInterval, erlang:min(MaxInterval, IdealInterval))). + +enable(#state{retries = 0} = State) -> + State; +enable(#state{dir = Dir, interval = Interval, limit = Limit, retries = Retries} + = State) -> + case {catch get_disk_free(Dir), + vm_memory_monitor:get_total_memory()} of + {N1, N2} when is_integer(N1), is_integer(N2) -> + rabbit_log:info("Enabling free disk space monitoring~n", []), + start_timer(set_disk_limits(State, Limit)); + Err -> + rabbit_log:info("Free disk space monitor encountered an error " + "(e.g. failed to parse output from OS tools): ~p, retries left: ~s~n", + [Err, Retries]), + timer:send_after(Interval, self(), try_enable), + State#state{enabled = false} + end. diff --git a/test/unit_inbroker_non_parallel_SUITE.erl b/test/unit_inbroker_non_parallel_SUITE.erl index 93f107de6b..2af6368f34 100644 --- a/test/unit_inbroker_non_parallel_SUITE.erl +++ b/test/unit_inbroker_non_parallel_SUITE.erl @@ -35,6 +35,7 @@ groups() -> app_management, %% Restart RabbitMQ. channel_statistics, %% Expect specific statistics. disk_monitor, %% Replace rabbit_misc module. + disk_monitor_enable, file_handle_cache, %% Change FHC limit. head_message_timestamp_statistics, %% Expect specific statistics. log_management, %% Check log files. @@ -631,6 +632,37 @@ disk_monitor1(_Config) -> meck:unload(rabbit_misc), passed. +disk_monitor_enable(Config) -> + passed = rabbit_ct_broker_helpers:rpc(Config, 0, + ?MODULE, disk_monitor_enable1, [Config]). + +disk_monitor_enable1(_Config) -> + case os:type() of + {unix, _} -> + disk_monitor_enable1(); + _ -> + %% skip windows testing + skipped + end. + +disk_monitor_enable1() -> + ok = meck:new(rabbit_misc, [passthrough]), + ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> "\n" end), + application:set_env(rabbit, disk_monitor_failure_retries, 20000), + application:set_env(rabbit, disk_monitor_failure_retry_interval, 100), + ok = rabbit_sup:stop_child(rabbit_disk_monitor_sup), + ok = rabbit_sup:start_delayed_restartable_child(rabbit_disk_monitor, [1000]), + undefined = rabbit_disk_monitor:get_disk_free(), + Cmd = "Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on\n/dev/disk1 975798272 234783364 740758908 25% 58759839 185189727 24% /\n", + ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> Cmd end), + timer:sleep(1000), + Bytes = 740758908 * 1024, + Bytes = rabbit_disk_monitor:get_disk_free(), + meck:unload(rabbit_misc), + application:set_env(rabbit, disk_monitor_failure_retries, 10), + application:set_env(rabbit, disk_monitor_failure_retry_interval, 120000), + passed. + %% --------------------------------------------------------------------------- %% rabbitmqctl helpers. %% --------------------------------------------------------------------------- |
