diff options
| author | Diana Corbacho <diana@rabbitmq.com> | 2017-04-12 08:27:29 +0100 |
|---|---|---|
| committer | Diana Corbacho <diana@rabbitmq.com> | 2017-04-12 08:27:29 +0100 |
| commit | 186c32700b52de68cb3ba71e445844d236519603 (patch) | |
| tree | d78a17a4c39ba64fdaec653261a15064a19fa5ef | |
| parent | fe56987d5b8ccf6b8ad073f240c9bf8a330e532d (diff) | |
| download | rabbitmq-server-git-186c32700b52de68cb3ba71e445844d236519603.tar.gz | |
Re-enable disk_monitor in case of parser failures
Parser failures could be transient on start-up, so retry a few times
before giving up.
rabbitmq-server#1178
[#143558437]
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | src/rabbit_disk_monitor.erl | 43 | ||||
| -rw-r--r-- | test/unit_inbroker_non_parallel_SUITE.erl | 32 |
3 files changed, 67 insertions, 12 deletions
@@ -107,7 +107,9 @@ define PROJECT_ENV {queue_explicit_gc_run_operation_threshold, 1000}, {lazy_queue_explicit_gc_run_operation_threshold, 1000}, {background_gc_enabled, false}, - {background_gc_target_interval, 60000} + {background_gc_target_interval, 60000}, + {disk_monitor_enable_retries, 10}, + {disk_monitor_enable_interval, 120000} ] endef diff --git a/src/rabbit_disk_monitor.erl b/src/rabbit_disk_monitor.erl index b2548cb61a..86f9b4016e 100644 --- a/src/rabbit_disk_monitor.erl +++ b/src/rabbit_disk_monitor.erl @@ -65,7 +65,12 @@ alarmed, %% is monitoring enabled? false on unsupported %% platforms - enabled + enabled, + %% number of retries to enable monitoring if it fails + %% on start-up + retries, + %% Interval between retries + interval }). %%---------------------------------------------------------------------------- @@ -114,20 +119,17 @@ start_link(Args) -> init([Limit]) -> Dir = dir(), + {ok, Retries} = application:get_env(rabbit, disk_monitor_enable_retries), + {ok, Interval} = application:get_env(rabbit, disk_monitor_enable_interval), State = #state{dir = Dir, min_interval = ?DEFAULT_MIN_DISK_CHECK_INTERVAL, max_interval = ?DEFAULT_MAX_DISK_CHECK_INTERVAL, alarmed = false, - enabled = true}, - case {catch get_disk_free(Dir), - vm_memory_monitor:get_total_memory()} of - {N1, N2} when is_integer(N1), is_integer(N2) -> - {ok, start_timer(set_disk_limits(State, Limit))}; - Err -> - rabbit_log:info("Disabling disk free space monitoring " - "on unsupported platform:~n~p~n", [Err]), - {ok, State#state{enabled = false}} - end. + enabled = true, + limit = Limit, + retries = Retries, + interval = Interval}, + {ok, enable(State)}. handle_call(get_disk_free_limit, _From, State = #state{limit = Limit}) -> {reply, Limit, State}; @@ -161,6 +163,8 @@ handle_call(_Request, _From, State) -> handle_cast(_Request, State) -> {noreply, State}. +handle_info(try_enable, #state{retries = Retries} = State) -> + {noreply, enable(State#state{retries = Retries - 1})}; handle_info(update, State) -> {noreply, start_timer(internal_update(State))}; @@ -261,3 +265,20 @@ interval(#state{limit = Limit, max_interval = MaxInterval}) -> IdealInterval = 2 * (Actual - Limit) / ?FAST_RATE, trunc(erlang:max(MinInterval, erlang:min(MaxInterval, IdealInterval))). + +enable(#state{retries = 0} = State) -> + State; +enable(#state{dir = Dir, interval = Interval, limit = Limit, retries = Retries} + = State) -> + case {catch get_disk_free(Dir), + vm_memory_monitor:get_total_memory()} of + {N1, N2} when is_integer(N1), is_integer(N2) -> + rabbit_log:info("Enabling disk free space monitoring~n", []), + start_timer(set_disk_limits(State, Limit)); + Err -> + rabbit_log:info("Disabling disk free space monitoring " + "on unsupported platform, ~p retries left:~n~p~n", + [Retries, Err]), + timer:send_after(Interval, self(), try_enable), + State#state{enabled = false} + end. diff --git a/test/unit_inbroker_non_parallel_SUITE.erl b/test/unit_inbroker_non_parallel_SUITE.erl index 266f0d30c0..68b7a15a8a 100644 --- a/test/unit_inbroker_non_parallel_SUITE.erl +++ b/test/unit_inbroker_non_parallel_SUITE.erl @@ -35,6 +35,7 @@ groups() -> app_management, %% Restart RabbitMQ. channel_statistics, %% Expect specific statistics. disk_monitor, %% Replace rabbit_misc module. + disk_monitor_enable, file_handle_cache, %% Change FHC limit. head_message_timestamp_statistics, %% Expect specific statistics. log_management, %% Check log files. @@ -744,6 +745,37 @@ disk_monitor1(_Config) -> meck:unload(rabbit_misc), passed. +disk_monitor_enable(Config) -> + passed = rabbit_ct_broker_helpers:rpc(Config, 0, + ?MODULE, disk_monitor_enable1, [Config]). + +disk_monitor_enable1(_Config) -> + case os:type() of + {unix, _} -> + disk_monitor_enable1(); + _ -> + %% skip windows testing + skipped + end. + +disk_monitor_enable1() -> + ok = meck:new(rabbit_misc, [passthrough]), + ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> "\n" end), + application:set_env(rabbit, disk_monitor_enable_retries, 20000), + application:set_env(rabbit, disk_monitor_enable_interval, 100), + ok = rabbit_sup:stop_child(rabbit_disk_monitor_sup), + ok = rabbit_sup:start_delayed_restartable_child(rabbit_disk_monitor, [1000]), + undefined = rabbit_disk_monitor:get_disk_free(), + Cmd = "Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on\n/dev/disk1 975798272 234783364 740758908 25% 58759839 185189727 24% /\n", + ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> Cmd end), + timer:sleep(1000), + Bytes = 740758908 * 1024, + Bytes = rabbit_disk_monitor:get_disk_free(), + meck:unload(rabbit_misc), + application:set_env(rabbit, disk_monitor_enable_retries, 10), + application:set_env(rabbit, disk_monitor_enable_interval, 120000), + passed. + %% --------------------------------------------------------------------------- %% rabbitmqctl helpers. %% --------------------------------------------------------------------------- |
