summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiana Corbacho <diana@rabbitmq.com>2017-04-12 08:27:29 +0100
committerDiana Corbacho <diana@rabbitmq.com>2017-04-12 08:27:29 +0100
commit186c32700b52de68cb3ba71e445844d236519603 (patch)
treed78a17a4c39ba64fdaec653261a15064a19fa5ef
parentfe56987d5b8ccf6b8ad073f240c9bf8a330e532d (diff)
downloadrabbitmq-server-git-186c32700b52de68cb3ba71e445844d236519603.tar.gz
Re-enable disk_monitor in case of parser failures
Parser failures could be transient on start-up, so retry a few times before giving up. rabbitmq-server#1178 [#143558437]
-rw-r--r--Makefile4
-rw-r--r--src/rabbit_disk_monitor.erl43
-rw-r--r--test/unit_inbroker_non_parallel_SUITE.erl32
3 files changed, 67 insertions, 12 deletions
diff --git a/Makefile b/Makefile
index c824002b64..e40145dd1c 100644
--- a/Makefile
+++ b/Makefile
@@ -107,7 +107,9 @@ define PROJECT_ENV
{queue_explicit_gc_run_operation_threshold, 1000},
{lazy_queue_explicit_gc_run_operation_threshold, 1000},
{background_gc_enabled, false},
- {background_gc_target_interval, 60000}
+ {background_gc_target_interval, 60000},
+ {disk_monitor_enable_retries, 10},
+ {disk_monitor_enable_interval, 120000}
]
endef
diff --git a/src/rabbit_disk_monitor.erl b/src/rabbit_disk_monitor.erl
index b2548cb61a..86f9b4016e 100644
--- a/src/rabbit_disk_monitor.erl
+++ b/src/rabbit_disk_monitor.erl
@@ -65,7 +65,12 @@
alarmed,
%% is monitoring enabled? false on unsupported
%% platforms
- enabled
+ enabled,
+ %% number of retries to enable monitoring if it fails
+ %% on start-up
+ retries,
+ %% Interval between retries
+ interval
}).
%%----------------------------------------------------------------------------
@@ -114,20 +119,17 @@ start_link(Args) ->
init([Limit]) ->
Dir = dir(),
+ {ok, Retries} = application:get_env(rabbit, disk_monitor_enable_retries),
+ {ok, Interval} = application:get_env(rabbit, disk_monitor_enable_interval),
State = #state{dir = Dir,
min_interval = ?DEFAULT_MIN_DISK_CHECK_INTERVAL,
max_interval = ?DEFAULT_MAX_DISK_CHECK_INTERVAL,
alarmed = false,
- enabled = true},
- case {catch get_disk_free(Dir),
- vm_memory_monitor:get_total_memory()} of
- {N1, N2} when is_integer(N1), is_integer(N2) ->
- {ok, start_timer(set_disk_limits(State, Limit))};
- Err ->
- rabbit_log:info("Disabling disk free space monitoring "
- "on unsupported platform:~n~p~n", [Err]),
- {ok, State#state{enabled = false}}
- end.
+ enabled = true,
+ limit = Limit,
+ retries = Retries,
+ interval = Interval},
+ {ok, enable(State)}.
handle_call(get_disk_free_limit, _From, State = #state{limit = Limit}) ->
{reply, Limit, State};
@@ -161,6 +163,8 @@ handle_call(_Request, _From, State) ->
handle_cast(_Request, State) ->
{noreply, State}.
+handle_info(try_enable, #state{retries = Retries} = State) ->
+ {noreply, enable(State#state{retries = Retries - 1})};
handle_info(update, State) ->
{noreply, start_timer(internal_update(State))};
@@ -261,3 +265,20 @@ interval(#state{limit = Limit,
max_interval = MaxInterval}) ->
IdealInterval = 2 * (Actual - Limit) / ?FAST_RATE,
trunc(erlang:max(MinInterval, erlang:min(MaxInterval, IdealInterval))).
+
+enable(#state{retries = 0} = State) ->
+ State;
+enable(#state{dir = Dir, interval = Interval, limit = Limit, retries = Retries}
+ = State) ->
+ case {catch get_disk_free(Dir),
+ vm_memory_monitor:get_total_memory()} of
+ {N1, N2} when is_integer(N1), is_integer(N2) ->
+ rabbit_log:info("Enabling disk free space monitoring~n", []),
+ start_timer(set_disk_limits(State, Limit));
+ Err ->
+ rabbit_log:info("Disabling disk free space monitoring "
+ "on unsupported platform, ~p retries left:~n~p~n",
+ [Retries, Err]),
+ timer:send_after(Interval, self(), try_enable),
+ State#state{enabled = false}
+ end.
diff --git a/test/unit_inbroker_non_parallel_SUITE.erl b/test/unit_inbroker_non_parallel_SUITE.erl
index 266f0d30c0..68b7a15a8a 100644
--- a/test/unit_inbroker_non_parallel_SUITE.erl
+++ b/test/unit_inbroker_non_parallel_SUITE.erl
@@ -35,6 +35,7 @@ groups() ->
app_management, %% Restart RabbitMQ.
channel_statistics, %% Expect specific statistics.
disk_monitor, %% Replace rabbit_misc module.
+ disk_monitor_enable,
file_handle_cache, %% Change FHC limit.
head_message_timestamp_statistics, %% Expect specific statistics.
log_management, %% Check log files.
@@ -744,6 +745,37 @@ disk_monitor1(_Config) ->
meck:unload(rabbit_misc),
passed.
+disk_monitor_enable(Config) ->
+ passed = rabbit_ct_broker_helpers:rpc(Config, 0,
+ ?MODULE, disk_monitor_enable1, [Config]).
+
+disk_monitor_enable1(_Config) ->
+ case os:type() of
+ {unix, _} ->
+ disk_monitor_enable1();
+ _ ->
+ %% skip windows testing
+ skipped
+ end.
+
+disk_monitor_enable1() ->
+ ok = meck:new(rabbit_misc, [passthrough]),
+ ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> "\n" end),
+ application:set_env(rabbit, disk_monitor_enable_retries, 20000),
+ application:set_env(rabbit, disk_monitor_enable_interval, 100),
+ ok = rabbit_sup:stop_child(rabbit_disk_monitor_sup),
+ ok = rabbit_sup:start_delayed_restartable_child(rabbit_disk_monitor, [1000]),
+ undefined = rabbit_disk_monitor:get_disk_free(),
+ Cmd = "Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on\n/dev/disk1 975798272 234783364 740758908 25% 58759839 185189727 24% /\n",
+ ok = meck:expect(rabbit_misc, os_cmd, fun(_) -> Cmd end),
+ timer:sleep(1000),
+ Bytes = 740758908 * 1024,
+ Bytes = rabbit_disk_monitor:get_disk_free(),
+ meck:unload(rabbit_misc),
+ application:set_env(rabbit, disk_monitor_enable_retries, 10),
+ application:set_env(rabbit, disk_monitor_enable_interval, 120000),
+ passed.
+
%% ---------------------------------------------------------------------------
%% rabbitmqctl helpers.
%% ---------------------------------------------------------------------------