diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_alarm.erl | 70 | ||||
| -rw-r--r-- | src/rabbit_memsup_linux.erl | 150 |
2 files changed, 195 insertions, 25 deletions
diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl index c2d6aaffc1..6d65b3a407 100644 --- a/src/rabbit_alarm.erl +++ b/src/rabbit_alarm.erl @@ -50,22 +50,57 @@ %%---------------------------------------------------------------------------- start() -> + ok = alarm_handler:add_alarm_handler(?MODULE), + case whereis(memsup) of + undefined -> + Mod = case os:type() of + %% memsup doesn't take account of buffers or + %% cache when considering "free" memory - + %% therefore on Linux we can get memory alarms + %% very easily without any pressure existing on + %% memory at all. Therefore we need to use our + %% own simple memory monitor. + %% + {unix, linux} -> rabbit_memsup_linux; + + %% Start memsup programmatically rather than via + %% the rabbitmq-server script. This is not quite + %% the right thing to do as os_mon checks to see + %% if memsup is available before starting it, + %% but as memsup is available everywhere (even + %% on VXWorks) it should be ok. + %% + %% One benefit of the programmatic startup is + %% that we can add our alarm_handler before + %% memsup is running, thus ensuring that we + %% notice memory alarms that go off on startup. + %% + _ -> memsup + end, + %% This is based on os_mon:childspec(memsup, true) + {ok, _} = supervisor:start_child( + os_mon_sup, + {memsup, {Mod, start_link, []}, + permanent, 2000, worker, [Mod]}), + ok; + _ -> + ok + end, %% The default memsup check interval is 1 minute, which is way too - %% long - rabbit can gobble up all memory in a matter of - %% seconds. Unfortunately the memory_check_interval configuration - %% parameter and memsup:set_check_interval/1 function only provide - %% a granularity of minutes. So we have to peel off one layer of - %% the API to get to the underlying layer which operates at the + %% long - rabbit can gobble up all memory in a matter of seconds. + %% Unfortunately the memory_check_interval configuration parameter + %% and memsup:set_check_interval/1 function only provide a + %% granularity of minutes. So we have to peel off one layer of the + %% API to get to the underlying layer which operates at the %% granularity of milliseconds. %% %% Note that the new setting will only take effect after the first %% check has completed, i.e. after one minute. So if rabbit eats %% all the memory within the first minute after startup then we %% are out of luck. - ok = os_mon:call(memsup, {set_check_interval, ?MEMSUP_CHECK_INTERVAL}, - infinity), - - ok = alarm_handler:add_alarm_handler(?MODULE). + ok = os_mon:call(memsup, + {set_check_interval, ?MEMSUP_CHECK_INTERVAL}, + infinity). stop() -> ok = alarm_handler:delete_alarm_handler(?MODULE). @@ -77,9 +112,7 @@ register(Pid, HighMemMFA) -> %%---------------------------------------------------------------------------- init([]) -> - HWM = system_memory_high_watermark(), - {ok, #alarms{alertees = dict:new(), - system_memory_high_watermark = HWM}}. + {ok, #alarms{alertees = dict:new()}}. handle_call({register, Pid, HighMemMFA}, State = #alarms{alertees = Alertess}) -> @@ -121,19 +154,6 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- -system_memory_high_watermark() -> - %% When we register our alarm_handler, the - %% system_memory_high_watermark alarm may already have gone - %% off. How do we find out about that? Calling - %% alarm_handler:get_alarms() would deadlock. So instead we ask - %% memsup. Unfortunately that doesn't expose a suitable API, so we - %% have to reach quite deeply into its internals. - {dictionary, D} = process_info(whereis(memsup), dictionary), - case lists:keysearch(system_memory_high_watermark, 1, D) of - {value, {_, set}} -> true; - _Other -> false - end. - alert(Alert, Alertees) -> dict:fold(fun (Pid, {M, F, A}, Acc) -> ok = erlang:apply(M, F, A ++ [Pid, Alert]), diff --git a/src/rabbit_memsup_linux.erl b/src/rabbit_memsup_linux.erl new file mode 100644 index 0000000000..b77ffcabb4 --- /dev/null +++ b/src/rabbit_memsup_linux.erl @@ -0,0 +1,150 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd., +%% Cohesive Financial Technologies LLC., and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd., Cohesive Financial Technologies +%% LLC., and Rabbit Technologies Ltd. are Copyright (C) 2007-2008 +%% LShift Ltd., Cohesive Financial Technologies LLC., and Rabbit +%% Technologies Ltd.; +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_memsup_linux). + +-behaviour(gen_server). + +-export([start_link/0]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-export([update/0]). + +-define(SERVER, memsup). %% must be the same as the standard memsup + +-define(DEFAULT_MEMORY_CHECK_INTERVAL, 1000). + +-record(state, {memory_fraction, alarmed, timeout, timer}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(update/0 :: () -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + + +update() -> + gen_server:cast(?SERVER, update). + +%%---------------------------------------------------------------------------- + +init(_Args) -> + Fraction = os_mon:get_env(memsup, system_memory_high_watermark), + TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL), + {ok, #state{alarmed = false, + memory_fraction = Fraction, + timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL, + timer = TRef}}. + +start_timer(Timeout) -> + {ok, TRef} = timer:apply_interval(Timeout, ?MODULE, update, []), + TRef. + +%% Export the same API as the real memsup. Note that +%% get_sysmem_high_watermark gives an int in the range 0 - 100, while +%% set_sysmem_high_watermark takes a float in the range 0.0 - 1.0. +handle_call(get_sysmem_high_watermark, _From, State) -> + {reply, trunc(100 * State#state.memory_fraction), State}; + +handle_call({set_sysmem_high_watermark, Float}, _From, State) -> + {reply, ok, State#state{memory_fraction = Float}}; + +handle_call(get_check_interval, _From, State) -> + {reply, State#state.timeout, State}; + +handle_call({set_check_interval, Timeout}, _From, State) -> + {ok, cancel} = timer:cancel(State#state.timer), + {reply, ok, State#state{timeout = Timeout, timer = start_timer(Timeout)}}; + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(update, State = #state{alarmed = Alarmed, + memory_fraction = MemoryFraction}) -> + File = read_proc_file("/proc/meminfo"), + Lines = string:tokens(File, "\n"), + Dict = dict:from_list(lists:map(fun parse_line/1, Lines)), + MemTotal = dict:fetch('MemTotal', Dict), + MemUsed = MemTotal + - dict:fetch('MemFree', Dict) + - dict:fetch('Buffers', Dict) + - dict:fetch('Cached', Dict), + NewAlarmed = MemUsed / MemTotal > MemoryFraction, + case {Alarmed, NewAlarmed} of + {false, true} -> + alarm_handler:set_alarm({system_memory_high_watermark, []}); + {true, false} -> + alarm_handler:clear_alarm(system_memory_high_watermark); + _ -> + ok + end, + {noreply, State#state{alarmed = NewAlarmed}}; + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- + +-define(BUFFER_SIZE, 1024). + +%% file:read_file does not work on files in /proc as it seems to get +%% the size of the file first and then read that many bytes. But files +%% in /proc always have length 0, we just have to read until we get +%% eof. +read_proc_file(File) -> + {ok, IoDevice} = file:open(File, [read, raw]), + Res = read_proc_file(IoDevice, []), + file:close(IoDevice), + lists:flatten(lists:reverse(Res)). + +read_proc_file(IoDevice, Acc) -> + case file:read(IoDevice, ?BUFFER_SIZE) of + {ok, Res} -> read_proc_file(IoDevice, [Res | Acc]); + eof -> Acc + end. + +%% A line looks like "FooBar: 123456 kB" +parse_line(Line) -> + [Name, Value | _] = string:tokens(Line, ": "), + {list_to_atom(Name), list_to_integer(Value)}. |
