diff options
| author | Alexandru Scvortov <alexandru@rabbitmq.com> | 2010-09-03 18:31:58 +0100 |
|---|---|---|
| committer | Alexandru Scvortov <alexandru@rabbitmq.com> | 2010-09-03 18:31:58 +0100 |
| commit | a2a0397e0d3c5243cf4fc210da391a4b9cedd02b (patch) | |
| tree | 46e0a7b4b7f1d473fca3ad9e05dea77d6710b248 /src | |
| parent | bb892b5585a67016282ce3e9627d7bdf106ae13e (diff) | |
| parent | 4fec65f1e195e2a647f89e8eefc66104c928aa4b (diff) | |
| download | rabbitmq-server-git-a2a0397e0d3c5243cf4fc210da391a4b9cedd02b.tar.gz | |
merge default into bug22902
Diffstat (limited to 'src')
61 files changed, 8583 insertions, 1535 deletions
diff --git a/src/bpqueue.erl b/src/bpqueue.erl new file mode 100644 index 0000000000..49874aa60c --- /dev/null +++ b/src/bpqueue.erl @@ -0,0 +1,286 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(bpqueue). + +%% Block-prefixed queue. From the perspective of the queue interface +%% the datastructure acts like a regular queue where each value is +%% paired with the prefix. +%% +%% This is implemented as a queue of queues, which is more space and +%% time efficient, whilst supporting the normal queue interface. Each +%% inner queue has a prefix, which does not need to be unique, and it +%% is guaranteed that no two consecutive blocks have the same +%% prefix. len/1 returns the flattened length of the queue and is +%% O(1). + +-export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2, + foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4, + map_fold_filter_r/4]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([bpqueue/0]). + +-type(bpqueue() :: {non_neg_integer(), queue()}). +-type(prefix() :: any()). +-type(value() :: any()). +-type(result() :: ({'empty', bpqueue()} | + {{'value', prefix(), value()}, bpqueue()})). + +-spec(new/0 :: () -> bpqueue()). +-spec(is_empty/1 :: (bpqueue()) -> boolean()). +-spec(len/1 :: (bpqueue()) -> non_neg_integer()). +-spec(in/3 :: (prefix(), value(), bpqueue()) -> bpqueue()). +-spec(in_r/3 :: (prefix(), value(), bpqueue()) -> bpqueue()). +-spec(out/1 :: (bpqueue()) -> result()). +-spec(out_r/1 :: (bpqueue()) -> result()). +-spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()). +-spec(foldl/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B). +-spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B). +-spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()). +-spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]). +-spec(map_fold_filter_l/4 :: ((fun ((prefix()) -> boolean())), + (fun ((value(), B) -> + ({prefix(), value(), B} | 'stop'))), + B, + bpqueue()) -> + {bpqueue(), B}). +-spec(map_fold_filter_r/4 :: ((fun ((prefix()) -> boolean())), + (fun ((value(), B) -> + ({prefix(), value(), B} | 'stop'))), + B, + bpqueue()) -> + {bpqueue(), B}). + +-endif. + +%%---------------------------------------------------------------------------- + +new() -> {0, queue:new()}. + +is_empty({0, _Q}) -> true; +is_empty(_BPQ) -> false. + +len({N, _Q}) -> N. + +in(Prefix, Value, {0, Q}) -> + {1, queue:in({Prefix, queue:from_list([Value])}, Q)}; +in(Prefix, Value, BPQ) -> + in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ). + +in_r(Prefix, Value, BPQ = {0, _Q}) -> + in(Prefix, Value, BPQ); +in_r(Prefix, Value, BPQ) -> + in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ). + +in1({In, Out}, Prefix, Value, {N, Q}) -> + {N+1, case Out(Q) of + {{value, {Prefix, InnerQ}}, Q1} -> + In({Prefix, In(Value, InnerQ)}, Q1); + {{value, {_Prefix, _InnerQ}}, _Q1} -> + In({Prefix, queue:in(Value, queue:new())}, Q) + end}. + +in_q(Prefix, Queue, BPQ = {0, Q}) -> + case queue:len(Queue) of + 0 -> BPQ; + N -> {N, queue:in({Prefix, Queue}, Q)} + end; +in_q(Prefix, Queue, BPQ) -> + in_q1({fun queue:in/2, fun queue:out_r/1, + fun queue:join/2}, + Prefix, Queue, BPQ). + +in_q_r(Prefix, Queue, BPQ = {0, _Q}) -> + in_q(Prefix, Queue, BPQ); +in_q_r(Prefix, Queue, BPQ) -> + in_q1({fun queue:in_r/2, fun queue:out/1, + fun (T, H) -> queue:join(H, T) end}, + Prefix, Queue, BPQ). + +in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) -> + case queue:len(Queue) of + 0 -> BPQ; + M -> {N + M, case Out(Q) of + {{value, {Prefix, InnerQ}}, Q1} -> + In({Prefix, Join(InnerQ, Queue)}, Q1); + {{value, {_Prefix, _InnerQ}}, _Q1} -> + In({Prefix, Queue}, Q) + end} + end. + +out({0, _Q} = BPQ) -> {empty, BPQ}; +out(BPQ) -> out1({fun queue:in_r/2, fun queue:out/1}, BPQ). + +out_r({0, _Q} = BPQ) -> {empty, BPQ}; +out_r(BPQ) -> out1({fun queue:in/2, fun queue:out_r/1}, BPQ). + +out1({In, Out}, {N, Q}) -> + {{value, {Prefix, InnerQ}}, Q1} = Out(Q), + {{value, Value}, InnerQ1} = Out(InnerQ), + Q2 = case queue:is_empty(InnerQ1) of + true -> Q1; + false -> In({Prefix, InnerQ1}, Q1) + end, + {{value, Prefix, Value}, {N-1, Q2}}. + +join({0, _Q}, BPQ) -> + BPQ; +join(BPQ, {0, _Q}) -> + BPQ; +join({NHead, QHead}, {NTail, QTail}) -> + {{value, {Prefix, InnerQHead}}, QHead1} = queue:out_r(QHead), + {NHead + NTail, + case queue:out(QTail) of + {{value, {Prefix, InnerQTail}}, QTail1} -> + queue:join( + queue:in({Prefix, queue:join(InnerQHead, InnerQTail)}, QHead1), + QTail1); + {{value, {_Prefix, _InnerQTail}}, _QTail1} -> + queue:join(QHead, QTail) + end}. + +foldl(_Fun, Init, {0, _Q}) -> Init; +foldl( Fun, Init, {_N, Q}) -> fold1(fun queue:out/1, Fun, Init, Q). + +foldr(_Fun, Init, {0, _Q}) -> Init; +foldr( Fun, Init, {_N, Q}) -> fold1(fun queue:out_r/1, Fun, Init, Q). + +fold1(Out, Fun, Init, Q) -> + case Out(Q) of + {empty, _Q} -> + Init; + {{value, {Prefix, InnerQ}}, Q1} -> + fold1(Out, Fun, fold1(Out, Fun, Prefix, Init, InnerQ), Q1) + end. + +fold1(Out, Fun, Prefix, Init, InnerQ) -> + case Out(InnerQ) of + {empty, _Q} -> + Init; + {{value, Value}, InnerQ1} -> + fold1(Out, Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1) + end. + +from_list(List) -> + {FinalPrefix, FinalInnerQ, ListOfPQs1, Len} = + lists:foldl( + fun ({_Prefix, []}, Acc) -> + Acc; + ({Prefix, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) -> + {Prefix, queue:join(InnerQ, queue:from_list(InnerList)), + ListOfPQs, LenAcc + length(InnerList)}; + ({Prefix1, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) -> + {Prefix1, queue:from_list(InnerList), + [{Prefix, InnerQ} | ListOfPQs], LenAcc + length(InnerList)} + end, {undefined, queue:new(), [], 0}, List), + ListOfPQs2 = [{FinalPrefix, FinalInnerQ} | ListOfPQs1], + [{undefined, InnerQ1} | Rest] = All = lists:reverse(ListOfPQs2), + {Len, queue:from_list(case queue:is_empty(InnerQ1) of + true -> Rest; + false -> All + end)}. + +to_list({0, _Q}) -> []; +to_list({_N, Q}) -> [{Prefix, queue:to_list(InnerQ)} || + {Prefix, InnerQ} <- queue:to_list(Q)]. + +%% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init} +%% where FilterFun(Prefix) -> boolean() +%% Fun(Value, Init) -> {Prefix, Value, Init} | stop +%% +%% The filter fun allows you to skip very quickly over blocks that +%% you're not interested in. Such blocks appear in the resulting bpq +%% without modification. The Fun is then used both to map the value, +%% which also allows you to change the prefix (and thus block) of the +%% value, and also to modify the Init/Acc (just like a fold). If the +%% Fun returns 'stop' then it is not applied to any further items. +map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) -> + {BPQ, Init}; +map_fold_filter_l(PFilter, Fun, Init, {N, Q}) -> + map_fold_filter1({fun queue:out/1, fun queue:in/2, + fun in_q/3, fun join/2}, + N, PFilter, Fun, Init, Q, new()). + +map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) -> + {BPQ, Init}; +map_fold_filter_r(PFilter, Fun, Init, {N, Q}) -> + map_fold_filter1({fun queue:out_r/1, fun queue:in_r/2, + fun in_q_r/3, fun (T, H) -> join(H, T) end}, + N, PFilter, Fun, Init, Q, new()). + +map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, + Init, Q, QNew) -> + case Out(Q) of + {empty, _Q} -> + {QNew, Init}; + {{value, {Prefix, InnerQ}}, Q1} -> + case PFilter(Prefix) of + true -> + {Init1, QNew1, Cont} = + map_fold_filter2(Funs, Fun, Prefix, Prefix, + Init, InnerQ, QNew, queue:new()), + case Cont of + false -> {Join(QNew1, {Len - len(QNew1), Q1}), Init1}; + true -> map_fold_filter1(Funs, Len, PFilter, Fun, + Init1, Q1, QNew1) + end; + false -> + map_fold_filter1(Funs, Len, PFilter, Fun, + Init, Q1, InQ(Prefix, InnerQ, QNew)) + end + end. + +map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, + Init, InnerQ, QNew, InnerQNew) -> + case Out(InnerQ) of + {empty, _Q} -> + {Init, InQ(OrigPrefix, InnerQ, + InQ(Prefix, InnerQNew, QNew)), true}; + {{value, Value}, InnerQ1} -> + case Fun(Value, Init) of + stop -> + {Init, InQ(OrigPrefix, InnerQ, + InQ(Prefix, InnerQNew, QNew)), false}; + {Prefix1, Value1, Init1} -> + {Prefix2, QNew1, InnerQNew1} = + case Prefix1 =:= Prefix of + true -> {Prefix, QNew, In(Value1, InnerQNew)}; + false -> {Prefix1, InQ(Prefix, InnerQNew, QNew), + In(Value1, queue:new())} + end, + map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2, + Init1, InnerQ1, QNew1, InnerQNew1) + end + end. diff --git a/src/delegate.erl b/src/delegate.erl index 3f57953bf7..c8aa3092c7 100644 --- a/src/delegate.erl +++ b/src/delegate.erl @@ -44,7 +44,7 @@ -ifdef(use_specs). --spec(start_link/1 :: (non_neg_integer()) -> rabbit_types:ok(pid())). +-spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()} | {'error', any()}). -spec(invoke_no_result/2 :: (pid() | [pid()], fun ((pid()) -> any())) -> 'ok'). -spec(invoke/2 :: (pid() | [pid()], fun ((pid()) -> A)) -> A). diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl index 39ef3f85b8..ff303ee28c 100644 --- a/src/delegate_sup.erl +++ b/src/delegate_sup.erl @@ -43,7 +43,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), any()) | 'ignore'). +-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). -endif. diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index e209ee6be4..aecfb09694 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -116,13 +116,13 @@ %% do not need to worry about their handles being closed by the server %% - reopening them when necessary is handled transparently. %% -%% The server also supports obtain and release_on_death. obtain/0 -%% blocks until a file descriptor is available. release_on_death/1 -%% takes a pid and monitors the pid, reducing the count by 1 when the -%% pid dies. Thus the assumption is that obtain/0 is called first, and -%% when that returns, release_on_death/1 is called with the pid who -%% "owns" the file descriptor. This is, for example, used to track the -%% use of file descriptors through network sockets. +%% The server also supports obtain and transfer. obtain/0 blocks until +%% a file descriptor is available. transfer/1 is transfers ownership +%% of a file descriptor between processes. It is non-blocking. +%% +%% The callers of register_callback/3, obtain/0, and the argument of +%% transfer/1 are monitored, reducing the count of handles in use +%% appropriately when the processes terminate. -behaviour(gen_server). @@ -130,17 +130,28 @@ -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1, last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1, flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]). --export([release_on_death/1, obtain/0]). +-export([obtain/0, transfer/1, set_limit/1, get_limit/0]). +-export([ulimit/0]). -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -define(SERVER, ?MODULE). -define(RESERVED_FOR_OTHERS, 100). --define(FILE_HANDLES_LIMIT_WINDOWS, 10000000). + +%% Googling around suggests that Windows has a limit somewhere around +%% 16M, eg +%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx +%% however, it turns out that's only available through the win32 +%% API. Via the C Runtime, we have just 512: +%% http://msdn.microsoft.com/en-us/library/6e3b887c%28VS.80%29.aspx +-define(FILE_HANDLES_LIMIT_WINDOWS, 512). -define(FILE_HANDLES_LIMIT_OTHER, 1024). -define(FILE_HANDLES_CHECK_INTERVAL, 2000). +-define(OBTAIN_LIMIT(LIMIT), trunc((LIMIT * 0.9) - 2)). +-define(CLIENT_ETS_TABLE, ?MODULE). + %%---------------------------------------------------------------------------- -record(file, @@ -168,13 +179,31 @@ -record(fhc_state, { elders, limit, - count, - obtains, - callbacks, - client_mrefs, + open_count, + open_pending, + obtain_limit, + obtain_count, + obtain_pending, + clients, timer_ref }). +-record(cstate, + { pid, + callback, + opened, + obtained, + blocked, + pending_closes + }). + +-record(pending, + { kind, + pid, + requested, + from + }). + %%---------------------------------------------------------------------------- %% Specs %%---------------------------------------------------------------------------- @@ -182,8 +211,8 @@ -ifdef(use_specs). -type(ref() :: any()). --type(ok_or_error() :: rabbit_types:ok_or_error(any())). --type(val_or_error(T) :: rabbit_types:ok_or_error2(T, any())). +-type(ok_or_error() :: 'ok' | {'error', any()}). +-type(val_or_error(T) :: {'ok', T} | {'error', any()}). -type(position() :: ('bof' | 'eof' | non_neg_integer() | {('bof' |'eof'), non_neg_integer()} | {'cur', integer()})). @@ -210,8 +239,11 @@ -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). -spec(delete/1 :: (ref()) -> ok_or_error()). -spec(clear/1 :: (ref()) -> ok_or_error()). --spec(release_on_death/1 :: (pid()) -> 'ok'). -spec(obtain/0 :: () -> 'ok'). +-spec(transfer/1 :: (pid()) -> 'ok'). +-spec(set_limit/1 :: (non_neg_integer()) -> 'ok'). +-spec(get_limit/0 :: () -> non_neg_integer()). +-spec(ulimit/0 :: () -> 'infinity' | 'unknown' | non_neg_integer()). -endif. @@ -238,9 +270,9 @@ open(Path, Mode, Options) -> IsWriter = is_writer(Mode1), case IsWriter andalso HasWriter of true -> {error, writer_exists}; - false -> Ref = make_ref(), - case open1(Path1, Mode1, Options, Ref, bof, new) of - {ok, _Handle} -> + false -> {ok, Ref} = new_closed_handle(Path1, Mode1, Options), + case get_or_reopen([{Ref, new}]) of + {ok, [_Handle1]} -> RCount1 = case is_reader(Mode1) of true -> RCount + 1; false -> RCount @@ -251,6 +283,7 @@ open(Path, Mode, Options) -> has_writer = HasWriter1 }), {ok, Ref}; Error -> + erase({Ref, fhc_handle}), Error end end. @@ -301,7 +334,7 @@ append(Ref, Data) -> Size1 = Size + iolist_size(Data), Handle2 = Handle1 #handle { write_buffer = WriteBuffer1, write_buffer_size = Size1 }, - case Limit /= infinity andalso Size1 > Limit of + case Limit =/= infinity andalso Size1 > Limit of true -> {Result, Handle3} = write_buffer(Handle2), {Result, [Handle3]}; false -> {ok, [Handle2]} @@ -375,7 +408,8 @@ copy(Src, Dest, Count) -> {ok, Count1} = Result1 -> {Result1, [SHandle #handle { offset = SOffset + Count1 }, - DHandle #handle { offset = DOffset + Count1 }]}; + DHandle #handle { offset = DOffset + Count1, + is_dirty = true }]}; Error -> {Error, [SHandle, DHandle]} end; @@ -420,29 +454,29 @@ set_maximum_since_use(MaximumAge) -> case lists:foldl( fun ({{Ref, fhc_handle}, Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) -> - Age = timer:now_diff(Now, Then), - case Hdl /= closed andalso Age >= MaximumAge of - true -> {Res, Handle1} = soft_close(Handle), - case Res of - ok -> put({Ref, fhc_handle}, Handle1), - false; - _ -> put_handle(Ref, Handle1), - Rep - end; + case Hdl =/= closed andalso + timer:now_diff(Now, Then) >= MaximumAge of + true -> soft_close(Ref, Handle) orelse Rep; false -> Rep end; (_KeyValuePair, Rep) -> Rep - end, true, get()) of - true -> age_tree_change(), ok; - false -> ok + end, false, get()) of + false -> age_tree_change(), ok; + true -> ok end. -release_on_death(Pid) when is_pid(Pid) -> - gen_server:cast(?SERVER, {release_on_death, Pid}). - obtain() -> - gen_server:call(?SERVER, obtain, infinity). + gen_server:call(?SERVER, {obtain, self()}, infinity). + +transfer(Pid) -> + gen_server:cast(?SERVER, {transfer, self(), Pid}). + +set_limit(Limit) -> + gen_server:call(?SERVER, {set_limit, Limit}, infinity). + +get_limit() -> + gen_server:call(?SERVER, get_limit, infinity). %%---------------------------------------------------------------------------- %% Internal functions @@ -459,18 +493,9 @@ append_to_write(Mode) -> end. with_handles(Refs, Fun) -> - ResHandles = lists:foldl( - fun (Ref, {ok, HandlesAcc}) -> - case get_or_reopen(Ref) of - {ok, Handle} -> {ok, [Handle | HandlesAcc]}; - Error -> Error - end; - (_Ref, Error) -> - Error - end, {ok, []}, Refs), - case ResHandles of + case get_or_reopen([{Ref, reopen} || Ref <- Refs]) of {ok, Handles} -> - case Fun(lists:reverse(Handles)) of + case Fun(Handles) of {Result, Handles1} when is_list(Handles1) -> lists:zipwith(fun put_handle/2, Refs, Handles1), Result; @@ -499,36 +524,94 @@ with_flushed_handles(Refs, Fun) -> end end). -get_or_reopen(Ref) -> - case get({Ref, fhc_handle}) of - undefined -> - {error, not_open, Ref}; - #handle { hdl = closed, offset = Offset, - path = Path, mode = Mode, options = Options } -> - open1(Path, Mode, Options, Ref, Offset, reopen); - Handle -> - {ok, Handle} +get_or_reopen(RefNewOrReopens) -> + case partition_handles(RefNewOrReopens) of + {OpenHdls, []} -> + {ok, [Handle || {_Ref, Handle} <- OpenHdls]}; + {OpenHdls, ClosedHdls} -> + Oldest = oldest(get_age_tree(), fun () -> now() end), + case gen_server:call(?SERVER, {open, self(), length(ClosedHdls), + Oldest}, infinity) of + ok -> + case reopen(ClosedHdls) of + {ok, RefHdls} -> sort_handles(RefNewOrReopens, + OpenHdls, RefHdls, []); + Error -> Error + end; + close -> + [soft_close(Ref, Handle) || + {{Ref, fhc_handle}, Handle = #handle { hdl = Hdl }} <- + get(), + Hdl =/= closed], + get_or_reopen(RefNewOrReopens) + end + end. + +reopen(ClosedHdls) -> reopen(ClosedHdls, get_age_tree(), []). + +reopen([], Tree, RefHdls) -> + put_age_tree(Tree), + {ok, lists:reverse(RefHdls)}; +reopen([{Ref, NewOrReopen, Handle = #handle { hdl = closed, + path = Path, + mode = Mode, + offset = Offset, + last_used_at = undefined }} | + RefNewOrReopenHdls] = ToOpen, Tree, RefHdls) -> + case file:open(Path, case NewOrReopen of + new -> Mode; + reopen -> [read | Mode] + end) of + {ok, Hdl} -> + Now = now(), + {{ok, Offset1}, Handle1} = + maybe_seek(Offset, Handle #handle { hdl = Hdl, + offset = 0, + last_used_at = Now }), + Handle2 = Handle1 #handle { trusted_offset = Offset1 }, + put({Ref, fhc_handle}, Handle2), + reopen(RefNewOrReopenHdls, gb_trees:insert(Now, Ref, Tree), + [{Ref, Handle2} | RefHdls]); + Error -> + %% NB: none of the handles in ToOpen are in the age tree + Oldest = oldest(Tree, fun () -> undefined end), + [gen_server:cast(?SERVER, {close, self(), Oldest}) || _ <- ToOpen], + put_age_tree(Tree), + Error end. +partition_handles(RefNewOrReopens) -> + lists:foldr( + fun ({Ref, NewOrReopen}, {Open, Closed}) -> + case get({Ref, fhc_handle}) of + #handle { hdl = closed } = Handle -> + {Open, [{Ref, NewOrReopen, Handle} | Closed]}; + #handle {} = Handle -> + {[{Ref, Handle} | Open], Closed} + end + end, {[], []}, RefNewOrReopens). + +sort_handles([], [], [], Acc) -> + {ok, lists:reverse(Acc)}; +sort_handles([{Ref, _} | RefHdls], [{Ref, Handle} | RefHdlsA], RefHdlsB, Acc) -> + sort_handles(RefHdls, RefHdlsA, RefHdlsB, [Handle | Acc]); +sort_handles([{Ref, _} | RefHdls], RefHdlsA, [{Ref, Handle} | RefHdlsB], Acc) -> + sort_handles(RefHdls, RefHdlsA, RefHdlsB, [Handle | Acc]). + put_handle(Ref, Handle = #handle { last_used_at = Then }) -> Now = now(), age_tree_update(Then, Now, Ref), put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }). -with_age_tree(Fun) -> - put(fhc_age_tree, Fun(case get(fhc_age_tree) of - undefined -> gb_trees:empty(); - AgeTree -> AgeTree - end)). +with_age_tree(Fun) -> put_age_tree(Fun(get_age_tree())). -age_tree_insert(Now, Ref) -> - with_age_tree( - fun (Tree) -> - Tree1 = gb_trees:insert(Now, Ref, Tree), - {Oldest, _Ref} = gb_trees:smallest(Tree1), - gen_server:cast(?SERVER, {open, self(), Oldest}), - Tree1 - end). +get_age_tree() -> + case get(fhc_age_tree) of + undefined -> gb_trees:empty(); + AgeTree -> AgeTree + end. + +put_age_tree(Tree) -> put(fhc_age_tree, Tree). age_tree_update(Then, Now, Ref) -> with_age_tree( @@ -540,13 +623,7 @@ age_tree_delete(Then) -> with_age_tree( fun (Tree) -> Tree1 = gb_trees:delete_any(Then, Tree), - Oldest = case gb_trees:is_empty(Tree1) of - true -> - undefined; - false -> - {Oldest1, _Ref} = gb_trees:smallest(Tree1), - Oldest1 - end, + Oldest = oldest(Tree1, fun () -> undefined end), gen_server:cast(?SERVER, {close, self(), Oldest}), Tree1 end). @@ -562,48 +639,53 @@ age_tree_change() -> Tree end). -open1(Path, Mode, Options, Ref, Offset, NewOrReopen) -> - Mode1 = case NewOrReopen of - new -> Mode; - reopen -> [read | Mode] - end, - case file:open(Path, Mode1) of - {ok, Hdl} -> - WriteBufferSize = - case proplists:get_value(write_buffer, Options, unbuffered) of - unbuffered -> 0; - infinity -> infinity; - N when is_integer(N) -> N - end, - Now = now(), - Handle = #handle { hdl = Hdl, - offset = 0, - trusted_offset = 0, - is_dirty = false, - write_buffer_size = 0, - write_buffer_size_limit = WriteBufferSize, - write_buffer = [], - at_eof = false, - path = Path, - mode = Mode, - options = Options, - is_write = is_writer(Mode), - is_read = is_reader(Mode), - last_used_at = Now }, - {{ok, Offset1}, Handle1} = maybe_seek(Offset, Handle), - Handle2 = Handle1 #handle { trusted_offset = Offset1 }, - put({Ref, fhc_handle}, Handle2), - age_tree_insert(Now, Ref), - {ok, Handle2}; - {error, Reason} -> - {error, Reason} +oldest(Tree, DefaultFun) -> + case gb_trees:is_empty(Tree) of + true -> DefaultFun(); + false -> {Oldest, _Ref} = gb_trees:smallest(Tree), + Oldest + end. + +new_closed_handle(Path, Mode, Options) -> + WriteBufferSize = + case proplists:get_value(write_buffer, Options, unbuffered) of + unbuffered -> 0; + infinity -> infinity; + N when is_integer(N) -> N + end, + Ref = make_ref(), + put({Ref, fhc_handle}, #handle { hdl = closed, + offset = 0, + trusted_offset = 0, + is_dirty = false, + write_buffer_size = 0, + write_buffer_size_limit = WriteBufferSize, + write_buffer = [], + at_eof = false, + path = Path, + mode = Mode, + options = Options, + is_write = is_writer(Mode), + is_read = is_reader(Mode), + last_used_at = undefined }), + {ok, Ref}. + +soft_close(Ref, Handle) -> + {Res, Handle1} = soft_close(Handle), + case Res of + ok -> put({Ref, fhc_handle}, Handle1), + true; + _ -> put_handle(Ref, Handle1), + false end. soft_close(Handle = #handle { hdl = closed }) -> {ok, Handle}; soft_close(Handle) -> case write_buffer(Handle) of - {ok, #handle { hdl = Hdl, offset = Offset, is_dirty = IsDirty, + {ok, #handle { hdl = Hdl, + offset = Offset, + is_dirty = IsDirty, last_used_at = Then } = Handle1 } -> ok = case IsDirty of true -> file:sync(Hdl); @@ -611,8 +693,10 @@ soft_close(Handle) -> end, ok = file:close(Hdl), age_tree_delete(Then), - {ok, Handle1 #handle { hdl = closed, trusted_offset = Offset, - is_dirty = false }}; + {ok, Handle1 #handle { hdl = closed, + trusted_offset = Offset, + is_dirty = false, + last_used_at = undefined }}; {_Error, _Handle} = Result -> Result end. @@ -699,116 +783,309 @@ init([]) -> Watermark > 0) -> Watermark; _ -> - ulimit() + case ulimit() of + infinity -> infinity; + unknown -> ?FILE_HANDLES_LIMIT_OTHER; + Lim -> lists:max([2, Lim - ?RESERVED_FOR_OTHERS]) + end end, - error_logger:info_msg("Limiting to approx ~p file handles~n", [Limit]), - {ok, #fhc_state { elders = dict:new(), limit = Limit, count = 0, - obtains = [], callbacks = dict:new(), - client_mrefs = dict:new(), timer_ref = undefined }}. - -handle_call(obtain, From, State = #fhc_state { count = Count }) -> - State1 = #fhc_state { count = Count1, limit = Limit, obtains = Obtains } = - maybe_reduce(State #fhc_state { count = Count + 1 }), - case Limit /= infinity andalso Count1 >= Limit of - true -> {noreply, State1 #fhc_state { obtains = [From | Obtains], - count = Count1 - 1 }}; - false -> {reply, ok, State1} - end. - -handle_cast({register_callback, Pid, MFA}, - State = #fhc_state { callbacks = Callbacks }) -> - {noreply, ensure_mref( - Pid, State #fhc_state { - callbacks = dict:store(Pid, MFA, Callbacks) })}; - -handle_cast({open, Pid, EldestUnusedSince}, State = - #fhc_state { elders = Elders, count = Count }) -> + ObtainLimit = obtain_limit(Limit), + error_logger:info_msg("Limiting to approx ~p file handles (~p sockets)~n", + [Limit, ObtainLimit]), + Clients = ets:new(?CLIENT_ETS_TABLE, [set, private, {keypos, #cstate.pid}]), + {ok, #fhc_state { elders = dict:new(), + limit = Limit, + open_count = 0, + open_pending = pending_new(), + obtain_limit = ObtainLimit, + obtain_count = 0, + obtain_pending = pending_new(), + clients = Clients, + timer_ref = undefined }}. + +handle_call({open, Pid, Requested, EldestUnusedSince}, From, + State = #fhc_state { open_count = Count, + open_pending = Pending, + elders = Elders, + clients = Clients }) + when EldestUnusedSince =/= undefined -> Elders1 = dict:store(Pid, EldestUnusedSince, Elders), - {noreply, maybe_reduce( - ensure_mref(Pid, State #fhc_state { elders = Elders1, - count = Count + 1 }))}; + Item = #pending { kind = open, + pid = Pid, + requested = Requested, + from = From }, + ok = track_client(Pid, Clients), + State1 = State #fhc_state { elders = Elders1 }, + case needs_reduce(State1 #fhc_state { open_count = Count + Requested }) of + true -> case ets:lookup(Clients, Pid) of + [#cstate { opened = 0 }] -> + true = ets:update_element( + Clients, Pid, {#cstate.blocked, true}), + {noreply, + reduce(State1 #fhc_state { + open_pending = pending_in(Item, Pending) })}; + [#cstate { opened = Opened }] -> + true = ets:update_element( + Clients, Pid, + {#cstate.pending_closes, Opened}), + {reply, close, State1} + end; + false -> {noreply, run_pending_item(Item, State1)} + end; -handle_cast({update, Pid, EldestUnusedSince}, State = - #fhc_state { elders = Elders }) -> +handle_call({obtain, Pid}, From, State = #fhc_state { obtain_limit = Limit, + obtain_count = Count, + obtain_pending = Pending, + clients = Clients }) + when Limit =/= infinity andalso Count >= Limit -> + ok = track_client(Pid, Clients), + true = ets:update_element(Clients, Pid, {#cstate.blocked, true}), + Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, + {noreply, State #fhc_state { obtain_pending = pending_in(Item, Pending) }}; +handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count = Count, + obtain_pending = Pending, + clients = Clients }) -> + Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, + ok = track_client(Pid, Clients), + case needs_reduce(State #fhc_state { obtain_count = Count + 1 }) of + true -> + true = ets:update_element(Clients, Pid, {#cstate.blocked, true}), + {noreply, reduce(State #fhc_state { + obtain_pending = pending_in(Item, Pending) })}; + false -> + {noreply, run_pending_item(Item, State)} + end; +handle_call({set_limit, Limit}, _From, State) -> + {reply, ok, maybe_reduce( + process_pending(State #fhc_state { + limit = Limit, + obtain_limit = obtain_limit(Limit) }))}; +handle_call(get_limit, _From, State = #fhc_state { limit = Limit }) -> + {reply, Limit, State}. + +handle_cast({register_callback, Pid, MFA}, + State = #fhc_state { clients = Clients }) -> + ok = track_client(Pid, Clients), + true = ets:update_element(Clients, Pid, {#cstate.callback, MFA}), + {noreply, State}; + +handle_cast({update, Pid, EldestUnusedSince}, + State = #fhc_state { elders = Elders }) + when EldestUnusedSince =/= undefined -> Elders1 = dict:store(Pid, EldestUnusedSince, Elders), %% don't call maybe_reduce from here otherwise we can create a %% storm of messages - {noreply, ensure_mref(Pid, State #fhc_state { elders = Elders1 })}; + {noreply, State #fhc_state { elders = Elders1 }}; -handle_cast({close, Pid, EldestUnusedSince}, State = - #fhc_state { elders = Elders, count = Count }) -> +handle_cast({close, Pid, EldestUnusedSince}, + State = #fhc_state { elders = Elders, clients = Clients }) -> Elders1 = case EldestUnusedSince of undefined -> dict:erase(Pid, Elders); _ -> dict:store(Pid, EldestUnusedSince, Elders) end, - {noreply, process_obtains( - ensure_mref(Pid, State #fhc_state { elders = Elders1, - count = Count - 1 }))}; + ets:update_counter(Clients, Pid, {#cstate.pending_closes, -1, 0, 0}), + {noreply, process_pending( + update_counts(open, Pid, -1, + State #fhc_state { elders = Elders1 }))}; + +handle_cast({transfer, FromPid, ToPid}, State) -> + ok = track_client(ToPid, State#fhc_state.clients), + {noreply, process_pending( + update_counts(obtain, ToPid, +1, + update_counts(obtain, FromPid, -1, State)))}; handle_cast(check_counts, State) -> - {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}; - -handle_cast({release_on_death, Pid}, State) -> - _MRef = erlang:monitor(process, Pid), - {noreply, State}. - -handle_info({'DOWN', MRef, process, Pid, _Reason}, State = - #fhc_state { count = Count, callbacks = Callbacks, - client_mrefs = ClientMRefs, elders = Elders }) -> - {noreply, process_obtains( - case dict:find(Pid, ClientMRefs) of - {ok, MRef} -> State #fhc_state { - elders = dict:erase(Pid, Elders), - client_mrefs = dict:erase(Pid, ClientMRefs), - callbacks = dict:erase(Pid, Callbacks) }; - _ -> State #fhc_state { count = Count - 1 } - end)}. - -terminate(_Reason, State) -> + {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}. + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, + State = #fhc_state { elders = Elders, + open_count = OpenCount, + open_pending = OpenPending, + obtain_count = ObtainCount, + obtain_pending = ObtainPending, + clients = Clients }) -> + [#cstate { opened = Opened, obtained = Obtained }] = + ets:lookup(Clients, Pid), + true = ets:delete(Clients, Pid), + FilterFun = fun (#pending { pid = Pid1 }) -> Pid1 =/= Pid end, + {noreply, process_pending( + State #fhc_state { + open_count = OpenCount - Opened, + open_pending = filter_pending(FilterFun, OpenPending), + obtain_count = ObtainCount - Obtained, + obtain_pending = filter_pending(FilterFun, ObtainPending), + elders = dict:erase(Pid, Elders) })}. + +terminate(_Reason, State = #fhc_state { clients = Clients }) -> + ets:delete(Clients), State. code_change(_OldVsn, State, _Extra) -> {ok, State}. %%---------------------------------------------------------------------------- +%% pending queue abstraction helpers +%%---------------------------------------------------------------------------- + +queue_fold(Fun, Init, Q) -> + case queue:out(Q) of + {empty, _Q} -> Init; + {{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1) + end. + +filter_pending(Fun, {Count, Queue}) -> + {Delta, Queue1} = + queue_fold(fun (Item, {DeltaN, QueueN}) -> + case Fun(Item) of + true -> {DeltaN, queue:in(Item, QueueN)}; + false -> {DeltaN - requested(Item), QueueN} + end + end, {0, queue:new()}, Queue), + {Count + Delta, Queue1}. + +pending_new() -> + {0, queue:new()}. + +pending_in(Item = #pending { requested = Requested }, {Count, Queue}) -> + {Count + Requested, queue:in(Item, Queue)}. + +pending_out({0, _Queue} = Pending) -> + {empty, Pending}; +pending_out({N, Queue}) -> + {{value, #pending { requested = Requested }} = Result, Queue1} = + queue:out(Queue), + {Result, {N - Requested, Queue1}}. + +pending_count({Count, _Queue}) -> + Count. + +pending_is_empty({0, _Queue}) -> + true; +pending_is_empty({_N, _Queue}) -> + false. + +%%---------------------------------------------------------------------------- %% server helpers %%---------------------------------------------------------------------------- -process_obtains(State = #fhc_state { obtains = [] }) -> - State; -process_obtains(State = #fhc_state { limit = Limit, count = Count }) - when Limit /= infinity andalso Count >= Limit -> +obtain_limit(infinity) -> infinity; +obtain_limit(Limit) -> case ?OBTAIN_LIMIT(Limit) of + OLimit when OLimit < 0 -> 0; + OLimit -> OLimit + end. + +requested({_Kind, _Pid, Requested, _From}) -> + Requested. + +process_pending(State = #fhc_state { limit = infinity }) -> State; -process_obtains(State = #fhc_state { limit = Limit, count = Count, - obtains = Obtains }) -> - ObtainsLen = length(Obtains), - ObtainableLen = lists:min([ObtainsLen, Limit - Count]), - Take = ObtainsLen - ObtainableLen, - {ObtainsNew, ObtainableRev} = lists:split(Take, Obtains), - [gen_server:reply(From, ok) || From <- ObtainableRev], - State #fhc_state { count = Count + ObtainableLen, obtains = ObtainsNew }. - -maybe_reduce(State = #fhc_state { limit = Limit, count = Count, elders = Elders, - callbacks = Callbacks, timer_ref = TRef }) - when Limit /= infinity andalso Count >= Limit -> +process_pending(State) -> + process_open(process_obtain(State)). + +process_open(State = #fhc_state { limit = Limit, + open_pending = Pending, + open_count = OpenCount, + obtain_count = ObtainCount }) -> + {Pending1, State1} = + process_pending(Pending, Limit - (ObtainCount + OpenCount), State), + State1 #fhc_state { open_pending = Pending1 }. + +process_obtain(State = #fhc_state { limit = Limit, + obtain_pending = Pending, + obtain_limit = ObtainLimit, + obtain_count = ObtainCount, + open_count = OpenCount }) -> + Quota = lists:min([ObtainLimit - ObtainCount, + Limit - (ObtainCount + OpenCount)]), + {Pending1, State1} = process_pending(Pending, Quota, State), + State1 #fhc_state { obtain_pending = Pending1 }. + +process_pending(Pending, Quota, State) when Quota =< 0 -> + {Pending, State}; +process_pending(Pending, Quota, State) -> + case pending_out(Pending) of + {empty, _Pending} -> + {Pending, State}; + {{value, #pending { requested = Requested }}, _Pending1} + when Requested > Quota -> + {Pending, State}; + {{value, #pending { requested = Requested } = Item}, Pending1} -> + process_pending(Pending1, Quota - Requested, + run_pending_item(Item, State)) + end. + +run_pending_item(#pending { kind = Kind, + pid = Pid, + requested = Requested, + from = From }, + State = #fhc_state { clients = Clients }) -> + gen_server:reply(From, ok), + true = ets:update_element(Clients, Pid, {#cstate.blocked, false}), + update_counts(Kind, Pid, Requested, State). + +update_counts(Kind, Pid, Delta, + State = #fhc_state { open_count = OpenCount, + obtain_count = ObtainCount, + clients = Clients }) -> + {OpenDelta, ObtainDelta} = update_counts1(Kind, Pid, Delta, Clients), + State #fhc_state { open_count = OpenCount + OpenDelta, + obtain_count = ObtainCount + ObtainDelta }. + +update_counts1(open, Pid, Delta, Clients) -> + ets:update_counter(Clients, Pid, {#cstate.opened, Delta}), + {Delta, 0}; +update_counts1(obtain, Pid, Delta, Clients) -> + ets:update_counter(Clients, Pid, {#cstate.obtained, Delta}), + {0, Delta}. + +maybe_reduce(State) -> + case needs_reduce(State) of + true -> reduce(State); + false -> State + end. + +needs_reduce(#fhc_state { limit = Limit, + open_count = OpenCount, + open_pending = OpenPending, + obtain_count = ObtainCount, + obtain_limit = ObtainLimit, + obtain_pending = ObtainPending }) -> + Limit =/= infinity + andalso ((OpenCount + ObtainCount > Limit) + orelse (not pending_is_empty(OpenPending)) + orelse (ObtainCount < ObtainLimit + andalso not pending_is_empty(ObtainPending))). + +reduce(State = #fhc_state { open_pending = OpenPending, + obtain_pending = ObtainPending, + elders = Elders, + clients = Clients, + timer_ref = TRef }) -> Now = now(), - {Pids, Sum, ClientCount} = - dict:fold(fun (_Pid, undefined, Accs) -> - Accs; - (Pid, Eldest, {PidsAcc, SumAcc, CountAcc}) -> - {[Pid|PidsAcc], SumAcc + timer:now_diff(Now, Eldest), - CountAcc + 1} + {CStates, Sum, ClientCount} = + dict:fold(fun (Pid, Eldest, {CStatesAcc, SumAcc, CountAcc} = Accs) -> + [#cstate { pending_closes = PendingCloses, + opened = Opened, + blocked = Blocked } = CState] = + ets:lookup(Clients, Pid), + case Blocked orelse PendingCloses =:= Opened of + true -> Accs; + false -> {[CState | CStatesAcc], + SumAcc + timer:now_diff(Now, Eldest), + CountAcc + 1} + end end, {[], 0, 0}, Elders), - case Pids of + case CStates of [] -> ok; - _ -> AverageAge = Sum / ClientCount, - lists:foreach( - fun (Pid) -> - case dict:find(Pid, Callbacks) of - error -> ok; - {ok, {M, F, A}} -> apply(M, F, A ++ [AverageAge]) - end - end, Pids) + _ -> case (Sum / ClientCount) - + (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of + AverageAge when AverageAge > 0 -> + notify_age(CStates, AverageAge); + _ -> + notify_age0(Clients, CStates, + pending_count(OpenPending) + + pending_count(ObtainPending)) + end end, case TRef of undefined -> {ok, TRef1} = timer:apply_after( @@ -816,16 +1093,47 @@ maybe_reduce(State = #fhc_state { limit = Limit, count = Count, elders = Elders, gen_server, cast, [?SERVER, check_counts]), State #fhc_state { timer_ref = TRef1 }; _ -> State - end; -maybe_reduce(State) -> - State. + end. -%% Googling around suggests that Windows has a limit somewhere around -%% 16M, eg -%% http://blogs.technet.com/markrussinovich/archive/2009/09/29/3283844.aspx -%% For everything else, assume ulimit exists. Further googling -%% suggests that BSDs (incl OS X), solaris and linux all agree that -%% ulimit -n is file handles +notify_age(CStates, AverageAge) -> + lists:foreach( + fun (#cstate { callback = undefined }) -> ok; + (#cstate { callback = {M, F, A} }) -> apply(M, F, A ++ [AverageAge]) + end, CStates). + +notify_age0(Clients, CStates, Required) -> + Notifications = + [CState || CState <- CStates, CState#cstate.callback =/= undefined], + {L1, L2} = lists:split(random:uniform(length(Notifications)), + Notifications), + notify(Clients, Required, L2 ++ L1). + +notify(_Clients, _Required, []) -> + ok; +notify(_Clients, Required, _Notifications) when Required =< 0 -> + ok; +notify(Clients, Required, [#cstate{ pid = Pid, + callback = {M, F, A}, + opened = Opened } | Notifications]) -> + apply(M, F, A ++ [0]), + ets:update_element(Clients, Pid, {#cstate.pending_closes, Opened}), + notify(Clients, Required - Opened, Notifications). + +track_client(Pid, Clients) -> + case ets:insert_new(Clients, #cstate { pid = Pid, + callback = undefined, + opened = 0, + obtained = 0, + blocked = false, + pending_closes = 0 }) of + true -> _MRef = erlang:monitor(process, Pid), + ok; + false -> ok + end. + +%% For all unices, assume ulimit exists. Further googling suggests +%% that BSDs (incl OS X), solaris and linux all agree that ulimit -n +%% is file handles ulimit() -> case os:type() of {win32, _OsName} -> @@ -839,24 +1147,14 @@ ulimit() -> "unlimited" -> infinity; String = [C|_] when $0 =< C andalso C =< $9 -> - Num = list_to_integer( - lists:takewhile( - fun (D) -> $0 =< D andalso D =< $9 end, String)) - - ?RESERVED_FOR_OTHERS, - lists:max([1, Num]); + list_to_integer( + lists:takewhile( + fun (D) -> $0 =< D andalso D =< $9 end, String)); _ -> %% probably a variant of %% "/bin/sh: line 1: ulimit: command not found\n" - ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS + unknown end; _ -> - ?FILE_HANDLES_LIMIT_OTHER - ?RESERVED_FOR_OTHERS - end. - -ensure_mref(Pid, State = #fhc_state { client_mrefs = ClientMRefs }) -> - case dict:find(Pid, ClientMRefs) of - {ok, _MRef} -> State; - error -> MRef = erlang:monitor(process, Pid), - State #fhc_state { - client_mrefs = dict:store(Pid, MRef, ClientMRefs) } + unknown end. diff --git a/src/gatherer.erl b/src/gatherer.erl new file mode 100644 index 0000000000..1e03d6c41c --- /dev/null +++ b/src/gatherer.erl @@ -0,0 +1,145 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(gatherer). + +-behaviour(gen_server2). + +-export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). +-spec(stop/1 :: (pid()) -> 'ok'). +-spec(fork/1 :: (pid()) -> 'ok'). +-spec(finish/1 :: (pid()) -> 'ok'). +-spec(in/2 :: (pid(), any()) -> 'ok'). +-spec(out/1 :: (pid()) -> {'value', any()} | 'empty'). + +-endif. + +%%---------------------------------------------------------------------------- + +-define(HIBERNATE_AFTER_MIN, 1000). +-define(DESIRED_HIBERNATE, 10000). + +%%---------------------------------------------------------------------------- + +-record(gstate, { forks, values, blocked }). + +%%---------------------------------------------------------------------------- + +start_link() -> + gen_server2:start_link(?MODULE, [], [{timeout, infinity}]). + +stop(Pid) -> + gen_server2:call(Pid, stop, infinity). + +fork(Pid) -> + gen_server2:call(Pid, fork, infinity). + +finish(Pid) -> + gen_server2:cast(Pid, finish). + +in(Pid, Value) -> + gen_server2:cast(Pid, {in, Value}). + +out(Pid) -> + gen_server2:call(Pid, out, infinity). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(fork, _From, State = #gstate { forks = Forks }) -> + {reply, ok, State #gstate { forks = Forks + 1 }, hibernate}; + +handle_call(out, From, State = #gstate { forks = Forks, + values = Values, + blocked = Blocked }) -> + case queue:out(Values) of + {empty, _} -> + case Forks of + 0 -> {reply, empty, State, hibernate}; + _ -> {noreply, + State #gstate { blocked = queue:in(From, Blocked) }, + hibernate} + end; + {{value, _Value} = V, NewValues} -> + {reply, V, State #gstate { values = NewValues }, hibernate} + end; + +handle_call(Msg, _From, State) -> + {stop, {unexpected_call, Msg}, State}. + +handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) -> + NewForks = Forks - 1, + NewBlocked = case NewForks of + 0 -> [gen_server2:reply(From, empty) || + From <- queue:to_list(Blocked)], + queue:new(); + _ -> Blocked + end, + {noreply, State #gstate { forks = NewForks, blocked = NewBlocked }, + hibernate}; + +handle_cast({in, Value}, State = #gstate { values = Values, + blocked = Blocked }) -> + {noreply, case queue:out(Blocked) of + {empty, _} -> + State #gstate { values = queue:in(Value, Values) }; + {{value, From}, NewBlocked} -> + gen_server2:reply(From, {value, Value}), + State #gstate { blocked = NewBlocked } + end, hibernate}; + +handle_cast(Msg, State) -> + {stop, {unexpected_cast, Msg}, State}. + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +terminate(_Reason, State) -> + State. diff --git a/src/gen_server2.erl b/src/gen_server2.erl index 49ae63c1d5..9fb9e2fea7 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -164,7 +164,7 @@ cast/2, pcast/3, reply/2, abcast/2, abcast/3, multi_call/2, multi_call/3, multi_call/4, - enter_loop/3, enter_loop/4, enter_loop/5, wake_hib/7]). + enter_loop/3, enter_loop/4, enter_loop/5, enter_loop/6, wake_hib/7]). -export([behaviour_info/1]). @@ -976,7 +976,7 @@ print_event(Dev, Event, Name) -> terminate(Reason, Name, Msg, Mod, State, Debug) -> case catch Mod:terminate(Reason, State) of {'EXIT', R} -> - error_info(R, Name, Msg, State, Debug), + error_info(R, Reason, Name, Msg, State, Debug), exit(R); _ -> case Reason of @@ -987,42 +987,44 @@ terminate(Reason, Name, Msg, Mod, State, Debug) -> {shutdown,_}=Shutdown -> exit(Shutdown); _ -> - error_info(Reason, Name, Msg, State, Debug), + error_info(Reason, undefined, Name, Msg, State, Debug), exit(Reason) end end. -error_info(_Reason, application_controller, _Msg, _State, _Debug) -> +error_info(_Reason, _RootCause, application_controller, _Msg, _State, _Debug) -> %% OTP-5811 Don't send an error report if it's the system process %% application_controller which is terminating - let init take care %% of it instead ok; -error_info(Reason, Name, Msg, State, Debug) -> - Reason1 = - case Reason of - {undef,[{M,F,A}|MFAs]} -> - case code:is_loaded(M) of - false -> - {'module could not be loaded',[{M,F,A}|MFAs]}; - _ -> - case erlang:function_exported(M, F, length(A)) of - true -> - Reason; - false -> - {'function not exported',[{M,F,A}|MFAs]} - end - end; - _ -> - Reason - end, - format("** Generic server ~p terminating \n" - "** Last message in was ~p~n" - "** When Server state == ~p~n" - "** Reason for termination == ~n** ~p~n", - [Name, Msg, State, Reason1]), +error_info(Reason, RootCause, Name, Msg, State, Debug) -> + Reason1 = error_reason(Reason), + Fmt = + "** Generic server ~p terminating~n" + "** Last message in was ~p~n" + "** When Server state == ~p~n" + "** Reason for termination == ~n** ~p~n", + case RootCause of + undefined -> format(Fmt, [Name, Msg, State, Reason1]); + _ -> format(Fmt ++ "** In 'terminate' callback " + "with reason ==~n** ~p~n", + [Name, Msg, State, Reason1, + error_reason(RootCause)]) + end, sys:print_log(Debug), ok. +error_reason({undef,[{M,F,A}|MFAs]} = Reason) -> + case code:is_loaded(M) of + false -> {'module could not be loaded',[{M,F,A}|MFAs]}; + _ -> case erlang:function_exported(M, F, length(A)) of + true -> Reason; + false -> {'function not exported',[{M,F,A}|MFAs]} + end + end; +error_reason(Reason) -> + Reason. + %%% --------------------------------------------------- %%% Misc. functions. %%% --------------------------------------------------- diff --git a/src/pg_local.erl b/src/pg_local.erl index f5ded123d7..49fa873ae4 100644 --- a/src/pg_local.erl +++ b/src/pg_local.erl @@ -45,8 +45,8 @@ -type(name() :: term()). --spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), term())). --spec(start/0 :: () -> rabbit_types:ok_or_error2(pid(), term())). +-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). +-spec(start/0 :: () -> {'ok', pid()} | {'error', any()}). -spec(join/2 :: (name(), pid()) -> 'ok'). -spec(leave/2 :: (name(), pid()) -> 'ok'). -spec(get_members/1 :: (name()) -> [pid()]). diff --git a/src/rabbit.erl b/src/rabbit.erl index 18045b94fc..c257497070 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -83,9 +83,10 @@ {requires, external_infrastructure}, {enables, kernel_ready}]}). --rabbit_boot_step({rabbit_hooks, - [{description, "internal event notification system"}, - {mfa, {rabbit_hooks, start, []}}, +-rabbit_boot_step({rabbit_event, + [{description, "statistics event manager"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_event]}}, {requires, external_infrastructure}, {enables, kernel_ready}]}). @@ -204,8 +205,7 @@ %%---------------------------------------------------------------------------- prepare() -> - ok = ensure_working_log_handlers(), - ok = rabbit_mnesia:ensure_mnesia_dir(). + ok = ensure_working_log_handlers(). start() -> try @@ -426,9 +426,9 @@ print_banner() -> "| ~s +---+ |~n" "| |~n" "+-------------------+~n" - "AMQP ~p-~p~n~s~n~s~n~n", + "~s~n~s~n~s~n~n", [Product, string:right([$v|Version], ProductLen), - ?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR, + ?PROTOCOL_VERSION, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), Settings = [{"node", node()}, {"app descriptor", app_location()}, diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl index 30bae25e5a..9cfe1ca8df 100644 --- a/src/rabbit_access_control.erl +++ b/src/rabbit_access_control.erl @@ -38,7 +38,7 @@ -export([add_user/2, delete_user/1, change_password/2, list_users/0, lookup_user/1]). -export([add_vhost/1, delete_vhost/1, list_vhosts/0]). --export([set_permissions/5, clear_permissions/2, +-export([set_permissions/5, set_permissions/6, clear_permissions/2, list_vhost_permissions/1, list_user_permissions/1]). %%---------------------------------------------------------------------------- @@ -51,13 +51,20 @@ -type(username() :: binary()). -type(password() :: binary()). -type(regexp() :: binary()). - --spec(check_login/2 :: (binary(), binary()) -> rabbit_types:user()). --spec(user_pass_login/2 :: (username(), password()) -> rabbit_types:user()). +-type(scope() :: binary()). + +-spec(check_login/2 :: + (binary(), binary()) -> rabbit_types:user() | + rabbit_types:channel_exit()). +-spec(user_pass_login/2 :: + (username(), password()) + -> rabbit_types:user() | rabbit_types:channel_exit()). -spec(check_vhost_access/2 :: - (rabbit_types:user(), rabbit_types:vhost()) -> 'ok'). + (rabbit_types:user(), rabbit_types:vhost()) + -> 'ok' | rabbit_types:channel_exit()). -spec(check_resource_access/3 :: - (username(), rabbit_types:r(atom()), permission_atom()) -> 'ok'). + (username(), rabbit_types:r(atom()), permission_atom()) + -> 'ok' | rabbit_types:channel_exit()). -spec(add_user/2 :: (username(), password()) -> 'ok'). -spec(delete_user/1 :: (username()) -> 'ok'). -spec(change_password/2 :: (username(), password()) -> 'ok'). @@ -65,11 +72,15 @@ -spec(lookup_user/1 :: (username()) -> rabbit_types:ok(rabbit_types:user()) | rabbit_types:error('not_found')). --spec(add_vhost/1 :: (rabbit_types:vhost()) -> 'ok'). --spec(delete_vhost/1 :: (rabbit_types:vhost()) -> 'ok'). +-spec(add_vhost/1 :: + (rabbit_types:vhost()) -> 'ok'). +-spec(delete_vhost/1 :: + (rabbit_types:vhost()) -> 'ok'). -spec(list_vhosts/0 :: () -> [rabbit_types:vhost()]). -spec(set_permissions/5 ::(username(), rabbit_types:vhost(), regexp(), regexp(), regexp()) -> 'ok'). +-spec(set_permissions/6 ::(scope(), username(), rabbit_types:vhost(), + regexp(), regexp(), regexp()) -> 'ok'). -spec(clear_permissions/2 :: (username(), rabbit_types:vhost()) -> 'ok'). -spec(list_vhost_permissions/1 :: (rabbit_types:vhost()) @@ -149,6 +160,7 @@ check_vhost_access(#user{username = Username}, VHostPath) -> [VHostPath, Username]) end. +permission_index(scope) -> #permission.scope; permission_index(configure) -> #permission.configure; permission_index(write) -> #permission.write; permission_index(read) -> #permission.read. @@ -159,10 +171,6 @@ check_resource_access(Username, check_resource_access(Username, R#resource{name = <<"amq.default">>}, Permission); -check_resource_access(_Username, - #resource{name = <<"amq.gen",_/binary>>}, - _Permission) -> - ok; check_resource_access(Username, R = #resource{virtual_host = VHostPath, name = Name}, Permission) -> @@ -172,16 +180,19 @@ check_resource_access(Username, [] -> false; [#user_permission{permission = P}] -> - PermRegexp = case element(permission_index(Permission), P) of - %% <<"^$">> breaks Emacs' erlang mode - <<"">> -> <<$^, $$>>; - RE -> RE - end, - case regexp:match( - binary_to_list(Name), - binary_to_list(PermRegexp)) of - {match, _, _} -> true; - nomatch -> false + case {Name, P} of + {<<"amq.gen",_/binary>>, #permission{scope = client}} -> + true; + _ -> + PermRegexp = case element(permission_index(Permission), P) of + %% <<"^$">> breaks Emacs' erlang mode + <<"">> -> <<$^, $$>>; + RE -> RE + end, + case re:run(Name, PermRegexp, [{capture, none}]) of + match -> true; + nomatch -> false + end end end, if Res -> ok; @@ -294,7 +305,7 @@ internal_delete_vhost(VHostPath) -> ok = rabbit_exchange:delete(Name, false) end, rabbit_exchange:list(VHostPath)), - lists:foreach(fun ({Username, _, _, _}) -> + lists:foreach(fun ({Username, _, _, _, _}) -> ok = clear_permissions(Username, VHostPath) end, list_vhost_permissions(VHostPath)), @@ -306,13 +317,22 @@ list_vhosts() -> validate_regexp(RegexpBin) -> Regexp = binary_to_list(RegexpBin), - case regexp:parse(Regexp) of + case re:compile(Regexp) of {ok, _} -> ok; {error, Reason} -> throw({error, {invalid_regexp, Regexp, Reason}}) end. set_permissions(Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm) -> + set_permissions(<<"client">>, Username, VHostPath, ConfigurePerm, + WritePerm, ReadPerm). + +set_permissions(ScopeBin, Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm) -> lists:map(fun validate_regexp/1, [ConfigurePerm, WritePerm, ReadPerm]), + Scope = case ScopeBin of + <<"client">> -> client; + <<"all">> -> all; + _ -> throw({error, {invalid_scope, ScopeBin}}) + end, rabbit_misc:execute_mnesia_transaction( rabbit_misc:with_user_and_vhost( Username, VHostPath, @@ -322,12 +342,14 @@ set_permissions(Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm) -> username = Username, virtual_host = VHostPath}, permission = #permission{ + scope = Scope, configure = ConfigurePerm, write = WritePerm, read = ReadPerm}}, write) end)). + clear_permissions(Username, VHostPath) -> rabbit_misc:execute_mnesia_transaction( rabbit_misc:with_user_and_vhost( @@ -339,22 +361,23 @@ clear_permissions(Username, VHostPath) -> end)). list_vhost_permissions(VHostPath) -> - [{Username, ConfigurePerm, WritePerm, ReadPerm} || - {Username, _, ConfigurePerm, WritePerm, ReadPerm} <- + [{Username, ConfigurePerm, WritePerm, ReadPerm, Scope} || + {Username, _, ConfigurePerm, WritePerm, ReadPerm, Scope} <- list_permissions(rabbit_misc:with_vhost( VHostPath, match_user_vhost('_', VHostPath)))]. list_user_permissions(Username) -> - [{VHostPath, ConfigurePerm, WritePerm, ReadPerm} || - {_, VHostPath, ConfigurePerm, WritePerm, ReadPerm} <- + [{VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} || + {_, VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} <- list_permissions(rabbit_misc:with_user( Username, match_user_vhost(Username, '_')))]. list_permissions(QueryThunk) -> - [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm} || + [{Username, VHostPath, ConfigurePerm, WritePerm, ReadPerm, Scope} || #user_permission{user_vhost = #user_vhost{username = Username, virtual_host = VHostPath}, permission = #permission{ + scope = Scope, configure = ConfigurePerm, write = WritePerm, read = ReadPerm}} <- diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index f1b527681c..6b9ac56059 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -31,16 +31,17 @@ -module(rabbit_amqqueue). --export([start/0, declare/5, delete/3, purge/1]). +-export([start/0, stop/0, declare/5, delete/3, purge/1]). -export([internal_declare/2, internal_delete/1, maybe_run_queue_via_backing_queue/2, update_ram_duration/1, set_ram_duration_target/2, - set_maximum_since_use/2]). + set_maximum_since_use/2, maybe_expire/1]). -export([pseudo_queue/2]). -export([lookup/1, with/2, with_or_die/2, assert_equivalence/5, check_exclusive_access/2, with_exclusive_access_or_die/3, - stat/1, deliver/2, requeue/3, ack/4]). + stat/1, deliver/2, requeue/3, ack/4, reject/4]). -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). +-export([emit_stats/1]). -export([consumers/1, consumers_all/1]). -export([basic_get/3, basic_consume/7, basic_cancel/4]). -export([notify_sent/2, unblock/2, flush_all/2]). @@ -55,6 +56,8 @@ -include("rabbit.hrl"). -include_lib("stdlib/include/qlc.hrl"). +-define(EXPIRES_TYPES, [byte, short, signedint, long]). + %%---------------------------------------------------------------------------- -ifdef(use_specs). @@ -71,21 +74,28 @@ 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). -spec(start/0 :: () -> 'ok'). +-spec(stop/0 :: () -> 'ok'). -spec(declare/5 :: (name(), boolean(), boolean(), rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) - -> {'new' | 'existing', rabbit_types:amqqueue()}). + -> {'new' | 'existing', rabbit_types:amqqueue()} | + rabbit_types:channel_exit()). -spec(lookup/1 :: (name()) -> rabbit_types:ok(rabbit_types:amqqueue()) | rabbit_types:error('not_found')). -spec(with/2 :: (name(), qfun(A)) -> A | rabbit_types:error('not_found')). --spec(with_or_die/2 :: (name(), qfun(A)) -> A). +-spec(with_or_die/2 :: + (name(), qfun(A)) -> A | rabbit_types:channel_exit()). -spec(assert_equivalence/5 :: (rabbit_types:amqqueue(), boolean(), boolean(), - rabbit_framing:amqp_table(), rabbit_types:maybe(pid)) - -> ok). --spec(check_exclusive_access/2 :: (rabbit_types:amqqueue(), pid()) -> 'ok'). --spec(with_exclusive_access_or_die/3 :: (name(), pid(), qfun(A)) -> A). + rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) + -> 'ok' | rabbit_types:channel_exit() | + rabbit_types:connection_exit()). +-spec(check_exclusive_access/2 :: + (rabbit_types:amqqueue(), pid()) + -> 'ok' | rabbit_types:channel_exit()). +-spec(with_exclusive_access_or_die/3 :: + (name(), pid(), qfun(A)) -> A | rabbit_types:channel_exit()). -spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:amqqueue()]). -spec(info_keys/0 :: () -> [rabbit_types:info_key()]). -spec(info/1 :: (rabbit_types:amqqueue()) -> [rabbit_types:info()]). @@ -104,6 +114,7 @@ -spec(stat/1 :: (rabbit_types:amqqueue()) -> {'ok', non_neg_integer(), non_neg_integer()}). +-spec(emit_stats/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(delete/3 :: (rabbit_types:amqqueue(), 'false', 'false') -> qlen(); @@ -121,6 +132,7 @@ -spec(ack/4 :: (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid()) -> 'ok'). +-spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok'). -spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()). -spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok'). -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()). @@ -139,12 +151,15 @@ -spec(internal_declare/2 :: (rabbit_types:amqqueue(), boolean()) -> rabbit_types:amqqueue() | 'not_found'). --spec(internal_delete/1 :: (name()) -> rabbit_types:ok_or_error('not_found')). +-spec(internal_delete/1 :: + (name()) -> rabbit_types:ok_or_error('not_found') | + rabbit_types:connection_exit()). -spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). +-spec(maybe_expire/1 :: (pid()) -> 'ok'). -spec(on_node_down/1 :: (node()) -> 'ok'). -spec(pseudo_queue/2 :: (binary(), pid()) -> rabbit_types:amqqueue()). @@ -154,7 +169,7 @@ start() -> DurableQueues = find_durable_queues(), - {ok, BQ} = application:get_env(backing_queue_module), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]), {ok,_} = supervisor:start_child( rabbit_sup, @@ -164,6 +179,12 @@ start() -> _RealDurableQueues = recover_durable_queues(DurableQueues), ok. +stop() -> + ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup), + ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), + ok = BQ:stop(). + find_durable_queues() -> Node = node(), %% TODO: use dirty ops instead @@ -176,9 +197,11 @@ find_durable_queues() -> recover_durable_queues(DurableQueues) -> Qs = [start_queue_process(Q) || Q <- DurableQueues], - [Q || Q <- Qs, gen_server2:call(Q#amqqueue.pid, {init, true}) == Q]. + [Q || Q <- Qs, + gen_server2:call(Q#amqqueue.pid, {init, true}, infinity) == Q]. declare(QueueName, Durable, AutoDelete, Args, Owner) -> + ok = check_declare_arguments(QueueName, Args), Q = start_queue_process(#amqqueue{name = QueueName, durable = Durable, auto_delete = AutoDelete, @@ -246,11 +269,13 @@ with(Name, F) -> with_or_die(Name, F) -> with(Name, F, fun () -> rabbit_misc:not_found(Name) end). -assert_equivalence(#amqqueue{durable = Durable, auto_delete = AutoDelete} = Q, - Durable, AutoDelete, _Args, Owner) -> +assert_equivalence(#amqqueue{durable = Durable, + auto_delete = AutoDelete} = Q, + Durable, AutoDelete, RequiredArgs, Owner) -> + assert_args_equivalence(Q, RequiredArgs), check_exclusive_access(Q, Owner, strict); assert_equivalence(#amqqueue{name = QueueName}, - _Durable, _AutoDelete, _Args, _Owner) -> + _Durable, _AutoDelete, _RequiredArgs, _Owner) -> rabbit_misc:protocol_error( not_allowed, "parameters for ~s not equivalent", [rabbit_misc:rs(QueueName)]). @@ -271,6 +296,31 @@ with_exclusive_access_or_die(Name, ReaderPid, F) -> with_or_die(Name, fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end). +assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, + RequiredArgs) -> + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, + [<<"x-expires">>]). + +check_declare_arguments(QueueName, Args) -> + [case Fun(rabbit_misc:table_lookup(Args, Key)) of + ok -> ok; + {error, Error} -> rabbit_misc:protocol_error( + precondition_failed, + "invalid arg '~s' for ~s: ~w", + [Key, rabbit_misc:rs(QueueName), Error]) + end || {Key, Fun} <- [{<<"x-expires">>, fun check_expires_argument/1}]], + ok. + +check_expires_argument(undefined) -> + ok; +check_expires_argument({Type, Expires}) when Expires > 0 -> + case lists:member(Type, ?EXPIRES_TYPES) of + true -> ok; + false -> {error, {expires_not_of_acceptable_type, Type, Expires}} + end; +check_expires_argument({_Type, _Expires}) -> + {error, expires_zero_or_less}. + list(VHostPath) -> mnesia:dirty_match_object( rabbit_queue, @@ -305,6 +355,9 @@ consumers_all(VHostPath) -> stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity). +emit_stats(#amqqueue{pid = QPid}) -> + delegate_pcast(QPid, 7, emit_stats). + delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) -> delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity). @@ -328,9 +381,11 @@ requeue(QPid, MsgIds, ChPid) -> ack(QPid, Txn, MsgIds, ChPid) -> delegate_pcast(QPid, 7, {ack, Txn, MsgIds, ChPid}). +reject(QPid, MsgIds, Requeue, ChPid) -> + delegate_pcast(QPid, 7, {reject, MsgIds, Requeue, ChPid}). + commit_all(QPids, Txn, ChPid) -> safe_delegate_call_ok( - fun (QPid) -> exit({queue_disappeared, QPid}) end, fun (QPid) -> gen_server2:call(QPid, {commit, Txn, ChPid}, infinity) end, QPids). @@ -340,9 +395,6 @@ rollback_all(QPids, Txn, ChPid) -> notify_down_all(QPids, ChPid) -> safe_delegate_call_ok( - %% we don't care if the queue process has terminated in the - %% meantime - fun (_) -> ok end, fun (QPid) -> gen_server2:call(QPid, {notify_down, ChPid}, infinity) end, QPids). @@ -399,7 +451,7 @@ internal_delete(QueueName) -> end. maybe_run_queue_via_backing_queue(QPid, Fun) -> - gen_server2:pcall(QPid, 7, {maybe_run_queue_via_backing_queue, Fun}, + gen_server2:pcall(QPid, 6, {maybe_run_queue_via_backing_queue, Fun}, infinity). update_ram_duration(QPid) -> @@ -411,6 +463,9 @@ set_ram_duration_target(QPid, Duration) -> set_maximum_since_use(QPid, Age) -> gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}). +maybe_expire(QPid) -> + gen_server2:pcast(QPid, 8, maybe_expire). + on_node_down(Node) -> [Hook() || Hook <- rabbit_misc:execute_mnesia_transaction( @@ -434,11 +489,11 @@ pseudo_queue(QueueName, Pid) -> arguments = [], pid = Pid}. -safe_delegate_call_ok(H, F, Pids) -> +safe_delegate_call_ok(F, Pids) -> {_, Bad} = delegate:invoke(Pids, fun (Pid) -> rabbit_misc:with_exit_handler( - fun () -> H(Pid) end, + fun () -> ok end, fun () -> F(Pid) end) end), case Bad of diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 2fb60e9675..90a0503b00 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -56,8 +56,11 @@ backing_queue_state, active_consumers, blocked_consumers, + expires, sync_timer_ref, - rate_timer_ref + rate_timer_ref, + expiry_timer_ref, + stats_timer }). -record(consumer, {tag, ack_required}). @@ -72,13 +75,8 @@ txn, unsent_message_count}). --define(INFO_KEYS, - [name, - durable, - auto_delete, - arguments, - pid, - owner_pid, +-define(STATISTICS_KEYS, + [pid, exclusive_consumer_pid, exclusive_consumer_tag, messages_ready, @@ -89,6 +87,17 @@ backing_queue_status ]). +-define(CREATION_EVENT_KEYS, + [pid, + name, + durable, + auto_delete, + arguments, + owner_pid + ]). + +-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). + %%---------------------------------------------------------------------------- start_link(Q) -> gen_server2:start_link(?MODULE, Q, []). @@ -102,15 +111,18 @@ init(Q) -> process_flag(trap_exit, true), {ok, BQ} = application:get_env(backing_queue_module), - {ok, #q{q = Q#amqqueue{pid = self()}, - exclusive_consumer = none, - has_had_consumers = false, - backing_queue = BQ, + {ok, #q{q = Q#amqqueue{pid = self()}, + exclusive_consumer = none, + has_had_consumers = false, + backing_queue = BQ, backing_queue_state = undefined, - active_consumers = queue:new(), - blocked_consumers = queue:new(), - sync_timer_ref = undefined, - rate_timer_ref = undefined}, hibernate, + active_consumers = queue:new(), + blocked_consumers = queue:new(), + expires = undefined, + sync_timer_ref = undefined, + rate_timer_ref = undefined, + expiry_timer_ref = undefined, + stats_timer = rabbit_event:init_stats_timer()}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. terminate(shutdown, State = #q{backing_queue = BQ}) -> @@ -132,6 +144,12 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- +init_expires(State = #q{q = #amqqueue{arguments = Arguments}}) -> + case rabbit_misc:table_lookup(Arguments, <<"x-expires">>) of + {_Type, Expires} -> ensure_expiry_timer(State#q{expires = Expires}); + undefined -> State + end. + declare(Recover, From, State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable}, backing_queue = BQ, backing_queue_state = undefined}) -> @@ -145,7 +163,11 @@ declare(Recover, From, self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), BQS = BQ:init(QName, IsDurable, Recover), - noreply(State#q{backing_queue_state = BQS}); + rabbit_event:notify( + queue_created, + [{Item, i(Item, State)} || + Item <- ?CREATION_EVENT_KEYS]), + noreply(init_expires(State#q{backing_queue_state = BQS})); Q1 -> {stop, normal, {existing, Q1}, State} end. @@ -163,6 +185,7 @@ terminate_shutdown(Fun, State) -> BQ:tx_rollback(Txn, BQSN), BQSN1 end, BQS, all_ch_record()), + rabbit_event:notify(queue_deleted, [{pid, self()}]), State1#q{backing_queue_state = Fun(BQS1)} end. @@ -179,16 +202,17 @@ noreply(NewState) -> next_state(State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = ensure_rate_timer(State), - case BQ:needs_sync(BQS)of - true -> {ensure_sync_timer(State1), 0}; - false -> {stop_sync_timer(State1), hibernate} + State2 = ensure_stats_timer(State1), + case BQ:needs_idle_timeout(BQS)of + true -> {ensure_sync_timer(State2), 0}; + false -> {stop_sync_timer(State2), hibernate} end. ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) -> {ok, TRef} = timer:apply_after( ?SYNC_INTERVAL, rabbit_amqqueue, maybe_run_queue_via_backing_queue, - [self(), fun (BQS) -> BQ:sync(BQS) end]), + [self(), fun (BQS) -> BQ:idle_timeout(BQS) end]), State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> State. @@ -218,6 +242,39 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> {ok, cancel} = timer:cancel(TRef), State#q{rate_timer_ref = undefined}. +stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> + State; +stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) -> + {ok, cancel} = timer:cancel(TRef), + State#q{expiry_timer_ref = undefined}. + +%% We only wish to expire where there are no consumers *and* when +%% basic.get hasn't been called for the configured period. +ensure_expiry_timer(State = #q{expires = undefined}) -> + State; +ensure_expiry_timer(State = #q{expires = Expires}) -> + case is_unused(State) of + true -> + NewState = stop_expiry_timer(State), + {ok, TRef} = timer:apply_after( + Expires, rabbit_amqqueue, maybe_expire, [self()]), + NewState#q{expiry_timer_ref = TRef}; + false -> + State + end. + +ensure_stats_timer(State = #q{stats_timer = StatsTimer, + q = Q}) -> + State#q{stats_timer = rabbit_event:ensure_stats_timer( + StatsTimer, + fun() -> emit_stats(State) end, + fun() -> rabbit_amqqueue:emit_stats(Q) end)}. + +stop_stats_timer(State = #q{stats_timer = StatsTimer}) -> + State#q{stats_timer = rabbit_event:stop_stats_timer( + StatsTimer, + fun() -> emit_stats(State) end)}. + assert_invariant(#q{active_consumers = AC, backing_queue = BQ, backing_queue_state = BQS}) -> true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)). @@ -439,7 +496,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) -> _ -> rollback_transaction(Txn, ChPid, State1) end, - {ok, requeue_and_run(sets:to_list(ChAckTags), State2)} + {ok, requeue_and_run(sets:to_list(ChAckTags), + ensure_expiry_timer(State2))} end end. @@ -528,6 +586,10 @@ i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) -> i(Item, _) -> throw({bad_argument, Item}). +emit_stats(State) -> + rabbit_event:notify(queue_stats, + [{Item, i(Item, State)} || Item <- ?STATISTICS_KEYS]). + %--------------------------------------------------------------------------- handle_call({init, Recover}, From, @@ -541,6 +603,7 @@ handle_call({init, Recover}, From, declare(Recover, From, State); _ -> #q{q = #amqqueue{name = QName, durable = IsDurable}, backing_queue = BQ, backing_queue_state = undefined} = State, + gen_server2:reply(From, not_found), case Recover of true -> ok; _ -> rabbit_log:warning( @@ -548,7 +611,7 @@ handle_call({init, Recover}, From, end, BQS = BQ:init(QName, IsDurable, Recover), %% Rely on terminate to delete the queue. - {stop, normal, not_found, State#q{backing_queue_state = BQS}} + {stop, normal, State#q{backing_queue_state = BQS}} end; handle_call(info, _From, State) -> @@ -610,8 +673,9 @@ handle_call({basic_get, ChPid, NoAck}, _From, State = #q{q = #amqqueue{name = QName}, backing_queue_state = BQS, backing_queue = BQ}) -> AckRequired = not NoAck, + State1 = ensure_expiry_timer(State), case BQ:fetch(AckRequired, BQS) of - {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1}); + {empty, BQS1} -> reply(empty, State1#q{backing_queue_state = BQS1}); {{Message, IsDelivered, AckTag, Remaining}, BQS1} -> case AckRequired of true -> C = #cr{acktags = ChAckTags} = ch_record(ChPid), @@ -620,7 +684,7 @@ handle_call({basic_get, ChPid, NoAck}, _From, false -> ok end, Msg = {QName, self(), AckTag, IsDelivered, Message}, - reply({ok, Remaining, Msg}, State#q{backing_queue_state = BQS1}) + reply({ok, Remaining, Msg}, State1#q{backing_queue_state = BQS1}) end; handle_call({basic_consume, NoAck, ChPid, LimiterPid, @@ -687,7 +751,7 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From, ChPid, ConsumerTag, State#q.blocked_consumers)}, case should_auto_delete(NewState) of - false -> reply(ok, NewState); + false -> reply(ok, ensure_expiry_timer(NewState)); true -> {stop, normal, ok, NewState} end end; @@ -719,8 +783,6 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> gen_server2:reply(From, ok), case lookup_ch(ChPid) of not_found -> - rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n", - [ChPid]), noreply(State); C = #cr{acktags = ChAckTags} -> ChAckTags1 = subtract_acks(ChAckTags, AckTags), @@ -749,7 +811,22 @@ handle_cast({ack, Txn, AckTags, ChPid}, _ -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags, BQS)} end, store_ch_record(C1), - noreply(State #q { backing_queue_state = BQS1 }) + noreply(State#q{backing_queue_state = BQS1}) + end; + +handle_cast({reject, AckTags, Requeue, ChPid}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + case lookup_ch(ChPid) of + not_found -> + noreply(State); + C = #cr{acktags = ChAckTags} -> + ChAckTags1 = subtract_acks(ChAckTags, AckTags), + store_ch_record(C#cr{acktags = ChAckTags1}), + noreply(case Requeue of + true -> requeue_and_run(AckTags, State); + false -> BQS1 = BQ:ack(AckTags, BQS), + State #q { backing_queue_state = BQS1 } + end) end; handle_cast({rollback, Txn, ChPid}, State) -> @@ -803,6 +880,17 @@ handle_cast({set_ram_duration_target, Duration}, handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State); + +handle_cast(maybe_expire, State) -> + case is_unused(State) of + true -> ?LOGDEBUG("Queue lease expired for ~p~n", [State#q.q]), + {stop, normal, State}; + false -> noreply(ensure_expiry_timer(State)) + end; + +handle_cast(emit_stats, State) -> + emit_stats(State), noreply(State). handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, @@ -822,7 +910,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> handle_info(timeout, State = #q{backing_queue = BQ}) -> noreply(maybe_run_queue_via_backing_queue( - fun (BQS) -> BQ:sync(BQS) end, State)); + fun (BQS) -> BQ:idle_timeout(BQS) end, State)); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}; @@ -840,4 +928,5 @@ handle_pre_hibernate(State = #q{backing_queue = BQ, DesiredDuration = rabbit_memory_monitor:report_ram_duration(self(), infinity), BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - {hibernate, stop_rate_timer(State#q{backing_queue_state = BQS2})}. + {hibernate, stop_stats_timer( + stop_rate_timer(State#q{backing_queue_state = BQS2}))}. diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 432d62900b..2230c507e9 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -42,6 +42,11 @@ behaviour_info(callbacks) -> %% shared resources. {start, 1}, + %% Called to tear down any state/resources. NB: Implementations + %% should not depend on this function being called on shutdown + %% and instead should hook into the rabbit supervision hierarchy. + {stop, 0}, + %% Initialise the backing queue and its state. {init, 3}, @@ -113,14 +118,15 @@ behaviour_info(callbacks) -> %% queue. {ram_duration, 1}, - %% Should 'sync' be called as soon as the queue process can - %% manage (either on an empty mailbox, or when a timer fires)? - {needs_sync, 1}, + %% Should 'idle_timeout' be called as soon as the queue process + %% can manage (either on an empty mailbox, or when a timer + %% fires)? + {needs_idle_timeout, 1}, - %% Called (eventually) after needs_sync returns 'true'. Note this - %% may be called more than once for each 'true' returned from - %% needs_sync. - {sync, 1}, + %% Called (eventually) after needs_idle_timeout returns + %% 'true'. Note this may be called more than once for each 'true' + %% returned from needs_idle_timeout. + {idle_timeout, 1}, %% Called immediately before the queue hibernates. {handle_pre_hibernate, 1}, diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl index 03a19961fd..d62fc07cb0 100644 --- a/src/rabbit_basic.erl +++ b/src/rabbit_basic.erl @@ -48,11 +48,11 @@ ({ok, rabbit_router:routing_result(), [pid()]} | rabbit_types:error('not_found'))). --spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()). +-spec(publish/1 :: + (rabbit_types:delivery()) -> publish_result()). -spec(delivery/4 :: (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), - rabbit_types:message()) - -> rabbit_types:delivery()). + rabbit_types:message()) -> rabbit_types:delivery()). -spec(message/4 :: (rabbit_exchange:name(), rabbit_router:routing_key(), properties_input(), binary()) @@ -97,10 +97,13 @@ delivery(Mandatory, Immediate, Txn, Message) -> sender = self(), message = Message}. build_content(Properties, BodyBin) -> - {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), #content{class_id = ClassId, properties = Properties, properties_bin = none, + protocol = none, payload_fragments_rev = [BodyBin]}. from_content(Content) -> @@ -108,7 +111,9 @@ from_content(Content) -> properties = Props, payload_fragments_rev = FragmentsRev} = rabbit_binary_parser:ensure_content_decoded(Content), - {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), {Props, list_to_binary(lists:reverse(FragmentsRev))}. message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) -> diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl index 0e6ebe57bc..056ab1b574 100644 --- a/src/rabbit_binary_generator.erl +++ b/src/rabbit_binary_generator.erl @@ -41,12 +41,12 @@ % See definition of check_empty_content_body_frame_size/0, an assertion called at startup. -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8). --export([build_simple_method_frame/2, - build_simple_content_frames/3, +-export([build_simple_method_frame/3, + build_simple_content_frames/4, build_heartbeat_frame/0]). -export([generate_table/1, encode_properties/2]). -export([check_empty_content_body_frame_size/0]). --export([ensure_content_encoded/1, clear_encoded_content/1]). +-export([ensure_content_encoded/2, clear_encoded_content/1]). -import(lists). @@ -56,20 +56,22 @@ -type(frame() :: [binary()]). --spec(build_simple_method_frame/2 :: - (rabbit_channel:channel_number(), rabbit_framing:amqp_method_record()) +-spec(build_simple_method_frame/3 :: + (rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(), + rabbit_types:protocol()) -> frame()). --spec(build_simple_content_frames/3 :: +-spec(build_simple_content_frames/4 :: (rabbit_channel:channel_number(), rabbit_types:content(), - non_neg_integer()) + non_neg_integer(), rabbit_types:protocol()) -> [frame()]). -spec(build_heartbeat_frame/0 :: () -> frame()). -spec(generate_table/1 :: (rabbit_framing:amqp_table()) -> binary()). -spec(encode_properties/2 :: ([rabbit_framing:amqp_property_type()], [any()]) -> binary()). -spec(check_empty_content_body_frame_size/0 :: () -> 'ok'). --spec(ensure_content_encoded/1 :: - (rabbit_types:content()) -> rabbit_types:encoded_content()). +-spec(ensure_content_encoded/2 :: + (rabbit_types:content(), rabbit_types:protocol()) -> + rabbit_types:encoded_content()). -spec(clear_encoded_content/1 :: (rabbit_types:content()) -> rabbit_types:unencoded_content()). @@ -77,30 +79,24 @@ %%---------------------------------------------------------------------------- -build_simple_method_frame(ChannelInt, MethodRecord) -> - MethodFields = rabbit_framing:encode_method_fields(MethodRecord), +build_simple_method_frame(ChannelInt, MethodRecord, Protocol) -> + MethodFields = Protocol:encode_method_fields(MethodRecord), MethodName = rabbit_misc:method_record_type(MethodRecord), - {ClassId, MethodId} = rabbit_framing:method_id(MethodName), + {ClassId, MethodId} = Protocol:method_id(MethodName), create_frame(1, ChannelInt, [<<ClassId:16, MethodId:16>>, MethodFields]). -build_simple_content_frames(ChannelInt, - #content{class_id = ClassId, - properties = ContentProperties, - properties_bin = ContentPropertiesBin, - payload_fragments_rev = PayloadFragmentsRev}, - FrameMax) -> - {BodySize, ContentFrames} = build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt), +build_simple_content_frames(ChannelInt, Content, FrameMax, Protocol) -> + #content{class_id = ClassId, + properties_bin = ContentPropertiesBin, + payload_fragments_rev = PayloadFragmentsRev} = + ensure_content_encoded(Content, Protocol), + {BodySize, ContentFrames} = + build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt), HeaderFrame = create_frame(2, ChannelInt, [<<ClassId:16, 0:16, BodySize:64>>, - maybe_encode_properties(ContentProperties, ContentPropertiesBin)]), + ContentPropertiesBin]), [HeaderFrame | ContentFrames]. -maybe_encode_properties(_ContentProperties, ContentPropertiesBin) - when is_binary(ContentPropertiesBin) -> - ContentPropertiesBin; -maybe_encode_properties(ContentProperties, none) -> - rabbit_framing:encode_properties(ContentProperties). - build_content_frames(FragsRev, FrameMax, ChannelInt) -> BodyPayloadMax = if FrameMax == 0 -> iolist_size(FragsRev); @@ -283,13 +279,25 @@ check_empty_content_body_frame_size() -> ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE}) end. -ensure_content_encoded(Content = #content{properties_bin = PropsBin}) - when PropsBin =/= 'none' -> +ensure_content_encoded(Content = #content{properties_bin = PropBin, + protocol = Protocol}, Protocol) + when PropBin =/= none -> Content; -ensure_content_encoded(Content = #content{properties = Props}) -> - Content #content{properties_bin = rabbit_framing:encode_properties(Props)}. - -clear_encoded_content(Content = #content{properties_bin = none}) -> +ensure_content_encoded(Content = #content{properties = none, + properties_bin = PropBin, + protocol = Protocol}, Protocol1) + when PropBin =/= none -> + Props = Protocol:decode_properties(Content#content.class_id, PropBin), + Content#content{properties = Props, + properties_bin = Protocol1:encode_properties(Props), + protocol = Protocol1}; +ensure_content_encoded(Content = #content{properties = Props}, Protocol) + when Props =/= none -> + Content#content{properties_bin = Protocol:encode_properties(Props), + protocol = Protocol}. + +clear_encoded_content(Content = #content{properties_bin = none, + protocol = none}) -> Content; clear_encoded_content(Content = #content{properties = none}) -> %% Only clear when we can rebuild the properties_bin later in @@ -297,4 +305,4 @@ clear_encoded_content(Content = #content{properties = none}) -> %% one of properties and properties_bin can be 'none' Content; clear_encoded_content(Content = #content{}) -> - Content#content{properties_bin = none}. + Content#content{properties_bin = none, protocol = none}. diff --git a/src/rabbit_binary_parser.erl b/src/rabbit_binary_parser.erl index 69e34440b8..ebf063f031 100644 --- a/src/rabbit_binary_parser.erl +++ b/src/rabbit_binary_parser.erl @@ -163,11 +163,12 @@ parse_property(table, <<Len:32/unsigned, Table:Len/binary, Rest/binary>>) -> {parse_table(Table), Rest}. ensure_content_decoded(Content = #content{properties = Props}) - when Props =/= 'none' -> + when Props =/= none -> Content; -ensure_content_decoded(Content = #content{properties_bin = PropBin}) - when is_binary(PropBin) -> - Content#content{properties = rabbit_framing:decode_properties( +ensure_content_decoded(Content = #content{properties_bin = PropBin, + protocol = Protocol}) + when PropBin =/= none -> + Content#content{properties = Protocol:decode_properties( Content#content.class_id, PropBin)}. clear_decoded_content(Content = #content{properties = none}) -> diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index c4db3ace73..835d3f0da7 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -35,50 +35,52 @@ -behaviour(gen_server2). --export([start_link/6, do/2, do/3, shutdown/1]). --export([send_command/2, deliver/4, conserve_memory/2, flushed/2]). +-export([start_link/7, do/2, do/3, shutdown/1]). +-export([send_command/2, deliver/4, flushed/2]). -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]). - --export([flow_timeout/2]). +-export([emit_stats/1, flush/1]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1]). -record(ch, {state, channel, reader_pid, writer_pid, limiter_pid, - transaction_id, tx_participants, next_tag, + start_limiter_fun, transaction_id, tx_participants, next_tag, uncommitted_ack_q, unacked_message_q, username, virtual_host, most_recently_declared_queue, - consumer_mapping, blocking, queue_collector_pid, flow}). - --record(flow, {server, client, pending}). + consumer_mapping, blocking, queue_collector_pid, stats_timer}). -define(MAX_PERMISSION_CACHE_SIZE, 12). --define(FLOW_OK_TIMEOUT, 10000). %% 10 seconds --define(INFO_KEYS, +-define(STATISTICS_KEYS, [pid, - connection, - number, - user, - vhost, transactional, consumer_count, messages_unacknowledged, acks_uncommitted, prefetch_count]). +-define(CREATION_EVENT_KEYS, + [pid, + connection, + number, + user, + vhost]). + +-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). + %%---------------------------------------------------------------------------- -ifdef(use_specs). -export_type([channel_number/0]). --type(ref() :: any()). -type(channel_number() :: non_neg_integer()). --spec(start_link/6 :: +-spec(start_link/7 :: (channel_number(), pid(), pid(), rabbit_access_control:username(), - rabbit_types:vhost(), pid()) -> pid()). + rabbit_types:vhost(), pid(), + fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) -> + rabbit_types:ok_pid_or_error()). -spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). -spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(), rabbit_types:maybe(rabbit_types:content())) -> 'ok'). @@ -87,25 +89,23 @@ -spec(deliver/4 :: (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg()) -> 'ok'). --spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok'). -spec(flushed/2 :: (pid(), pid()) -> 'ok'). --spec(flow_timeout/2 :: (pid(), ref()) -> 'ok'). -spec(list/0 :: () -> [pid()]). -spec(info_keys/0 :: () -> [rabbit_types:info_key()]). -spec(info/1 :: (pid()) -> [rabbit_types:info()]). -spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]). -spec(info_all/0 :: () -> [[rabbit_types:info()]]). -spec(info_all/1 :: ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]). +-spec(emit_stats/1 :: (pid()) -> 'ok'). -endif. %%---------------------------------------------------------------------------- -start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid) -> - {ok, Pid} = gen_server2:start_link( - ?MODULE, [Channel, ReaderPid, WriterPid, - Username, VHost, CollectorPid], []), - Pid. +start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid, + StartLimiterFun) -> + gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid, Username, + VHost, CollectorPid, StartLimiterFun], []). do(Pid, Method) -> do(Pid, Method, none). @@ -122,15 +122,9 @@ send_command(Pid, Msg) -> deliver(Pid, ConsumerTag, AckRequired, Msg) -> gen_server2:cast(Pid, {deliver, ConsumerTag, AckRequired, Msg}). -conserve_memory(Pid, Conserve) -> - gen_server2:pcast(Pid, 8, {conserve_memory, Conserve}). - flushed(Pid, QPid) -> gen_server2:cast(Pid, {flushed, QPid}). -flow_timeout(Pid, Ref) -> - gen_server2:pcast(Pid, 7, {flow_timeout, Ref}). - list() -> pg_local:get_members(rabbit_channels). @@ -151,31 +145,40 @@ info_all() -> info_all(Items) -> rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()). +emit_stats(Pid) -> + gen_server2:pcast(Pid, 7, emit_stats). + +flush(Pid) -> + gen_server2:call(Pid, flush). + %%--------------------------------------------------------------------------- -init([Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid]) -> +init([Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid, + StartLimiterFun]) -> process_flag(trap_exit, true), - link(WriterPid), ok = pg_local:join(rabbit_channels, self()), - {ok, #ch{state = starting, - channel = Channel, - reader_pid = ReaderPid, - writer_pid = WriterPid, - limiter_pid = undefined, - transaction_id = none, - tx_participants = sets:new(), - next_tag = 1, - uncommitted_ack_q = queue:new(), - unacked_message_q = queue:new(), - username = Username, - virtual_host = VHost, - most_recently_declared_queue = <<>>, - consumer_mapping = dict:new(), - blocking = dict:new(), - queue_collector_pid = CollectorPid, - flow = #flow{server = true, client = true, - pending = none}}, - hibernate, + State = #ch{state = starting, + channel = Channel, + reader_pid = ReaderPid, + writer_pid = WriterPid, + limiter_pid = undefined, + start_limiter_fun = StartLimiterFun, + transaction_id = none, + tx_participants = sets:new(), + next_tag = 1, + uncommitted_ack_q = queue:new(), + unacked_message_q = queue:new(), + username = Username, + virtual_host = VHost, + most_recently_declared_queue = <<>>, + consumer_mapping = dict:new(), + blocking = dict:new(), + queue_collector_pid = CollectorPid, + stats_timer = rabbit_event:init_stats_timer()}, + rabbit_event:notify( + channel_created, + [{Item, i(Item, State)} || Item <- ?CREATION_EVENT_KEYS]), + {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. handle_call(info, _From, State) -> @@ -187,6 +190,9 @@ handle_call({info, Items}, _From, State) -> catch Error -> reply({error, Error}, State) end; +handle_call(flush, _From, State) -> + reply(ok, State); + handle_call(_Request, _From, State) -> noreply(State). @@ -225,40 +231,25 @@ handle_cast({deliver, ConsumerTag, AckRequired, Msg}, next_tag = DeliveryTag}) -> State1 = lock_message(AckRequired, {DeliveryTag, ConsumerTag, Msg}, State), ok = internal_deliver(WriterPid, true, ConsumerTag, DeliveryTag, Msg), + {_QName, QPid, _MsgId, _Redelivered, _Msg} = Msg, + maybe_incr_stats([{QPid, 1}], + case AckRequired of + true -> deliver; + false -> deliver_no_ack + end, State), noreply(State1#ch{next_tag = DeliveryTag + 1}); -handle_cast({conserve_memory, true}, State = #ch{state = starting}) -> - noreply(State); -handle_cast({conserve_memory, false}, State = #ch{state = starting}) -> - ok = rabbit_writer:send_command(State#ch.writer_pid, #'channel.open_ok'{}), - noreply(State#ch{state = running}); -handle_cast({conserve_memory, Conserve}, State = #ch{state = running}) -> - flow_control(not Conserve, State); -handle_cast({conserve_memory, _Conserve}, State) -> - noreply(State); - -handle_cast({flow_timeout, Ref}, - State = #ch{flow = #flow{client = Flow, pending = {Ref, _TRef}}}) -> - {stop, normal, terminating( - rabbit_misc:amqp_error( - precondition_failed, - "timeout waiting for channel.flow_ok{active=~w}", - [not Flow], none), State)}; -handle_cast({flow_timeout, _Ref}, State) -> +handle_cast(emit_stats, State) -> + internal_emit_stats(State), {noreply, State}. -handle_info({'EXIT', WriterPid, Reason = {writer, send_failed, _Error}}, - State = #ch{writer_pid = WriterPid}) -> - State#ch.reader_pid ! {channel_exit, State#ch.channel, Reason}, - {stop, normal, State}; -handle_info({'EXIT', _Pid, Reason}, State) -> - {stop, Reason, State}; handle_info({'DOWN', _MRef, process, QPid, _Reason}, State) -> + erase_queue_stats(QPid), {noreply, queue_blocked(QPid, State)}. handle_pre_hibernate(State) -> ok = clear_permission_cache(), - {hibernate, State}. + {hibernate, stop_stats_timer(State)}. terminate(_Reason, State = #ch{state = terminating}) -> terminate(State); @@ -266,8 +257,10 @@ terminate(_Reason, State = #ch{state = terminating}) -> terminate(Reason, State) -> Res = rollback_and_notify(State), case Reason of - normal -> ok = Res; - _ -> ok + normal -> ok = Res; + shutdown -> ok = Res; + {shutdown, _Term} -> ok = Res; + _ -> ok end, terminate(State). @@ -276,9 +269,23 @@ code_change(_OldVsn, State, _Extra) -> %%--------------------------------------------------------------------------- -reply(Reply, NewState) -> {reply, Reply, NewState, hibernate}. +reply(Reply, NewState) -> + {reply, Reply, ensure_stats_timer(NewState), hibernate}. + +noreply(NewState) -> + {noreply, ensure_stats_timer(NewState), hibernate}. -noreply(NewState) -> {noreply, NewState, hibernate}. +ensure_stats_timer(State = #ch{stats_timer = StatsTimer}) -> + ChPid = self(), + State#ch{stats_timer = rabbit_event:ensure_stats_timer( + StatsTimer, + fun() -> internal_emit_stats(State) end, + fun() -> emit_stats(ChPid) end)}. + +stop_stats_timer(State = #ch{stats_timer = StatsTimer}) -> + State#ch{stats_timer = rabbit_event:stop_stats_timer( + StatsTimer, + fun() -> internal_emit_stats(State) end)}. return_ok(State, true, _Msg) -> {noreply, State}; return_ok(State, false, Msg) -> {reply, Msg, State}. @@ -385,10 +392,7 @@ queue_blocked(QPid, State = #ch{blocking = Blocking}) -> end. handle_method(#'channel.open'{}, _, State = #ch{state = starting}) -> - case rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}) of - true -> {noreply, State}; - false -> {reply, #'channel.open_ok'{}, State#ch{state = running}} - end; + {reply, #'channel.open_ok'{}, State#ch{state = running}}; handle_method(#'channel.open'{}, _, _State) -> rabbit_misc:protocol_error( @@ -399,16 +403,12 @@ handle_method(_Method, _, #ch{state = starting}) -> handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) -> ok = rollback_and_notify(State), - ok = rabbit_writer:send_command(WriterPid, #'channel.close_ok'{}), + ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}), stop; handle_method(#'access.request'{},_, State) -> {reply, #'access.request_ok'{ticket = 1}, State}; -handle_method(#'basic.publish'{}, _, #ch{flow = #flow{client = false}}) -> - rabbit_misc:protocol_error( - command_invalid, - "basic.publish received after channel.flow_ok{active=false}", []); handle_method(#'basic.publish'{exchange = ExchangeNameBin, routing_key = RoutingKey, mandatory = Mandatory, @@ -437,6 +437,9 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, unroutable -> ok = basic_return(Message, WriterPid, no_route); not_delivered -> ok = basic_return(Message, WriterPid, no_consumers) end, + maybe_incr_stats([{ExchangeName, 1} | + [{{QPid, ExchangeName}, 1} || + QPid <- DeliveredQPids]], publish, State), {noreply, case TxnKey of none -> State; _ -> add_tx_participants(DeliveredQPids, State) @@ -447,7 +450,9 @@ handle_method(#'basic.ack'{delivery_tag = DeliveryTag, _, State = #ch{transaction_id = TxnKey, unacked_message_q = UAMQ}) -> {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), - Participants = ack(TxnKey, Acked), + QIncs = ack(TxnKey, Acked), + Participants = [QPid || {QPid, _} <- QIncs], + maybe_incr_stats(QIncs, ack, State), {noreply, case TxnKey of none -> ok = notify_limiter(State#ch.limiter_pid, Acked), State#ch{unacked_message_q = Remaining}; @@ -470,11 +475,16 @@ handle_method(#'basic.get'{queue = QueueNameBin, QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of {ok, MessageCount, - Msg = {_QName, _QPid, _MsgId, Redelivered, + Msg = {_QName, QPid, _MsgId, Redelivered, #basic_message{exchange_name = ExchangeName, routing_key = RoutingKey, content = Content}}} -> State1 = lock_message(not(NoAck), {DeliveryTag, none, Msg}, State), + maybe_incr_stats([{QPid, 1}], + case NoAck of + true -> get_no_ack; + false -> get + end, State), ok = rabbit_writer:send_command( WriterPid, #'basic.get_ok'{delivery_tag = DeliveryTag, @@ -638,6 +648,17 @@ handle_method(#'basic.recover'{requeue = Requeue}, Content, State) -> ok = rabbit_writer:send_command(WriterPid, #'basic.recover_ok'{}), {noreply, State2}; +handle_method(#'basic.reject'{delivery_tag = DeliveryTag, + requeue = Requeue}, + _, State = #ch{ unacked_message_q = UAMQ}) -> + {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, false), + ok = fold_per_queue( + fun (QPid, MsgIds, ok) -> + rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self()) + end, ok, Acked), + ok = notify_limiter(State#ch.limiter_pid, Acked), + {noreply, State#ch{unacked_message_q = Remaining}}; + handle_method(#'exchange.declare'{exchange = ExchangeNameBin, type = TypeNameBin, passive = false, @@ -735,7 +756,8 @@ handle_method(#'queue.declare'{queue = QueueNameBin, %% the connection shuts down. ok = case Owner of none -> ok; - _ -> rabbit_queue_collector:register(CollectorPid, Q) + _ -> rabbit_queue_collector:register( + CollectorPid, Q) end, return_queue_declare_ok(QueueName, NoWait, 0, 0, State); {existing, _Q} -> @@ -853,48 +875,12 @@ handle_method(#'channel.flow'{active = false}, _, blocking = dict:from_list(Queues)}} end; -handle_method(#'channel.flow_ok'{active = Active}, _, - State = #ch{flow = #flow{server = Active, client = Flow, - pending = {_Ref, TRef}} = F}) - when Flow =:= not Active -> - {ok, cancel} = timer:cancel(TRef), - {noreply, State#ch{flow = F#flow{client = Active, pending = none}}}; -handle_method(#'channel.flow_ok'{active = Active}, _, - State = #ch{flow = #flow{server = Flow, client = Flow, - pending = {_Ref, TRef}}}) - when Flow =:= not Active -> - {ok, cancel} = timer:cancel(TRef), - {noreply, issue_flow(Flow, State)}; -handle_method(#'channel.flow_ok'{}, _, #ch{flow = #flow{pending = none}}) -> - rabbit_misc:protocol_error( - command_invalid, "unsolicited channel.flow_ok", []); -handle_method(#'channel.flow_ok'{active = Active}, _, _State) -> - rabbit_misc:protocol_error( - command_invalid, - "received channel.flow_ok{active=~w} has incorrect polarity", [Active]); - handle_method(_MethodRecord, _Content, _State) -> rabbit_misc:protocol_error( command_invalid, "unimplemented method", []). %%---------------------------------------------------------------------------- -flow_control(Active, State = #ch{flow = #flow{server = Flow, pending = none}}) - when Flow =:= not Active -> - ok = clear_permission_cache(), - noreply(issue_flow(Active, State)); -flow_control(Active, State = #ch{flow = F}) -> - noreply(State#ch{flow = F#flow{server = Active}}). - -issue_flow(Active, State) -> - ok = rabbit_writer:send_command( - State#ch.writer_pid, #'channel.flow'{active = Active}), - Ref = make_ref(), - {ok, TRef} = timer:apply_after(?FLOW_OK_TIMEOUT, ?MODULE, flow_timeout, - [self(), Ref]), - State#ch{flow = #flow{server = Active, client = not Active, - pending = {Ref, TRef}}}. - binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, ReturnMethod, NoWait, State = #ch{virtual_host = VHostPath, @@ -938,7 +924,7 @@ basic_return(#basic_message{exchange_name = ExchangeName, content = Content}, WriterPid, Reason) -> {_Close, ReplyCode, ReplyText} = - rabbit_framing:lookup_amqp_exception(Reason), + rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason), ok = rabbit_writer:send_command( WriterPid, #'basic.return'{reply_code = ReplyCode, @@ -967,7 +953,7 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> end; {empty, _} -> rabbit_misc:protocol_error( - not_found, "unknown delivery tag ~w", [DeliveryTag]) + precondition_failed, "unknown delivery tag ~w", [DeliveryTag]) end. add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) -> @@ -978,7 +964,7 @@ ack(TxnKey, UAQ) -> fold_per_queue( fun (QPid, MsgIds, L) -> ok = rabbit_amqqueue:ack(QPid, TxnKey, MsgIds, self()), - [QPid | L] + [{QPid, length(MsgIds)} | L] end, [], UAQ). make_tx_id() -> rabbit_guid:guid(). @@ -1030,8 +1016,8 @@ fold_per_queue(F, Acc0, UAQ) -> dict:fold(fun (QPid, MsgIds, Acc) -> F(QPid, MsgIds, Acc) end, Acc0, D). -start_limiter(State = #ch{unacked_message_q = UAMQ}) -> - LPid = rabbit_limiter:start_link(self(), queue:len(UAMQ)), +start_limiter(State = #ch{unacked_message_q = UAMQ, start_limiter_fun = SLF}) -> + {ok, LPid} = SLF(queue:len(UAMQ)), ok = limit_queues(LPid, State), LPid. @@ -1103,10 +1089,9 @@ internal_deliver(WriterPid, Notify, ConsumerTag, DeliveryTag, false -> rabbit_writer:send_command(WriterPid, M, Content) end. -terminate(#ch{writer_pid = WriterPid, limiter_pid = LimiterPid}) -> +terminate(_State) -> pg_local:leave(rabbit_channels, self()), - rabbit_writer:shutdown(WriterPid), - rabbit_limiter:shutdown(LimiterPid). + rabbit_event:notify(channel_closed, [{pid, self()}]). infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. @@ -1127,3 +1112,60 @@ i(prefetch_count, #ch{limiter_pid = LimiterPid}) -> rabbit_limiter:get_limit(LimiterPid); i(Item, _) -> throw({bad_argument, Item}). + +maybe_incr_stats(QXIncs, Measure, #ch{stats_timer = StatsTimer}) -> + case rabbit_event:stats_level(StatsTimer) of + fine -> [incr_stats(QX, Inc, Measure) || {QX, Inc} <- QXIncs]; + _ -> ok + end. + +incr_stats({QPid, _} = QX, Inc, Measure) -> + maybe_monitor(QPid), + update_measures(queue_exchange_stats, QX, Inc, Measure); +incr_stats(QPid, Inc, Measure) when is_pid(QPid) -> + maybe_monitor(QPid), + update_measures(queue_stats, QPid, Inc, Measure); +incr_stats(X, Inc, Measure) -> + update_measures(exchange_stats, X, Inc, Measure). + +maybe_monitor(QPid) -> + case get({monitoring, QPid}) of + undefined -> erlang:monitor(process, QPid), + put({monitoring, QPid}, true); + _ -> ok + end. + +update_measures(Type, QX, Inc, Measure) -> + Measures = case get({Type, QX}) of + undefined -> []; + D -> D + end, + Cur = case orddict:find(Measure, Measures) of + error -> 0; + {ok, C} -> C + end, + put({Type, QX}, + orddict:store(Measure, Cur + Inc, Measures)). + +internal_emit_stats(State = #ch{stats_timer = StatsTimer}) -> + CoarseStats = [{Item, i(Item, State)} || Item <- ?STATISTICS_KEYS], + case rabbit_event:stats_level(StatsTimer) of + coarse -> + rabbit_event:notify(channel_stats, CoarseStats); + fine -> + FineStats = + [{channel_queue_stats, + [{QPid, Stats} || {{queue_stats, QPid}, Stats} <- get()]}, + {channel_exchange_stats, + [{X, Stats} || {{exchange_stats, X}, Stats} <- get()]}, + {channel_queue_exchange_stats, + [{QX, Stats} || + {{queue_exchange_stats, QX}, Stats} <- get()]}], + rabbit_event:notify(channel_stats, CoarseStats ++ FineStats) + end. + +erase_queue_stats(QPid) -> + erase({monitoring, QPid}), + erase({queue_stats, QPid}), + [erase({queue_exchange_stats, QX}) || + {{queue_exchange_stats, QX = {QPid0, _}}, _} <- get(), QPid =:= QPid0]. diff --git a/src/rabbit_channel_sup.erl b/src/rabbit_channel_sup.erl new file mode 100644 index 0000000000..02199a6516 --- /dev/null +++ b/src/rabbit_channel_sup.erl @@ -0,0 +1,96 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_channel_sup). + +-behaviour(supervisor2). + +-export([start_link/1]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([start_link_args/0]). + +-type(start_link_args() :: + {rabbit_types:protocol(), rabbit_net:socket(), + rabbit_channel:channel_number(), non_neg_integer(), pid(), + rabbit_access_control:username(), rabbit_types:vhost(), pid()}). + +-spec(start_link/1 :: (start_link_args()) -> {'ok', pid(), pid()}). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link({Protocol, Sock, Channel, FrameMax, ReaderPid, Username, VHost, + Collector}) -> + {ok, SupPid} = supervisor2:start_link(?MODULE, []), + {ok, WriterPid} = + supervisor2:start_child( + SupPid, + {writer, {rabbit_writer, start_link, + [Sock, Channel, FrameMax, Protocol, ReaderPid]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_writer]}), + {ok, ChannelPid} = + supervisor2:start_child( + SupPid, + {channel, {rabbit_channel, start_link, + [Channel, ReaderPid, WriterPid, Username, VHost, + Collector, start_limiter_fun(SupPid)]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), + {ok, FramingChannelPid} = + supervisor2:start_child( + SupPid, + {framing_channel, {rabbit_framing_channel, start_link, + [ReaderPid, ChannelPid, Protocol]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_framing_channel]}), + {ok, SupPid, FramingChannelPid}. + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, {{one_for_all, 0, 1}, []}}. + +start_limiter_fun(SupPid) -> + fun (UnackedCount) -> + Me = self(), + {ok, _Pid} = + supervisor2:start_child( + SupPid, + {limiter, {rabbit_limiter, start_link, [Me, UnackedCount]}, + transient, ?MAX_WAIT, worker, [rabbit_limiter]}) + end. diff --git a/src/rabbit_hooks.erl b/src/rabbit_channel_sup_sup.erl index 3fc84c1e09..d193880555 100644 --- a/src/rabbit_hooks.erl +++ b/src/rabbit_channel_sup_sup.erl @@ -29,45 +29,37 @@ %% Contributor(s): ______________________________________. %% --module(rabbit_hooks). +-module(rabbit_channel_sup_sup). --export([start/0]). --export([subscribe/3, unsubscribe/2, trigger/2, notify_remote/5]). +-behaviour(supervisor2). --define(TableName, rabbit_hooks). +-export([start_link/0, start_channel/2]). + +-export([init/1]). + +%%---------------------------------------------------------------------------- -ifdef(use_specs). --spec(start/0 :: () -> 'ok'). --spec(subscribe/3 :: (atom(), atom(), {atom(), atom(), list()}) -> 'ok'). --spec(unsubscribe/2 :: (atom(), atom()) -> 'ok'). --spec(trigger/2 :: (atom(), list()) -> 'ok'). --spec(notify_remote/5 :: (atom(), atom(), list(), pid(), list()) -> 'ok'). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). +-spec(start_channel/2 :: (pid(), rabbit_channel_sup:start_link_args()) -> + {'ok', pid(), pid()}). -endif. -start() -> - ets:new(?TableName, [bag, public, named_table]), - ok. +%%---------------------------------------------------------------------------- -subscribe(Hook, HandlerName, Handler) -> - ets:insert(?TableName, {Hook, HandlerName, Handler}), - ok. +start_link() -> + supervisor2:start_link(?MODULE, []). -unsubscribe(Hook, HandlerName) -> - ets:match_delete(?TableName, {Hook, HandlerName, '_'}), - ok. +start_channel(Pid, Args) -> + {ok, ChSupPid, _} = Result = supervisor2:start_child(Pid, [Args]), + link(ChSupPid), + Result. -trigger(Hook, Args) -> - Hooks = ets:lookup(?TableName, Hook), - [case catch apply(M, F, [Hook, Name, Args | A]) of - {'EXIT', Reason} -> - rabbit_log:warning("Failed to execute handler ~p for hook ~p: ~p", - [Name, Hook, Reason]); - _ -> ok - end || {_, Name, {M, F, A}} <- Hooks], - ok. +%%---------------------------------------------------------------------------- -notify_remote(Hook, HandlerName, Args, Pid, PidArgs) -> - Pid ! {rabbitmq_hook, [Hook, HandlerName, Args | PidArgs]}, - ok. +init([]) -> + {ok, {{simple_one_for_one_terminate, 0, 1}, + [{channel_sup, {rabbit_channel_sup, start_link, []}, + temporary, infinity, supervisor, [rabbit_channel_sup]}]}}. diff --git a/src/rabbit_connection_sup.erl b/src/rabbit_connection_sup.erl new file mode 100644 index 0000000000..69e21d73cc --- /dev/null +++ b/src/rabbit_connection_sup.erl @@ -0,0 +1,99 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_connection_sup). + +-behaviour(supervisor2). + +-export([start_link/0, reader/1]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> {'ok', pid(), pid()}). +-spec(reader/1 :: (pid()) -> pid()). + +-endif. + +%%-------------------------------------------------------------------------- + +start_link() -> + {ok, SupPid} = supervisor2:start_link(?MODULE, []), + {ok, ChannelSupSupPid} = + supervisor2:start_child( + SupPid, + {channel_sup_sup, {rabbit_channel_sup_sup, start_link, []}, + intrinsic, infinity, supervisor, [rabbit_channel_sup_sup]}), + {ok, Collector} = + supervisor2:start_child( + SupPid, + {collector, {rabbit_queue_collector, start_link, []}, + intrinsic, ?MAX_WAIT, worker, [rabbit_queue_collector]}), + {ok, ReaderPid} = + supervisor2:start_child( + SupPid, + {reader, {rabbit_reader, start_link, + [ChannelSupSupPid, Collector, start_heartbeat_fun(SupPid)]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_reader]}), + {ok, SupPid, ReaderPid}. + +reader(Pid) -> + hd(supervisor2:find_child(Pid, reader)). + +%%-------------------------------------------------------------------------- + +init([]) -> + {ok, {{one_for_all, 0, 1}, []}}. + +start_heartbeat_fun(SupPid) -> + fun (_Sock, 0) -> + none; + (Sock, TimeoutSec) -> + Parent = self(), + {ok, Sender} = + supervisor2:start_child( + SupPid, {heartbeat_sender, + {rabbit_heartbeat, start_heartbeat_sender, + [Parent, Sock, TimeoutSec]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_heartbeat]}), + {ok, Receiver} = + supervisor2:start_child( + SupPid, {heartbeat_receiver, + {rabbit_heartbeat, start_heartbeat_receiver, + [Parent, Sock, TimeoutSec]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_heartbeat]}), + {Sender, Receiver} + end. diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index 6e6ad06cb3..f0b623c2dc 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -32,20 +32,25 @@ -module(rabbit_control). -include("rabbit.hrl"). --export([start/0, stop/0, action/4]). - --record(params, {quiet, node, command, args}). +-export([start/0, stop/0, action/5]). -define(RPC_TIMEOUT, infinity). +-define(QUIET_OPT, "-q"). +-define(NODE_OPT, "-n"). +-define(VHOST_OPT, "-p"). +-define(SCOPE_OPT, "-s"). + %%---------------------------------------------------------------------------- -ifdef(use_specs). -spec(start/0 :: () -> no_return()). -spec(stop/0 :: () -> 'ok'). --spec(action/4 :: (atom(), node(), [string()], - fun ((string(), [any()]) -> 'ok')) -> 'ok'). +-spec(action/5 :: + (atom(), node(), [string()], [{string(), any()}], + fun ((string(), [any()]) -> 'ok')) + -> 'ok'). -spec(usage/0 :: () -> no_return()). -endif. @@ -55,18 +60,33 @@ start() -> {ok, [[NodeStr|_]|_]} = init:get_argument(nodename), FullCommand = init:get_plain_arguments(), - #params{quiet = Quiet, node = Node, command = Command, args = Args} = - parse_args(FullCommand, #params{quiet = false, - node = rabbit_misc:makenode(NodeStr)}), + case FullCommand of + [] -> usage(); + _ -> ok + end, + {[Command0 | Args], Opts} = + rabbit_misc:get_options( + [{flag, ?QUIET_OPT}, {option, ?NODE_OPT, NodeStr}, + {option, ?VHOST_OPT, "/"}, {option, ?SCOPE_OPT, "client"}], + FullCommand), + Opts1 = lists:map(fun({K, V}) -> + case K of + ?NODE_OPT -> {?NODE_OPT, rabbit_misc:makenode(V)}; + _ -> {K, V} + end + end, Opts), + Command = list_to_atom(Command0), + Quiet = proplists:get_bool(?QUIET_OPT, Opts1), + Node = proplists:get_value(?NODE_OPT, Opts1), Inform = case Quiet of true -> fun (_Format, _Args1) -> ok end; false -> fun (Format, Args1) -> io:format(Format ++ " ...~n", Args1) - end + end end, %% The reason we don't use a try/catch here is that rpc:call turns %% thrown errors into normal return values - case catch action(Command, Node, Args, Inform) of + case catch action(Command, Node, Args, Opts, Inform) of ok -> case Quiet of true -> ok; @@ -118,15 +138,6 @@ print_badrpc_diagnostics(Node) -> fmt_stderr("- current node cookie hash: ~s", [rabbit_misc:cookie_hash()]), ok. -parse_args(["-n", NodeS | Args], Params) -> - parse_args(Args, Params#params{node = rabbit_misc:makenode(NodeS)}); -parse_args(["-q" | Args], Params) -> - parse_args(Args, Params#params{quiet = true}); -parse_args([Command | Args], Params) -> - Params#params{command = list_to_atom(Command), args = Args}; -parse_args([], _) -> - usage(). - stop() -> ok. @@ -134,39 +145,39 @@ usage() -> io:format("~s", [rabbit_ctl_usage:usage()]), halt(1). -action(stop, Node, [], Inform) -> +action(stop, Node, [], _Opts, Inform) -> Inform("Stopping and halting node ~p", [Node]), call(Node, {rabbit, stop_and_halt, []}); -action(stop_app, Node, [], Inform) -> +action(stop_app, Node, [], _Opts, Inform) -> Inform("Stopping node ~p", [Node]), call(Node, {rabbit, stop, []}); -action(start_app, Node, [], Inform) -> +action(start_app, Node, [], _Opts, Inform) -> Inform("Starting node ~p", [Node]), call(Node, {rabbit, start, []}); -action(reset, Node, [], Inform) -> +action(reset, Node, [], _Opts, Inform) -> Inform("Resetting node ~p", [Node]), call(Node, {rabbit_mnesia, reset, []}); -action(force_reset, Node, [], Inform) -> +action(force_reset, Node, [], _Opts, Inform) -> Inform("Forcefully resetting node ~p", [Node]), call(Node, {rabbit_mnesia, force_reset, []}); -action(cluster, Node, ClusterNodeSs, Inform) -> +action(cluster, Node, ClusterNodeSs, _Opts, Inform) -> ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), Inform("Clustering node ~p with ~p", [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); -action(force_cluster, Node, ClusterNodeSs, Inform) -> +action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) -> ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); -action(status, Node, [], Inform) -> +action(status, Node, [], _Opts, Inform) -> Inform("Status of node ~p", [Node]), case call(Node, {rabbit, status, []}) of {badrpc, _} = Res -> Res; @@ -174,129 +185,117 @@ action(status, Node, [], Inform) -> ok end; -action(rotate_logs, Node, [], Inform) -> +action(rotate_logs, Node, [], _Opts, Inform) -> Inform("Reopening logs for node ~p", [Node]), call(Node, {rabbit, rotate_logs, [""]}); -action(rotate_logs, Node, Args = [Suffix], Inform) -> +action(rotate_logs, Node, Args = [Suffix], _Opts, Inform) -> Inform("Rotating logs to files with suffix ~p", [Suffix]), call(Node, {rabbit, rotate_logs, Args}); -action(close_connection, Node, [PidStr, Explanation], Inform) -> +action(close_connection, Node, [PidStr, Explanation], _Opts, Inform) -> Inform("Closing connection ~s", [PidStr]), rpc_call(Node, rabbit_networking, close_connection, [rabbit_misc:string_to_pid(PidStr), Explanation]); -action(add_user, Node, Args = [Username, _Password], Inform) -> +action(add_user, Node, Args = [Username, _Password], _Opts, Inform) -> Inform("Creating user ~p", [Username]), call(Node, {rabbit_access_control, add_user, Args}); -action(delete_user, Node, Args = [_Username], Inform) -> +action(delete_user, Node, Args = [_Username], _Opts, Inform) -> Inform("Deleting user ~p", Args), call(Node, {rabbit_access_control, delete_user, Args}); -action(change_password, Node, Args = [Username, _Newpassword], Inform) -> +action(change_password, Node, Args = [Username, _Newpassword], _Opts, Inform) -> Inform("Changing password for user ~p", [Username]), call(Node, {rabbit_access_control, change_password, Args}); -action(list_users, Node, [], Inform) -> +action(list_users, Node, [], _Opts, Inform) -> Inform("Listing users", []), display_list(call(Node, {rabbit_access_control, list_users, []})); -action(add_vhost, Node, Args = [_VHostPath], Inform) -> +action(add_vhost, Node, Args = [_VHostPath], _Opts, Inform) -> Inform("Creating vhost ~p", Args), call(Node, {rabbit_access_control, add_vhost, Args}); -action(delete_vhost, Node, Args = [_VHostPath], Inform) -> +action(delete_vhost, Node, Args = [_VHostPath], _Opts, Inform) -> Inform("Deleting vhost ~p", Args), call(Node, {rabbit_access_control, delete_vhost, Args}); -action(list_vhosts, Node, [], Inform) -> +action(list_vhosts, Node, [], _Opts, Inform) -> Inform("Listing vhosts", []), display_list(call(Node, {rabbit_access_control, list_vhosts, []})); -action(list_user_permissions, Node, Args = [_Username], Inform) -> +action(list_user_permissions, Node, Args = [_Username], _Opts, Inform) -> Inform("Listing permissions for user ~p", Args), display_list(call(Node, {rabbit_access_control, list_user_permissions, Args})); -action(list_queues, Node, Args, Inform) -> +action(list_queues, Node, Args, Opts, Inform) -> Inform("Listing queues", []), - {VHostArg, RemainingArgs} = parse_vhost_flag_bin(Args), - ArgAtoms = default_if_empty(RemainingArgs, [name, messages]), + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + ArgAtoms = default_if_empty(Args, [name, messages]), display_info_list(rpc_call(Node, rabbit_amqqueue, info_all, [VHostArg, ArgAtoms]), ArgAtoms); -action(list_exchanges, Node, Args, Inform) -> +action(list_exchanges, Node, Args, Opts, Inform) -> Inform("Listing exchanges", []), - {VHostArg, RemainingArgs} = parse_vhost_flag_bin(Args), - ArgAtoms = default_if_empty(RemainingArgs, [name, type]), + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + ArgAtoms = default_if_empty(Args, [name, type]), display_info_list(rpc_call(Node, rabbit_exchange, info_all, [VHostArg, ArgAtoms]), ArgAtoms); -action(list_bindings, Node, Args, Inform) -> +action(list_bindings, Node, _Args, Opts, Inform) -> Inform("Listing bindings", []), - {VHostArg, _} = parse_vhost_flag_bin(Args), + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), InfoKeys = [exchange_name, queue_name, routing_key, args], display_info_list( [lists:zip(InfoKeys, tuple_to_list(X)) || X <- rpc_call(Node, rabbit_exchange, list_bindings, [VHostArg])], InfoKeys); -action(list_connections, Node, Args, Inform) -> +action(list_connections, Node, Args, _Opts, Inform) -> Inform("Listing connections", []), ArgAtoms = default_if_empty(Args, [user, peer_address, peer_port, state]), display_info_list(rpc_call(Node, rabbit_networking, connection_info_all, [ArgAtoms]), ArgAtoms); -action(list_channels, Node, Args, Inform) -> +action(list_channels, Node, Args, _Opts, Inform) -> Inform("Listing channels", []), ArgAtoms = default_if_empty(Args, [pid, user, transactional, consumer_count, messages_unacknowledged]), display_info_list(rpc_call(Node, rabbit_channel, info_all, [ArgAtoms]), ArgAtoms); -action(list_consumers, Node, Args, Inform) -> +action(list_consumers, Node, _Args, Opts, Inform) -> Inform("Listing consumers", []), - {VHostArg, _} = parse_vhost_flag_bin(Args), + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), InfoKeys = [queue_name, channel_pid, consumer_tag, ack_required], display_info_list( [lists:zip(InfoKeys, tuple_to_list(X)) || X <- rpc_call(Node, rabbit_amqqueue, consumers_all, [VHostArg])], InfoKeys); -action(Command, Node, Args, Inform) -> - {VHost, RemainingArgs} = parse_vhost_flag(Args), - action(Command, Node, VHost, RemainingArgs, Inform). - -action(set_permissions, Node, VHost, [Username, CPerm, WPerm, RPerm], Inform) -> +action(set_permissions, Node, [Username, CPerm, WPerm, RPerm], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), + Scope = proplists:get_value(?SCOPE_OPT, Opts), Inform("Setting permissions for user ~p in vhost ~p", [Username, VHost]), call(Node, {rabbit_access_control, set_permissions, - [Username, VHost, CPerm, WPerm, RPerm]}); + [Scope, Username, VHost, CPerm, WPerm, RPerm]}); -action(clear_permissions, Node, VHost, [Username], Inform) -> +action(clear_permissions, Node, [Username], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), Inform("Clearing permissions for user ~p in vhost ~p", [Username, VHost]), call(Node, {rabbit_access_control, clear_permissions, [Username, VHost]}); -action(list_permissions, Node, VHost, [], Inform) -> +action(list_permissions, Node, [], Opts, Inform) -> + VHost = proplists:get_value(?VHOST_OPT, Opts), Inform("Listing permissions in vhost ~p", [VHost]), display_list(call(Node, {rabbit_access_control, list_vhost_permissions, [VHost]})). -parse_vhost_flag(Args) when is_list(Args) -> - case Args of - ["-p", VHost | RemainingArgs] -> - {VHost, RemainingArgs}; - RemainingArgs -> - {"/", RemainingArgs} - end. - -parse_vhost_flag_bin(Args) -> - {VHost, RemainingArgs} = parse_vhost_flag(Args), - {list_to_binary(VHost), RemainingArgs}. - default_if_empty(List, Default) when is_list(List) -> if List == [] -> Default; @@ -357,6 +356,8 @@ rpc_call(Node, Mod, Fun, Args) -> %% characters. We don't escape characters above 127, since they may %% form part of UTF-8 strings. +escape(Atom) when is_atom(Atom) -> + escape(atom_to_list(Atom)); escape(Bin) when is_binary(Bin) -> escape(binary_to_list(Bin)); escape(L) when is_list(L) -> diff --git a/src/rabbit_dialyzer.erl b/src/rabbit_dialyzer.erl index 0ec6beb676..51bd6b1f93 100644 --- a/src/rabbit_dialyzer.erl +++ b/src/rabbit_dialyzer.erl @@ -56,7 +56,7 @@ create_basic_plt(BasicPltPath) -> ok. add_to_plt(PltPath, FilesString) -> - {ok, Files} = regexp:split(FilesString, " "), + Files = string:tokens(FilesString, " "), DialyzerWarnings = dialyzer:run([{analysis_type, plt_add}, {init_plt, PltPath}, {output_plt, PltPath}, @@ -65,7 +65,7 @@ add_to_plt(PltPath, FilesString) -> ok. dialyze_files(PltPath, ModifiedFiles) -> - {ok, Files} = regexp:split(ModifiedFiles, " "), + Files = string:tokens(ModifiedFiles, " "), DialyzerWarnings = dialyzer:run([{init_plt, PltPath}, {files, Files}]), case DialyzerWarnings of diff --git a/src/rabbit_event.erl b/src/rabbit_event.erl new file mode 100644 index 0000000000..0f00537a6a --- /dev/null +++ b/src/rabbit_event.erl @@ -0,0 +1,138 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_event). + +-include("rabbit.hrl"). + +-export([start_link/0]). +-export([init_stats_timer/0, ensure_stats_timer/3, stop_stats_timer/2]). +-export([ensure_stats_timer_after/2, reset_stats_timer_after/1]). +-export([stats_level/1]). +-export([notify/2]). + +%%---------------------------------------------------------------------------- + +-record(state, {level, timer}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([event_type/0, event_props/0, event_timestamp/0, event/0]). + +-type(event_type() :: atom()). +-type(event_props() :: term()). +-type(event_timestamp() :: + {non_neg_integer(), non_neg_integer(), non_neg_integer()}). + +-type(event() :: #event { + type :: event_type(), + props :: event_props(), + timestamp :: event_timestamp() + }). + +-type(level() :: 'none' | 'coarse' | 'fine'). + +-opaque(state() :: #state { + level :: level(), + timer :: atom() + }). + +-type(timer_fun() :: fun (() -> 'ok')). + +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). +-spec(init_stats_timer/0 :: () -> state()). +-spec(ensure_stats_timer/3 :: (state(), timer_fun(), timer_fun()) -> state()). +-spec(stop_stats_timer/2 :: (state(), timer_fun()) -> state()). +-spec(ensure_stats_timer_after/2 :: (state(), timer_fun()) -> state()). +-spec(reset_stats_timer_after/1 :: (state()) -> state()). +-spec(stats_level/1 :: (state()) -> level()). +-spec(notify/2 :: (event_type(), event_props()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link() -> + gen_event:start_link({local, ?MODULE}). + +init_stats_timer() -> + {ok, StatsLevel} = application:get_env(rabbit, collect_statistics), + #state{level = StatsLevel, timer = undefined}. + +ensure_stats_timer(State = #state{level = none}, _NowFun, _TimerFun) -> + State; +ensure_stats_timer(State = #state{timer = undefined}, NowFun, TimerFun) -> + NowFun(), + {ok, TRef} = timer:apply_interval(?STATS_INTERVAL, + erlang, apply, [TimerFun, []]), + State#state{timer = TRef}; +ensure_stats_timer(State, _NowFun, _TimerFun) -> + State. + +stop_stats_timer(State = #state{level = none}, _NowFun) -> + State; +stop_stats_timer(State = #state{timer = undefined}, _NowFun) -> + State; +stop_stats_timer(State = #state{timer = TRef}, NowFun) -> + {ok, cancel} = timer:cancel(TRef), + NowFun(), + State#state{timer = undefined}. + +ensure_stats_timer_after(State = #state{level = none}, _TimerFun) -> + State; +ensure_stats_timer_after(State = #state{timer = undefined}, TimerFun) -> + {ok, TRef} = timer:apply_after(?STATS_INTERVAL, + erlang, apply, [TimerFun, []]), + State#state{timer = TRef}; +ensure_stats_timer_after(State, _TimerFun) -> + State. + +reset_stats_timer_after(State) -> + State#state{timer = undefined}. + +stats_level(#state{level = Level}) -> + Level. + +notify(Type, Props) -> + try + %% TODO: switch to os:timestamp() when we drop support for + %% Erlang/OTP < R13B01 + gen_event:notify(rabbit_event, #event{type = Type, + props = Props, + timestamp = now()}) + catch error:badarg -> + %% badarg means rabbit_event is no longer registered. We never + %% unregister it so the great likelihood is that we're shutting + %% down the broker but some events were backed up. Ignore it. + ok + end. diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl index d91ebe9ba9..af4eb1bd79 100644 --- a/src/rabbit_exchange.erl +++ b/src/rabbit_exchange.erl @@ -49,7 +49,6 @@ -import(mnesia). -import(sets). -import(lists). --import(regexp). %%---------------------------------------------------------------------------- @@ -72,17 +71,21 @@ -spec(declare/5 :: (name(), type(), boolean(), boolean(), rabbit_framing:amqp_table()) -> rabbit_types:exchange()). --spec(check_type/1 :: (binary()) -> atom()). +-spec(check_type/1 :: + (binary()) -> atom() | rabbit_types:connection_exit()). -spec(assert_equivalence/5 :: (rabbit_types:exchange(), atom(), boolean(), boolean(), rabbit_framing:amqp_table()) - -> 'ok'). + -> 'ok' | rabbit_types:connection_exit()). -spec(assert_args_equivalence/2 :: - (rabbit_types:exchange(), rabbit_framing:amqp_table()) -> 'ok'). + (rabbit_types:exchange(), rabbit_framing:amqp_table()) + -> 'ok' | rabbit_types:connection_exit()). -spec(lookup/1 :: (name()) -> rabbit_types:ok(rabbit_types:exchange()) | rabbit_types:error('not_found')). --spec(lookup_or_die/1 :: (name()) -> rabbit_types:exchange()). +-spec(lookup_or_die/1 :: + (name()) -> rabbit_types:exchange() | + rabbit_types:channel_exit()). -spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]). -spec(info_keys/0 :: () -> [rabbit_types:info_key()]). -spec(info/1 :: (rabbit_types:exchange()) -> [rabbit_types:info()]). @@ -96,8 +99,7 @@ -> {rabbit_router:routing_result(), [pid()]}). -spec(add_binding/5 :: (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), - rabbit_framing:amqp_table(), inner_fun()) - -> bind_res()). + rabbit_framing:amqp_table(), inner_fun()) -> bind_res()). -spec(delete_binding/5 :: (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), rabbit_framing:amqp_table(), inner_fun()) @@ -107,9 +109,9 @@ -> [{name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), rabbit_framing:amqp_table()}]). -spec(delete_queue_bindings/1 :: - (rabbit_amqqueue:name()) -> fun (() -> none())). + (rabbit_amqqueue:name()) -> fun (() -> any())). -spec(delete_transient_queue_bindings/1 :: - (rabbit_amqqueue:name()) -> fun (() -> none())). + (rabbit_amqqueue:name()) -> fun (() -> any())). -spec(delete/2 :: (name(), boolean())-> 'ok' | rabbit_types:error('not_found') | @@ -190,6 +192,9 @@ declare(ExchangeName, Type, Durable, AutoDelete, Args) -> end end) of {new, X} -> TypeModule:create(X), + rabbit_event:notify( + exchange_created, + [{Item, i(Item, Exchange)} || Item <- ?INFO_KEYS]), X; {existing, X} -> X; Err -> Err @@ -197,12 +202,8 @@ declare(ExchangeName, Type, Durable, AutoDelete, Args) -> %% Used with atoms from records; e.g., the type is expected to exist. type_to_module(T) -> - case rabbit_exchange_type_registry:lookup_module(T) of - {ok, Module} -> Module; - {error, not_found} -> rabbit_misc:protocol_error( - command_invalid, - "invalid exchange type '~s'", [T]) - end. + {ok, Module} = rabbit_exchange_type_registry:lookup_module(T), + Module. %% Used with binaries sent over the wire; the type may not exist. check_type(TypeBin) -> @@ -211,16 +212,19 @@ check_type(TypeBin) -> rabbit_misc:protocol_error( command_invalid, "unknown exchange type '~s'", [TypeBin]); T -> - _Module = type_to_module(T), - T + case rabbit_exchange_type_registry:lookup_module(T) of + {error, not_found} -> rabbit_misc:protocol_error( + command_invalid, + "invalid exchange type '~s'", [T]); + {ok, _Module} -> T + end end. assert_equivalence(X = #exchange{ durable = Durable, auto_delete = AutoDelete, type = Type}, - Type, Durable, AutoDelete, - RequiredArgs) -> - ok = (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs); + Type, Durable, AutoDelete, RequiredArgs) -> + (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs); assert_equivalence(#exchange{ name = Name }, _Type, _Durable, _AutoDelete, _Args) -> rabbit_misc:protocol_error( @@ -228,23 +232,14 @@ assert_equivalence(#exchange{ name = Name }, _Type, _Durable, _AutoDelete, "cannot redeclare ~s with different type, durable or autodelete value", [rabbit_misc:rs(Name)]). -alternate_exchange_value(Args) -> - lists:keysearch(<<"alternate-exchange">>, 1, Args). - assert_args_equivalence(#exchange{ name = Name, arguments = Args }, RequiredArgs) -> %% The spec says "Arguments are compared for semantic %% equivalence". The only arg we care about is %% "alternate-exchange". - Ae1 = alternate_exchange_value(RequiredArgs), - Ae2 = alternate_exchange_value(Args), - if Ae1==Ae2 -> ok; - true -> rabbit_misc:protocol_error( - not_allowed, - "cannot redeclare ~s with inequivalent args", - [rabbit_misc:rs(Name)]) - end. + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name, + [<<"alternate-exchange">>]). lookup(Name) -> rabbit_misc:dirty_read({rabbit_exchange, Name}). @@ -388,7 +383,6 @@ cleanup_deleted_queue_bindings1(ExchangeName, Bindings) -> [X] = mnesia:read({rabbit_exchange, ExchangeName}), {maybe_auto_delete(X), Bindings}. - delete_forward_routes(Route) -> ok = mnesia:delete_object(rabbit_route, Route, write), ok = mnesia:delete_object(rabbit_durable_route, Route, write). @@ -437,6 +431,12 @@ add_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) -> X#exchange.durable andalso Q#amqqueue.durable, fun mnesia:write/3), + rabbit_event:notify( + binding_created, + [{exchange_name, ExchangeName}, + {queue_name, QueueName}, + {routing_key, RoutingKey}, + {arguments, Arguments}]), {new, X, B}; [_R] -> {existing, X, B} @@ -469,6 +469,10 @@ delete_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) -> X#exchange.durable andalso Q#amqqueue.durable, fun mnesia:delete_object/3), + rabbit_event:notify( + binding_deleted, + [{exchange_name, ExchangeName}, + {queue_name, QueueName}]), {maybe_auto_delete(X), B}; {error, _} = E -> E @@ -587,6 +591,7 @@ unconditional_delete(Exchange = #exchange{name = ExchangeName}) -> Bindings = delete_exchange_bindings(ExchangeName), ok = mnesia:delete({rabbit_durable_exchange, ExchangeName}), ok = mnesia:delete({rabbit_exchange, ExchangeName}), + rabbit_event:notify(exchange_deleted, [{name, ExchangeName}]), {deleted, Exchange, Bindings}. %%---------------------------------------------------------------------------- diff --git a/src/rabbit_exchange_type_registry.erl b/src/rabbit_exchange_type_registry.erl index 7906fbee72..f15275b538 100644 --- a/src/rabbit_exchange_type_registry.erl +++ b/src/rabbit_exchange_type_registry.erl @@ -45,8 +45,7 @@ -ifdef(use_specs). --spec(start_link/0 :: - () -> 'ignore' | rabbit_types:ok_or_error2(pid(), term())). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(register/2 :: (binary(), atom()) -> 'ok'). -spec(binary_to_type/1 :: (binary()) -> atom() | rabbit_types:error('not_found')). diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl index a374cfee7f..e796acf327 100644 --- a/src/rabbit_exchange_type_topic.erl +++ b/src/rabbit_exchange_type_topic.erl @@ -67,8 +67,7 @@ publish(#exchange{name = Name}, Delivery = Delivery). split_topic_key(Key) -> - {ok, KeySplit} = regexp:split(binary_to_list(Key), "\\."), - KeySplit. + string:tokens(binary_to_list(Key), "."). topic_matches(PatternKey, RoutingKey) -> P = split_topic_key(PatternKey), diff --git a/src/rabbit_framing_channel.erl b/src/rabbit_framing_channel.erl index bc1a2a0835..cb53185f6b 100644 --- a/src/rabbit_framing_channel.erl +++ b/src/rabbit_framing_channel.erl @@ -32,21 +32,16 @@ -module(rabbit_framing_channel). -include("rabbit.hrl"). --export([start_link/2, process/2, shutdown/1]). +-export([start_link/3, process/2, shutdown/1]). %% internal --export([mainloop/1]). +-export([mainloop/3]). %%-------------------------------------------------------------------- -start_link(StartFun, StartArgs) -> - spawn_link( - fun () -> - %% we trap exits so that a normal termination of the - %% channel or reader process terminates us too. - process_flag(trap_exit, true), - mainloop(apply(StartFun, StartArgs)) - end). +start_link(Parent, ChannelPid, Protocol) -> + {ok, proc_lib:spawn_link( + fun () -> mainloop(Parent, ChannelPid, Protocol) end)}. process(Pid, Frame) -> Pid ! {frame, Frame}, @@ -60,53 +55,61 @@ shutdown(Pid) -> read_frame(ChannelPid) -> receive - %% converting the exit signal into one of our own ensures that - %% the reader sees the right pid (i.e. ours) when a channel - %% exits. Similarly in the other direction, though it is not - %% really relevant there since the channel is not specifically - %% watching out for reader exit signals. - {'EXIT', _Pid, Reason} -> exit(Reason); {frame, Frame} -> Frame; terminate -> rabbit_channel:shutdown(ChannelPid), read_frame(ChannelPid); Msg -> exit({unexpected_message, Msg}) end. -mainloop(ChannelPid) -> - {method, MethodName, FieldsBin} = read_frame(ChannelPid), - Method = rabbit_framing:decode_method_fields(MethodName, FieldsBin), - case rabbit_framing:method_has_content(MethodName) of - true -> {ClassId, _MethodId} = rabbit_framing:method_id(MethodName), - rabbit_channel:do(ChannelPid, Method, - collect_content(ChannelPid, ClassId)); - false -> rabbit_channel:do(ChannelPid, Method) - end, - ?MODULE:mainloop(ChannelPid). +mainloop(Parent, ChannelPid, Protocol) -> + case read_frame(ChannelPid) of + {method, MethodName, FieldsBin} -> + Method = Protocol:decode_method_fields(MethodName, FieldsBin), + case Protocol:method_has_content(MethodName) of + true -> {ClassId, _MethodId} = Protocol:method_id(MethodName), + case collect_content(ChannelPid, ClassId, Protocol) of + {ok, Content} -> + rabbit_channel:do(ChannelPid, Method, Content), + ?MODULE:mainloop(Parent, ChannelPid, Protocol); + {error, Reason} -> + channel_exit(Parent, Reason, MethodName) + end; + false -> rabbit_channel:do(ChannelPid, Method), + ?MODULE:mainloop(Parent, ChannelPid, Protocol) + end; + _ -> + channel_exit(Parent, {unexpected_frame, + "expected method frame, " + "got non method frame instead", + []}, none) + end. -collect_content(ChannelPid, ClassId) -> +collect_content(ChannelPid, ClassId, Protocol) -> case read_frame(ChannelPid) of {content_header, ClassId, 0, BodySize, PropertiesBin} -> - Payload = collect_content_payload(ChannelPid, BodySize, []), - #content{class_id = ClassId, - properties = none, - properties_bin = PropertiesBin, - payload_fragments_rev = Payload}; + case collect_content_payload(ChannelPid, BodySize, []) of + {ok, Payload} -> {ok, #content{ + class_id = ClassId, + properties = none, + properties_bin = PropertiesBin, + protocol = Protocol, + payload_fragments_rev = Payload}}; + Error -> Error + end; {content_header, HeaderClassId, 0, _BodySize, _PropertiesBin} -> - rabbit_misc:protocol_error( - command_invalid, - "expected content header for class ~w, " - "got one for class ~w instead", - [ClassId, HeaderClassId]); + {error, {unexpected_frame, + "expected content header for class ~w, " + "got one for class ~w instead", + [ClassId, HeaderClassId]}}; _ -> - rabbit_misc:protocol_error( - command_invalid, - "expected content header for class ~w, " - "got non content header frame instead", - [ClassId]) + {error, {unexpected_frame, + "expected content header for class ~w, " + "got non content header frame instead", + [ClassId]}} end. collect_content_payload(_ChannelPid, 0, Acc) -> - Acc; + {ok, Acc}; collect_content_payload(ChannelPid, RemainingByteCount, Acc) -> case read_frame(ChannelPid) of {content_body, FragmentBin} -> @@ -114,8 +117,13 @@ collect_content_payload(ChannelPid, RemainingByteCount, Acc) -> RemainingByteCount - size(FragmentBin), [FragmentBin | Acc]); _ -> - rabbit_misc:protocol_error( - command_invalid, - "expected content body, got non content body frame instead", - []) + {error, {unexpected_frame, + "expected content body, " + "got non content body frame instead", + []}} end. + +channel_exit(Parent, {ErrorName, ExplanationFormat, Params}, MethodName) -> + Reason = rabbit_misc:amqp_error(ErrorName, ExplanationFormat, Params, + MethodName), + Parent ! {channel_exit, self(), Reason}. diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl index af1c629f41..e7d0c10177 100644 --- a/src/rabbit_guid.erl +++ b/src/rabbit_guid.erl @@ -52,7 +52,7 @@ -type(guid() :: binary()). --spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(guid/0 :: () -> guid()). -spec(string_guid/1 :: (any()) -> string()). -spec(binstring_guid/1 :: (any()) -> binary()). diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl index 4556570567..a9945af1d4 100644 --- a/src/rabbit_heartbeat.erl +++ b/src/rabbit_heartbeat.erl @@ -31,70 +31,102 @@ -module(rabbit_heartbeat). --export([start_heartbeat/2]). +-export([start_heartbeat_sender/3, start_heartbeat_receiver/3, + pause_monitor/1, resume_monitor/1]). -start_heartbeat(_Sock, 0) -> - none; -start_heartbeat(Sock, TimeoutSec) -> - Parent = self(), - %% we check for incoming data every interval, and time out after - %% two checks with no change. As a result we will time out between - %% 2 and 3 intervals after the last data has been received. - spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000, - recv_oct, 1, - fun () -> - Parent ! timeout, - stop - end, - erlang:monitor(process, Parent)) end), +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([heartbeaters/0]). + +-type(heartbeaters() :: rabbit_types:maybe({pid(), pid()})). + +-spec(start_heartbeat_sender/3 :: + (pid(), rabbit_net:socket(), non_neg_integer()) -> + rabbit_types:ok(pid())). +-spec(start_heartbeat_receiver/3 :: + (pid(), rabbit_net:socket(), non_neg_integer()) -> + rabbit_types:ok(pid())). + +-spec(pause_monitor/1 :: (heartbeaters()) -> 'ok'). +-spec(resume_monitor/1 :: (heartbeaters()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_heartbeat_sender(_Parent, Sock, TimeoutSec) -> %% the 'div 2' is there so that we don't end up waiting for nearly %% 2 * TimeoutSec before sending a heartbeat in the boundary case %% where the last message was sent just after a heartbeat. - spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000 div 2, - send_oct, 0, - fun () -> - catch rabbit_net:send(Sock, rabbit_binary_generator:build_heartbeat_frame()), - continue - end, - erlang:monitor(process, Parent)) end), + heartbeater( + {Sock, TimeoutSec * 1000 div 2, send_oct, 0, + fun () -> + catch rabbit_net:send( + Sock, rabbit_binary_generator:build_heartbeat_frame()), + continue + end}). + +start_heartbeat_receiver(Parent, Sock, TimeoutSec) -> + %% we check for incoming data every interval, and time out after + %% two checks with no change. As a result we will time out between + %% 2 and 3 intervals after the last data has been received. + heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, fun () -> + Parent ! timeout, + stop + end}). + +pause_monitor(none) -> + ok; +pause_monitor({_Sender, Receiver}) -> + Receiver ! pause, + ok. + +resume_monitor(none) -> + ok; +resume_monitor({_Sender, Receiver}) -> + Receiver ! resume, ok. -%% Y-combinator, posted by Vladimir Sekissov to the Erlang mailing list -%% http://www.erlang.org/ml-archive/erlang-questions/200301/msg00053.html -y(X) -> - F = fun (P) -> X(fun (A) -> (P(P))(A) end) end, - F(F). - -heartbeater(Sock, TimeoutMillisec, StatName, Threshold, Handler, MonitorRef) -> - Heartbeat = - fun (F) -> - fun ({StatVal, SameCount}) -> - receive - {'DOWN', MonitorRef, process, _Object, _Info} -> ok; - Other -> exit({unexpected_message, Other}) - after TimeoutMillisec -> - case rabbit_net:getstat(Sock, [StatName]) of - {ok, [{StatName, NewStatVal}]} -> - if NewStatVal =/= StatVal -> - F({NewStatVal, 0}); - SameCount < Threshold -> - F({NewStatVal, SameCount + 1}); - true -> - case Handler() of - stop -> ok; - continue -> F({NewStatVal, 0}) - end - end; - {error, einval} -> - %% the socket is dead, most - %% likely because the - %% connection is being shut - %% down -> terminate - ok; - {error, Reason} -> - exit({cannot_get_socket_stats, Reason}) - end - end - end - end, - (y(Heartbeat))({0, 0}). +%%---------------------------------------------------------------------------- + +heartbeater(Params) -> + {ok, proc_lib:spawn_link(fun () -> heartbeater(Params, {0, 0}) end)}. + +heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params, + {StatVal, SameCount}) -> + Recurse = fun (V) -> heartbeater(Params, V) end, + receive + pause -> + receive + resume -> + Recurse({0, 0}); + Other -> + exit({unexpected_message, Other}) + end; + Other -> + exit({unexpected_message, Other}) + after TimeoutMillisec -> + case rabbit_net:getstat(Sock, [StatName]) of + {ok, [{StatName, NewStatVal}]} -> + if NewStatVal =/= StatVal -> + Recurse({NewStatVal, 0}); + SameCount < Threshold -> + Recurse({NewStatVal, SameCount + 1}); + true -> + case Handler() of + stop -> ok; + continue -> Recurse({NewStatVal, 0}) + end + end; + {error, einval} -> + %% the socket is dead, most likely because the + %% connection is being shut down -> terminate + ok; + {error, Reason} -> + exit({cannot_get_socket_stats, Reason}) + end + end. diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl index 8214b976c4..4e0dad8422 100644 --- a/src/rabbit_invariable_queue.erl +++ b/src/rabbit_invariable_queue.erl @@ -34,10 +34,10 @@ -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2, publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1, - set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1, - handle_pre_hibernate/1, status/1]). + set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, + idle_timeout/1, handle_pre_hibernate/1, status/1]). --export([start/1]). +-export([start/1, stop/0]). -behaviour(rabbit_backing_queue). @@ -61,6 +61,9 @@ start(DurableQueues) -> ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]). +stop() -> + ok = rabbit_sup:stop_child(rabbit_persister). + init(QName, IsDurable, Recover) -> Q = queue:from_list(case IsDurable andalso Recover of true -> rabbit_persister:queue_content(QName); @@ -197,9 +200,9 @@ set_ram_duration_target(_DurationTarget, State) -> State. ram_duration(State) -> {0, State}. -needs_sync(_State) -> false. +needs_idle_timeout(_State) -> false. -sync(State) -> State. +idle_timeout(State) -> State. handle_pre_hibernate(State) -> State. diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl index 878af02976..da7078f1ba 100644 --- a/src/rabbit_limiter.erl +++ b/src/rabbit_limiter.erl @@ -35,7 +35,7 @@ -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). --export([start_link/2, shutdown/1]). +-export([start_link/2]). -export([limit/2, can_send/3, ack/2, register/2, unregister/2]). -export([get_limit/1, block/1, unblock/1]). @@ -45,8 +45,8 @@ -type(maybe_pid() :: pid() | 'undefined'). --spec(start_link/2 :: (pid(), non_neg_integer()) -> pid()). --spec(shutdown/1 :: (maybe_pid()) -> 'ok'). +-spec(start_link/2 :: (pid(), non_neg_integer()) -> + rabbit_types:ok_pid_or_error()). -spec(limit/2 :: (maybe_pid(), non_neg_integer()) -> 'ok' | 'stopped'). -spec(can_send/3 :: (maybe_pid(), pid(), boolean()) -> boolean()). -spec(ack/2 :: (maybe_pid(), non_neg_integer()) -> 'ok'). @@ -74,20 +74,12 @@ %%---------------------------------------------------------------------------- start_link(ChPid, UnackedMsgCount) -> - {ok, Pid} = gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []), - Pid. - -shutdown(undefined) -> - ok; -shutdown(LimiterPid) -> - true = unlink(LimiterPid), - gen_server2:cast(LimiterPid, shutdown). + gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []). limit(undefined, 0) -> ok; limit(LimiterPid, PrefetchCount) -> - unlink_on_stopped(LimiterPid, - gen_server2:call(LimiterPid, {limit, PrefetchCount})). + gen_server2:call(LimiterPid, {limit, PrefetchCount}). %% Ask the limiter whether the queue can deliver a message without %% breaching a limit @@ -125,8 +117,7 @@ block(LimiterPid) -> unblock(undefined) -> ok; unblock(LimiterPid) -> - unlink_on_stopped(LimiterPid, - gen_server2:call(LimiterPid, unblock, infinity)). + gen_server2:call(LimiterPid, unblock, infinity). %%---------------------------------------------------------------------------- %% gen_server callbacks @@ -165,9 +156,6 @@ handle_call(unblock, _From, State) -> {stop, State1} -> {stop, normal, stopped, State1} end. -handle_cast(shutdown, State) -> - {stop, normal, State}; - handle_cast({ack, Count}, State = #lim{volume = Volume}) -> NewVolume = if Volume == 0 -> 0; true -> Volume - Count @@ -247,9 +235,3 @@ notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) -> ok end, State#lim{queues = NewQueues}. - -unlink_on_stopped(LimiterPid, stopped) -> - ok = rabbit_misc:unlink_and_capture_exit(LimiterPid), - stopped; -unlink_on_stopped(_LimiterPid, Result) -> - Result. diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl index 85bcbca04a..863f77e7eb 100644 --- a/src/rabbit_log.erl +++ b/src/rabbit_log.erl @@ -50,7 +50,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(debug/1 :: (string()) -> 'ok'). -spec(debug/2 :: (string(), [any()]) -> 'ok'). -spec(info/1 :: (string()) -> 'ok'). diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl index bdf3807531..f87b62713a 100644 --- a/src/rabbit_memory_monitor.erl +++ b/src/rabbit_memory_monitor.erl @@ -86,7 +86,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(update/0 :: () -> 'ok'). -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok'). -spec(deregister/1 :: (pid()) -> 'ok'). diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index fcc9fc7e54..086d260e24 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -38,9 +38,9 @@ -export([method_record_type/1, polite_pause/0, polite_pause/1]). -export([die/1, frame_error/2, amqp_error/4, protocol_error/3, protocol_error/4, protocol_error/1]). --export([not_found/1]). --export([get_config/1, get_config/2, set_config/2]). +-export([not_found/1, assert_args_equivalence/4]). -export([dirty_read/1]). +-export([table_lookup/2]). -export([r/3, r/2, r_arg/4, rs/1]). -export([enable_cover/0, report_cover/0]). -export([enable_cover/1, report_cover/1]). @@ -61,7 +61,9 @@ -export([sort_field_table/1]). -export([pid_to_string/1, string_to_pid/1]). -export([version_compare/2, version_compare/3]). --export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]). +-export([recursive_delete/1, dict_cons/3, orddict_cons/3, + unlink_and_capture_exit/1]). +-export([get_options/2]). -import(mnesia). -import(lists). @@ -77,32 +79,39 @@ -type(ok_or_error() :: rabbit_types:ok_or_error(any())). -type(thunk(T) :: fun(() -> T)). -type(resource_name() :: binary()). +-type(optdef() :: {flag, string()} | {option, string(), any()}). +-type(channel_or_connection_exit() + :: rabbit_types:channel_exit() | rabbit_types:connection_exit()). -spec(method_record_type/1 :: (rabbit_framing:amqp_method_record()) -> rabbit_framing:amqp_method_name()). -spec(polite_pause/0 :: () -> 'done'). -spec(polite_pause/1 :: (non_neg_integer()) -> 'done'). --spec(die/1 :: (rabbit_framing:amqp_exception()) -> no_return()). +-spec(die/1 :: + (rabbit_framing:amqp_exception()) -> channel_or_connection_exit()). -spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary()) - -> no_return()). + -> rabbit_types:connection_exit()). -spec(amqp_error/4 :: (rabbit_framing:amqp_exception(), string(), [any()], rabbit_framing:amqp_method_name()) -> rabbit_types:amqp_error()). -spec(protocol_error/3 :: (rabbit_framing:amqp_exception(), string(), [any()]) - -> no_return()). + -> channel_or_connection_exit()). -spec(protocol_error/4 :: (rabbit_framing:amqp_exception(), string(), [any()], - rabbit_framing:amqp_method_name()) - -> no_return()). --spec(protocol_error/1 :: (rabbit_types:amqp_error()) -> no_return()). --spec(not_found/1 :: (rabbit_types:r(atom())) -> no_return()). --spec(get_config/1 :: - (atom()) -> rabbit_types:ok_or_error2(any(), 'not_found')). --spec(get_config/2 :: (atom(), A) -> A). --spec(set_config/2 :: (atom(), any()) -> 'ok'). + rabbit_framing:amqp_method_name()) -> channel_or_connection_exit()). +-spec(protocol_error/1 :: + (rabbit_types:amqp_error()) -> channel_or_connection_exit()). +-spec(not_found/1 :: (rabbit_types:r(atom())) -> rabbit_types:channel_exit()). +-spec(assert_args_equivalence/4 :: (rabbit_framing:amqp_table(), + rabbit_framing:amqp_table(), + rabbit_types:r(any()), [binary()]) -> + 'ok' | rabbit_types:connection_exit()). -spec(dirty_read/1 :: ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')). +-spec(table_lookup/2 :: + (rabbit_framing:amqp_table(), binary()) + -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}). -spec(r/2 :: (rabbit_types:vhost(), K) -> rabbit_types:r3(rabbit_types:vhost(), K, '_') when is_subtype(K, atom())). @@ -168,8 +177,13 @@ -spec(recursive_delete/1 :: ([file:filename()]) -> rabbit_types:ok_or_error({file:filename(), any()})). --spec(dict_cons/3 :: (any(), any(), dict:dictionary()) -> dict:dictionary()). +-spec(dict_cons/3 :: (any(), any(), dict:dictionary()) -> + dict:dictionary()). +-spec(orddict_cons/3 :: (any(), any(), orddict:dictionary()) -> + orddict:dictionary()). -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok'). +-spec(get_options/2 :: ([optdef()], [string()]) + -> {[string()], [{string(), any()}]}). -endif. @@ -207,27 +221,32 @@ protocol_error(#amqp_error{} = Error) -> not_found(R) -> protocol_error(not_found, "no ~s", [rs(R)]). -get_config(Key) -> - case dirty_read({rabbit_config, Key}) of - {ok, {rabbit_config, Key, V}} -> {ok, V}; - Other -> Other - end. +assert_args_equivalence(Orig, New, Name, Keys) -> + [assert_args_equivalence1(Orig, New, Name, Key) || Key <- Keys], + ok. -get_config(Key, DefaultValue) -> - case get_config(Key) of - {ok, V} -> V; - {error, not_found} -> DefaultValue +assert_args_equivalence1(Orig, New, Name, Key) -> + case {table_lookup(Orig, Key), table_lookup(New, Key)} of + {Same, Same} -> ok; + {Orig1, New1} -> protocol_error( + not_allowed, + "inequivalent arg '~s' for ~s: " + "required ~w, received ~w", + [Key, rabbit_misc:rs(Name), New1, Orig1]) end. -set_config(Key, Value) -> - ok = mnesia:dirty_write({rabbit_config, Key, Value}). - dirty_read(ReadSpec) -> case mnesia:dirty_read(ReadSpec) of [Result] -> {ok, Result}; [] -> {error, not_found} end. +table_lookup(Table, Key) -> + case lists:keysearch(Key, 1, Table) of + {value, {_, TypeBin, ValueBin}} -> {TypeBin, ValueBin}; + false -> undefined + end. + r(#resource{virtual_host = VHostPath}, Kind, Name) when is_binary(Name) -> #resource{virtual_host = VHostPath, kind = Kind, name = Name}; @@ -240,9 +259,9 @@ r(VHostPath, Kind) when is_binary(VHostPath) -> r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) -> r_arg(VHostPath, Kind, Table, Key); r_arg(VHostPath, Kind, Table, Key) -> - case lists:keysearch(Key, 1, Table) of - {value, {_, longstr, NameBin}} -> r(VHostPath, Kind, NameBin); - false -> undefined + case table_lookup(Table, Key) of + {longstr, NameBin} -> r(VHostPath, Kind, NameBin); + undefined -> undefined end. rs(#resource{virtual_host = VHostPath, kind = Kind, name = Name}) -> @@ -585,7 +604,7 @@ string_to_pid(Str) -> binary_to_term(<<131,103,NodeEnc/binary,Id:32,Ser:32,0:8>>); nomatch -> throw(Err) - end. + end. version_compare(A, B, lte) -> case version_compare(A, B) of @@ -661,8 +680,44 @@ recursive_delete1(Path) -> dict_cons(Key, Value, Dict) -> dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). +orddict_cons(Key, Value, Dict) -> + orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). + unlink_and_capture_exit(Pid) -> unlink(Pid), receive {'EXIT', Pid, _} -> ok after 0 -> ok end. + +% Separate flags and options from arguments. +% get_options([{flag, "-q"}, {option, "-p", "/"}], +% ["set_permissions","-p","/","guest", +% "-q",".*",".*",".*"]) +% == {["set_permissions","guest",".*",".*",".*"], +% [{"-q",true},{"-p","/"}]} +get_options(Defs, As) -> + lists:foldl(fun(Def, {AsIn, RsIn}) -> + {AsOut, Value} = case Def of + {flag, Key} -> + get_flag(Key, AsIn); + {option, Key, Default} -> + get_option(Key, Default, AsIn) + end, + {AsOut, [{Key, Value} | RsIn]} + end, {As, []}, Defs). + +get_option(K, _Default, [K, V | As]) -> + {As, V}; +get_option(K, Default, [Nk | As]) -> + {As1, V} = get_option(K, Default, As), + {[Nk | As1], V}; +get_option(_, Default, As) -> + {As, Default}. + +get_flag(K, [K | As]) -> + {As, true}; +get_flag(K, [Nk | As]) -> + {As1, V} = get_flag(K, As), + {[Nk | As1], V}; +get_flag(_, []) -> + {[], false}. diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index e2b6927f8c..a321488897 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -77,7 +77,7 @@ status() -> {disc, disc_copies}, {ram, ram_copies}], begin - Nodes = mnesia:table_info(schema, CopyType), + Nodes = nodes_of_type(CopyType), Nodes =/= [] end]; no -> case mnesia:system_info(db_nodes) of @@ -91,7 +91,6 @@ init() -> ok = ensure_mnesia_running(), ok = ensure_mnesia_dir(), ok = init_db(read_cluster_nodes_config(), true), - ok = wait_for_tables(), ok. is_db_empty() -> @@ -114,7 +113,6 @@ cluster(ClusterNodes, Force) -> rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), try ok = init_db(ClusterNodes, Force), - ok = wait_for_tables(), ok = create_cluster_nodes_config(ClusterNodes) after mnesia:stop() @@ -144,58 +142,96 @@ empty_ram_only_tables() -> %%-------------------------------------------------------------------- +nodes_of_type(Type) -> + %% This function should return the nodes of a certain type (ram, + %% disc or disc_only) in the current cluster. The type of nodes + %% is determined when the cluster is initially configured. + %% Specifically, we check whether a certain table, which we know + %% will be written to disk on a disc node, is stored on disk or in + %% RAM. + mnesia:table_info(rabbit_durable_exchange, Type). + table_definitions() -> [{rabbit_user, [{record_name, user}, {attributes, record_info(fields, user)}, - {disc_copies, [node()]}]}, + {disc_copies, [node()]}, + {match, #user{_='_'}}]}, {rabbit_user_permission, [{record_name, user_permission}, {attributes, record_info(fields, user_permission)}, - {disc_copies, [node()]}]}, + {disc_copies, [node()]}, + {match, #user_permission{user_vhost = #user_vhost{_='_'}, + permission = #permission{_='_'}, + _='_'}}]}, {rabbit_vhost, [{record_name, vhost}, {attributes, record_info(fields, vhost)}, - {disc_copies, [node()]}]}, - {rabbit_config, - [{disc_copies, [node()]}]}, + {disc_copies, [node()]}, + {match, #vhost{_='_'}}]}, {rabbit_listener, [{record_name, listener}, {attributes, record_info(fields, listener)}, - {type, bag}]}, + {type, bag}, + {match, #listener{_='_'}}]}, {rabbit_durable_route, [{record_name, route}, {attributes, record_info(fields, route)}, - {disc_copies, [node()]}]}, + {disc_copies, [node()]}, + {match, #route{binding = binding_match(), _='_'}}]}, {rabbit_route, [{record_name, route}, {attributes, record_info(fields, route)}, - {type, ordered_set}]}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, {rabbit_reverse_route, [{record_name, reverse_route}, {attributes, record_info(fields, reverse_route)}, - {type, ordered_set}]}, + {type, ordered_set}, + {match, #reverse_route{reverse_binding = reverse_binding_match(), + _='_'}}]}, + %% Consider the implications to nodes_of_type/1 before altering + %% the next entry. {rabbit_durable_exchange, [{record_name, exchange}, {attributes, record_info(fields, exchange)}, - {disc_copies, [node()]}]}, + {disc_copies, [node()]}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, {rabbit_exchange, [{record_name, exchange}, - {attributes, record_info(fields, exchange)}]}, + {attributes, record_info(fields, exchange)}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, {rabbit_durable_queue, [{record_name, amqqueue}, {attributes, record_info(fields, amqqueue)}, - {disc_copies, [node()]}]}, + {disc_copies, [node()]}, + {match, #amqqueue{name = queue_name_match(), _='_'}}]}, {rabbit_queue, [{record_name, amqqueue}, - {attributes, record_info(fields, amqqueue)}]}]. + {attributes, record_info(fields, amqqueue)}, + {match, #amqqueue{name = queue_name_match(), _='_'}}]}]. + +binding_match() -> + #binding{queue_name = queue_name_match(), + exchange_name = exchange_name_match(), + _='_'}. +reverse_binding_match() -> + #reverse_binding{queue_name = queue_name_match(), + exchange_name = exchange_name_match(), + _='_'}. +exchange_name_match() -> + resource_match(exchange). +queue_name_match() -> + resource_match(queue). +resource_match(Kind) -> + #resource{kind = Kind, _='_'}. table_names() -> [Tab || {Tab, _} <- table_definitions()]. replicated_table_names() -> - [Tab || {Tab, Attrs} <- table_definitions(), - not lists:member({local_content, true}, Attrs) + [Tab || {Tab, TabDef} <- table_definitions(), + not lists:member({local_content, true}, TabDef) ]. dir() -> mnesia:system_info(directory). @@ -220,11 +256,53 @@ ensure_mnesia_not_running() -> yes -> throw({error, mnesia_unexpectedly_running}) end. +ensure_schema_integrity() -> + case check_schema_integrity() of + ok -> + ok; + {error, Reason} -> + throw({error, {schema_integrity_check_failed, Reason}}) + end. + check_schema_integrity() -> - %%TODO: more thorough checks - case catch [mnesia:table_info(Tab, version) || Tab <- table_names()] of - {'EXIT', Reason} -> {error, Reason}; - _ -> ok + Tables = mnesia:system_info(tables), + case [Error || {Tab, TabDef} <- table_definitions(), + case lists:member(Tab, Tables) of + false -> + Error = {table_missing, Tab}, + true; + true -> + {_, ExpAttrs} = proplists:lookup(attributes, TabDef), + Attrs = mnesia:table_info(Tab, attributes), + Error = {table_attributes_mismatch, Tab, + ExpAttrs, Attrs}, + Attrs /= ExpAttrs + end] of + [] -> check_table_integrity(); + Errors -> {error, Errors} + end. + +check_table_integrity() -> + ok = wait_for_tables(), + case lists:all(fun ({Tab, TabDef}) -> + {_, Match} = proplists:lookup(match, TabDef), + read_test_table(Tab, Match) + end, table_definitions()) of + true -> ok; + false -> {error, invalid_table_content} + end. + +read_test_table(Tab, Match) -> + case mnesia:dirty_first(Tab) of + '$end_of_table' -> + true; + Key -> + ObjList = mnesia:dirty_read(Tab, Key), + MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), + case ets:match_spec_run(ObjList, MatchComp) of + ObjList -> true; + _ -> false + end end. %% The cluster node config file contains some or all of the disk nodes @@ -253,20 +331,9 @@ read_cluster_nodes_config() -> case rabbit_misc:read_term_file(FileName) of {ok, [ClusterNodes]} -> ClusterNodes; {error, enoent} -> - case application:get_env(cluster_config) of + case application:get_env(cluster_nodes) of undefined -> []; - {ok, DefaultFileName} -> - case file:consult(DefaultFileName) of - {ok, [ClusterNodes]} -> ClusterNodes; - {error, enoent} -> - error_logger:warning_msg( - "default cluster config file ~p does not exist~n", - [DefaultFileName]), - []; - {error, Reason} -> - throw({error, {cannot_read_cluster_nodes_config, - DefaultFileName, Reason}}) - end + {ok, ClusterNodes} -> ClusterNodes end; {error, Reason} -> throw({error, {cannot_read_cluster_nodes_config, @@ -333,8 +400,9 @@ init_db(ClusterNodes, Force) -> ok = create_local_table_copies(case IsDiskNode of true -> disc; false -> ram - end) - end; + end), + ok = ensure_schema_integrity() + end; {error, Reason} -> %% one reason we may end up here is if we try to join %% nodes together that are currently running standalone or @@ -349,7 +417,9 @@ create_schema() -> cannot_create_schema), rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), - create_tables(). + ok = create_tables(), + ok = ensure_schema_integrity(), + ok = wait_for_tables(). move_db() -> mnesia:stop(), @@ -374,12 +444,13 @@ move_db() -> ok. create_tables() -> - lists:foreach(fun ({Tab, TabArgs}) -> - case mnesia:create_table(Tab, TabArgs) of + lists:foreach(fun ({Tab, TabDef}) -> + TabDef1 = proplists:delete(match, TabDef), + case mnesia:create_table(Tab, TabDef1) of {atomic, ok} -> ok; {aborted, Reason} -> throw({error, {table_creation_failed, - Tab, TabArgs, Reason}}) + Tab, TabDef1, Reason}}) end end, table_definitions()), @@ -434,17 +505,12 @@ wait_for_replicated_tables() -> wait_for_tables(replicated_table_names()). wait_for_tables() -> wait_for_tables(table_names()). wait_for_tables(TableNames) -> - case check_schema_integrity() of - ok -> - case mnesia:wait_for_tables(TableNames, 30000) of - ok -> ok; - {timeout, BadTabs} -> - throw({error, {timeout_waiting_for_tables, BadTabs}}); - {error, Reason} -> - throw({error, {failed_waiting_for_tables, Reason}}) - end; + case mnesia:wait_for_tables(TableNames, 30000) of + ok -> ok; + {timeout, BadTabs} -> + throw({error, {timeout_waiting_for_tables, BadTabs}}); {error, Reason} -> - throw({error, {schema_integrity_check_failed, Reason}}) + throw({error, {failed_waiting_for_tables, Reason}}) end. reset(Force) -> diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl new file mode 100644 index 0000000000..4f1784392e --- /dev/null +++ b/src/rabbit_msg_file.erl @@ -0,0 +1,136 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_file). + +-export([append/3, read/2, scan/2]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(INTEGER_SIZE_BYTES, 8). +-define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)). +-define(WRITE_OK_SIZE_BITS, 8). +-define(WRITE_OK_MARKER, 255). +-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)). +-define(GUID_SIZE_BYTES, 16). +-define(GUID_SIZE_BITS, (8 * ?GUID_SIZE_BYTES)). +-define(SCAN_BLOCK_SIZE, 4194304). %% 4MB + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(io_device() :: any()). +-type(position() :: non_neg_integer()). +-type(msg_size() :: non_neg_integer()). +-type(file_size() :: non_neg_integer()). + +-spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) -> + rabbit_types:ok_or_error2(msg_size(), any())). +-spec(read/2 :: (io_device(), msg_size()) -> + rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()}, + any())). +-spec(scan/2 :: (io_device(), file_size()) -> + {'ok', [{rabbit_guid:guid(), msg_size(), position()}], + position()}). + +-endif. + +%%---------------------------------------------------------------------------- + +append(FileHdl, Guid, MsgBody) + when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES -> + MsgBodyBin = term_to_binary(MsgBody), + MsgBodyBinSize = size(MsgBodyBin), + Size = MsgBodyBinSize + ?GUID_SIZE_BYTES, + case file_handle_cache:append(FileHdl, + <<Size:?INTEGER_SIZE_BITS, + Guid:?GUID_SIZE_BYTES/binary, + MsgBodyBin:MsgBodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of + ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT}; + KO -> KO + end. + +read(FileHdl, TotalSize) -> + Size = TotalSize - ?FILE_PACKING_ADJUSTMENT, + BodyBinSize = Size - ?GUID_SIZE_BYTES, + case file_handle_cache:read(FileHdl, TotalSize) of + {ok, <<Size:?INTEGER_SIZE_BITS, + Guid:?GUID_SIZE_BYTES/binary, + MsgBodyBin:BodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} -> + {ok, {Guid, binary_to_term(MsgBodyBin)}}; + KO -> KO + end. + +scan(FileHdl, FileSize) when FileSize >= 0 -> + scan(FileHdl, FileSize, <<>>, 0, [], 0). + +scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) -> + {ok, Acc, ScanOffset}; +scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) -> + Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]), + case file_handle_cache:read(FileHdl, Read) of + {ok, Data1} -> + {Data2, Acc1, ScanOffset1} = + scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset), + ReadOffset1 = ReadOffset + size(Data1), + scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1); + _KO -> + {ok, Acc, ScanOffset} + end. + +scan(<<>>, Acc, Offset) -> + {<<>>, Acc, Offset}; +scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) -> + {<<>>, Acc, Offset}; %% Nothing to do other than stop. +scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary, + WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) -> + TotalSize = Size + ?FILE_PACKING_ADJUSTMENT, + case WriteMarker of + ?WRITE_OK_MARKER -> + %% Here we take option 5 from + %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in + %% which we read the Guid as a number, and then convert it + %% back to a binary in order to work around bugs in + %% Erlang's GC. + <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> = + <<GuidAndMsg:Size/binary>>, + <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>, + scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize); + _ -> + scan(Rest, Acc, Offset + TotalSize) + end; +scan(Data, Acc, Offset) -> + {Data, Acc, Offset}. diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl new file mode 100644 index 0000000000..a9c7db76f0 --- /dev/null +++ b/src/rabbit_msg_store.erl @@ -0,0 +1,1668 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store). + +-behaviour(gen_server2). + +-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2, + sync/3, client_init/2, client_terminate/2, + client_delete_and_terminate/3, successfully_recovered_state/1]). + +-export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(SYNC_INTERVAL, 5). %% milliseconds +-define(CLEAN_FILENAME, "clean.dot"). +-define(FILE_SUMMARY_FILENAME, "file_summary.ets"). + +-define(BINARY_MODE, [raw, binary]). +-define(READ_MODE, [read]). +-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]). +-define(WRITE_MODE, [write]). + +-define(FILE_EXTENSION, ".rdq"). +-define(FILE_EXTENSION_TMP, ".rdt"). + +-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB + +%%---------------------------------------------------------------------------- + +-record(msstate, + { dir, %% store directory + index_module, %% the module for index ops + index_state, %% where are messages? + current_file, %% current file name as number + current_file_handle, %% current file handle since the last fsync? + file_handle_cache, %% file handle cache + on_sync, %% pending sync requests + sync_timer_ref, %% TRef for our interval timer + sum_valid_data, %% sum of valid data in all files + sum_file_size, %% sum of file sizes + pending_gc_completion, %% things to do once GC completes + gc_active, %% is the GC currently working? + gc_pid, %% pid of our GC + file_handles_ets, %% tid of the shared file handles table + file_summary_ets, %% tid of the file summary table + dedup_cache_ets, %% tid of dedup cache table + cur_file_cache_ets, %% tid of current file cache table + client_refs, %% set of references of all registered clients + successfully_recovered, %% boolean: did we recover state? + file_size_limit %% how big are our files allowed to get? + }). + +-record(client_msstate, + { file_handle_cache, + index_state, + index_module, + dir, + gc_pid, + file_handles_ets, + file_summary_ets, + dedup_cache_ets, + cur_file_cache_ets + }). + +-record(file_summary, + {file, valid_total_size, contiguous_top, left, right, file_size, + locked, readers}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(server() :: pid() | atom()). +-type(file_num() :: non_neg_integer()). +-type(client_msstate() :: #client_msstate { + file_handle_cache :: dict:dictionary(), + index_state :: any(), + index_module :: atom(), + dir :: file:filename(), + gc_pid :: pid(), + file_handles_ets :: ets:tid(), + file_summary_ets :: ets:tid(), + dedup_cache_ets :: ets:tid(), + cur_file_cache_ets :: ets:tid() }). +-type(startup_fun_state() :: + {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), + A}). + +-spec(start_link/4 :: + (atom(), file:filename(), [binary()] | 'undefined', + startup_fun_state()) -> rabbit_types:ok_pid_or_error()). +-spec(write/4 :: (server(), rabbit_guid:guid(), msg(), client_msstate()) -> + rabbit_types:ok(client_msstate())). +-spec(read/3 :: (server(), rabbit_guid:guid(), client_msstate()) -> + {rabbit_types:ok(msg()) | 'not_found', client_msstate()}). +-spec(contains/2 :: (server(), rabbit_guid:guid()) -> boolean()). +-spec(remove/2 :: (server(), [rabbit_guid:guid()]) -> 'ok'). +-spec(release/2 :: (server(), [rabbit_guid:guid()]) -> 'ok'). +-spec(sync/3 :: (server(), [rabbit_guid:guid()], fun (() -> any())) -> 'ok'). +-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> + 'ok'). +-spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok'). +-spec(client_init/2 :: (server(), binary()) -> client_msstate()). +-spec(client_terminate/2 :: (client_msstate(), server()) -> 'ok'). +-spec(client_delete_and_terminate/3 :: + (client_msstate(), server(), binary()) -> 'ok'). +-spec(successfully_recovered_state/1 :: (server()) -> boolean()). + +-spec(gc/3 :: (non_neg_integer(), non_neg_integer(), + {ets:tid(), file:filename(), atom(), any()}) -> + 'concurrent_readers' | non_neg_integer()). + +-endif. + +%%---------------------------------------------------------------------------- + +%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION +%% It is not recommended to set this to < 0.5 +-define(GARBAGE_FRACTION, 0.5). + +%% The components: +%% +%% Index: this is a mapping from Guid to #msg_location{}: +%% {Guid, RefCount, File, Offset, TotalSize} +%% By default, it's in ets, but it's also pluggable. +%% FileSummary: this is an ets table which maps File to #file_summary{}: +%% {File, ValidTotalSize, ContiguousTop, Left, Right, +%% FileSize, Locked, Readers} +%% +%% The basic idea is that messages are appended to the current file up +%% until that file becomes too big (> file_size_limit). At that point, +%% the file is closed and a new file is created on the _right_ of the +%% old file which is used for new messages. Files are named +%% numerically ascending, thus the file with the lowest name is the +%% eldest file. +%% +%% We need to keep track of which messages are in which files (this is +%% the Index); how much useful data is in each file and which files +%% are on the left and right of each other. This is the purpose of the +%% FileSummary ets table. +%% +%% As messages are removed from files, holes appear in these +%% files. The field ValidTotalSize contains the total amount of useful +%% data left in the file, whilst ContiguousTop contains the amount of +%% valid data right at the start of each file. These are needed for +%% garbage collection. +%% +%% When we discover that a file is now empty, we delete it. When we +%% discover that it can be combined with the useful data in either its +%% left or right neighbour, and overall, across all the files, we have +%% ((the amount of garbage) / (the sum of all file sizes)) > +%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently, +%% which will compact the two files together. This keeps disk +%% utilisation high and aids performance. We deliberately do this +%% lazily in order to prevent doing GC on files which are soon to be +%% emptied (and hence deleted) soon. +%% +%% Given the compaction between two files, the left file (i.e. elder +%% file) is considered the ultimate destination for the good data in +%% the right file. If necessary, the good data in the left file which +%% is fragmented throughout the file is written out to a temporary +%% file, then read back in to form a contiguous chunk of good data at +%% the start of the left file. Thus the left file is garbage collected +%% and compacted. Then the good data from the right file is copied +%% onto the end of the left file. Index and FileSummary tables are +%% updated. +%% +%% On non-clean startup, we scan the files we discover, dealing with +%% the possibilites of a crash having occured during a compaction +%% (this consists of tidyup - the compaction is deliberately designed +%% such that data is duplicated on disk rather than risking it being +%% lost), and rebuild the FileSummary ets table and Index. +%% +%% So, with this design, messages move to the left. Eventually, they +%% should end up in a contiguous block on the left and are then never +%% rewritten. But this isn't quite the case. If in a file there is one +%% message that is being ignored, for some reason, and messages in the +%% file to the right and in the current block are being read all the +%% time then it will repeatedly be the case that the good data from +%% both files can be combined and will be written out to a new +%% file. Whenever this happens, our shunned message will be rewritten. +%% +%% So, provided that we combine messages in the right order, +%% (i.e. left file, bottom to top, right file, bottom to top), +%% eventually our shunned message will end up at the bottom of the +%% left file. The compaction/combining algorithm is smart enough to +%% read in good data from the left file that is scattered throughout +%% (i.e. C and D in the below diagram), then truncate the file to just +%% above B (i.e. truncate to the limit of the good contiguous region +%% at the start of the file), then write C and D on top and then write +%% E, F and G from the right file on top. Thus contiguous blocks of +%% good data at the bottom of files are not rewritten (yes, this is +%% the data the size of which is tracked by the ContiguousTop +%% variable. Judicious use of a mirror is required). +%% +%% +-------+ +-------+ +-------+ +%% | X | | G | | G | +%% +-------+ +-------+ +-------+ +%% | D | | X | | F | +%% +-------+ +-------+ +-------+ +%% | X | | X | | E | +%% +-------+ +-------+ +-------+ +%% | C | | F | ===> | D | +%% +-------+ +-------+ +-------+ +%% | X | | X | | C | +%% +-------+ +-------+ +-------+ +%% | B | | X | | B | +%% +-------+ +-------+ +-------+ +%% | A | | E | | A | +%% +-------+ +-------+ +-------+ +%% left right left +%% +%% From this reasoning, we do have a bound on the number of times the +%% message is rewritten. From when it is inserted, there can be no +%% files inserted between it and the head of the queue, and the worst +%% case is that everytime it is rewritten, it moves one position lower +%% in the file (for it to stay at the same position requires that +%% there are no holes beneath it, which means truncate would be used +%% and so it would not be rewritten at all). Thus this seems to +%% suggest the limit is the number of messages ahead of it in the +%% queue, though it's likely that that's pessimistic, given the +%% requirements for compaction/combination of files. +%% +%% The other property is that we have is the bound on the lowest +%% utilisation, which should be 50% - worst case is that all files are +%% fractionally over half full and can't be combined (equivalent is +%% alternating full files and files with only one tiny message in +%% them). +%% +%% Messages are reference-counted. When a message with the same guid +%% is written several times we only store it once, and only remove it +%% from the store when it has been removed the same number of times. +%% +%% The reference counts do not persist. Therefore the initialisation +%% function must be provided with a generator that produces ref count +%% deltas for all recovered messages. This is only used on startup +%% when the shutdown was non-clean. +%% +%% Read messages with a reference count greater than one are entered +%% into a message cache. The purpose of the cache is not especially +%% performance, though it can help there too, but prevention of memory +%% explosion. It ensures that as messages with a high reference count +%% are read from several processes they are read back as the same +%% binary object rather than multiples of identical binary +%% objects. +%% +%% Reads can be performed directly by clients without calling to the +%% server. This is safe because multiple file handles can be used to +%% read files. However, locking is used by the concurrent GC to make +%% sure that reads are not attempted from files which are in the +%% process of being garbage collected. +%% +%% The server automatically defers reads, removes and contains calls +%% that occur which refer to files which are currently being +%% GC'd. Contains calls are only deferred in order to ensure they do +%% not overtake removes. +%% +%% The current file to which messages are being written has a +%% write-back cache. This is written to immediately by clients and can +%% be read from by clients too. This means that there are only ever +%% writes made to the current file, thus eliminating delays due to +%% flushing write buffers in order to be able to safely read from the +%% current file. The one exception to this is that on start up, the +%% cache is not populated with msgs found in the current file, and +%% thus in this case only, reads may have to come from the file +%% itself. The effect of this is that even if the msg_store process is +%% heavily overloaded, clients can still write and read messages with +%% very low latency and not block at all. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% variable_queue. + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +start_link(Server, Dir, ClientRefs, StartupFunState) -> + gen_server2:start_link({local, Server}, ?MODULE, + [Server, Dir, ClientRefs, StartupFunState], + [{timeout, infinity}]). + +write(Server, Guid, Msg, + CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) -> + ok = update_msg_cache(CurFileCacheEts, Guid, Msg), + {gen_server2:cast(Server, {write, Guid}), CState}. + +read(Server, Guid, + CState = #client_msstate { dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }) -> + %% 1. Check the dedup cache + case fetch_and_increment_cache(DedupCacheEts, Guid) of + not_found -> + %% 2. Check the cur file cache + case ets:lookup(CurFileCacheEts, Guid) of + [] -> + Defer = fun() -> {gen_server2:pcall( + Server, 2, {read, Guid}, infinity), + CState} end, + case index_lookup(Guid, CState) of + not_found -> Defer(); + MsgLocation -> client_read1(Server, MsgLocation, Defer, + CState) + end; + [{Guid, Msg, _CacheRefCount}] -> + %% Although we've found it, we don't know the + %% refcount, so can't insert into dedup cache + {{ok, Msg}, CState} + end; + Msg -> + {{ok, Msg}, CState} + end. + +contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity). +remove(_Server, []) -> ok; +remove(Server, Guids) -> gen_server2:cast(Server, {remove, Guids}). +release(_Server, []) -> ok; +release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}). +sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}). +sync(Server) -> gen_server2:pcast(Server, 8, sync). %% internal + +gc_done(Server, Reclaimed, Source, Destination) -> + gen_server2:pcast(Server, 8, {gc_done, Reclaimed, Source, Destination}). + +set_maximum_since_use(Server, Age) -> + gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}). + +client_init(Server, Ref) -> + {IState, IModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} = + gen_server2:pcall(Server, 7, {new_client_state, Ref}, infinity), + #client_msstate { file_handle_cache = dict:new(), + index_state = IState, + index_module = IModule, + dir = Dir, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }. + +client_terminate(CState, Server) -> + close_all_handles(CState), + ok = gen_server2:call(Server, client_terminate, infinity). + +client_delete_and_terminate(CState, Server, Ref) -> + close_all_handles(CState), + ok = gen_server2:cast(Server, {client_delete, Ref}). + +successfully_recovered_state(Server) -> + gen_server2:pcall(Server, 7, successfully_recovered_state, infinity). + +%%---------------------------------------------------------------------------- +%% Client-side-only helpers +%%---------------------------------------------------------------------------- + +client_read1(Server, + #msg_location { guid = Guid, file = File } = MsgLocation, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + case ets:lookup(FileSummaryEts, File) of + [] -> %% File has been GC'd and no longer exists. Go around again. + read(Server, Guid, CState); + [#file_summary { locked = Locked, right = Right }] -> + client_read2(Server, Locked, Right, MsgLocation, Defer, CState) + end. + +client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) -> + %% Although we've already checked both caches and not found the + %% message there, the message is apparently in the + %% current_file. We can only arrive here if we are trying to read + %% a message which we have not written, which is very odd, so just + %% defer. + %% + %% OR, on startup, the cur_file_cache is not populated with the + %% contents of the current file, thus reads from the current file + %% will end up here and will need to be deferred. + Defer(); +client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) -> + %% Of course, in the mean time, the GC could have run and our msg + %% is actually in a different file, unlocked. However, defering is + %% the safest and simplest thing to do. + Defer(); +client_read2(Server, false, _Right, + MsgLocation = #msg_location { guid = Guid, file = File }, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + %% It's entirely possible that everything we're doing from here on + %% is for the wrong file, or a non-existent file, as a GC may have + %% finished. + safe_ets_update_counter( + FileSummaryEts, File, {#file_summary.readers, +1}, + fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end, + fun () -> read(Server, Guid, CState) end). + +client_read3(Server, #msg_location { guid = Guid, file = File }, Defer, + CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + gc_pid = GCPid }) -> + Release = + fun() -> ok = case ets:update_counter(FileSummaryEts, File, + {#file_summary.readers, -1}) of + 0 -> case ets:lookup(FileSummaryEts, File) of + [#file_summary { locked = true }] -> + rabbit_msg_store_gc:no_readers( + GCPid, File); + _ -> ok + end; + _ -> ok + end + end, + %% If a GC involving the file hasn't already started, it won't + %% start now. Need to check again to see if we've been locked in + %% the meantime, between lookup and update_counter (thus GC + %% started before our +1. In fact, it could have finished by now + %% too). + case ets:lookup(FileSummaryEts, File) of + [] -> %% GC has deleted our file, just go round again. + read(Server, Guid, CState); + [#file_summary { locked = true }] -> + %% If we get a badarg here, then the GC has finished and + %% deleted our file. Try going around again. Otherwise, + %% just defer. + %% + %% badarg scenario: we lookup, msg_store locks, GC starts, + %% GC ends, we +1 readers, msg_store ets:deletes (and + %% unlocks the dest) + try Release(), + Defer() + catch error:badarg -> read(Server, Guid, CState) + end; + [#file_summary { locked = false }] -> + %% Ok, we're definitely safe to continue - a GC involving + %% the file cannot start up now, and isn't running, so + %% nothing will tell us from now on to close the handle if + %% it's already open. + %% + %% Finally, we need to recheck that the msg is still at + %% the same place - it's possible an entire GC ran between + %% us doing the lookup and the +1 on the readers. (Same as + %% badarg scenario above, but we don't have a missing file + %% - we just have the /wrong/ file). + case index_lookup(Guid, CState) of + #msg_location { file = File } = MsgLocation -> + %% Still the same file. + mark_handle_open(FileHandlesEts, File), + + CState1 = close_all_indicated(CState), + {Msg, CState2} = %% This will never be the current file + read_from_disk(MsgLocation, CState1, DedupCacheEts), + Release(), %% this MUST NOT fail with badarg + {{ok, Msg}, CState2}; + MsgLocation -> %% different file! + Release(), %% this MUST NOT fail with badarg + client_read1(Server, MsgLocation, Defer, CState) + end + end. + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + +init([Server, BaseDir, ClientRefs, StartupFunState]) -> + process_flag(trap_exit, true), + + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + + Dir = filename:join(BaseDir, atom_to_list(Server)), + + {ok, IndexModule} = application:get_env(msg_store_index_module), + rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]), + + AttemptFileSummaryRecovery = + case ClientRefs of + undefined -> ok = rabbit_misc:recursive_delete([Dir]), + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + false; + _ -> ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + recover_crashed_compactions(Dir) + end, + + %% if we found crashed compactions we trust neither the + %% file_summary nor the location index. Note the file_summary is + %% left empty here if it can't be recovered. + {FileSummaryRecovered, FileSummaryEts} = + recover_file_summary(AttemptFileSummaryRecovery, Dir), + + {CleanShutdown, IndexState, ClientRefs1} = + recover_index_and_client_refs(IndexModule, FileSummaryRecovered, + ClientRefs, Dir, Server), + %% CleanShutdown => msg location index and file_summary both + %% recovered correctly. + true = case {FileSummaryRecovered, CleanShutdown} of + {true, false} -> ets:delete_all_objects(FileSummaryEts); + _ -> true + end, + %% CleanShutdown <=> msg location index and file_summary both + %% recovered correctly. + + DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]), + FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles, + [ordered_set, public]), + CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]), + + {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit), + + State = #msstate { dir = Dir, + index_module = IndexModule, + index_state = IndexState, + current_file = 0, + current_file_handle = undefined, + file_handle_cache = dict:new(), + on_sync = [], + sync_timer_ref = undefined, + sum_valid_data = 0, + sum_file_size = 0, + pending_gc_completion = [], + gc_active = false, + gc_pid = undefined, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs1, + successfully_recovered = CleanShutdown, + file_size_limit = FileSizeLimit + }, + + %% If we didn't recover the msg location index then we need to + %% rebuild it now. + {Offset, State1 = #msstate { current_file = CurFile }} = + build_index(CleanShutdown, StartupFunState, State), + + %% read is only needed so that we can seek + {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile), + [read | ?WRITE_MODE]), + {ok, Offset} = file_handle_cache:position(CurHdl, Offset), + ok = file_handle_cache:truncate(CurHdl), + + {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule, + FileSummaryEts), + + {ok, maybe_compact( + State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }), + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call({read, Guid}, From, State) -> + State1 = read_message(Guid, From, State), + noreply(State1); + +handle_call({contains, Guid}, From, State) -> + State1 = contains_message(Guid, From, State), + noreply(State1); + +handle_call({new_client_state, CRef}, _From, + State = #msstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs, + gc_pid = GCPid }) -> + reply({IndexState, IndexModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts}, + State #msstate { client_refs = sets:add_element(CRef, ClientRefs) }); + +handle_call(successfully_recovered_state, _From, State) -> + reply(State #msstate.successfully_recovered, State); + +handle_call(client_terminate, _From, State) -> + reply(ok, State). + +handle_cast({write, Guid}, + State = #msstate { current_file_handle = CurHdl, + current_file = CurFile, + sum_valid_data = SumValid, + sum_file_size = SumFileSize, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts }) -> + true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}), + [{Guid, Msg, _CacheRefCount}] = ets:lookup(CurFileCacheEts, Guid), + case index_lookup(Guid, State) of + not_found -> + %% New message, lots to do + {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), + {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg), + ok = index_insert(#msg_location { + guid = Guid, ref_count = 1, file = CurFile, + offset = CurOffset, total_size = TotalSize }, + State), + [#file_summary { valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + right = undefined, + locked = false, + file_size = FileSize }] = + ets:lookup(FileSummaryEts, CurFile), + ValidTotalSize1 = ValidTotalSize + TotalSize, + ContiguousTop1 = case CurOffset =:= ContiguousTop of + true -> ValidTotalSize1; + false -> ContiguousTop + end, + true = ets:update_element( + FileSummaryEts, CurFile, + [{#file_summary.valid_total_size, ValidTotalSize1}, + {#file_summary.contiguous_top, ContiguousTop1}, + {#file_summary.file_size, FileSize + TotalSize}]), + NextOffset = CurOffset + TotalSize, + noreply( + maybe_roll_to_new_file( + NextOffset, State #msstate { + sum_valid_data = SumValid + TotalSize, + sum_file_size = SumFileSize + TotalSize })); + #msg_location { ref_count = RefCount } -> + %% We already know about it, just update counter. Only + %% update field otherwise bad interaction with concurrent GC + ok = index_update_fields(Guid, + {#msg_location.ref_count, RefCount + 1}, + State), + noreply(State) + end; + +handle_cast({remove, Guids}, State) -> + State1 = lists:foldl( + fun (Guid, State2) -> remove_message(Guid, State2) end, + State, Guids), + noreply(maybe_compact(State1)); + +handle_cast({release, Guids}, State = + #msstate { dedup_cache_ets = DedupCacheEts }) -> + lists:foreach( + fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids), + noreply(State); + +handle_cast({sync, Guids, K}, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + on_sync = Syncs }) -> + {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl), + case lists:any(fun (Guid) -> + #msg_location { file = File, offset = Offset } = + index_lookup(Guid, State), + File =:= CurFile andalso Offset >= SyncOffset + end, Guids) of + false -> K(), + noreply(State); + true -> noreply(State #msstate { on_sync = [K | Syncs] }) + end; + +handle_cast(sync, State) -> + noreply(internal_sync(State)); + +handle_cast({gc_done, Reclaimed, Src, Dst}, + State = #msstate { sum_file_size = SumFileSize, + gc_active = {Src, Dst}, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts }) -> + %% GC done, so now ensure that any clients that have open fhs to + %% those files close them before using them again. This has to be + %% done here (given it's done in the msg_store, and not the gc), + %% and not when starting up the GC, because if done when starting + %% up the GC, the client could find the close, and close and + %% reopen the fh, whilst the GC is waiting for readers to + %% disappear, before it's actually done the GC. + true = mark_handle_to_close(FileHandlesEts, Src), + true = mark_handle_to_close(FileHandlesEts, Dst), + %% we always move data left, so Src has gone and was on the + %% right, so need to make dest = source.right.left, and also + %% dest.right = source.right + [#file_summary { left = Dst, + right = SrcRight, + locked = true, + readers = 0 }] = ets:lookup(FileSummaryEts, Src), + %% this could fail if SrcRight =:= undefined + ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}), + true = ets:update_element(FileSummaryEts, Dst, + [{#file_summary.locked, false}, + {#file_summary.right, SrcRight}]), + true = ets:delete(FileSummaryEts, Src), + noreply( + maybe_compact(run_pending( + State #msstate { sum_file_size = SumFileSize - Reclaimed, + gc_active = false }))); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State); + +handle_cast({client_delete, CRef}, + State = #msstate { client_refs = ClientRefs }) -> + noreply( + State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }). + +handle_info(timeout, State) -> + noreply(internal_sync(State)); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. + +terminate(_Reason, State = #msstate { index_state = IndexState, + index_module = IndexModule, + current_file_handle = CurHdl, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs, + dir = Dir }) -> + %% stop the gc first, otherwise it could be working and we pull + %% out the ets tables from under it. + ok = rabbit_msg_store_gc:stop(GCPid), + State1 = case CurHdl of + undefined -> State; + _ -> State2 = internal_sync(State), + file_handle_cache:close(CurHdl), + State2 + end, + State3 = close_all_handles(State1), + store_file_summary(FileSummaryEts, Dir), + [ets:delete(T) || + T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]], + IndexModule:terminate(IndexState), + store_recovery_terms([{client_refs, sets:to_list(ClientRefs)}, + {index_module, IndexModule}], Dir), + State3 #msstate { index_state = undefined, + current_file_handle = undefined }. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% general helper functions +%%---------------------------------------------------------------------------- + +noreply(State) -> + {State1, Timeout} = next_state(State), + {noreply, State1, Timeout}. + +reply(Reply, State) -> + {State1, Timeout} = next_state(State), + {reply, Reply, State1, Timeout}. + +next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) -> + {State, hibernate}; +next_state(State = #msstate { sync_timer_ref = undefined }) -> + {start_sync_timer(State), 0}; +next_state(State = #msstate { on_sync = [] }) -> + {stop_sync_timer(State), hibernate}; +next_state(State) -> + {State, 0}. + +start_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]), + State #msstate { sync_timer_ref = TRef }. + +stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> + State; +stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #msstate { sync_timer_ref = undefined }. + +internal_sync(State = #msstate { current_file_handle = CurHdl, + on_sync = Syncs }) -> + State1 = stop_sync_timer(State), + case Syncs of + [] -> State1; + _ -> ok = file_handle_cache:sync(CurHdl), + lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)), + State1 #msstate { on_sync = [] } + end. + +read_message(Guid, From, + State = #msstate { dedup_cache_ets = DedupCacheEts }) -> + case index_lookup(Guid, State) of + not_found -> + gen_server2:reply(From, not_found), + State; + MsgLocation -> + case fetch_and_increment_cache(DedupCacheEts, Guid) of + not_found -> read_message1(From, MsgLocation, State); + Msg -> gen_server2:reply(From, {ok, Msg}), + State + end + end. + +read_message1(From, #msg_location { guid = Guid, ref_count = RefCount, + file = File, offset = Offset } = MsgLoc, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }) -> + case File =:= CurFile of + true -> {Msg, State1} = + %% can return [] if msg in file existed on startup + case ets:lookup(CurFileCacheEts, Guid) of + [] -> + {ok, RawOffSet} = + file_handle_cache:current_raw_offset(CurHdl), + ok = case Offset >= RawOffSet of + true -> file_handle_cache:flush(CurHdl); + false -> ok + end, + read_from_disk(MsgLoc, State, DedupCacheEts); + [{Guid, Msg1, _CacheRefCount}] -> + ok = maybe_insert_into_cache( + DedupCacheEts, RefCount, Guid, Msg1), + {Msg1, State} + end, + gen_server2:reply(From, {ok, Msg}), + State1; + false -> [#file_summary { locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> add_to_pending_gc_completion({read, Guid, From}, + State); + false -> {Msg, State1} = + read_from_disk(MsgLoc, State, DedupCacheEts), + gen_server2:reply(From, {ok, Msg}), + State1 + end + end. + +read_from_disk(#msg_location { guid = Guid, ref_count = RefCount, + file = File, offset = Offset, + total_size = TotalSize }, + State, DedupCacheEts) -> + {Hdl, State1} = get_read_handle(File, State), + {ok, Offset} = file_handle_cache:position(Hdl, Offset), + {ok, {Guid, Msg}} = + case rabbit_msg_file:read(Hdl, TotalSize) of + {ok, {Guid, _}} = Obj -> + Obj; + Rest -> + {error, {misread, [{old_state, State}, + {file_num, File}, + {offset, Offset}, + {guid, Guid}, + {read, Rest}, + {proc_dict, get()} + ]}} + end, + ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg), + {Msg, State1}. + +contains_message(Guid, From, State = #msstate { gc_active = GCActive }) -> + case index_lookup(Guid, State) of + not_found -> + gen_server2:reply(From, false), + State; + #msg_location { file = File } -> + case GCActive of + {A, B} when File =:= A orelse File =:= B -> + add_to_pending_gc_completion( + {contains, Guid, From}, State); + _ -> + gen_server2:reply(From, true), + State + end + end. + +remove_message(Guid, State = #msstate { sum_valid_data = SumValid, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts }) -> + #msg_location { ref_count = RefCount, file = File, + offset = Offset, total_size = TotalSize } = + index_lookup(Guid, State), + case RefCount of + 1 -> + %% don't remove from CUR_FILE_CACHE_ETS_NAME here because + %% there may be further writes in the mailbox for the same + %% msg. + ok = remove_cache_entry(DedupCacheEts, Guid), + [#file_summary { valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> + add_to_pending_gc_completion({remove, Guid}, State); + false -> + ok = index_delete(Guid, State), + ContiguousTop1 = lists:min([ContiguousTop, Offset]), + ValidTotalSize1 = ValidTotalSize - TotalSize, + true = ets:update_element( + FileSummaryEts, File, + [{#file_summary.valid_total_size, ValidTotalSize1}, + {#file_summary.contiguous_top, ContiguousTop1}]), + State1 = delete_file_if_empty(File, State), + State1 #msstate { sum_valid_data = SumValid - TotalSize } + end; + _ when 1 < RefCount -> + ok = decrement_cache(DedupCacheEts, Guid), + %% only update field, otherwise bad interaction with concurrent GC + ok = index_update_fields(Guid, + {#msg_location.ref_count, RefCount - 1}, + State), + State + end. + +add_to_pending_gc_completion( + Op, State = #msstate { pending_gc_completion = Pending }) -> + State #msstate { pending_gc_completion = [Op | Pending] }. + +run_pending(State = #msstate { pending_gc_completion = [] }) -> + State; +run_pending(State = #msstate { pending_gc_completion = Pending }) -> + State1 = State #msstate { pending_gc_completion = [] }, + lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)). + +run_pending({read, Guid, From}, State) -> + read_message(Guid, From, State); +run_pending({contains, Guid, From}, State) -> + contains_message(Guid, From, State); +run_pending({remove, Guid}, State) -> + remove_message(Guid, State). + +safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) -> + try + SuccessFun(ets:update_counter(Tab, Key, UpdateOp)) + catch error:badarg -> FailThunk() + end. + +safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) -> + safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk). + +%%---------------------------------------------------------------------------- +%% file helper functions +%%---------------------------------------------------------------------------- + +open_file(Dir, FileName, Mode) -> + file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode, + [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]). + +close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) -> + CState #client_msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, State = #msstate { file_handle_cache = FHC }) -> + State #msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, FHC) -> + case dict:find(Key, FHC) of + {ok, Hdl} -> ok = file_handle_cache:close(Hdl), + dict:erase(Key, FHC); + error -> FHC + end. + +mark_handle_open(FileHandlesEts, File) -> + %% This is fine to fail (already exists) + ets:insert_new(FileHandlesEts, {{self(), File}, open}), + true. + +mark_handle_to_close(FileHandlesEts, File) -> + [ ets:update_element(FileHandlesEts, Key, {2, close}) + || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ], + true. + +close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } = + CState) -> + Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}), + lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) -> + true = ets:delete(FileHandlesEts, Key), + close_handle(File, CStateM) + end, CState, Objs). + +close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_handle_cache = FHC }) -> + Self = self(), + ok = dict:fold(fun (File, Hdl, ok) -> + true = ets:delete(FileHandlesEts, {Self, File}), + file_handle_cache:close(Hdl) + end, ok, FHC), + CState #client_msstate { file_handle_cache = dict:new() }; + +close_all_handles(State = #msstate { file_handle_cache = FHC }) -> + ok = dict:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end, + ok, FHC), + State #msstate { file_handle_cache = dict:new() }. + +get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, CState #client_msstate { file_handle_cache = FHC2 }}; + +get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, State #msstate { file_handle_cache = FHC2 }}. + +get_read_handle(FileNum, FHC, Dir) -> + case dict:find(FileNum, FHC) of + {ok, Hdl} -> {Hdl, FHC}; + error -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum), + ?READ_MODE), + {Hdl, dict:store(FileNum, Hdl, FHC)} + end. + +preallocate(Hdl, FileSizeLimit, FinalPos) -> + {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit), + ok = file_handle_cache:truncate(Hdl), + {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos), + ok. + +truncate_and_extend_file(Hdl, Lowpoint, Highpoint) -> + {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint), + ok = file_handle_cache:truncate(Hdl), + ok = preallocate(Hdl, Highpoint, Lowpoint). + +form_filename(Dir, Name) -> filename:join(Dir, Name). + +filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION. + +filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)). + +list_sorted_file_names(Dir, Ext) -> + lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end, + filelib:wildcard("*" ++ Ext, Dir)). + +%%---------------------------------------------------------------------------- +%% message cache helper functions +%%---------------------------------------------------------------------------- + +maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg) + when RefCount > 1 -> + update_msg_cache(DedupCacheEts, Guid, Msg); +maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) -> + ok. + +update_msg_cache(CacheEts, Guid, Msg) -> + case ets:insert_new(CacheEts, {Guid, Msg, 1}) of + true -> ok; + false -> safe_ets_update_counter_ok( + CacheEts, Guid, {3, +1}, + fun () -> update_msg_cache(CacheEts, Guid, Msg) end) + end. + +remove_cache_entry(DedupCacheEts, Guid) -> + true = ets:delete(DedupCacheEts, Guid), + ok. + +fetch_and_increment_cache(DedupCacheEts, Guid) -> + case ets:lookup(DedupCacheEts, Guid) of + [] -> + not_found; + [{_Guid, Msg, _RefCount}] -> + safe_ets_update_counter_ok( + DedupCacheEts, Guid, {3, +1}, + %% someone has deleted us in the meantime, insert us + fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end), + Msg + end. + +decrement_cache(DedupCacheEts, Guid) -> + true = safe_ets_update_counter( + DedupCacheEts, Guid, {3, -1}, + fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid); + (_N) -> true + end, + %% Guid is not in there because although it's been + %% delivered, it's never actually been read (think: + %% persistent message held in RAM) + fun () -> true end), + ok. + +%%---------------------------------------------------------------------------- +%% index +%%---------------------------------------------------------------------------- + +index_lookup(Key, #client_msstate { index_module = Index, + index_state = State }) -> + Index:lookup(Key, State); + +index_lookup(Key, #msstate { index_module = Index, index_state = State }) -> + Index:lookup(Key, State). + +index_insert(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:insert(Obj, State). + +index_update(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:update(Obj, State). + +index_update_fields(Key, Updates, #msstate { index_module = Index, + index_state = State }) -> + Index:update_fields(Key, Updates, State). + +index_delete(Key, #msstate { index_module = Index, index_state = State }) -> + Index:delete(Key, State). + +index_delete_by_file(File, #msstate { index_module = Index, + index_state = State }) -> + Index:delete_by_file(File, State). + +%%---------------------------------------------------------------------------- +%% shutdown and recovery +%%---------------------------------------------------------------------------- + +recover_index_and_client_refs(IndexModule, _Recover, undefined, Dir, _Server) -> + {false, IndexModule:new(Dir), sets:new()}; +recover_index_and_client_refs(IndexModule, false, _ClientRefs, Dir, Server) -> + rabbit_log:warning("~w: rebuilding indices from scratch~n", [Server]), + {false, IndexModule:new(Dir), sets:new()}; +recover_index_and_client_refs(IndexModule, true, ClientRefs, Dir, Server) -> + Fresh = fun (ErrorMsg, ErrorArgs) -> + rabbit_log:warning("~w: " ++ ErrorMsg ++ "~n" + "rebuilding indices from scratch~n", + [Server | ErrorArgs]), + {false, IndexModule:new(Dir), sets:new()} + end, + case read_recovery_terms(Dir) of + {false, Error} -> + Fresh("failed to read recovery terms: ~p", [Error]); + {true, Terms} -> + RecClientRefs = proplists:get_value(client_refs, Terms, []), + RecIndexModule = proplists:get_value(index_module, Terms), + case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs) + andalso IndexModule =:= RecIndexModule) of + true -> case IndexModule:recover(Dir) of + {ok, IndexState1} -> + {true, IndexState1, + sets:from_list(ClientRefs)}; + {error, Error} -> + Fresh("failed to recover index: ~p", [Error]) + end; + false -> Fresh("recovery terms differ from present", []) + end + end. + +store_recovery_terms(Terms, Dir) -> + rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +read_recovery_terms(Dir) -> + Path = filename:join(Dir, ?CLEAN_FILENAME), + case rabbit_misc:read_term_file(Path) of + {ok, Terms} -> case file:delete(Path) of + ok -> {true, Terms}; + {error, Error} -> {false, Error} + end; + {error, Error} -> {false, Error} + end. + +store_file_summary(Tid, Dir) -> + ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME), + [{extended_info, [object_count]}]). + +recover_file_summary(false, _Dir) -> + %% TODO: the only reason for this to be an *ordered*_set is so + %% that a) maybe_compact can start a traversal from the eldest + %% file, and b) build_index in fast recovery mode can easily + %% identify the current file. It's awkward to have both that + %% odering and the left/right pointers in the entries - replacing + %% the former with some additional bit of state would be easy, but + %% ditching the latter would be neater. + {false, ets:new(rabbit_msg_store_file_summary, + [ordered_set, public, {keypos, #file_summary.file}])}; +recover_file_summary(true, Dir) -> + Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {true, Tid}; + {error, _Error} -> recover_file_summary(false, Dir) + end. + +count_msg_refs(Gen, Seed, State) -> + case Gen(Seed) of + finished -> + ok; + {_Guid, 0, Next} -> + count_msg_refs(Gen, Next, State); + {Guid, Delta, Next} -> + ok = case index_lookup(Guid, State) of + not_found -> + index_insert(#msg_location { guid = Guid, + file = undefined, + ref_count = Delta }, + State); + #msg_location { ref_count = RefCount } = StoreEntry -> + NewRefCount = RefCount + Delta, + case NewRefCount of + 0 -> index_delete(Guid, State); + _ -> index_update(StoreEntry #msg_location { + ref_count = NewRefCount }, + State) + end + end, + count_msg_refs(Gen, Next, State) + end. + +recover_crashed_compactions(Dir) -> + FileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION), + TmpFileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION_TMP), + lists:foreach( + fun (TmpFileName) -> + NonTmpRelatedFileName = + filename:rootname(TmpFileName) ++ ?FILE_EXTENSION, + true = lists:member(NonTmpRelatedFileName, FileNames), + ok = recover_crashed_compaction( + Dir, TmpFileName, NonTmpRelatedFileName) + end, TmpFileNames), + TmpFileNames == []. + +recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) -> + %% Because a msg can legitimately appear multiple times in the + %% same file, identifying the contents of the tmp file and where + %% they came from is non-trivial. If we are recovering a crashed + %% compaction then we will be rebuilding the index, which can cope + %% with duplicates appearing. Thus the simplest and safest thing + %% to do is to append the contents of the tmp file to its main + %% file. + {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_MODE), + {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName, + ?READ_MODE ++ ?WRITE_MODE), + {ok, _End} = file_handle_cache:position(MainHdl, eof), + Size = filelib:file_size(form_filename(Dir, TmpFileName)), + {ok, Size} = file_handle_cache:copy(TmpHdl, MainHdl, Size), + ok = file_handle_cache:close(MainHdl), + ok = file_handle_cache:delete(TmpHdl), + ok. + +scan_file_for_valid_messages(Dir, FileName) -> + case open_file(Dir, FileName, ?READ_MODE) of + {ok, Hdl} -> Valid = rabbit_msg_file:scan( + Hdl, filelib:file_size( + form_filename(Dir, FileName))), + %% if something really bad has happened, + %% the close could fail, but ignore + file_handle_cache:close(Hdl), + Valid; + {error, enoent} -> {ok, [], 0}; + {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}} + end. + +%% Takes the list in *ascending* order (i.e. eldest message +%% first). This is the opposite of what scan_file_for_valid_messages +%% produces. The list of msgs that is produced is youngest first. +find_contiguous_block_prefix(L) -> find_contiguous_block_prefix(L, 0, []). + +find_contiguous_block_prefix([], ExpectedOffset, Guids) -> + {ExpectedOffset, Guids}; +find_contiguous_block_prefix([{Guid, TotalSize, ExpectedOffset} | Tail], + ExpectedOffset, Guids) -> + ExpectedOffset1 = ExpectedOffset + TotalSize, + find_contiguous_block_prefix(Tail, ExpectedOffset1, [Guid | Guids]); +find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, Guids) -> + {ExpectedOffset, Guids}. + +build_index(true, _StartupFunState, + State = #msstate { file_summary_ets = FileSummaryEts }) -> + ets:foldl( + fun (#file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize, + file = File }, + {_Offset, State1 = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize }}) -> + {FileSize, State1 #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize, + current_file = File }} + end, {0, State}, FileSummaryEts); +build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit}, + State = #msstate { dir = Dir }) -> + ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State), + {ok, Pid} = gatherer:start_link(), + case [filename_to_num(FileName) || + FileName <- list_sorted_file_names(Dir, ?FILE_EXTENSION)] of + [] -> build_index(Pid, undefined, [State #msstate.current_file], + State); + Files -> {Offset, State1} = build_index(Pid, undefined, Files, State), + {Offset, lists:foldl(fun delete_file_if_empty/2, + State1, Files)} + end. + +build_index(Gatherer, Left, [], + State = #msstate { file_summary_ets = FileSummaryEts, + sum_valid_data = SumValid, + sum_file_size = SumFileSize }) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = rabbit_misc:unlink_and_capture_exit(Gatherer), + ok = index_delete_by_file(undefined, State), + Offset = case ets:lookup(FileSummaryEts, Left) of + [] -> 0; + [#file_summary { file_size = FileSize }] -> FileSize + end, + {Offset, State #msstate { current_file = Left }}; + {value, #file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize } = FileSummary} -> + true = ets:insert_new(FileSummaryEts, FileSummary), + build_index(Gatherer, Left, [], + State #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize }) + end; +build_index(Gatherer, Left, [File|Files], State) -> + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> build_index_worker(Gatherer, State, + Left, File, Files) + end), + build_index(Gatherer, File, Files, State). + +build_index_worker(Gatherer, State = #msstate { dir = Dir }, + Left, File, Files) -> + {ok, Messages, FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + {ValidMessages, ValidTotalSize} = + lists:foldl( + fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) -> + case index_lookup(Guid, State) of + #msg_location { file = undefined } = StoreEntry -> + ok = index_update(StoreEntry #msg_location { + file = File, offset = Offset, + total_size = TotalSize }, + State), + {[Obj | VMAcc], VTSAcc + TotalSize}; + _ -> + {VMAcc, VTSAcc} + end + end, {[], 0}, Messages), + %% foldl reverses lists, find_contiguous_block_prefix needs + %% msgs eldest first, so, ValidMessages is the right way round + {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages), + {Right, FileSize1} = + case Files of + %% if it's the last file, we'll truncate to remove any + %% rubbish above the last valid message. This affects the + %% file size. + [] -> {undefined, case ValidMessages of + [] -> 0; + _ -> {_Guid, TotalSize, Offset} = + lists:last(ValidMessages), + Offset + TotalSize + end}; + [F|_] -> {F, FileSize} + end, + ok = gatherer:in(Gatherer, #file_summary { + file = File, + valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + left = Left, + right = Right, + file_size = FileSize1, + locked = false, + readers = 0 }), + ok = gatherer:finish(Gatherer). + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- internal +%%---------------------------------------------------------------------------- + +maybe_roll_to_new_file( + Offset, + State = #msstate { dir = Dir, + current_file_handle = CurHdl, + current_file = CurFile, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + file_size_limit = FileSizeLimit }) + when Offset >= FileSizeLimit -> + State1 = internal_sync(State), + ok = file_handle_cache:close(CurHdl), + NextFile = CurFile + 1, + {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE), + true = ets:insert_new(FileSummaryEts, #file_summary { + file = NextFile, + valid_total_size = 0, + contiguous_top = 0, + left = CurFile, + right = undefined, + file_size = 0, + locked = false, + readers = 0 }), + true = ets:update_element(FileSummaryEts, CurFile, + {#file_summary.right, NextFile}), + true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}), + maybe_compact(State1 #msstate { current_file_handle = NextHdl, + current_file = NextFile }); +maybe_roll_to_new_file(_, State) -> + State. + +maybe_compact(State = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize, + gc_active = false, + gc_pid = GCPid, + file_summary_ets = FileSummaryEts, + file_size_limit = FileSizeLimit }) + when (SumFileSize > 2 * FileSizeLimit andalso + (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) -> + %% TODO: the algorithm here is sub-optimal - it may result in a + %% complete traversal of FileSummaryEts. + case ets:first(FileSummaryEts) of + '$end_of_table' -> + State; + First -> + case find_files_to_gc(FileSummaryEts, FileSizeLimit, + ets:lookup(FileSummaryEts, First)) of + not_found -> + State; + {Src, Dst} -> + State1 = close_handle(Src, close_handle(Dst, State)), + true = ets:update_element(FileSummaryEts, Src, + {#file_summary.locked, true}), + true = ets:update_element(FileSummaryEts, Dst, + {#file_summary.locked, true}), + ok = rabbit_msg_store_gc:gc(GCPid, Src, Dst), + State1 #msstate { gc_active = {Src, Dst} } + end + end; +maybe_compact(State) -> + State. + +find_files_to_gc(FileSummaryEts, FileSizeLimit, + [#file_summary { file = Dst, + valid_total_size = DstValid, + right = Src }]) -> + case Src of + undefined -> + not_found; + _ -> + [#file_summary { file = Src, + valid_total_size = SrcValid, + left = Dst, + right = SrcRight }] = Next = + ets:lookup(FileSummaryEts, Src), + case SrcRight of + undefined -> not_found; + _ -> case DstValid + SrcValid =< FileSizeLimit of + true -> {Src, Dst}; + false -> find_files_to_gc( + FileSummaryEts, FileSizeLimit, Next) + end + end + end. + +delete_file_if_empty(File, State = #msstate { current_file = File }) -> + State; +delete_file_if_empty(File, State = #msstate { + dir = Dir, + sum_file_size = SumFileSize, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts }) -> + [#file_summary { valid_total_size = ValidData, + left = Left, + right = Right, + file_size = FileSize, + locked = false }] = + ets:lookup(FileSummaryEts, File), + case ValidData of + %% we should NEVER find the current file in here hence right + %% should always be a file, not undefined + 0 -> case {Left, Right} of + {undefined, _} when Right =/= undefined -> + %% the eldest file is empty. + true = ets:update_element( + FileSummaryEts, Right, + {#file_summary.left, undefined}); + {_, _} when Right =/= undefined -> + true = ets:update_element(FileSummaryEts, Right, + {#file_summary.left, Left}), + true = ets:update_element(FileSummaryEts, Left, + {#file_summary.right, Right}) + end, + true = mark_handle_to_close(FileHandlesEts, File), + true = ets:delete(FileSummaryEts, File), + State1 = close_handle(File, State), + ok = file:delete(form_filename(Dir, filenum_to_name(File))), + State1 #msstate { sum_file_size = SumFileSize - FileSize }; + _ -> State + end. + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- external +%%---------------------------------------------------------------------------- + +gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) -> + [SrcObj = #file_summary { + readers = SrcReaders, + left = DstFile, + file_size = SrcFileSize, + locked = true }] = ets:lookup(FileSummaryEts, SrcFile), + [DstObj = #file_summary { + readers = DstReaders, + right = SrcFile, + file_size = DstFileSize, + locked = true }] = ets:lookup(FileSummaryEts, DstFile), + + case SrcReaders =:= 0 andalso DstReaders =:= 0 of + true -> TotalValidData = combine_files(SrcObj, DstObj, State), + %% don't update dest.right, because it could be + %% changing at the same time + true = ets:update_element( + FileSummaryEts, DstFile, + [{#file_summary.valid_total_size, TotalValidData}, + {#file_summary.contiguous_top, TotalValidData}, + {#file_summary.file_size, TotalValidData}]), + SrcFileSize + DstFileSize - TotalValidData; + false -> concurrent_readers + end. + +combine_files(#file_summary { file = Source, + valid_total_size = SourceValid, + left = Destination }, + #file_summary { file = Destination, + valid_total_size = DestinationValid, + contiguous_top = DestinationContiguousTop, + right = Source }, + State = {_FileSummaryEts, Dir, _Index, _IndexState}) -> + SourceName = filenum_to_name(Source), + DestinationName = filenum_to_name(Destination), + {ok, SourceHdl} = open_file(Dir, SourceName, + ?READ_AHEAD_MODE), + {ok, DestinationHdl} = open_file(Dir, DestinationName, + ?READ_AHEAD_MODE ++ ?WRITE_MODE), + ExpectedSize = SourceValid + DestinationValid, + %% if DestinationValid =:= DestinationContiguousTop then we don't + %% need a tmp file + %% if they're not equal, then we need to write out everything past + %% the DestinationContiguousTop to a tmp file then truncate, + %% copy back in, and then copy over from Source + %% otherwise we just truncate straight away and copy over from Source + case DestinationContiguousTop =:= DestinationValid of + true -> + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, ExpectedSize); + false -> + {DestinationWorkList, DestinationValid} = + find_unremoved_messages_in_file(Destination, State), + Worklist = + lists:dropwhile( + fun (#msg_location { offset = Offset }) + when Offset =/= DestinationContiguousTop -> + %% it cannot be that Offset =:= + %% DestinationContiguousTop because if it + %% was then DestinationContiguousTop would + %% have been extended by TotalSize + Offset < DestinationContiguousTop + end, DestinationWorkList), + Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP, + {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE), + ok = copy_messages( + Worklist, DestinationContiguousTop, DestinationValid, + DestinationHdl, TmpHdl, Destination, State), + TmpSize = DestinationValid - DestinationContiguousTop, + %% so now Tmp contains everything we need to salvage from + %% Destination, and index_state has been updated to + %% reflect the compaction of Destination so truncate + %% Destination and copy from Tmp back to the end + {ok, 0} = file_handle_cache:position(TmpHdl, 0), + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, ExpectedSize), + {ok, TmpSize} = + file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize), + %% position in DestinationHdl should now be DestinationValid + ok = file_handle_cache:sync(DestinationHdl), + ok = file_handle_cache:delete(TmpHdl) + end, + {SourceWorkList, SourceValid} = + find_unremoved_messages_in_file(Source, State), + ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize, + SourceHdl, DestinationHdl, Destination, State), + %% tidy up + ok = file_handle_cache:close(DestinationHdl), + ok = file_handle_cache:delete(SourceHdl), + ExpectedSize. + +find_unremoved_messages_in_file(File, + {_FileSummaryEts, Dir, Index, IndexState}) -> + %% Messages here will be end-of-file at start-of-list + {ok, Messages, _FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + %% foldl will reverse so will end up with msgs in ascending offset order + lists:foldl(fun ({Guid, TotalSize, Offset}, Acc = {List, Size}) -> + case Index:lookup(Guid, IndexState) of + #msg_location { file = File, total_size = TotalSize, + offset = Offset } = Entry -> + {[ Entry | List ], TotalSize + Size}; + _ -> + Acc + end + end, {[], 0}, Messages). + +copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl, + Destination, {_FileSummaryEts, _Dir, Index, IndexState}) -> + Copy = fun ({BlockStart, BlockEnd}) -> + BSize = BlockEnd - BlockStart, + {ok, BlockStart} = + file_handle_cache:position(SourceHdl, BlockStart), + {ok, BSize} = + file_handle_cache:copy(SourceHdl, DestinationHdl, BSize) + end, + case + lists:foldl( + fun (#msg_location { guid = Guid, offset = Offset, + total_size = TotalSize }, + {CurOffset, Block = {BlockStart, BlockEnd}}) -> + %% CurOffset is in the DestinationFile. + %% Offset, BlockStart and BlockEnd are in the SourceFile + %% update MsgLocation to reflect change of file and offset + ok = Index:update_fields(Guid, + [{#msg_location.file, Destination}, + {#msg_location.offset, CurOffset}], + IndexState), + {CurOffset + TotalSize, + case BlockEnd of + undefined -> + %% base case, called only for the first list elem + {Offset, Offset + TotalSize}; + Offset -> + %% extend the current block because the + %% next msg follows straight on + {BlockStart, BlockEnd + TotalSize}; + _ -> + %% found a gap, so actually do the work for + %% the previous block + Copy(Block), + {Offset, Offset + TotalSize} + end} + end, {InitOffset, {undefined, undefined}}, WorkList) of + {FinalOffset, Block} -> + case WorkList of + [] -> ok; + _ -> Copy(Block), %% do the last remaining block + ok = file_handle_cache:sync(DestinationHdl) + end; + {FinalOffsetZ, _Block} -> + {gc_error, [{expected, FinalOffset}, + {got, FinalOffsetZ}, + {destination, Destination}]} + end. diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl new file mode 100644 index 0000000000..1eb3c11fb5 --- /dev/null +++ b/src/rabbit_msg_store_ets_index.erl @@ -0,0 +1,90 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_ets_index). + +-behaviour(rabbit_msg_store_index). + +-export([new/1, recover/1, + lookup/2, insert/2, update/2, update_fields/3, delete/2, + delete_by_file/2, terminate/1]). + +-define(MSG_LOC_NAME, rabbit_msg_store_ets_index). +-define(FILENAME, "msg_store_index.ets"). + +-include("rabbit_msg_store_index.hrl"). + +-record(state, { table, dir }). + +new(Dir) -> + file:delete(filename:join(Dir, ?FILENAME)), + Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]), + #state { table = Tid, dir = Dir }. + +recover(Dir) -> + Path = filename:join(Dir, ?FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {ok, #state { table = Tid, dir = Dir }}; + Error -> Error + end. + +lookup(Key, State) -> + case ets:lookup(State #state.table, Key) of + [] -> not_found; + [Entry] -> Entry + end. + +insert(Obj, State) -> + true = ets:insert_new(State #state.table, Obj), + ok. + +update(Obj, State) -> + true = ets:insert(State #state.table, Obj), + ok. + +update_fields(Key, Updates, State) -> + true = ets:update_element(State #state.table, Key, Updates), + ok. + +delete(Key, State) -> + true = ets:delete(State #state.table, Key), + ok. + +delete_by_file(File, State) -> + MatchHead = #msg_location { file = File, _ = '_' }, + ets:select_delete(State #state.table, [{MatchHead, [], [true]}]), + ok. + +terminate(#state { table = MsgLocations, dir = Dir }) -> + ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME), + [{extended_info, [object_count]}]), + ets:delete(MsgLocations). diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl new file mode 100644 index 0000000000..c7948b7eb3 --- /dev/null +++ b/src/rabbit_msg_store_gc.erl @@ -0,0 +1,141 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_gc). + +-behaviour(gen_server2). + +-export([start_link/4, gc/3, no_readers/2, stop/1]). + +-export([set_maximum_since_use/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(gcstate, + {dir, + index_state, + index_module, + parent, + file_summary_ets, + scheduled + }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/4 :: (file:filename(), any(), atom(), ets:tid()) -> + rabbit_types:ok_pid_or_error()). +-spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok'). +-spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok'). +-spec(stop/1 :: (pid()) -> 'ok'). +-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link(Dir, IndexState, IndexModule, FileSummaryEts) -> + gen_server2:start_link( + ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts], + [{timeout, infinity}]). + +gc(Server, Source, Destination) -> + gen_server2:cast(Server, {gc, Source, Destination}). + +no_readers(Server, File) -> + gen_server2:cast(Server, {no_readers, File}). + +stop(Server) -> + gen_server2:call(Server, stop, infinity). + +set_maximum_since_use(Pid, Age) -> + gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}). + +%%---------------------------------------------------------------------------- + +init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) -> + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + {ok, #gcstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + parent = Parent, + file_summary_ets = FileSummaryEts, + scheduled = undefined }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast({gc, Source, Destination}, + State = #gcstate { scheduled = undefined }) -> + {noreply, attempt_gc(State #gcstate { scheduled = {Source, Destination} }), + hibernate}; + +handle_cast({no_readers, File}, + State = #gcstate { scheduled = {Source, Destination} }) + when File =:= Source orelse File =:= Destination -> + {noreply, attempt_gc(State), hibernate}; + +handle_cast({no_readers, _File}, State) -> + {noreply, State, hibernate}; + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + {noreply, State, hibernate}. + +handle_info(Info, State) -> + {stop, {unhandled_info, Info}, State}. + +terminate(_Reason, State) -> + State. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +attempt_gc(State = #gcstate { dir = Dir, + index_state = IndexState, + index_module = Index, + parent = Parent, + file_summary_ets = FileSummaryEts, + scheduled = {Source, Destination} }) -> + case rabbit_msg_store:gc(Source, Destination, + {FileSummaryEts, Dir, Index, IndexState}) of + concurrent_readers -> State; + Reclaimed -> ok = rabbit_msg_store:gc_done( + Parent, Reclaimed, Source, Destination), + State #gcstate { scheduled = undefined } + end. diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl new file mode 100644 index 0000000000..0ed64a9d81 --- /dev/null +++ b/src/rabbit_msg_store_index.erl @@ -0,0 +1,47 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_index). + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{new, 1}, + {recover, 1}, + {lookup, 2}, + {insert, 2}, + {update, 2}, + {update_fields, 3}, + {delete, 2}, + {delete_by_file, 2}, + {terminate, 1}]; +behaviour_info(_Other) -> + undefined. diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl index 5db1d77a32..c7a5a60027 100644 --- a/src/rabbit_multi.erl +++ b/src/rabbit_multi.erl @@ -93,7 +93,14 @@ usage() -> action(start_all, [NodeCount], RpcTimeout) -> io:format("Starting all nodes...~n", []), application:load(rabbit), - NodeName = rabbit_misc:nodeparts(getenv("RABBITMQ_NODENAME")), + {_NodeNamePrefix, NodeHost} = NodeName = rabbit_misc:nodeparts( + getenv("RABBITMQ_NODENAME")), + case net_adm:names(NodeHost) of + {error, EpmdReason} -> + throw({cannot_connect_to_epmd, NodeHost, EpmdReason}); + {ok, _} -> + ok + end, {NodePids, Running} = case list_to_integer(NodeCount) of 1 -> {NodePid, Started} = start_node(rabbit_misc:makenode(NodeName), @@ -309,9 +316,9 @@ is_dead(Pid) -> {win32, fun () -> Res = os:cmd("tasklist /nh /fi \"pid eq " ++ PidS ++ "\""), - case regexp:first_match(Res, "erl.exe") of - {match, _, _} -> false; - _ -> true + case re:run(Res, "erl\\.exe", [{capture, none}]) of + match -> false; + _ -> true end end}]). diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index 3a3357ba9d..08272afed4 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -107,7 +107,15 @@ boot_ssl() -> ok; {ok, SslListeners} -> ok = rabbit_misc:start_applications([crypto, public_key, ssl]), - {ok, SslOpts} = application:get_env(ssl_options), + {ok, SslOptsConfig} = application:get_env(ssl_options), + SslOpts = + case proplists:get_value(verify, SslOptsConfig, verify_none) of + verify_none -> SslOptsConfig; + verify_peer -> [{verify_fun, fun([]) -> true; + ([_|_]) -> false + end} + | SslOptsConfig] + end, [start_ssl_listener(Host, Port, SslOpts) || {Host, Port} <- SslListeners], ok end. @@ -118,7 +126,7 @@ start() -> {rabbit_tcp_client_sup, {tcp_client_sup, start_link, [{local, rabbit_tcp_client_sup}, - {rabbit_reader,start_link,[]}]}, + {rabbit_connection_sup,start_link,[]}]}, transient, infinity, supervisor, [tcp_client_sup]}), ok. @@ -204,10 +212,10 @@ on_node_down(Node) -> ok = mnesia:dirty_delete(rabbit_listener, Node). start_client(Sock, SockTransform) -> - {ok, Child} = supervisor:start_child(rabbit_tcp_client_sup, []), - ok = rabbit_net:controlling_process(Sock, Child), - Child ! {go, Sock, SockTransform}, - Child. + {ok, _Child, Reader} = supervisor:start_child(rabbit_tcp_client_sup, []), + ok = rabbit_net:controlling_process(Sock, Reader), + Reader ! {go, Sock, SockTransform}, + Reader. start_client(Sock) -> start_client(Sock, fun (S) -> {ok, S} end). @@ -230,8 +238,9 @@ start_ssl_client(SslOpts, Sock) -> end). connections() -> - [Pid || {_, Pid, _, _} <- supervisor:which_children( - rabbit_tcp_client_sup)]. + [rabbit_connection_sup:reader(ConnSup) || + {_, ConnSup, supervisor, _} + <- supervisor:which_children(rabbit_tcp_client_sup)]. connection_info_keys() -> rabbit_reader:info_keys(). @@ -242,8 +251,7 @@ connection_info_all() -> cmap(fun (Q) -> connection_info(Q) end). connection_info_all(Items) -> cmap(fun (Q) -> connection_info(Q, Items) end). close_connection(Pid, Explanation) -> - case lists:any(fun ({_, ChildPid, _, _}) -> ChildPid =:= Pid end, - supervisor:which_children(rabbit_tcp_client_sup)) of + case lists:member(Pid, connections()) of true -> rabbit_reader:shutdown(Pid, Explanation); false -> throw({error, {not_a_connection_pid, Pid}}) end. diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl index a427b13548..66e5cf6311 100644 --- a/src/rabbit_persister.erl +++ b/src/rabbit_persister.erl @@ -73,9 +73,8 @@ {deliver, pmsg()} | {ack, pmsg()}). --spec(start_link/1 :: - ([rabbit_amqqueue:name()]) - -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/1 :: ([rabbit_amqqueue:name()]) -> + rabbit_types:ok_pid_or_error()). -spec(transaction/1 :: ([work_item()]) -> 'ok'). -spec(extend_transaction/2 :: ({rabbit_types:txn(), rabbit_amqqueue:name()}, [work_item()]) diff --git a/src/rabbit_plugin_activator.erl b/src/rabbit_plugin_activator.erl index ef3c5cc250..b23776cd74 100644 --- a/src/rabbit_plugin_activator.erl +++ b/src/rabbit_plugin_activator.erl @@ -35,7 +35,6 @@ -define(DefaultPluginDir, "plugins"). -define(DefaultUnpackedPluginDir, "priv/plugins"). --define(DefaultRabbitEBin, "ebin"). -define(BaseApps, [rabbit]). %%---------------------------------------------------------------------------- @@ -52,23 +51,22 @@ %%---------------------------------------------------------------------------- start() -> + io:format("Activating RabbitMQ plugins ...~n"), %% Ensure Rabbit is loaded so we can access it's environment application:load(rabbit), %% Determine our various directories PluginDir = get_env(plugins_dir, ?DefaultPluginDir), UnpackedPluginDir = get_env(plugins_expand_dir, ?DefaultUnpackedPluginDir), - RabbitEBin = get_env(rabbit_ebin, ?DefaultRabbitEBin), - RootName = RabbitEBin ++ "/rabbit", + RootName = UnpackedPluginDir ++ "/rabbit", %% Unpack any .ez plugins unpack_ez_plugins(PluginDir, UnpackedPluginDir), %% Build a list of required apps based on the fixed set, and any plugins - RequiredApps = ?BaseApps ++ - find_plugins(PluginDir) ++ - find_plugins(UnpackedPluginDir), + PluginApps = find_plugins(PluginDir) ++ find_plugins(UnpackedPluginDir), + RequiredApps = ?BaseApps ++ PluginApps, %% Build the entire set of dependencies - this will load the %% applications along the way @@ -79,7 +77,7 @@ start() -> AppList end, AppVersions = [determine_version(App) || App <- AllApps], - {rabbit, RabbitVersion} = proplists:lookup(rabbit, AppVersions), + RabbitVersion = proplists:get_value(rabbit, AppVersions), %% Build the overall release descriptor RDesc = {release, @@ -87,7 +85,7 @@ start() -> {erts, erlang:system_info(version)}, AppVersions}, - %% Write it out to ebin/rabbit.rel + %% Write it out to $RABBITMQ_PLUGINS_EXPAND_DIR/rabbit.rel file:write_file(RootName ++ ".rel", io_lib:format("~p.~n", [RDesc])), %% Compile the script @@ -132,6 +130,10 @@ start() -> ok -> ok; error -> error("failed to compile boot script file ~s", [ScriptFile]) end, + io:format("~w plugins activated:~n", [length(PluginApps)]), + [io:format("* ~s-~s~n", [App, proplists:get_value(App, AppVersions)]) + || App <- PluginApps], + io:nl(), halt(), ok. @@ -149,29 +151,33 @@ determine_version(App) -> {ok, Vsn} = application:get_key(App, vsn), {App, Vsn}. -assert_dir(Dir) -> - case filelib:is_dir(Dir) of - true -> ok; - false -> ok = filelib:ensure_dir(Dir), - ok = file:make_dir(Dir) - end. - -delete_dir(Dir) -> - case filelib:is_dir(Dir) of +delete_recursively(Fn) -> + case filelib:is_dir(Fn) and not(is_symlink(Fn)) of true -> - case file:list_dir(Dir) of + case file:list_dir(Fn) of {ok, Files} -> - [case Dir ++ "/" ++ F of - Fn -> - case filelib:is_dir(Fn) and not(is_symlink(Fn)) of - true -> delete_dir(Fn); - false -> file:delete(Fn) - end - end || F <- Files] - end, - ok = file:del_dir(Dir); + case lists:foldl(fun ( Fn1, ok) -> delete_recursively( + Fn ++ "/" ++ Fn1); + (_Fn1, Err) -> Err + end, ok, Files) of + ok -> case file:del_dir(Fn) of + ok -> ok; + {error, E} -> {error, + {cannot_delete, Fn, E}} + end; + Err -> Err + end; + {error, E} -> + {error, {cannot_list_files, Fn, E}} + end; false -> - ok + case filelib:is_file(Fn) of + true -> case file:delete(Fn) of + ok -> ok; + {error, E} -> {error, {cannot_delete, Fn, E}} + end; + false -> ok + end end. is_symlink(Name) -> @@ -180,13 +186,18 @@ is_symlink(Name) -> _ -> false end. -unpack_ez_plugins(PluginSrcDir, PluginDestDir) -> +unpack_ez_plugins(SrcDir, DestDir) -> %% Eliminate the contents of the destination directory - delete_dir(PluginDestDir), - - assert_dir(PluginDestDir), - [unpack_ez_plugin(PluginName, PluginDestDir) || - PluginName <- filelib:wildcard(PluginSrcDir ++ "/*.ez")]. + case delete_recursively(DestDir) of + ok -> ok; + {error, E} -> error("Could not delete dir ~s (~p)", [DestDir, E]) + end, + case filelib:ensure_dir(DestDir ++ "/") of + ok -> ok; + {error, E2} -> error("Could not create dir ~s (~p)", [DestDir, E2]) + end, + [unpack_ez_plugin(PluginName, DestDir) || + PluginName <- filelib:wildcard(SrcDir ++ "/*.ez")]. unpack_ez_plugin(PluginFn, PluginDestDir) -> zip:unzip(PluginFn, [{cwd, PluginDestDir}]), @@ -245,8 +256,8 @@ post_process_script(ScriptFile) -> {error, {failed_to_load_script, Reason}} end. -process_entry(Entry = {apply,{application,start_boot,[stdlib,permanent]}}) -> - [Entry, {apply,{rabbit,prepare,[]}}]; +process_entry(Entry = {apply,{application,start_boot,[rabbit,permanent]}}) -> + [{apply,{rabbit,prepare,[]}}, Entry]; process_entry(Entry) -> [Entry]. diff --git a/src/rabbit_queue_collector.erl b/src/rabbit_queue_collector.erl index ea3768d4b4..0a49b94d09 100644 --- a/src/rabbit_queue_collector.erl +++ b/src/rabbit_queue_collector.erl @@ -33,7 +33,7 @@ -behaviour(gen_server). --export([start_link/0, register/2, delete_all/1, shutdown/1]). +-export([start_link/0, register/2, delete_all/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -46,7 +46,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> rabbit_types:ok(pid())). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(register/2 :: (pid(), rabbit_types:amqqueue()) -> 'ok'). -spec(delete_all/1 :: (pid()) -> 'ok'). @@ -63,9 +63,6 @@ register(CollectorPid, Q) -> delete_all(CollectorPid) -> gen_server:call(CollectorPid, delete_all, infinity). -shutdown(CollectorPid) -> - gen_server:call(CollectorPid, shutdown, infinity). - %%---------------------------------------------------------------------------- init([]) -> @@ -87,13 +84,10 @@ handle_call(delete_all, _From, State = #state{queues = Queues}) -> rabbit_amqqueue:delete(Q, false, false) end) || {MonitorRef, Q} <- dict:to_list(Queues)], - {reply, ok, State}; - -handle_call(shutdown, _From, State) -> - {stop, normal, ok, State}. + {reply, ok, State}. -handle_cast(_Msg, State) -> - {noreply, State}. +handle_cast(Msg, State) -> + {stop, {unhandled_cast, Msg}, State}. handle_info({'DOWN', MonitorRef, process, _DownPid, _Reason}, State = #state{queues = Queues}) -> diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl new file mode 100644 index 0000000000..d6b8bb2889 --- /dev/null +++ b/src/rabbit_queue_index.erl @@ -0,0 +1,932 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_queue_index). + +-export([init/4, terminate/2, delete_and_terminate/1, publish/4, + deliver/2, ack/2, sync/2, flush/1, read/3, + next_segment_boundary/1, bounds/1, recover/1]). + +-define(CLEAN_FILENAME, "clean.dot"). + +%%---------------------------------------------------------------------------- + +%% The queue index is responsible for recording the order of messages +%% within a queue on disk. +%% +%% Because of the fact that the queue can decide at any point to send +%% a queue entry to disk, you can not rely on publishes appearing in +%% order. The only thing you can rely on is a message being published, +%% then delivered, then ack'd. +%% +%% In order to be able to clean up ack'd messages, we write to segment +%% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT +%% publishes, delivers and acknowledgements. They are numbered, and so +%% it is known that the 0th segment contains messages 0 -> +%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages +%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As +%% such, in the segment files, we only refer to message sequence ids +%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a +%% fixed size. +%% +%% However, transient messages which are not sent to disk at any point +%% will cause gaps to appear in segment files. Therefore, we delete a +%% segment file whenever the number of publishes == number of acks +%% (note that although it is not fully enforced, it is assumed that a +%% message will never be ackd before it is delivered, thus this test +%% also implies == number of delivers). In practise, this does not +%% cause disk churn in the pathological case because of the journal +%% and caching (see below). +%% +%% Because of the fact that publishes, delivers and acks can occur all +%% over, we wish to avoid lots of seeking. Therefore we have a fixed +%% sized journal to which all actions are appended. When the number of +%% entries in this journal reaches max_journal_entries, the journal +%% entries are scattered out to their relevant files, and the journal +%% is truncated to zero size. Note that entries in the journal must +%% carry the full sequence id, thus the format of entries in the +%% journal is different to that in the segments. +%% +%% The journal is also kept fully in memory, pre-segmented: the state +%% contains a mapping from segment numbers to state-per-segment (this +%% state is held for all segments which have been "seen": thus a +%% segment which has been read but has no pending entries in the +%% journal is still held in this mapping. Also note that a dict is +%% used for this mapping, not an array because with an array, you will +%% always have entries from 0). Actions are stored directly in this +%% state. Thus at the point of flushing the journal, firstly no +%% reading from disk is necessary, but secondly if the known number of +%% acks and publishes in a segment are equal, given the known state of +%% the segment file combined with the journal, no writing needs to be +%% done to the segment file either (in fact it is deleted if it exists +%% at all). This is safe given that the set of acks is a subset of the +%% set of publishes. When it's necessary to sync messages because of +%% transactions, it's only necessary to fsync on the journal: when +%% entries are distributed from the journal to segment files, those +%% segments appended to are fsync'd prior to the journal being +%% truncated. +%% +%% This module is also responsible for scanning the queue index files +%% and seeding the message store on start up. +%% +%% Note that in general, the representation of a message's state as +%% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'), +%% ('ack'|'no_ack')} is richer than strictly necessary for most +%% operations. However, for startup, and to ensure the safe and +%% correct combination of journal entries with entries read from the +%% segment on disk, this richer representation vastly simplifies and +%% clarifies the code. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% variable_queue. +%% +%%---------------------------------------------------------------------------- + +%% ---- Journal details ---- + +-define(JOURNAL_FILENAME, "journal.jif"). + +-define(PUB_PERSIST_JPREFIX, 2#00). +-define(PUB_TRANS_JPREFIX, 2#01). +-define(DEL_JPREFIX, 2#10). +-define(ACK_JPREFIX, 2#11). +-define(JPREFIX_BITS, 2). +-define(SEQ_BYTES, 8). +-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)). + +%% ---- Segment details ---- + +-define(SEGMENT_EXTENSION, ".idx"). + +%% TODO: The segment size would be configurable, but deriving all the +%% other values is quite hairy and quite possibly noticably less +%% efficient, depending on how clever the compiler is when it comes to +%% binary generation/matching with constant vs variable lengths. + +-define(REL_SEQ_BITS, 14). +-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))). + +%% seq only is binary 00 followed by 14 bits of rel seq id +%% (range: 0 - 16383) +-define(REL_SEQ_ONLY_PREFIX, 00). +-define(REL_SEQ_ONLY_PREFIX_BITS, 2). +-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2). + +%% publish record is binary 1 followed by a bit for is_persistent, +%% then 14 bits of rel seq id, and 128 bits of md5sum msg id +-define(PUBLISH_PREFIX, 1). +-define(PUBLISH_PREFIX_BITS, 1). + +-define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes +-define(GUID_BITS, (?GUID_BYTES * 8)). +%% 16 bytes for md5sum + 2 for seq, bits and prefix +-define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + 2). + +%% 1 publish, 1 deliver, 1 ack per msg +-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT * + (?PUBLISH_RECORD_LENGTH_BYTES + + (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))). + +%% ---- misc ---- + +-define(PUB, {_, _}). %% {Guid, IsPersistent} + +-define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]). + +%%---------------------------------------------------------------------------- + +-record(qistate, { dir, segments, journal_handle, dirty_count, + max_journal_entries }). + +-record(segment, { num, path, journal_entries, unacked }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(hdl() :: ('undefined' | any())). +-type(segment() :: ('undefined' | + #segment { num :: non_neg_integer(), + path :: file:filename(), + journal_entries :: array(), + unacked :: non_neg_integer() + })). +-type(seq_id() :: integer()). +-type(seg_dict() :: {dict:dictionary(), [segment()]}). +-type(qistate() :: #qistate { dir :: file:filename(), + segments :: 'undefined' | seg_dict(), + journal_handle :: hdl(), + dirty_count :: integer(), + max_journal_entries :: non_neg_integer() + }). +-type(startup_fun_state() :: + {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), + A}). + +-spec(init/4 :: (rabbit_amqqueue:name(), boolean(), boolean(), + fun ((rabbit_guid:guid()) -> boolean())) -> + {'undefined' | non_neg_integer(), [any()], qistate()}). +-spec(terminate/2 :: ([any()], qistate()) -> qistate()). +-spec(delete_and_terminate/1 :: (qistate()) -> qistate()). +-spec(publish/4 :: (rabbit_guid:guid(), seq_id(), boolean(), qistate()) -> + qistate()). +-spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(ack/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(sync/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(flush/1 :: (qistate()) -> qistate()). +-spec(read/3 :: (seq_id(), seq_id(), qistate()) -> + {[{rabbit_guid:guid(), seq_id(), boolean(), boolean()}], + qistate()}). +-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()). +-spec(bounds/1 :: (qistate()) -> + {non_neg_integer(), non_neg_integer(), qistate()}). +-spec(recover/1 :: + ([rabbit_amqqueue:name()]) -> {[[any()]], startup_fun_state()}). + +-endif. + + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +init(Name, Recover, MsgStoreRecovered, ContainsCheckFun) -> + State = #qistate { dir = Dir } = blank_state(Name, not Recover), + Terms = case read_shutdown_terms(Dir) of + {error, _} -> []; + {ok, Terms1} -> Terms1 + end, + CleanShutdown = detect_clean_shutdown(Dir), + {Count, State1} = + case CleanShutdown andalso MsgStoreRecovered of + true -> RecoveredCounts = proplists:get_value(segments, Terms, []), + init_clean(RecoveredCounts, State); + false -> init_dirty(CleanShutdown, ContainsCheckFun, State) + end, + {Count, Terms, State1}. + +terminate(Terms, State) -> + {SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + store_clean_shutdown([{segments, SegmentCounts} | Terms], Dir), + State1. + +delete_and_terminate(State) -> + {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + ok = rabbit_misc:recursive_delete([Dir]), + State1. + +publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) -> + ?GUID_BYTES = size(Guid), + {JournalHdl, State1} = get_journal_handle(State), + ok = file_handle_cache:append( + JournalHdl, [<<(case IsPersistent of + true -> ?PUB_PERSIST_JPREFIX; + false -> ?PUB_TRANS_JPREFIX + end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]), + maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)). + +deliver(SeqIds, State) -> + deliver_or_ack(del, SeqIds, State). + +ack(SeqIds, State) -> + deliver_or_ack(ack, SeqIds, State). + +sync([], State) -> + State; +sync(_SeqIds, State = #qistate { journal_handle = undefined }) -> + State; +sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) -> + %% The SeqIds here contains the SeqId of every publish and ack in + %% the transaction. Ideally we should go through these seqids and + %% only sync the journal if the pubs or acks appear in the + %% journal. However, this would be complex to do, and given that + %% the variable queue publishes and acks to the qi, and then + %% syncs, all in one operation, there is no possibility of the + %% seqids not being in the journal, provided the transaction isn't + %% emptied (handled above anyway). + ok = file_handle_cache:sync(JournalHdl), + State. + +flush(State = #qistate { dirty_count = 0 }) -> State; +flush(State) -> flush_journal(State). + +read(StartEnd, StartEnd, State) -> + {[], State}; +read(Start, End, State = #qistate { segments = Segments, + dir = Dir }) when Start =< End -> + %% Start is inclusive, End is exclusive. + LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start), + UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1), + {Messages, Segments1} = + lists:foldr(fun (Seg, Acc) -> + read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir) + end, {[], Segments}, lists:seq(StartSeg, EndSeg)), + {Messages, State #qistate { segments = Segments1 }}. + +next_segment_boundary(SeqId) -> + {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + reconstruct_seq_id(Seg + 1, 0). + +bounds(State = #qistate { segments = Segments }) -> + %% This is not particularly efficient, but only gets invoked on + %% queue initialisation. + SegNums = lists:sort(segment_nums(Segments)), + %% Don't bother trying to figure out the lowest seq_id, merely the + %% seq_id of the start of the lowest segment. That seq_id may not + %% actually exist, but that's fine. The important thing is that + %% the segment exists and the seq_id reported is on a segment + %% boundary. + %% + %% We also don't really care about the max seq_id. Just start the + %% next segment: it makes life much easier. + %% + %% SegNums is sorted, ascending. + {LowSeqId, NextSeqId} = + case SegNums of + [] -> {0, 0}; + [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0), + reconstruct_seq_id(1 + lists:last(SegNums), 0)} + end, + {LowSeqId, NextSeqId, State}. + +recover(DurableQueues) -> + DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} || + Queue <- DurableQueues ]), + QueuesDir = queues_dir(), + Directories = case file:list_dir(QueuesDir) of + {ok, Entries} -> [ Entry || Entry <- Entries, + filelib:is_dir( + filename:join( + QueuesDir, Entry)) ]; + {error, enoent} -> [] + end, + DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)), + {DurableQueueNames, DurableTerms} = + lists:foldl( + fun (QueueDir, {DurableAcc, TermsAcc}) -> + case sets:is_element(QueueDir, DurableDirectories) of + true -> + TermsAcc1 = + case read_shutdown_terms( + filename:join(QueuesDir, QueueDir)) of + {error, _} -> TermsAcc; + {ok, Terms} -> [Terms | TermsAcc] + end, + {[dict:fetch(QueueDir, DurableDict) | DurableAcc], + TermsAcc1}; + false -> + Dir = filename:join(queues_dir(), QueueDir), + ok = rabbit_misc:recursive_delete([Dir]), + {DurableAcc, TermsAcc} + end + end, {[], []}, Directories), + {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + +%%---------------------------------------------------------------------------- +%% startup and shutdown +%%---------------------------------------------------------------------------- + +blank_state(QueueName, EnsureFresh) -> + StrName = queue_name_to_dir_name(QueueName), + Dir = filename:join(queues_dir(), StrName), + ok = case EnsureFresh of + true -> false = filelib:is_file(Dir), %% is_file == is file or dir + ok; + false -> ok + end, + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + #qistate { dir = Dir, + segments = segments_new(), + journal_handle = undefined, + dirty_count = 0, + max_journal_entries = MaxJournal }. + +detect_clean_shutdown(Dir) -> + case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of + ok -> true; + {error, enoent} -> false + end. + +read_shutdown_terms(Dir) -> + rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)). + +store_clean_shutdown(Terms, Dir) -> + rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +init_clean(RecoveredCounts, State) -> + %% Load the journal. Since this is a clean recovery this (almost) + %% gets us back to where we were on shutdown. + State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State), + %% The journal loading only creates records for segments touched + %% by the journal, and the counts are based on the journal entries + %% only. We need *complete* counts for *all* segments. By an + %% amazing coincidence we stored that information on shutdown. + Segments1 = + lists:foldl( + fun ({Seg, UnackedCount}, SegmentsN) -> + Segment = segment_find_or_new(Seg, Dir, SegmentsN), + segment_store(Segment #segment { unacked = UnackedCount }, + SegmentsN) + end, Segments, RecoveredCounts), + %% the counts above include transient messages, which would be the + %% wrong thing to return + {undefined, State1 # qistate { segments = Segments1 }}. + +init_dirty(CleanShutdown, ContainsCheckFun, State) -> + %% Recover the journal completely. This will also load segments + %% which have entries in the journal and remove duplicates. The + %% counts will correctly reflect the combination of the segment + %% and the journal. + State1 = #qistate { dir = Dir, segments = Segments } = + recover_journal(State), + {Segments1, Count} = + %% Load each segment in turn and filter out messages that are + %% not in the msg_store, by adding acks to the journal. These + %% acks only go to the RAM journal as it doesn't matter if we + %% lose them. Also mark delivered if not clean shutdown. Also + %% find the number of unacked messages. + lists:foldl( + fun (Seg, {Segments2, CountAcc}) -> + Segment = #segment { unacked = UnackedCount } = + recover_segment(ContainsCheckFun, CleanShutdown, + segment_find_or_new(Seg, Dir, Segments2)), + {segment_store(Segment, Segments2), CountAcc + UnackedCount} + end, {Segments, 0}, all_segment_nums(State1)), + %% Unconditionally flush since the dirty_count doesn't get updated + %% by the above foldl. + State2 = flush_journal(State1 #qistate { segments = Segments1 }), + {Count, State2}. + +terminate(State = #qistate { journal_handle = JournalHdl, + segments = Segments }) -> + ok = case JournalHdl of + undefined -> ok; + _ -> file_handle_cache:close(JournalHdl) + end, + SegmentCounts = + segment_fold( + fun (#segment { num = Seg, unacked = UnackedCount }, Acc) -> + [{Seg, UnackedCount} | Acc] + end, [], Segments), + {SegmentCounts, State #qistate { journal_handle = undefined, + segments = undefined }}. + +recover_segment(ContainsCheckFun, CleanShutdown, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, UnackedCount} = load_segment(false, Segment), + {SegEntries1, UnackedCountDelta} = + segment_plus_journal(SegEntries, JEntries), + array:sparse_foldl( + fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment1) -> + recover_message(ContainsCheckFun(Guid), CleanShutdown, + Del, RelSeq, Segment1) + end, + Segment #segment { unacked = UnackedCount + UnackedCountDelta }, + SegEntries1). + +recover_message( true, true, _Del, _RelSeq, Segment) -> + Segment; +recover_message( true, false, del, _RelSeq, Segment) -> + Segment; +recover_message( true, false, no_del, RelSeq, Segment) -> + add_to_journal(RelSeq, del, Segment); +recover_message(false, _, del, RelSeq, Segment) -> + add_to_journal(RelSeq, ack, Segment); +recover_message(false, _, no_del, RelSeq, Segment) -> + add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)). + +queue_name_to_dir_name(Name = #resource { kind = queue }) -> + <<Num:128>> = erlang:md5(term_to_binary(Name)), + lists:flatten(io_lib:format("~.36B", [Num])). + +queues_dir() -> + filename:join(rabbit_mnesia:dir(), "queues"). + +%%---------------------------------------------------------------------------- +%% msg store startup delta function +%%---------------------------------------------------------------------------- + +queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> + {ok, Gatherer} = gatherer:start_link(), + [begin + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> queue_index_walker_reader(QueueName, Gatherer) + end) + end || QueueName <- DurableQueues], + queue_index_walker({next, Gatherer}); + +queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = rabbit_misc:unlink_and_capture_exit(Gatherer), + finished; + {value, {Guid, Count}} -> + {Guid, Count, {next, Gatherer}} + end. + +queue_index_walker_reader(QueueName, Gatherer) -> + State = #qistate { segments = Segments, dir = Dir } = + recover_journal(blank_state(QueueName, false)), + [ok = segment_entries_foldr( + fun (_RelSeq, {{Guid, true}, _IsDelivered, no_ack}, ok) -> + gatherer:in(Gatherer, {Guid, 1}); + (_RelSeq, _Value, Acc) -> + Acc + end, ok, segment_find_or_new(Seg, Dir, Segments)) || + Seg <- all_segment_nums(State)], + {_SegmentCounts, _State} = terminate(State), + ok = gatherer:finish(Gatherer). + +%%---------------------------------------------------------------------------- +%% journal manipulation +%%---------------------------------------------------------------------------- + +add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, + segments = Segments, + dir = Dir }) -> + {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + Segment = segment_find_or_new(Seg, Dir, Segments), + Segment1 = add_to_journal(RelSeq, Action, Segment), + State #qistate { dirty_count = DCount + 1, + segments = segment_store(Segment1, Segments) }; + +add_to_journal(RelSeq, Action, + Segment = #segment { journal_entries = JEntries, + unacked = UnackedCount }) -> + Segment1 = Segment #segment { + journal_entries = add_to_journal(RelSeq, Action, JEntries) }, + case Action of + del -> Segment1; + ack -> Segment1 #segment { unacked = UnackedCount - 1 }; + ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 } + end; + +add_to_journal(RelSeq, Action, JEntries) -> + Val = case array:get(RelSeq, JEntries) of + undefined -> + case Action of + ?PUB -> {Action, no_del, no_ack}; + del -> {no_pub, del, no_ack}; + ack -> {no_pub, no_del, ack} + end; + ({Pub, no_del, no_ack}) when Action == del -> + {Pub, del, no_ack}; + ({Pub, Del, no_ack}) when Action == ack -> + {Pub, Del, ack} + end, + array:set(RelSeq, Val, JEntries). + +maybe_flush_journal(State = #qistate { dirty_count = DCount, + max_journal_entries = MaxJournal }) + when DCount > MaxJournal -> + flush_journal(State); +maybe_flush_journal(State) -> + State. + +flush_journal(State = #qistate { segments = Segments }) -> + Segments1 = + segment_fold( + fun (#segment { unacked = 0, path = Path }, SegmentsN) -> + case filelib:is_file(Path) of + true -> ok = file:delete(Path); + false -> ok + end, + SegmentsN; + (#segment {} = Segment, SegmentsN) -> + segment_store(append_journal_to_segment(Segment), SegmentsN) + end, segments_new(), Segments), + {JournalHdl, State1} = + get_journal_handle(State #qistate { segments = Segments1 }), + ok = file_handle_cache:clear(JournalHdl), + State1 #qistate { dirty_count = 0 }. + +append_journal_to_segment(#segment { journal_entries = JEntries, + path = Path } = Segment) -> + case array:sparse_size(JEntries) of + 0 -> Segment; + _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE], + [{write_buffer, infinity}]), + array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries), + ok = file_handle_cache:close(Hdl), + Segment #segment { journal_entries = array_new() } + end. + +get_journal_handle(State = #qistate { journal_handle = undefined, + dir = Dir }) -> + Path = filename:join(Dir, ?JOURNAL_FILENAME), + {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE], + [{write_buffer, infinity}]), + {Hdl, State #qistate { journal_handle = Hdl }}; +get_journal_handle(State = #qistate { journal_handle = Hdl }) -> + {Hdl, State}. + +%% Loading Journal. This isn't idempotent and will mess up the counts +%% if you call it more than once on the same state. Assumes the counts +%% are 0 to start with. +load_journal(State) -> + {JournalHdl, State1} = get_journal_handle(State), + {ok, 0} = file_handle_cache:position(JournalHdl, 0), + load_journal_entries(State1). + +%% ditto +recover_journal(State) -> + State1 = #qistate { segments = Segments } = load_journal(State), + Segments1 = + segment_map( + fun (Segment = #segment { journal_entries = JEntries, + unacked = UnackedCountInJournal }) -> + %% We want to keep ack'd entries in so that we can + %% remove them if duplicates are in the journal. The + %% counts here are purely from the segment itself. + {SegEntries, UnackedCountInSeg} = load_segment(true, Segment), + {JEntries1, UnackedCountDuplicates} = + journal_minus_segment(JEntries, SegEntries), + Segment #segment { journal_entries = JEntries1, + unacked = (UnackedCountInJournal + + UnackedCountInSeg - + UnackedCountDuplicates) } + end, Segments), + State1 #qistate { segments = Segments1 }. + +load_journal_entries(State = #qistate { journal_handle = Hdl }) -> + case file_handle_cache:read(Hdl, ?SEQ_BYTES) of + {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} -> + case Prefix of + ?DEL_JPREFIX -> + load_journal_entries(add_to_journal(SeqId, del, State)); + ?ACK_JPREFIX -> + load_journal_entries(add_to_journal(SeqId, ack, State)); + _ -> + case file_handle_cache:read(Hdl, ?GUID_BYTES) of + {ok, <<GuidNum:?GUID_BITS>>} -> + %% work around for binary data + %% fragmentation. See + %% rabbit_msg_file:read_next/2 + <<Guid:?GUID_BYTES/binary>> = + <<GuidNum:?GUID_BITS>>, + Publish = {Guid, case Prefix of + ?PUB_PERSIST_JPREFIX -> true; + ?PUB_TRANS_JPREFIX -> false + end}, + load_journal_entries( + add_to_journal(SeqId, Publish, State)); + _ErrOrEoF -> %% err, we've lost at least a publish + State + end + end; + _ErrOrEoF -> State + end. + +deliver_or_ack(_Kind, [], State) -> + State; +deliver_or_ack(Kind, SeqIds, State) -> + JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end, + {JournalHdl, State1} = get_journal_handle(State), + ok = file_handle_cache:append( + JournalHdl, + [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]), + maybe_flush_journal(lists:foldl(fun (SeqId, StateN) -> + add_to_journal(SeqId, Kind, StateN) + end, State1, SeqIds)). + +%%---------------------------------------------------------------------------- +%% segment manipulation +%%---------------------------------------------------------------------------- + +seq_id_to_seg_and_rel_seq_id(SeqId) -> + { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }. + +reconstruct_seq_id(Seg, RelSeq) -> + (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq. + +all_segment_nums(#qistate { dir = Dir, segments = Segments }) -> + lists:sort( + sets:to_list( + lists:foldl( + fun (SegName, Set) -> + sets:add_element( + list_to_integer( + lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end, + SegName)), Set) + end, sets:from_list(segment_nums(Segments)), + filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))). + +segment_find_or_new(Seg, Dir, Segments) -> + case segment_find(Seg, Segments) of + {ok, Segment} -> Segment; + error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION, + Path = filename:join(Dir, SegName), + #segment { num = Seg, + path = Path, + journal_entries = array_new(), + unacked = 0 } + end. + +segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) -> + {ok, Segment}; %% 1 or (2, matches head) +segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) -> + {ok, Segment}; %% 2, matches tail +segment_find(Seg, {Segments, _}) -> %% no match + dict:find(Seg, Segments). + +segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head) + {Segments, [#segment { num = Seg } | Tail]}) -> + {Segments, [Segment | Tail]}; +segment_store(Segment = #segment { num = Seg }, %% 2, matches tail + {Segments, [SegmentA, #segment { num = Seg }]}) -> + {Segments, [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, {Segments, []}) -> + {dict:erase(Seg, Segments), [Segment]}; +segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) -> + {dict:erase(Seg, Segments), [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, + {Segments, [SegmentA, SegmentB]}) -> + {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)), + [Segment, SegmentA]}. + +segment_fold(Fun, Acc, {Segments, CachedSegments}) -> + dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end, + lists:foldl(Fun, Acc, CachedSegments), Segments). + +segment_map(Fun, {Segments, CachedSegments}) -> + {dict:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments), + lists:map(Fun, CachedSegments)}. + +segment_nums({Segments, CachedSegments}) -> + lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++ + dict:fetch_keys(Segments). + +segments_new() -> + {dict:new(), []}. + +write_entry_to_segment(_RelSeq, {?PUB, del, ack}, Hdl) -> + Hdl; +write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) -> + ok = case Pub of + no_pub -> + ok; + {Guid, IsPersistent} -> + file_handle_cache:append( + Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, + (bool_to_int(IsPersistent)):1, + RelSeq:?REL_SEQ_BITS>>, Guid]) + end, + ok = case {Del, Ack} of + {no_del, no_ack} -> + ok; + _ -> + Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>, + file_handle_cache:append( + Hdl, case {Del, Ack} of + {del, ack} -> [Binary, Binary]; + _ -> Binary + end) + end, + Hdl. + +read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, + {Messages, Segments}, Dir) -> + Segment = segment_find_or_new(Seg, Dir, Segments), + {segment_entries_foldr( + fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc) + when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso + (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> + [ {Guid, reconstruct_seq_id(StartSeg, RelSeq), + IsPersistent, IsDelivered == del} | Acc ]; + (_RelSeq, _Value, Acc) -> + Acc + end, Messages, Segment), + segment_store(Segment, Segments)}. + +segment_entries_foldr(Fun, Init, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, _UnackedCount} = load_segment(false, Segment), + {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries), + array:sparse_foldr(Fun, Init, SegEntries1). + +%% Loading segments +%% +%% Does not do any combining with the journal at all. +load_segment(KeepAcked, #segment { path = Path }) -> + case filelib:is_file(Path) of + false -> {array_new(), 0}; + true -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []), + {ok, 0} = file_handle_cache:position(Hdl, bof), + Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0), + ok = file_handle_cache:close(Hdl), + Res + end. + +load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) -> + case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of + {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, + IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} -> + %% because we specify /binary, and binaries are complete + %% bytes, the size spec is in bytes, not bits. + {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES), + Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack}, + SegEntries1 = array:set(RelSeq, Obj, SegEntries), + load_segment_entries(KeepAcked, Hdl, SegEntries1, + UnackedCount + 1); + {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>} -> + {UnackedCountDelta, SegEntries1} = + case array:get(RelSeq, SegEntries) of + {Pub, no_del, no_ack} -> + { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)}; + {Pub, del, no_ack} when KeepAcked -> + {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)}; + {_Pub, del, no_ack} -> + {-1, array:reset(RelSeq, SegEntries)} + end, + load_segment_entries(KeepAcked, Hdl, SegEntries1, + UnackedCount + UnackedCountDelta); + _ErrOrEoF -> + {SegEntries, UnackedCount} + end. + +array_new() -> + array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]). + +bool_to_int(true ) -> 1; +bool_to_int(false) -> 0. + +%%---------------------------------------------------------------------------- +%% journal & segment combination +%%---------------------------------------------------------------------------- + +%% Combine what we have just read from a segment file with what we're +%% holding for that segment in memory. There must be no duplicates. +segment_plus_journal(SegEntries, JEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) -> + SegEntry = array:get(RelSeq, SegEntriesOut), + {Obj, AdditionalUnackedDelta} = + segment_plus_journal1(SegEntry, JObj), + {case Obj of + undefined -> array:reset(RelSeq, SegEntriesOut); + _ -> array:set(RelSeq, Obj, SegEntriesOut) + end, + AdditionalUnacked + AdditionalUnackedDelta} + end, {SegEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item which we may be adding to (for items only in the journal), +%% modifying in (bits in both), or, when returning 'undefined', +%% erasing from (ack in journal, not segment) the segment array. The +%% other element of the tuple is the delta for AdditionalUnacked. +segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, ack}) -> + {undefined, 0}; + +segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) -> + {{Pub, del, no_ack}, 0}; +segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) -> + {undefined, -1}; +segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) -> + {undefined, -1}. + +%% Remove from the journal entries for a segment, items that are +%% duplicates of entries found in the segment itself. Used on start up +%% to clean up the journal. +journal_minus_segment(JEntries, SegEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {JEntriesOut, UnackedRemoved}) -> + SegEntry = array:get(RelSeq, SegEntries), + {Obj, UnackedRemovedDelta} = + journal_minus_segment1(JObj, SegEntry), + {case Obj of + keep -> JEntriesOut; + undefined -> array:reset(RelSeq, JEntriesOut); + _ -> array:set(RelSeq, Obj, JEntriesOut) + end, + UnackedRemoved + UnackedRemovedDelta} + end, {JEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item we are adding to or modifying in the (initially fresh) journal +%% array. If the item is 'undefined' we leave the journal array +%% alone. The other element of the tuple is the deltas for +%% UnackedRemoved. + +%% Both the same. Must be at least the publish +journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) -> + {undefined, 1}; +journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) -> + {undefined, 0}; + +%% Just publish in journal +journal_minus_segment1({?PUB, no_del, no_ack}, undefined) -> + {keep, 0}; + +%% Publish and deliver in journal +journal_minus_segment1({?PUB, del, no_ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, no_ack}, 1}; + +%% Publish, deliver and ack in journal +journal_minus_segment1({?PUB, del, ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, ack}, 1}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) -> + {{no_pub, no_del, ack}, 1}; + +%% Just deliver in journal +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) -> + {undefined, 0}; + +%% Just ack in journal +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) -> + {undefined, -1}; + +%% Deliver and ack in journal +journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> + {{no_pub, no_del, ack}, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> + {undefined, -1}. diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index befbb0c1ec..6c685deed2 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -35,18 +35,19 @@ -include_lib("public_key/include/public_key.hrl"). --export([start_link/0, info_keys/0, info/1, info/2, shutdown/2]). +-export([start_link/3, info_keys/0, info/1, info/2, shutdown/2]). -export([system_continue/3, system_terminate/4, system_code_change/4]). --export([init/1, mainloop/3]). +-export([init/4, mainloop/2]). --export([server_properties/0]). +-export([conserve_memory/2, server_properties/0]). --export([analyze_frame/2]). +-export([analyze_frame/3]). + +-export([emit_stats/1]). -import(gen_tcp). --import(fprof). -import(inet). -import(prim_inet). @@ -59,14 +60,21 @@ %--------------------------------------------------------------------------- --record(v1, {sock, connection, callback, recv_ref, connection_state, - queue_collector}). +-record(v1, {parent, sock, connection, callback, recv_length, recv_ref, + connection_state, queue_collector, heartbeater, stats_timer, + channel_sup_sup_pid, start_heartbeat_fun}). + +-define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt, + send_pend, state, channels]). + +-define(CREATION_EVENT_KEYS, [pid, address, port, peer_address, peer_port, + protocol, user, vhost, timeout, frame_max, + client_properties]). --define(INFO_KEYS, - [pid, address, port, peer_address, peer_port, - recv_oct, recv_cnt, send_oct, send_cnt, send_pend, - state, channels, user, vhost, timeout, frame_max, client_properties, - ssl_subject, ssl_fingerprint, ssl_ca]). +-define(SSL_KEYS, + [ssl_subject, ssl_fingerprint, ssl_ca]). + +-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS ++ SSL_KEYS -- [pid]). %% connection lifecycle %% @@ -104,6 +112,17 @@ %% -> log error, mark channel as closing, *running* %% handshake_timeout -> ignore, *running* %% heartbeat timeout -> *throw* +%% conserve_memory=true -> *blocking* +%% blocking: +%% conserve_memory=true -> *blocking* +%% conserve_memory=false -> *running* +%% receive a method frame for a content-bearing method +%% -> process, stop receiving, *blocked* +%% ...rest same as 'running' +%% blocked: +%% conserve_memory=true -> *blocked* +%% conserve_memory=false -> resume receiving, *running* +%% ...rest same as 'running' %% closing: %% socket close -> *terminate* %% receive connection.close -> send connection.close_ok, @@ -137,35 +156,60 @@ %% %% TODO: refactor the code so that the above is obvious +-define(IS_RUNNING(State), + (State#v1.connection_state =:= running orelse + State#v1.connection_state =:= blocking orelse + State#v1.connection_state =:= blocked)). + %%---------------------------------------------------------------------------- -ifdef(use_specs). +-type(start_heartbeat_fun() :: + fun ((rabbit_networking:socket(), non_neg_integer()) -> + rabbit_heartbeat:heartbeaters())). + +-spec(start_link/3 :: (pid(), pid(), start_heartbeat_fun()) -> + rabbit_types:ok(pid())). -spec(info_keys/0 :: () -> [rabbit_types:info_key()]). -spec(info/1 :: (pid()) -> [rabbit_types:info()]). -spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]). +-spec(emit_stats/1 :: (pid()) -> 'ok'). -spec(shutdown/2 :: (pid(), string()) -> 'ok'). +-spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok'). -spec(server_properties/0 :: () -> rabbit_framing:amqp_table()). +%% These specs only exists to add no_return() to keep dialyzer happy +-spec(init/4 :: (pid(), pid(), pid(), start_heartbeat_fun()) -> no_return()). +-spec(start_connection/7 :: + (pid(), pid(), pid(), start_heartbeat_fun(), any(), + rabbit_networking:socket(), + fun ((rabbit_networking:socket()) -> + rabbit_types:ok_or_error2( + rabbit_networking:socket(), any()))) -> no_return()). + -endif. %%-------------------------------------------------------------------------- -start_link() -> - {ok, proc_lib:spawn_link(?MODULE, init, [self()])}. +start_link(ChannelSupSupPid, Collector, StartHeartbeatFun) -> + {ok, proc_lib:spawn_link(?MODULE, init, [self(), ChannelSupSupPid, + Collector, StartHeartbeatFun])}. shutdown(Pid, Explanation) -> gen_server:call(Pid, {shutdown, Explanation}, infinity). -init(Parent) -> +init(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun) -> Deb = sys:debug_options([]), receive {go, Sock, SockTransform} -> - start_connection(Parent, Deb, Sock, SockTransform) + start_connection( + Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, Sock, + SockTransform) end. system_continue(Parent, Deb, State) -> - ?MODULE:mainloop(Parent, Deb, State). + ?MODULE:mainloop(Deb, State#v1{parent = Parent}). system_terminate(Reason, _Parent, _Deb, _State) -> exit(Reason). @@ -184,32 +228,12 @@ info(Pid, Items) -> {error, Error} -> throw(Error) end. -setup_profiling() -> - Value = rabbit_misc:get_config(profiling_enabled, false), - case Value of - once -> - rabbit_log:info("Enabling profiling for this connection, " - "and disabling for subsequent.~n"), - rabbit_misc:set_config(profiling_enabled, false), - fprof:trace(start); - true -> - rabbit_log:info("Enabling profiling for this connection.~n"), - fprof:trace(start); - false -> - ok - end, - Value. +emit_stats(Pid) -> + gen_server:cast(Pid, emit_stats). -teardown_profiling(Value) -> - case Value of - false -> - ok; - _ -> - rabbit_log:info("Completing profiling for this connection.~n"), - fprof:trace(stop), - fprof:profile(), - fprof:analyse([{dest, []}, {cols, 100}]) - end. +conserve_memory(Pid, Conserve) -> + Pid ! {conserve_memory, Conserve}, + ok. server_properties() -> {ok, Product} = application:get_key(rabbit, id), @@ -233,7 +257,8 @@ socket_op(Sock, Fun) -> exit(normal) end. -start_connection(Parent, Deb, Sock, SockTransform) -> +start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, + Sock, SockTransform) -> process_flag(trap_exit, true), {PeerAddress, PeerPort} = socket_op(Sock, fun rabbit_net:peername/1), PeerAddressS = inet_parse:ntoa(PeerAddress), @@ -242,22 +267,29 @@ start_connection(Parent, Deb, Sock, SockTransform) -> ClientSock = socket_op(Sock, SockTransform), erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(), handshake_timeout), - ProfilingValue = setup_profiling(), - {ok, Collector} = rabbit_queue_collector:start_link(), try - mainloop(Parent, Deb, switch_callback( - #v1{sock = ClientSock, - connection = #connection{ - user = none, - timeout_sec = ?HANDSHAKE_TIMEOUT, - frame_max = ?FRAME_MIN_SIZE, - vhost = none, - client_properties = none}, - callback = uninitialized_callback, - recv_ref = none, - connection_state = pre_init, - queue_collector = Collector}, - handshake, 8)) + mainloop(Deb, switch_callback( + #v1{parent = Parent, + sock = ClientSock, + connection = #connection{ + protocol = none, + user = none, + timeout_sec = ?HANDSHAKE_TIMEOUT, + frame_max = ?FRAME_MIN_SIZE, + vhost = none, + client_properties = none}, + callback = uninitialized_callback, + recv_length = 0, + recv_ref = none, + connection_state = pre_init, + queue_collector = Collector, + heartbeater = none, + stats_timer = + rabbit_event:init_stats_timer(), + channel_sup_sup_pid = ChannelSupSupPid, + start_heartbeat_fun = StartHeartbeatFun + }, + handshake, 8)) catch Ex -> (if Ex == connection_closed_abruptly -> fun rabbit_log:warning/2; @@ -274,21 +306,18 @@ start_connection(Parent, Deb, Sock, SockTransform) -> %% output to be sent, which results in unnecessary delays. %% %% gen_tcp:close(ClientSock), - teardown_profiling(ProfilingValue), - rabbit_queue_collector:shutdown(Collector), - rabbit_misc:unlink_and_capture_exit(Collector) + rabbit_event:notify(connection_closed, [{pid, self()}]) end, done. -mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) -> +mainloop(Deb, State = #v1{parent = Parent, sock= Sock, recv_ref = Ref}) -> %%?LOGDEBUG("Reader mainloop: ~p bytes available, need ~p~n", [HaveBytes, WaitUntilNBytes]), receive {inet_async, Sock, Ref, {ok, Data}} -> {State1, Callback1, Length1} = handle_input(State#v1.callback, Data, State#v1{recv_ref = none}), - mainloop(Parent, Deb, - switch_callback(State1, Callback1, Length1)); + mainloop(Deb, switch_callback(State1, Callback1, Length1)); {inet_async, Sock, Ref, {error, closed}} -> if State#v1.connection_state =:= closed -> State; @@ -297,6 +326,8 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) -> end; {inet_async, Sock, Ref, {error, Reason}} -> throw({inet_error, Reason}); + {conserve_memory, Conserve} -> + mainloop(Deb, internal_conserve_memory(Conserve, State)); {'EXIT', Parent, Reason} -> terminate(io_lib:format("broker forced connection closure " "with reason '~w'", [Reason]), State), @@ -311,17 +342,17 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) -> exit(Reason); {channel_exit, _Chan, E = {writer, send_failed, _Error}} -> throw(E); - {channel_exit, Channel, Reason} -> - mainloop(Parent, Deb, handle_channel_exit(Channel, Reason, State)); - {'EXIT', Pid, Reason} -> - mainloop(Parent, Deb, handle_dependent_exit(Pid, Reason, State)); + {channel_exit, ChannelOrFrPid, Reason} -> + mainloop(Deb, handle_channel_exit(ChannelOrFrPid, Reason, State)); + {'EXIT', ChSupPid, Reason} -> + mainloop(Deb, handle_dependent_exit(ChSupPid, Reason, State)); terminate_connection -> State; handshake_timeout -> - if State#v1.connection_state =:= running orelse + if ?IS_RUNNING(State) orelse State#v1.connection_state =:= closing orelse State#v1.connection_state =:= closed -> - mainloop(Parent, Deb, State); + mainloop(Deb, State); true -> throw({handshake_timeout, State#v1.callback}) end; @@ -332,16 +363,21 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) -> gen_server:reply(From, ok), case ForceTermination of force -> ok; - normal -> mainloop(Parent, Deb, NewState) + normal -> mainloop(Deb, NewState) end; {'$gen_call', From, info} -> gen_server:reply(From, infos(?INFO_KEYS, State)), - mainloop(Parent, Deb, State); + mainloop(Deb, State); {'$gen_call', From, {info, Items}} -> gen_server:reply(From, try {ok, infos(Items, State)} catch Error -> {error, Error} end), - mainloop(Parent, Deb, State); + mainloop(Deb, State); + {'$gen_cast', emit_stats} -> + internal_emit_stats(State), + mainloop(Deb, State#v1{stats_timer = + rabbit_event:reset_stats_timer_after( + State#v1.stats_timer)}); {system, From, Request} -> sys:handle_system_msg(Request, From, Parent, ?MODULE, Deb, State); @@ -350,21 +386,44 @@ mainloop(Parent, Deb, State = #v1{sock= Sock, recv_ref = Ref}) -> exit({unexpected_message, Other}) end. -switch_callback(OldState, NewCallback, Length) -> +switch_callback(State = #v1{connection_state = blocked, + heartbeater = Heartbeater}, Callback, Length) -> + ok = rabbit_heartbeat:pause_monitor(Heartbeater), + State#v1{callback = Callback, recv_length = Length, recv_ref = none}; +switch_callback(State, Callback, Length) -> Ref = inet_op(fun () -> rabbit_net:async_recv( - OldState#v1.sock, Length, infinity) end), - OldState#v1{callback = NewCallback, - recv_ref = Ref}. + State#v1.sock, Length, infinity) end), + State#v1{callback = Callback, recv_length = Length, recv_ref = Ref}. -terminate(Explanation, State = #v1{connection_state = running}) -> +terminate(Explanation, State) when ?IS_RUNNING(State) -> {normal, send_exception(State, 0, rabbit_misc:amqp_error( connection_forced, Explanation, [], none))}; terminate(_Explanation, State) -> {force, State}. -close_connection(State = #v1{connection = #connection{ +internal_conserve_memory(true, State = #v1{connection_state = running}) -> + State#v1{connection_state = blocking}; +internal_conserve_memory(false, State = #v1{connection_state = blocking}) -> + State#v1{connection_state = running}; +internal_conserve_memory(false, State = #v1{connection_state = blocked, + heartbeater = Heartbeater, + callback = Callback, + recv_length = Length, + recv_ref = none}) -> + ok = rabbit_heartbeat:resume_monitor(Heartbeater), + switch_callback(State#v1{connection_state = running}, Callback, Length); +internal_conserve_memory(_Conserve, State) -> + State. + +close_connection(State = #v1{queue_collector = Collector, + connection = #connection{ timeout_sec = TimeoutSec}}) -> + %% The spec says "Exclusive queues may only be accessed by the + %% current connection, and are deleted when that connection + %% closes." This does not strictly imply synchrony, but in + %% practice it seems to be what people assume. + rabbit_queue_collector:delete_all(Collector), %% We terminate the connection after the specified interval, but %% no later than ?CLOSING_TIMEOUT seconds. TimeoutMillisec = @@ -379,30 +438,45 @@ close_channel(Channel, State) -> put({channel, Channel}, closing), State. +handle_channel_exit(ChFrPid, Reason, State) when is_pid(ChFrPid) -> + {channel, Channel} = get({ch_fr_pid, ChFrPid}), + handle_exception(State, Channel, Reason); handle_channel_exit(Channel, Reason, State) -> handle_exception(State, Channel, Reason). -handle_dependent_exit(Pid, normal, State) -> - erase({chpid, Pid}), - maybe_close(State); -handle_dependent_exit(Pid, Reason, State) -> - case channel_cleanup(Pid) of - undefined -> exit({abnormal_dependent_exit, Pid, Reason}); - Channel -> maybe_close(handle_exception(State, Channel, Reason)) +handle_dependent_exit(ChSupPid, Reason, State) -> + case termination_kind(Reason) of + controlled -> + case erase({ch_sup_pid, ChSupPid}) of + undefined -> ok; + {_Channel, {ch_fr_pid, _ChFrPid} = ChFr} -> erase(ChFr) + end, + maybe_close(State); + uncontrolled -> + case channel_cleanup(ChSupPid) of + undefined -> + exit({abnormal_dependent_exit, ChSupPid, Reason}); + Channel -> + maybe_close(handle_exception(State, Channel, Reason)) + end end. -channel_cleanup(Pid) -> - case get({chpid, Pid}) of - undefined -> undefined; - {channel, Channel} -> erase({channel, Channel}), - erase({chpid, Pid}), - Channel +channel_cleanup(ChSupPid) -> + case get({ch_sup_pid, ChSupPid}) of + undefined -> undefined; + {{channel, Channel}, ChFr} -> erase({channel, Channel}), + erase(ChFr), + erase({ch_sup_pid, ChSupPid}), + Channel end. -all_channels() -> [Pid || {{chpid, Pid},_} <- get()]. +all_channels() -> [ChFrPid || {{ch_sup_pid, _ChSupPid}, + {_Channel, {ch_fr_pid, ChFrPid}}} <- get()]. terminate_channels() -> - NChannels = length([exit(Pid, normal) || Pid <- all_channels()]), + NChannels = + length([rabbit_framing_channel:shutdown(ChFrPid) + || ChFrPid <- all_channels()]), if NChannels > 0 -> Timeout = 1000 * ?CHANNEL_TERMINATION_TIMEOUT * NChannels, TimerRef = erlang:send_after(Timeout, self(), cancel_wait), @@ -420,14 +494,15 @@ wait_for_channel_termination(0, TimerRef) -> wait_for_channel_termination(N, TimerRef) -> receive - {'EXIT', Pid, Reason} -> - case channel_cleanup(Pid) of + {'EXIT', ChSupPid, Reason} -> + case channel_cleanup(ChSupPid) of undefined -> - exit({abnormal_dependent_exit, Pid, Reason}); + exit({abnormal_dependent_exit, ChSupPid, Reason}); Channel -> - case Reason of - normal -> ok; - _ -> + case termination_kind(Reason) of + controlled -> + ok; + uncontrolled -> rabbit_log:error( "connection ~p, channel ~p - " "error while terminating:~n~p~n", @@ -440,24 +515,28 @@ wait_for_channel_termination(N, TimerRef) -> end. maybe_close(State = #v1{connection_state = closing, - queue_collector = Collector}) -> + connection = #connection{protocol = Protocol}, + sock = Sock}) -> case all_channels() of [] -> - %% Spec says "Exclusive queues may only be accessed by the current - %% connection, and are deleted when that connection closes." - %% This does not strictly imply synchrony, but in practice it seems - %% to be what people assume. - rabbit_queue_collector:delete_all(Collector), - ok = send_on_channel0(State#v1.sock, #'connection.close_ok'{}), - close_connection(State); + NewState = close_connection(State), + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), + NewState; _ -> State end; maybe_close(State) -> State. -handle_frame(Type, 0, Payload, State = #v1{connection_state = CS}) +termination_kind(normal) -> controlled; +termination_kind(shutdown) -> controlled; +termination_kind({shutdown, _Term}) -> controlled; +termination_kind(_) -> uncontrolled. + +handle_frame(Type, 0, Payload, + State = #v1{connection_state = CS, + connection = #connection{protocol = Protocol}}) when CS =:= closing; CS =:= closed -> - case analyze_frame(Type, Payload) of + case analyze_frame(Type, Payload, Protocol) of {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); _Other -> State @@ -465,31 +544,38 @@ handle_frame(Type, 0, Payload, State = #v1{connection_state = CS}) handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS}) when CS =:= closing; CS =:= closed -> State; -handle_frame(Type, 0, Payload, State) -> - case analyze_frame(Type, Payload) of +handle_frame(Type, 0, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case analyze_frame(Type, Payload, Protocol) of error -> throw({unknown_frame, 0, Type, Payload}); heartbeat -> State; - trace -> State; {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); Other -> throw({unexpected_frame_on_channel0, Other}) end; -handle_frame(Type, Channel, Payload, State) -> - case analyze_frame(Type, Payload) of +handle_frame(Type, Channel, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case analyze_frame(Type, Payload, Protocol) of error -> throw({unknown_frame, Channel, Type, Payload}); heartbeat -> throw({unexpected_heartbeat_frame, Channel}); - trace -> throw({unexpected_trace_frame, Channel}); AnalyzedFrame -> %%?LOGDEBUG("Ch ~p Frame ~p~n", [Channel, AnalyzedFrame]), case get({channel, Channel}) of - {chpid, ChPid} -> + {ch_fr_pid, ChFrPid} -> + ok = rabbit_framing_channel:process(ChFrPid, AnalyzedFrame), case AnalyzedFrame of {method, 'channel.close', _} -> - erase({channel, Channel}); - _ -> ok - end, - ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame), - State; + erase({channel, Channel}), + State; + {method, MethodName, _} -> + case (State#v1.connection_state =:= blocking andalso + Protocol:method_has_content(MethodName)) of + true -> State#v1{connection_state = blocked}; + false -> State + end; + _ -> + State + end; closing -> %% According to the spec, after sending a %% channel.close we must ignore all frames except @@ -509,32 +595,37 @@ handle_frame(Type, Channel, Payload, State) -> end, State; undefined -> - case State#v1.connection_state of - running -> ok = send_to_new_channel( - Channel, AnalyzedFrame, State), - State; - Other -> throw({channel_frame_while_starting, - Channel, Other, AnalyzedFrame}) + case ?IS_RUNNING(State) of + true -> ok = send_to_new_channel( + Channel, AnalyzedFrame, State), + State; + false -> throw({channel_frame_while_starting, + Channel, State#v1.connection_state, + AnalyzedFrame}) end end end. -analyze_frame(?FRAME_METHOD, <<ClassId:16, MethodId:16, MethodFields/binary>>) -> - {method, rabbit_framing:lookup_method_name({ClassId, MethodId}), MethodFields}; -analyze_frame(?FRAME_HEADER, <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>) -> +analyze_frame(?FRAME_METHOD, + <<ClassId:16, MethodId:16, MethodFields/binary>>, + Protocol) -> + MethodName = Protocol:lookup_method_name({ClassId, MethodId}), + {method, MethodName, MethodFields}; +analyze_frame(?FRAME_HEADER, + <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>, + _Protocol) -> {content_header, ClassId, Weight, BodySize, Properties}; -analyze_frame(?FRAME_BODY, Body) -> +analyze_frame(?FRAME_BODY, Body, _Protocol) -> {content_body, Body}; -analyze_frame(?FRAME_TRACE, _Body) -> - trace; -analyze_frame(?FRAME_HEARTBEAT, <<>>) -> +analyze_frame(?FRAME_HEARTBEAT, <<>>, _Protocol) -> heartbeat; -analyze_frame(_Type, _Body) -> +analyze_frame(_Type, _Body, _Protocol) -> error. handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) -> %%?LOGDEBUG("Got frame header: ~p/~p/~p~n", [Type, Channel, PayloadSize]), - {State, {frame_payload, Type, Channel, PayloadSize}, PayloadSize + 1}; + {ensure_stats_timer(State), {frame_payload, Type, Channel, PayloadSize}, + PayloadSize + 1}; handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, State) -> case PayloadAndMarker of @@ -546,54 +637,76 @@ handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, Stat throw({bad_payload, PayloadAndMarker}) end; -handle_input(handshake, <<"AMQP",1,1,ProtocolMajor,ProtocolMinor>>, - State = #v1{sock = Sock, connection = Connection}) -> - case check_version({ProtocolMajor, ProtocolMinor}, - {?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR}) of - true -> - ok = send_on_channel0( - Sock, - #'connection.start'{ - version_major = ?PROTOCOL_VERSION_MAJOR, - version_minor = ?PROTOCOL_VERSION_MINOR, - server_properties = server_properties(), - mechanisms = <<"PLAIN AMQPLAIN">>, - locales = <<"en_US">> }), - {State#v1{connection = Connection#connection{ - timeout_sec = ?NORMAL_TIMEOUT}, - connection_state = starting}, - frame_header, 7}; - false -> - throw({bad_version, ProtocolMajor, ProtocolMinor}) - end; +%% The two rules pertaining to version negotiation: +%% +%% * If the server cannot support the protocol specified in the +%% protocol header, it MUST respond with a valid protocol header and +%% then close the socket connection. +%% +%% * The server MUST provide a protocol version that is lower than or +%% equal to that requested by the client in the protocol header. +handle_input(handshake, <<"AMQP", 0, 0, 9, 1>>, State) -> + start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State); + +%% This is the protocol header for 0-9, which we can safely treat as +%% though it were 0-9-1. +handle_input(handshake, <<"AMQP", 1, 1, 0, 9>>, State) -> + start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State); + +%% This is what most clients send for 0-8. The 0-8 spec, confusingly, +%% defines the version as 8-0. +handle_input(handshake, <<"AMQP", 1, 1, 8, 0>>, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +%% The 0-8 spec as on the AMQP web site actually has this as the +%% protocol header; some libraries e.g., py-amqplib, send it when they +%% want 0-8. +handle_input(handshake, <<"AMQP", 1, 1, 9, 1>>, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +handle_input(handshake, <<"AMQP", A, B, C, D>>, #v1{sock = Sock}) -> + refuse_connection(Sock, {bad_version, A, B, C, D}); handle_input(handshake, Other, #v1{sock = Sock}) -> - ok = inet_op(fun () -> rabbit_net:send( - Sock, <<"AMQP",1,1, - ?PROTOCOL_VERSION_MAJOR, - ?PROTOCOL_VERSION_MINOR>>) end), - throw({bad_header, Other}); + refuse_connection(Sock, {bad_header, Other}); handle_input(Callback, Data, _State) -> throw({bad_input, Callback, Data}). -%% the 0-8 spec, confusingly, defines the version as 8-0 -adjust_version({8,0}) -> {0,8}; -adjust_version(Version) -> Version. -check_version(ClientVersion, ServerVersion) -> - {ClientMajor, ClientMinor} = adjust_version(ClientVersion), - {ServerMajor, ServerMinor} = adjust_version(ServerVersion), - ClientMajor > ServerMajor - orelse - (ClientMajor == ServerMajor andalso - ClientMinor >= ServerMinor). +%% Offer a protocol version to the client. Connection.start only +%% includes a major and minor version number, Luckily 0-9 and 0-9-1 +%% are similar enough that clients will be happy with either. +start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, + Protocol, + State = #v1{sock = Sock, connection = Connection}) -> + Start = #'connection.start'{ version_major = ProtocolMajor, + version_minor = ProtocolMinor, + server_properties = server_properties(), + mechanisms = <<"PLAIN AMQPLAIN">>, + locales = <<"en_US">> }, + ok = send_on_channel0(Sock, Start, Protocol), + {State#v1{connection = Connection#connection{ + timeout_sec = ?NORMAL_TIMEOUT, + protocol = Protocol}, + connection_state = starting}, + frame_header, 7}. + +refuse_connection(Sock, Exception) -> + ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",0,0,9,1>>) end), + throw(Exception). + +ensure_stats_timer(State = #v1{stats_timer = StatsTimer}) -> + Self = self(), + State#v1{stats_timer = rabbit_event:ensure_stats_timer_after( + StatsTimer, + fun() -> emit_stats(Self) end)}. %%-------------------------------------------------------------------------- -handle_method0(MethodName, FieldsBin, State) -> +handle_method0(MethodName, FieldsBin, + State = #v1{connection = #connection{protocol = Protocol}}) -> try - handle_method0(rabbit_framing:decode_method_fields( - MethodName, FieldsBin), + handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), State) catch exit:Reason -> CompleteReason = case Reason of @@ -601,13 +714,14 @@ handle_method0(MethodName, FieldsBin, State) -> Reason#amqp_error{method = MethodName}; OtherReason -> OtherReason end, - case State#v1.connection_state of - running -> send_exception(State, 0, CompleteReason); + case ?IS_RUNNING(State) of + true -> send_exception(State, 0, CompleteReason); %% We don't trust the client at this point - force %% them to wait for a bit so they can't DOS us with %% repeated failed logins etc. - Other -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), - throw({channel0_error, Other, CompleteReason}) + false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw({channel0_error, State#v1.connection_state, + CompleteReason}) end end. @@ -615,14 +729,14 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism, response = Response, client_properties = ClientProperties}, State = #v1{connection_state = starting, - connection = Connection, + connection = Connection = + #connection{protocol = Protocol}, sock = Sock}) -> User = rabbit_access_control:check_login(Mechanism, Response), - ok = send_on_channel0( - Sock, - #'connection.tune'{channel_max = 0, + Tune = #'connection.tune'{channel_max = 0, frame_max = ?FRAME_MAX, - heartbeat = 0}), + heartbeat = 0}, + ok = send_on_channel0(Sock, Tune, Protocol), State#v1{connection_state = tuning, connection = Connection#connection{ user = User, @@ -631,7 +745,8 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax, heartbeat = ClientHeartbeat}, State = #v1{connection_state = tuning, connection = Connection, - sock = Sock}) -> + sock = Sock, + start_heartbeat_fun = SHF}) -> if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) -> rabbit_misc:protocol_error( not_allowed, "frame_max=~w < ~w min size", @@ -641,53 +756,43 @@ handle_method0(#'connection.tune_ok'{frame_max = FrameMax, not_allowed, "frame_max=~w > ~w max size", [FrameMax, ?FRAME_MAX]); true -> - rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat), + Heartbeater = SHF(Sock, ClientHeartbeat), State#v1{connection_state = opening, connection = Connection#connection{ timeout_sec = ClientHeartbeat, - frame_max = FrameMax}} + frame_max = FrameMax}, + heartbeater = Heartbeater} end; -handle_method0(#'connection.open'{virtual_host = VHostPath, - insist = Insist}, +handle_method0(#'connection.open'{virtual_host = VHostPath}, + State = #v1{connection_state = opening, connection = Connection = #connection{ - user = User}, + user = User, + protocol = Protocol}, sock = Sock}) -> ok = rabbit_access_control:check_vhost_access(User, VHostPath), NewConnection = Connection#connection{vhost = VHostPath}, - KnownHosts = format_listeners(rabbit_networking:active_listeners()), - Redirects = compute_redirects(Insist), - if Redirects == [] -> - ok = send_on_channel0( - Sock, - #'connection.open_ok'{known_hosts = KnownHosts}), - State#v1{connection_state = running, - connection = NewConnection}; - true -> - %% FIXME: 'host' is supposed to only contain one - %% address; but which one do we pick? This is - %% really a problem with the spec. - Host = format_listeners(Redirects), - rabbit_log:info("connection ~p redirecting to ~p~n", - [self(), Host]), - ok = send_on_channel0( - Sock, - #'connection.redirect'{host = Host, - known_hosts = KnownHosts}), - close_connection(State#v1{connection = NewConnection}) - end; -handle_method0(#'connection.close'{}, - State = #v1{connection_state = running}) -> + ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol), + State1 = internal_conserve_memory( + rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}), + State#v1{connection_state = running, + connection = NewConnection}), + rabbit_event:notify( + connection_created, + [{Item, i(Item, State1)} || Item <- ?CREATION_EVENT_KEYS]), + State1; +handle_method0(#'connection.close'{}, State) when ?IS_RUNNING(State) -> lists:foreach(fun rabbit_framing_channel:shutdown/1, all_channels()), maybe_close(State#v1{connection_state = closing}); handle_method0(#'connection.close'{}, State = #v1{connection_state = CS, + connection = #connection{protocol = Protocol}, sock = Sock}) when CS =:= closing; CS =:= closed -> %% We're already closed or closing, so we don't need to cleanup %% anything. - ok = send_on_channel0(Sock, #'connection.close_ok'{}), + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), State; handle_method0(#'connection.close_ok'{}, State = #v1{connection_state = closed}) -> @@ -700,23 +805,8 @@ handle_method0(_Method, #v1{connection_state = S}) -> rabbit_misc:protocol_error( channel_error, "unexpected method in connection state ~w", [S]). -send_on_channel0(Sock, Method) -> - ok = rabbit_writer:internal_send_command(Sock, 0, Method). - -format_listeners(Listeners) -> - list_to_binary( - rabbit_misc:intersperse( - $,, - [io_lib:format("~s:~w", [Host, Port]) || - #listener{host = Host, port = Port} <- Listeners])). - -compute_redirects(true) -> []; -compute_redirects(false) -> - Node = node(), - LNode = rabbit_load:pick(), - if Node == LNode -> []; - true -> rabbit_networking:node_listeners(LNode) - end. +send_on_channel0(Sock, Method, Protocol) -> + ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). %%-------------------------------------------------------------------------- @@ -757,6 +847,10 @@ i(state, #v1{connection_state = S}) -> S; i(channels, #v1{}) -> length(all_channels()); +i(protocol, #v1{connection = #connection{protocol = none}}) -> + none; +i(protocol, #v1{connection = #connection{protocol = Protocol}}) -> + Protocol:version(); i(user, #v1{connection = #connection{user = #user{username = Username}}}) -> Username; i(user, #v1{connection = #connection{user = none}}) -> @@ -785,19 +879,21 @@ get_ssl_info(F, Sock) -> %%-------------------------------------------------------------------------- -send_to_new_channel(Channel, AnalyzedFrame, - State = #v1{queue_collector = Collector}) -> - #v1{sock = Sock, connection = #connection{ - frame_max = FrameMax, - user = #user{username = Username}, - vhost = VHost}} = State, - WriterPid = rabbit_writer:start(Sock, Channel, FrameMax), - ChPid = rabbit_framing_channel:start_link( - fun rabbit_channel:start_link/6, - [Channel, self(), WriterPid, Username, VHost, Collector]), - put({channel, Channel}, {chpid, ChPid}), - put({chpid, ChPid}, {channel, Channel}), - ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame). +send_to_new_channel(Channel, AnalyzedFrame, State) -> + #v1{sock = Sock, queue_collector = Collector, + channel_sup_sup_pid = ChanSupSup, + connection = #connection{protocol = Protocol, + frame_max = FrameMax, + user = #user{username = Username}, + vhost = VHost}} = State, + {ok, ChSupPid, ChFrPid} = + rabbit_channel_sup_sup:start_channel( + ChanSupSup, {Protocol, Sock, Channel, FrameMax, + self(), Username, VHost, Collector}), + put({channel, Channel}, {ch_fr_pid, ChFrPid}), + put({ch_sup_pid, ChSupPid}, {{channel, Channel}, {ch_fr_pid, ChFrPid}}), + put({ch_fr_pid, ChFrPid}, {channel, Channel}), + ok = rabbit_framing_channel:process(ChFrPid, AnalyzedFrame). log_channel_error(ConnectionState, Channel, Reason) -> rabbit_log:error("connection ~p (~p), channel ~p - error:~n~p~n", @@ -810,25 +906,27 @@ handle_exception(State = #v1{connection_state = CS}, Channel, Reason) -> log_channel_error(CS, Channel, Reason), send_exception(State, Channel, Reason). -send_exception(State, Channel, Reason) -> - {ShouldClose, CloseChannel, CloseMethod} = map_exception(Channel, Reason), +send_exception(State = #v1{connection = #connection{protocol = Protocol}}, + Channel, Reason) -> + {ShouldClose, CloseChannel, CloseMethod} = + map_exception(Channel, Reason, Protocol), NewState = case ShouldClose of true -> terminate_channels(), close_connection(State); false -> close_channel(Channel, State) end, ok = rabbit_writer:internal_send_command( - NewState#v1.sock, CloseChannel, CloseMethod), + NewState#v1.sock, CloseChannel, CloseMethod, Protocol), NewState. -map_exception(Channel, Reason) -> +map_exception(Channel, Reason, Protocol) -> {SuggestedClose, ReplyCode, ReplyText, FailedMethod} = - lookup_amqp_exception(Reason), + lookup_amqp_exception(Reason, Protocol), ShouldClose = SuggestedClose or (Channel == 0), {ClassId, MethodId} = case FailedMethod of {_, _} -> FailedMethod; none -> {0, 0}; - _ -> rabbit_framing:method_id(FailedMethod) + _ -> Protocol:method_id(FailedMethod) end, {CloseChannel, CloseMethod} = case ShouldClose of @@ -843,22 +941,16 @@ map_exception(Channel, Reason) -> end, {ShouldClose, CloseChannel, CloseMethod}. -%% FIXME: this clause can go when we move to AMQP spec >=8.1 -lookup_amqp_exception(#amqp_error{name = precondition_failed, - explanation = Expl, - method = Method}) -> - ExplBin = amqp_exception_explanation(<<"PRECONDITION_FAILED">>, Expl), - {false, 406, ExplBin, Method}; lookup_amqp_exception(#amqp_error{name = Name, explanation = Expl, - method = Method}) -> - {ShouldClose, Code, Text} = rabbit_framing:lookup_amqp_exception(Name), + method = Method}, + Protocol) -> + {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(Name), ExplBin = amqp_exception_explanation(Text, Expl), {ShouldClose, Code, ExplBin, Method}; -lookup_amqp_exception(Other) -> +lookup_amqp_exception(Other, Protocol) -> rabbit_log:warning("Non-AMQP exit reason '~p'~n", [Other]), - {ShouldClose, Code, Text} = - rabbit_framing:lookup_amqp_exception(internal_error), + {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(internal_error), {ShouldClose, Code, Text, none}. amqp_exception_explanation(Text, Expl) -> @@ -867,3 +959,7 @@ amqp_exception_explanation(Text, Expl) -> if size(CompleteTextBin) > 255 -> <<CompleteTextBin:252/binary, "...">>; true -> CompleteTextBin end. + +internal_emit_stats(State) -> + rabbit_event:notify(connection_stats, + [{Item, i(Item, State)} || Item <- ?STATISTICS_KEYS]). diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index d50b9f3126..ec049a1a2c 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -69,8 +69,8 @@ deliver(QPids, Delivery = #delivery{mandatory = false, deliver(QPids, Delivery) -> {Success, _} = delegate:invoke(QPids, - fun (Pid) -> - rabbit_amqqueue:deliver(Pid, Delivery) + fun (Pid) -> + rabbit_amqqueue:deliver(Pid, Delivery) end), {Routed, Handled} = lists:foldl(fun fold_deliveries/2, {false, []}, Success), diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl index 2c5e51125e..97613d17a5 100644 --- a/src/rabbit_sup.erl +++ b/src/rabbit_sup.erl @@ -34,7 +34,7 @@ -behaviour(supervisor). -export([start_link/0, start_child/1, start_child/2, start_child/3, - start_restartable_child/1, start_restartable_child/2]). + start_restartable_child/1, start_restartable_child/2, stop_child/1]). -export([init/1]). @@ -69,5 +69,11 @@ start_restartable_child(Mod, Args) -> transient, infinity, supervisor, [rabbit_restartable_sup]}), ok. +stop_child(ChildId) -> + case supervisor:terminate_child(?SERVER, ChildId) of + ok -> supervisor:delete_child(?SERVER, ChildId); + E -> E + end. + init([]) -> {ok, {{one_for_all, 0, 1}, []}}. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index ff7c07e37e..bdd3cdcd64 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -35,15 +35,15 @@ -export([all_tests/0, test_parsing/0]). -%% Exported so the hook mechanism can call back --export([handle_hook/3, bad_handle_hook/3, extra_arg_hook/5]). - -import(lists). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). -include_lib("kernel/include/file.hrl"). +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + test_content_prop_roundtrip(Datum, Binary) -> Types = [element(1, E) || E <- Datum], Values = [element(2, E) || E <- Datum], @@ -51,16 +51,24 @@ test_content_prop_roundtrip(Datum, Binary) -> Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion all_tests() -> + application:set_env(rabbit, file_handles_high_watermark, 10, infinity), + ok = file_handle_cache:set_limit(10), + passed = test_file_handle_cache(), + passed = test_backing_queue(), passed = test_priority_queue(), + passed = test_bpqueue(), passed = test_pg_local(), passed = test_unfold(), + passed = test_supervisor_delayed_restart(), passed = test_parsing(), passed = test_content_framing(), + passed = test_content_transcoding(), passed = test_topic_matching(), passed = test_log_management(), passed = test_app_management(), passed = test_log_management_during_startup(), - passed = test_memory_pressure(), + passed = test_statistics(), + passed = test_option_parser(), passed = test_cluster_management(), passed = test_user_management(), passed = test_server_status(), @@ -207,6 +215,143 @@ test_priority_queue(Q) -> priority_queue:to_list(Q), priority_queue_out_all(Q)}. +test_bpqueue() -> + Q = bpqueue:new(), + true = bpqueue:is_empty(Q), + 0 = bpqueue:len(Q), + [] = bpqueue:to_list(Q), + + Q1 = bpqueue_test(fun bpqueue:in/3, fun bpqueue:out/1, + fun bpqueue:to_list/1, + fun bpqueue:foldl/3, fun bpqueue:map_fold_filter_l/4), + Q2 = bpqueue_test(fun bpqueue:in_r/3, fun bpqueue:out_r/1, + fun (QR) -> lists:reverse( + [{P, lists:reverse(L)} || + {P, L} <- bpqueue:to_list(QR)]) + end, + fun bpqueue:foldr/3, fun bpqueue:map_fold_filter_r/4), + + [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)), + [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)), + [{foo, [1, 2]}, {bar, [3, 3]}, {foo, [2,1]}] = + bpqueue:to_list(bpqueue:join(Q1, Q2)), + + [{foo, [1, 2]}, {bar, [3]}, {foo, [1, 2]}, {bar, [3]}] = + bpqueue:to_list(bpqueue:join(Q1, Q1)), + + [{foo, [1, 2]}, {bar, [3]}] = + bpqueue:to_list( + bpqueue:from_list( + [{x, []}, {foo, [1]}, {y, []}, {foo, [2]}, {bar, [3]}, {z, []}])), + + [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])), + + {4, [a,b,c,d]} = + bpqueue:foldl( + fun (Prefix, Value, {Prefix, Acc}) -> + {Prefix + 1, [Value | Acc]} + end, + {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])), + + [{bar,3}, {foo,2}, {foo,1}] = + bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2), + + BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], + BPQ = bpqueue:from_list(BPQL), + + %% no effect + {BPQL, 0} = bpqueue_mffl([none], {none, []}, BPQ), + {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ), + {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ), + {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ), + + %% process 1 item + {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffl([foo,bar], {foo, [2]}, BPQ), + {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffl([bar], {bar, [4]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} = + bpqueue_mffr([foo,bar], {foo, [6]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffr([bar], {baz, [4]}, BPQ), + + %% change prefix + {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} = + bpqueue_mffl([foo,bar], {bar, []}, BPQ), + {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} = + bpqueue_mffl([foo], {bar, [5]}, BPQ), + {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} = + bpqueue_mffl([foo], {bar, [7]}, BPQ), + {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} = + bpqueue_mffl([bar], {foo, [5]}, BPQ), + {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} = + bpqueue_mffl([foo], {bar, []}, BPQ), + {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} = + bpqueue_mffl([bar], {foo, []}, BPQ), + + %% edge cases + {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} = + bpqueue_mffl([foo], {foo, [5]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} = + bpqueue_mffr([foo], {foo, [2]}, BPQ), + + passed. + +bpqueue_test(In, Out, List, Fold, MapFoldFilter) -> + Q = bpqueue:new(), + {empty, _Q} = Out(Q), + + ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q), + {Q1M, 0} = MapFoldFilter(fun(_P) -> throw(explosion) end, + fun(_V, _N) -> throw(explosion) end, 0, Q), + [] = bpqueue:to_list(Q1M), + + Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))), + false = bpqueue:is_empty(Q1), + 3 = bpqueue:len(Q1), + [{foo, [1, 2]}, {bar, [3]}] = List(Q1), + + {{value, foo, 1}, Q3} = Out(Q1), + {{value, foo, 2}, Q4} = Out(Q3), + {{value, bar, 3}, _Q5} = Out(Q4), + + F = fun (QN) -> + MapFoldFilter(fun (foo) -> true; + (_) -> false + end, + fun (2, _Num) -> stop; + (V, Num) -> {bar, -V, V - Num} end, + 0, QN) + end, + {Q6, 0} = F(Q), + [] = bpqueue:to_list(Q6), + {Q7, 1} = F(Q1), + [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = List(Q7), + + Q1. + +bpqueue_mffl(FF1A, FF2A, BPQ) -> + bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ). + +bpqueue_mffr(FF1A, FF2A, BPQ) -> + bpqueue_mff(fun bpqueue:map_fold_filter_r/4, FF1A, FF2A, BPQ). + +bpqueue_mff(Fold, FF1A, FF2A, BPQ) -> + FF1 = fun (Prefixes) -> + fun (P) -> lists:member(P, Prefixes) end + end, + FF2 = fun ({Prefix, Stoppers}) -> + fun (Val, Num) -> + case lists:member(Val, Stoppers) of + true -> stop; + false -> {Prefix, -Val, 1 + Num} + end + end + end, + Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end, + + Queue_to_list(Fold(FF1(FF1A), FF2(FF2A), 0, BPQ)). + test_simple_n_element_queue(N) -> Items = lists:seq(1, N), Q = priority_queue_in_all(priority_queue:new(), Items), @@ -360,8 +505,10 @@ test_content_framing(FrameMax, BodyBin) -> rabbit_binary_generator:build_simple_content_frames( 1, rabbit_binary_generator:ensure_content_encoded( - rabbit_basic:build_content(#'P_basic'{}, BodyBin)), - FrameMax), + rabbit_basic:build_content(#'P_basic'{}, BodyBin), + rabbit_framing_amqp_0_9_1), + FrameMax, + rabbit_framing_amqp_0_9_1), %% header is formatted correctly and the size is the total of the %% fragments <<_FrameHeader:7/binary, _ClassAndWeight:4/binary, @@ -389,6 +536,51 @@ test_content_framing() -> passed = test_content_framing(11, <<"More than one frame">>), passed. +test_content_transcoding() -> + %% there are no guarantees provided by 'clear' - it's just a hint + ClearDecoded = fun rabbit_binary_parser:clear_decoded_content/1, + ClearEncoded = fun rabbit_binary_generator:clear_encoded_content/1, + EnsureDecoded = + fun (C0) -> + C1 = rabbit_binary_parser:ensure_content_decoded(C0), + true = C1#content.properties =/= none, + C1 + end, + EnsureEncoded = + fun (Protocol) -> + fun (C0) -> + C1 = rabbit_binary_generator:ensure_content_encoded( + C0, Protocol), + true = C1#content.properties_bin =/= none, + C1 + end + end, + %% Beyond the assertions in Ensure*, the only testable guarantee + %% is that the operations should never fail. + %% + %% If we were using quickcheck we'd simply stuff all the above + %% into a generator for sequences of operations. In the absence of + %% quickcheck we pick particularly interesting sequences that: + %% + %% - execute every op twice since they are idempotent + %% - invoke clear_decoded, clear_encoded, decode and transcode + %% with one or both of decoded and encoded content present + [begin + sequence_with_content([Op]), + sequence_with_content([ClearEncoded, Op]), + sequence_with_content([ClearDecoded, Op]) + end || Op <- [ClearDecoded, ClearEncoded, EnsureDecoded, + EnsureEncoded(rabbit_framing_amqp_0_9_1), + EnsureEncoded(rabbit_framing_amqp_0_8)]], + passed. + +sequence_with_content(Sequence) -> + lists:foldl(fun (F, V) -> F(F(V)) end, + rabbit_binary_generator:ensure_content_encoded( + rabbit_basic:build_content(#'P_basic'{}, <<>>), + rabbit_framing_amqp_0_9_1), + Sequence). + test_topic_match(P, R) -> test_topic_match(P, R, true). @@ -579,6 +771,30 @@ test_log_management_during_startup() -> ok = control_action(start_app, []), passed. +test_option_parser() -> + % command and arguments should just pass through + ok = check_get_options({["mock_command", "arg1", "arg2"], []}, + [], ["mock_command", "arg1", "arg2"]), + + % get flags + ok = check_get_options( + {["mock_command", "arg1"], [{"-f", true}, {"-f2", false}]}, + [{flag, "-f"}, {flag, "-f2"}], ["mock_command", "arg1", "-f"]), + + % get options + ok = check_get_options( + {["mock_command"], [{"-foo", "bar"}, {"-baz", "notbaz"}]}, + [{option, "-foo", "notfoo"}, {option, "-baz", "notbaz"}], + ["mock_command", "-foo", "bar"]), + + % shuffled and interleaved arguments and options + ok = check_get_options( + {["a1", "a2", "a3"], [{"-o1", "hello"}, {"-o2", "noto2"}, {"-f", true}]}, + [{option, "-o1", "noto1"}, {flag, "-f"}, {option, "-o2", "noto2"}], + ["-f", "a1", "-o1", "hello", "a2", "a3"]), + + passed. + test_cluster_management() -> %% 'cluster' and 'reset' should only work if the app is stopped @@ -714,7 +930,7 @@ test_cluster_management2(SecondaryNode) -> %% attempt to leave cluster when no other node is alive ok = control_action(cluster, [SecondaryNodeS, NodeS]), ok = control_action(start_app, []), - ok = control_action(stop_app, SecondaryNode, []), + ok = control_action(stop_app, SecondaryNode, [], []), ok = control_action(stop_app, []), {error, {no_running_cluster_nodes, _, _}} = control_action(reset, []), @@ -722,9 +938,9 @@ test_cluster_management2(SecondaryNode) -> %% leave system clustered, with the secondary node as a ram node ok = control_action(force_reset, []), ok = control_action(start_app, []), - ok = control_action(force_reset, SecondaryNode, []), - ok = control_action(cluster, SecondaryNode, [NodeS]), - ok = control_action(start_app, SecondaryNode, []), + ok = control_action(force_reset, SecondaryNode, [], []), + ok = control_action(cluster, SecondaryNode, [NodeS], []), + ok = control_action(start_app, SecondaryNode, [], []), passed. @@ -744,9 +960,12 @@ test_user_management() -> {error, {no_such_user, _}} = control_action(list_user_permissions, ["foo"]), {error, {no_such_vhost, _}} = - control_action(list_permissions, ["-p", "/testhost"]), + control_action(list_permissions, [], [{"-p", "/testhost"}]), {error, {invalid_regexp, _, _}} = control_action(set_permissions, ["guest", "+foo", ".*", ".*"]), + {error, {invalid_scope, _}} = + control_action(set_permissions, ["guest", "foo", ".*", ".*"], + [{"-s", "cilent"}]), %% user creation ok = control_action(add_user, ["foo", "bar"]), @@ -762,16 +981,21 @@ test_user_management() -> ok = control_action(list_vhosts, []), %% user/vhost mapping - ok = control_action(set_permissions, ["-p", "/testhost", - "foo", ".*", ".*", ".*"]), - ok = control_action(set_permissions, ["-p", "/testhost", - "foo", ".*", ".*", ".*"]), - ok = control_action(list_permissions, ["-p", "/testhost"]), + ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"], + [{"-p", "/testhost"}]), + ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"], + [{"-p", "/testhost"}]), + ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"], + [{"-p", "/testhost"}, {"-s", "client"}]), + ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"], + [{"-p", "/testhost"}, {"-s", "all"}]), + ok = control_action(list_permissions, [], [{"-p", "/testhost"}]), + ok = control_action(list_permissions, [], [{"-p", "/testhost"}]), ok = control_action(list_user_permissions, ["foo"]), %% user/vhost unmapping - ok = control_action(clear_permissions, ["-p", "/testhost", "foo"]), - ok = control_action(clear_permissions, ["-p", "/testhost", "foo"]), + ok = control_action(clear_permissions, ["foo"], [{"-p", "/testhost"}]), + ok = control_action(clear_permissions, ["foo"], [{"-p", "/testhost"}]), %% vhost deletion ok = control_action(delete_vhost, ["/testhost"]), @@ -780,8 +1004,8 @@ test_user_management() -> %% deleting a populated vhost ok = control_action(add_vhost, ["/testhost"]), - ok = control_action(set_permissions, ["-p", "/testhost", - "foo", ".*", ".*", ".*"]), + ok = control_action(set_permissions, ["foo", ".*", ".*", ".*"], + [{"-p", "/testhost"}]), ok = control_action(delete_vhost, ["/testhost"]), %% user deletion @@ -794,8 +1018,9 @@ test_user_management() -> test_server_status() -> %% create a few things so there is some useful information to list Writer = spawn(fun () -> receive shutdown -> ok end end), - Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>, - self()), + {ok, Ch} = rabbit_channel:start_link(1, self(), Writer, + <<"user">>, <<"/">>, self(), + fun (_) -> {ok, self()} end), [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>], {new, Queue = #amqqueue{}} <- [rabbit_amqqueue:declare( @@ -836,176 +1061,112 @@ test_server_status() -> %% cleanup [{ok, _} = rabbit_amqqueue:delete(QR, false, false) || QR <- [Q, Q2]], + + unlink(Ch), ok = rabbit_channel:shutdown(Ch), passed. -test_hooks() -> - %% Firing of hooks calls all hooks in an isolated manner - rabbit_hooks:subscribe(test_hook, test, {rabbit_tests, handle_hook, []}), - rabbit_hooks:subscribe(test_hook, test2, {rabbit_tests, handle_hook, []}), - rabbit_hooks:subscribe(test_hook2, test2, {rabbit_tests, handle_hook, []}), - rabbit_hooks:trigger(test_hook, [arg1, arg2]), - [arg1, arg2] = get(test_hook_test_fired), - [arg1, arg2] = get(test_hook_test2_fired), - undefined = get(test_hook2_test2_fired), - - %% Hook Deletion works - put(test_hook_test_fired, undefined), - put(test_hook_test2_fired, undefined), - rabbit_hooks:unsubscribe(test_hook, test), - rabbit_hooks:trigger(test_hook, [arg3, arg4]), - undefined = get(test_hook_test_fired), - [arg3, arg4] = get(test_hook_test2_fired), - undefined = get(test_hook2_test2_fired), - - %% Catches exceptions from bad hooks - rabbit_hooks:subscribe(test_hook3, test, {rabbit_tests, bad_handle_hook, []}), - ok = rabbit_hooks:trigger(test_hook3, []), - - %% Passing extra arguments to hooks - rabbit_hooks:subscribe(arg_hook, test, {rabbit_tests, extra_arg_hook, [1, 3]}), - rabbit_hooks:trigger(arg_hook, [arg1, arg2]), - {[arg1, arg2], 1, 3} = get(arg_hook_test_fired), - - %% Invoking Pids - Remote = fun () -> - receive - {rabbitmq_hook,[remote_test,test,[],Target]} -> - Target ! invoked - end - end, - P = spawn(Remote), - rabbit_hooks:subscribe(remote_test, test, {rabbit_hooks, notify_remote, [P, [self()]]}), - rabbit_hooks:trigger(remote_test, []), - receive - invoked -> ok - after 100 -> - io:format("Remote hook not invoked"), - throw(timeout) +test_spawn(Receiver) -> + Me = self(), + Writer = spawn(fun () -> Receiver(Me) end), + {ok, Ch} = rabbit_channel:start_link(1, Me, Writer, + <<"guest">>, <<"/">>, self(), + fun (_) -> {ok, self()} end), + ok = rabbit_channel:do(Ch, #'channel.open'{}), + receive #'channel.open_ok'{} -> ok + after 1000 -> throw(failed_to_receive_channel_open_ok) end, - passed. + {Writer, Ch}. -test_memory_pressure_receiver(Pid) -> +test_statistics_receiver(Pid) -> receive shutdown -> ok; {send_command, Method} -> - ok = case Method of - #'channel.flow'{} -> ok; - #'basic.qos_ok'{} -> ok; - #'channel.open_ok'{} -> ok - end, Pid ! Method, - test_memory_pressure_receiver(Pid); - sync -> - Pid ! sync, - test_memory_pressure_receiver(Pid) + test_statistics_receiver(Pid) end. -test_memory_pressure_receive_flow(Active) -> - receive #'channel.flow'{active = Active} -> ok - after 1000 -> throw(failed_to_receive_channel_flow) - end, - receive #'channel.flow'{} -> - throw(pipelining_sync_commands_detected) - after 0 -> - ok - end. - -test_memory_pressure_sync(Ch, Writer) -> - ok = rabbit_channel:do(Ch, #'basic.qos'{}), - Writer ! sync, - receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end, - receive #'basic.qos_ok'{} -> ok - after 1000 -> throw(failed_to_receive_basic_qos_ok) +test_statistics_event_receiver(Pid) -> + receive + Foo -> + Pid ! Foo, + test_statistics_event_receiver(Pid) end. -test_memory_pressure_spawn() -> - Me = self(), - Writer = spawn(fun () -> test_memory_pressure_receiver(Me) end), - Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>, - self()), - ok = rabbit_channel:do(Ch, #'channel.open'{}), - MRef = erlang:monitor(process, Ch), - receive #'channel.open_ok'{} -> ok - after 1000 -> throw(failed_to_receive_channel_open_ok) - end, - {Writer, Ch, MRef}. - -expect_normal_channel_termination(MRef, Ch) -> - receive {'DOWN', MRef, process, Ch, normal} -> ok - after 1000 -> throw(channel_failed_to_exit) +test_statistics_receive_event(Ch, Matcher) -> + rabbit_channel:flush(Ch), + rabbit_channel:emit_stats(Ch), + test_statistics_receive_event1(Ch, Matcher). + +test_statistics_receive_event1(Ch, Matcher) -> + receive #event{type = channel_stats, props = Props} -> + case Matcher(Props) of + true -> Props; + _ -> test_statistics_receive_event1(Ch, Matcher) + end + after 1000 -> throw(failed_to_receive_event) end. -test_memory_pressure() -> - {Writer0, Ch0, MRef0} = test_memory_pressure_spawn(), - [ok = rabbit_channel:conserve_memory(Ch0, Conserve) || - Conserve <- [false, false, true, false, true, true, false]], - ok = test_memory_pressure_sync(Ch0, Writer0), - receive {'DOWN', MRef0, process, Ch0, Info0} -> - throw({channel_died_early, Info0}) - after 0 -> ok - end, - - %% we should have just 1 active=false waiting for us - ok = test_memory_pressure_receive_flow(false), - - %% if we reply with flow_ok, we should immediately get an - %% active=true back - ok = rabbit_channel:do(Ch0, #'channel.flow_ok'{active = false}), - ok = test_memory_pressure_receive_flow(true), - - %% if we publish at this point, the channel should die - Content = rabbit_basic:build_content(#'P_basic'{}, <<>>), - ok = rabbit_channel:do(Ch0, #'basic.publish'{}, Content), - expect_normal_channel_termination(MRef0, Ch0), - - {Writer1, Ch1, MRef1} = test_memory_pressure_spawn(), - ok = rabbit_channel:conserve_memory(Ch1, true), - ok = test_memory_pressure_receive_flow(false), - ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}), - ok = test_memory_pressure_sync(Ch1, Writer1), - ok = rabbit_channel:conserve_memory(Ch1, false), - ok = test_memory_pressure_receive_flow(true), - %% send back the wrong flow_ok. Channel should die. - ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}), - expect_normal_channel_termination(MRef1, Ch1), - - {_Writer2, Ch2, MRef2} = test_memory_pressure_spawn(), - %% just out of the blue, send a flow_ok. Life should end. - ok = rabbit_channel:do(Ch2, #'channel.flow_ok'{active = true}), - expect_normal_channel_termination(MRef2, Ch2), - - {_Writer3, Ch3, MRef3} = test_memory_pressure_spawn(), - ok = rabbit_channel:conserve_memory(Ch3, true), - receive {'DOWN', MRef3, process, Ch3, _} -> - ok - after 12000 -> - throw(channel_failed_to_exit) - end, - - alarm_handler:set_alarm({vm_memory_high_watermark, []}), - Me = self(), - Writer4 = spawn(fun () -> test_memory_pressure_receiver(Me) end), - Ch4 = rabbit_channel:start_link(1, self(), Writer4, <<"user">>, <<"/">>, - self()), - ok = rabbit_channel:do(Ch4, #'channel.open'{}), - MRef4 = erlang:monitor(process, Ch4), - Writer4 ! sync, - receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end, - receive #'channel.open_ok'{} -> throw(unexpected_channel_open_ok) - after 0 -> ok - end, - alarm_handler:clear_alarm(vm_memory_high_watermark), - Writer4 ! sync, - receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end, - receive #'channel.open_ok'{} -> ok - after 1000 -> throw(failed_to_receive_channel_open_ok) - end, - rabbit_channel:shutdown(Ch4), - expect_normal_channel_termination(MRef4, Ch4), - +test_statistics() -> + application:set_env(rabbit, collect_statistics, fine), + + %% ATM this just tests the queue / exchange stats in channels. That's + %% by far the most complex code though. + + %% Set up a channel and queue + {_Writer, Ch} = test_spawn(fun test_statistics_receiver/1), + rabbit_channel:do(Ch, #'queue.declare'{}), + QName = receive #'queue.declare_ok'{queue = Q0} -> + Q0 + after 1000 -> throw(failed_to_receive_queue_declare_ok) + end, + {ok, Q} = rabbit_amqqueue:lookup(rabbit_misc:r(<<"/">>, queue, QName)), + QPid = Q#amqqueue.pid, + X = rabbit_misc:r(<<"/">>, exchange, <<"">>), + + rabbit_tests_event_receiver:start(self()), + + %% Check stats empty + Event = test_statistics_receive_event(Ch, fun (_) -> true end), + [] = proplists:get_value(channel_queue_stats, Event), + [] = proplists:get_value(channel_exchange_stats, Event), + [] = proplists:get_value(channel_queue_exchange_stats, Event), + + %% Publish and get a message + rabbit_channel:do(Ch, #'basic.publish'{exchange = <<"">>, + routing_key = QName}, + rabbit_basic:build_content(#'P_basic'{}, <<"">>)), + rabbit_channel:do(Ch, #'basic.get'{queue = QName}), + + %% Check the stats reflect that + Event2 = test_statistics_receive_event( + Ch, + fun (E) -> + length(proplists:get_value( + channel_queue_exchange_stats, E)) > 0 + end), + [{QPid,[{get,1}]}] = proplists:get_value(channel_queue_stats, Event2), + [{X,[{publish,1}]}] = proplists:get_value(channel_exchange_stats, Event2), + [{{QPid,X},[{publish,1}]}] = + proplists:get_value(channel_queue_exchange_stats, Event2), + + %% Check the stats remove stuff on queue deletion + rabbit_channel:do(Ch, #'queue.delete'{queue = QName}), + Event3 = test_statistics_receive_event( + Ch, + fun (E) -> + length(proplists:get_value( + channel_queue_exchange_stats, E)) == 0 + end), + + [] = proplists:get_value(channel_queue_stats, Event3), + [{X,[{publish,1}]}] = proplists:get_value(channel_exchange_stats, Event3), + [] = proplists:get_value(channel_queue_exchange_stats, Event3), + + rabbit_channel:shutdown(Ch), + rabbit_tests_event_receiver:stop(), passed. test_delegates_async(SecondaryNode) -> @@ -1097,11 +1258,16 @@ test_delegates_sync(SecondaryNode) -> %--------------------------------------------------------------------- -control_action(Command, Args) -> control_action(Command, node(), Args). +control_action(Command, Args) -> + control_action(Command, node(), Args, default_options()). -control_action(Command, Node, Args) -> +control_action(Command, Args, NewOpts) -> + control_action(Command, node(), Args, + expand_options(default_options(), NewOpts)). + +control_action(Command, Node, Args, Opts) -> case catch rabbit_control:action( - Command, Node, Args, + Command, Node, Args, Opts, fun (Format, Args1) -> io:format(Format ++ " ...~n", Args1) end) of @@ -1115,13 +1281,28 @@ control_action(Command, Node, Args) -> info_action(Command, Args, CheckVHost) -> ok = control_action(Command, []), - if CheckVHost -> ok = control_action(Command, ["-p", "/"]); + if CheckVHost -> ok = control_action(Command, []); true -> ok end, ok = control_action(Command, lists:map(fun atom_to_list/1, Args)), {bad_argument, dummy} = control_action(Command, ["dummy"]), ok. +default_options() -> [{"-s", "client"}, {"-p", "/"}, {"-q", "false"}]. + +expand_options(As, Bs) -> + lists:foldl(fun({K, _}=A, R) -> + case proplists:is_defined(K, R) of + true -> R; + false -> [A | R] + end + end, Bs, As). + +check_get_options({ExpArgs, ExpOpts}, Defs, Args) -> + {ExpArgs, ResOpts} = rabbit_misc:get_options(Defs, Args), + true = lists:sort(ExpOpts) == lists:sort(ResOpts), % don't care about the order + ok. + empty_files(Files) -> [case file:read_file_info(File) of {ok, FInfo} -> FInfo#file_info.size == 0; @@ -1179,10 +1360,608 @@ delete_log_handlers(Handlers) -> Handler <- Handlers], ok. -handle_hook(HookName, Handler, Args) -> - A = atom_to_list(HookName) ++ "_" ++ atom_to_list(Handler) ++ "_fired", - put(list_to_atom(A), Args). -bad_handle_hook(_, _, _) -> - bad:bad(). -extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) -> - handle_hook(Hookname, Handler, {Args, Extra1, Extra2}). +test_supervisor_delayed_restart() -> + test_sup:test_supervisor_delayed_restart(). + +test_file_handle_cache() -> + %% test copying when there is just one spare handle + Limit = file_handle_cache:get_limit(), + ok = file_handle_cache:set_limit(5), %% 1 or 2 sockets, 2 msg_stores + TmpDir = filename:join(rabbit_mnesia:dir(), "tmp"), + ok = filelib:ensure_dir(filename:join(TmpDir, "nothing")), + Pid = spawn(fun () -> {ok, Hdl} = file_handle_cache:open( + filename:join(TmpDir, "file3"), + [write], []), + receive close -> ok end, + file_handle_cache:delete(Hdl) + end), + Src = filename:join(TmpDir, "file1"), + Dst = filename:join(TmpDir, "file2"), + Content = <<"foo">>, + ok = file:write_file(Src, Content), + {ok, SrcHdl} = file_handle_cache:open(Src, [read], []), + {ok, DstHdl} = file_handle_cache:open(Dst, [write], []), + Size = size(Content), + {ok, Size} = file_handle_cache:copy(SrcHdl, DstHdl, Size), + ok = file_handle_cache:delete(SrcHdl), + file_handle_cache:delete(DstHdl), + Pid ! close, + ok = file_handle_cache:set_limit(Limit), + passed. + +test_backing_queue() -> + case application:get_env(rabbit, backing_queue_module) of + {ok, rabbit_variable_queue} -> + {ok, FileSizeLimit} = + application:get_env(rabbit, msg_store_file_size_limit), + application:set_env(rabbit, msg_store_file_size_limit, 512, + infinity), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + application:set_env(rabbit, queue_index_max_journal_entries, 128, + infinity), + passed = test_msg_store(), + application:set_env(rabbit, msg_store_file_size_limit, + FileSizeLimit, infinity), + passed = test_queue_index(), + passed = test_variable_queue(), + passed = test_queue_recover(), + application:set_env(rabbit, queue_index_max_journal_entries, + MaxJournal, infinity), + passed; + _ -> + passed + end. + +restart_msg_store_empty() -> + ok = rabbit_variable_queue:stop_msg_store(), + ok = rabbit_variable_queue:start_msg_store( + undefined, {fun (ok) -> finished end, ok}). + +guid_bin(X) -> + erlang:md5(term_to_binary(X)). + +msg_store_contains(Atom, Guids) -> + Atom = lists:foldl( + fun (Guid, Atom1) when Atom1 =:= Atom -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) end, + Atom, Guids). + +msg_store_sync(Guids) -> + Ref = make_ref(), + Self = self(), + ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, Guids, + fun () -> Self ! {sync, Ref} end), + receive + {sync, Ref} -> ok + after + 10000 -> + io:format("Sync from msg_store missing for guids ~p~n", [Guids]), + throw(timeout) + end. + +msg_store_read(Guids, MSCState) -> + lists:foldl(fun (Guid, MSCStateM) -> + {{ok, Guid}, MSCStateN} = rabbit_msg_store:read( + ?PERSISTENT_MSG_STORE, + Guid, MSCStateM), + MSCStateN + end, MSCState, Guids). + +msg_store_write(Guids, MSCState) -> + lists:foldl(fun (Guid, {ok, MSCStateN}) -> + rabbit_msg_store:write(?PERSISTENT_MSG_STORE, + Guid, Guid, MSCStateN) + end, {ok, MSCState}, Guids). + +msg_store_remove(Guids) -> + rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids). + +foreach_with_msg_store_client(MsgStore, Ref, Fun, L) -> + rabbit_msg_store:client_terminate( + lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MsgStore, MSCState) end, + rabbit_msg_store:client_init(MsgStore, Ref), L), MsgStore). + +test_msg_store() -> + restart_msg_store_empty(), + Self = self(), + Guids = [guid_bin(M) || M <- lists:seq(1,100)], + {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids), + %% check we don't contain any of the msgs we're about to publish + false = msg_store_contains(false, Guids), + Ref = rabbit_guid:guid(), + MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), + %% publish the first half + {ok, MSCState1} = msg_store_write(Guids1stHalf, MSCState), + %% sync on the first half + ok = msg_store_sync(Guids1stHalf), + %% publish the second half + {ok, MSCState2} = msg_store_write(Guids2ndHalf, MSCState1), + %% sync on the first half again - the msg_store will be dirty, but + %% we won't need the fsync + ok = msg_store_sync(Guids1stHalf), + %% check they're all in there + true = msg_store_contains(true, Guids), + %% publish the latter half twice so we hit the caching and ref count code + {ok, MSCState3} = msg_store_write(Guids2ndHalf, MSCState2), + %% check they're still all in there + true = msg_store_contains(true, Guids), + %% sync on the 2nd half, but do lots of individual syncs to try + %% and cause coalescing to happen + ok = lists:foldl( + fun (Guid, ok) -> rabbit_msg_store:sync( + ?PERSISTENT_MSG_STORE, + [Guid], fun () -> Self ! {sync, Guid} end) + end, ok, Guids2ndHalf), + lists:foldl( + fun(Guid, ok) -> + receive + {sync, Guid} -> ok + after + 10000 -> + io:format("Sync from msg_store missing (guid: ~p)~n", + [Guid]), + throw(timeout) + end + end, ok, Guids2ndHalf), + %% it's very likely we're not dirty here, so the 1st half sync + %% should hit a different code path + ok = msg_store_sync(Guids1stHalf), + %% read them all + MSCState4 = msg_store_read(Guids, MSCState3), + %% read them all again - this will hit the cache, not disk + MSCState5 = msg_store_read(Guids, MSCState4), + %% remove them all + ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids), + %% check first half doesn't exist + false = msg_store_contains(false, Guids1stHalf), + %% check second half does exist + true = msg_store_contains(true, Guids2ndHalf), + %% read the second half again + MSCState6 = msg_store_read(Guids2ndHalf, MSCState5), + %% release the second half, just for fun (aka code coverage) + ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, Guids2ndHalf), + %% read the second half again, just for fun (aka code coverage) + MSCState7 = msg_store_read(Guids2ndHalf, MSCState6), + ok = rabbit_msg_store:client_terminate(MSCState7, ?PERSISTENT_MSG_STORE), + %% stop and restart, preserving every other msg in 2nd half + ok = rabbit_variable_queue:stop_msg_store(), + ok = rabbit_variable_queue:start_msg_store( + [], {fun ([]) -> finished; + ([Guid|GuidsTail]) + when length(GuidsTail) rem 2 == 0 -> + {Guid, 1, GuidsTail}; + ([Guid|GuidsTail]) -> + {Guid, 0, GuidsTail} + end, Guids2ndHalf}), + %% check we have the right msgs left + lists:foldl( + fun (Guid, Bool) -> + not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)) + end, false, Guids2ndHalf), + %% restart empty + restart_msg_store_empty(), + %% check we don't contain any of the msgs + false = msg_store_contains(false, Guids), + %% publish the first half again + MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), + {ok, MSCState9} = msg_store_write(Guids1stHalf, MSCState8), + %% this should force some sort of sync internally otherwise misread + ok = rabbit_msg_store:client_terminate( + msg_store_read(Guids1stHalf, MSCState9), ?PERSISTENT_MSG_STORE), + ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf), + %% restart empty + restart_msg_store_empty(), %% now safe to reuse guids + %% push a lot of msgs in... at least 100 files worth + {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit), + PayloadSizeBits = 65536, + BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)), + GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)], + Payload = << 0:PayloadSizeBits >>, + ok = foreach_with_msg_store_client( + ?PERSISTENT_MSG_STORE, Ref, + fun (Guid, MsgStore, MSCStateM) -> + {ok, MSCStateN} = rabbit_msg_store:write( + MsgStore, Guid, Payload, MSCStateM), + MSCStateN + end, GuidsBig), + %% now read them to ensure we hit the fast client-side reading + ok = foreach_with_msg_store_client( + ?PERSISTENT_MSG_STORE, Ref, + fun (Guid, MsgStore, MSCStateM) -> + {{ok, Payload}, MSCStateN} = rabbit_msg_store:read( + MsgStore, Guid, MSCStateM), + MSCStateN + end, GuidsBig), + %% .., then 3s by 1... + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]), + %% .., then remove 3s by 2, from the young end first. This hits + %% GC (under 50% good data left, but no empty files. Must GC). + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]), + %% .., then remove 3s by 3, from the young end first. This hits + %% GC... + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]), + %% ensure empty + false = msg_store_contains(false, GuidsBig), + %% restart empty + restart_msg_store_empty(), + passed. + +queue_name(Name) -> + rabbit_misc:r(<<"/">>, queue, Name). + +test_queue() -> + queue_name(<<"test">>). + +init_test_queue() -> + rabbit_queue_index:init( + test_queue(), true, false, + fun (Guid) -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) + end). + +restart_test_queue(Qi) -> + _ = rabbit_queue_index:terminate([], Qi), + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([test_queue()]), + init_test_queue(). + +empty_test_queue() -> + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([]), + {0, _Terms, Qi} = init_test_queue(), + _ = rabbit_queue_index:delete_and_terminate(Qi), + ok. + +with_empty_test_queue(Fun) -> + ok = empty_test_queue(), + {0, _Terms, Qi} = init_test_queue(), + rabbit_queue_index:delete_and_terminate(Fun(Qi)). + +queue_index_publish(SeqIds, Persistent, Qi) -> + Ref = rabbit_guid:guid(), + MsgStore = case Persistent of + true -> ?PERSISTENT_MSG_STORE; + false -> ?TRANSIENT_MSG_STORE + end, + {A, B, MSCStateEnd} = + lists:foldl( + fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) -> + Guid = rabbit_guid:guid(), + QiM = rabbit_queue_index:publish( + Guid, SeqId, Persistent, QiN), + {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid, + Guid, MSCStateN), + {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM} + end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds), + ok = rabbit_msg_store:client_delete_and_terminate( + MSCStateEnd, MsgStore, Ref), + {A, B}. + +verify_read_with_published(_Delivered, _Persistent, [], _) -> + ok; +verify_read_with_published(Delivered, Persistent, + [{Guid, SeqId, Persistent, Delivered}|Read], + [{SeqId, Guid}|Published]) -> + verify_read_with_published(Delivered, Persistent, Read, Published); +verify_read_with_published(_Delivered, _Persistent, _Read, _Published) -> + ko. + +test_queue_index() -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + TwoSegs = SegmentSize + SegmentSize, + MostOfASegment = trunc(SegmentSize*0.75), + SeqIdsA = lists:seq(0, MostOfASegment-1), + SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment), + SeqIdsC = lists:seq(0, trunc(SegmentSize/2)), + SeqIdsD = lists:seq(0, SegmentSize*4), + + with_empty_test_queue( + fun (Qi0) -> + {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0), + {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1), + {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2), + {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3), + ok = verify_read_with_published(false, false, ReadA, + lists:reverse(SeqIdsGuidsA)), + %% should get length back as 0, as all the msgs were transient + {0, _Terms1, Qi6} = restart_test_queue(Qi4), + {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6), + {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7), + {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8), + {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9), + ok = verify_read_with_published(false, true, ReadB, + lists:reverse(SeqIdsGuidsB)), + %% should get length back as MostOfASegment + LenB = length(SeqIdsB), + {LenB, _Terms2, Qi12} = restart_test_queue(Qi10), + {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12), + Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13), + {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14), + ok = verify_read_with_published(true, true, ReadC, + lists:reverse(SeqIdsGuidsB)), + Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15), + Qi17 = rabbit_queue_index:flush(Qi16), + %% Everything will have gone now because #pubs == #acks + {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17), + %% should get length back as 0 because all persistent + %% msgs have been acked + {0, _Terms3, Qi19} = restart_test_queue(Qi18), + Qi19 + end), + + %% These next bits are just to hit the auto deletion of segment files. + %% First, partials: + %% a) partial pub+del+ack, then move to new segment + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), + Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2), + Qi4 = rabbit_queue_index:flush(Qi3), + {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], + false, Qi4), + Qi5 + end), + + %% b) partial pub+del, then move to new segment, then ack all in old segment + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), + {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], + false, Qi2), + Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3), + rabbit_queue_index:flush(Qi4) + end), + + %% c) just fill up several segments of all pubs, then +dels, then +acks + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1), + Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2), + rabbit_queue_index:flush(Qi3) + end), + + %% d) get messages in all states to a segment, then flush, then do + %% the same again, don't flush and read. This will hit all + %% possibilities in combining the segment with the journal. + with_empty_test_queue( + fun (Qi0) -> + {Qi1, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], + false, Qi0), + Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), + Qi3 = rabbit_queue_index:ack([0], Qi2), + Qi4 = rabbit_queue_index:flush(Qi3), + {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4), + Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), + Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), + {[], Qi8} = rabbit_queue_index:read(0, 4, Qi7), + {ReadD, Qi9} = rabbit_queue_index:read(4, 7, Qi8), + ok = verify_read_with_published(true, false, ReadD, + [Four, Five, Six]), + {ReadE, Qi10} = rabbit_queue_index:read(7, 9, Qi9), + ok = verify_read_with_published(false, false, ReadE, + [Seven, Eight]), + Qi10 + end), + + %% e) as for (d), but use terminate instead of read, which will + %% exercise journal_minus_segment, not segment_plus_journal. + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], + true, Qi0), + Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), + Qi3 = rabbit_queue_index:ack([0], Qi2), + {5, _Terms9, Qi4} = restart_test_queue(Qi3), + {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4), + Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), + Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), + {5, _Terms10, Qi8} = restart_test_queue(Qi7), + Qi8 + end), + + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([]), + + passed. + +variable_queue_publish(IsPersistent, Count, VQ) -> + lists:foldl( + fun (_N, VQN) -> + rabbit_variable_queue:publish( + rabbit_basic:message( + rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = case IsPersistent of + true -> 2; + false -> 1 + end}, <<>>), VQN) + end, VQ, lists:seq(1, Count)). + +variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> + lists:foldl(fun (N, {VQN, AckTagsAcc}) -> + Rem = Len - N, + {{#basic_message { is_persistent = IsPersistent }, + IsDelivered, AckTagN, Rem}, VQM} = + rabbit_variable_queue:fetch(true, VQN), + {VQM, [AckTagN | AckTagsAcc]} + end, {VQ, []}, lists:seq(1, Count)). + +assert_prop(List, Prop, Value) -> + Value = proplists:get_value(Prop, List). + +assert_props(List, PropVals) -> + [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals]. + +with_fresh_variable_queue(Fun) -> + ok = empty_test_queue(), + VQ = rabbit_variable_queue:init(test_queue(), true, false), + S0 = rabbit_variable_queue:status(VQ), + assert_props(S0, [{q1, 0}, {q2, 0}, + {delta, {delta, undefined, 0, undefined}}, + {q3, 0}, {q4, 0}, + {len, 0}]), + _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)), + passed. + +test_variable_queue() -> + [passed = with_fresh_variable_queue(F) || + F <- [fun test_variable_queue_dynamic_duration_change/1, + fun test_variable_queue_partial_segments_delta_thing/1, + fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1, + fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1]], + passed. + +test_variable_queue_dynamic_duration_change(VQ0) -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + + %% start by sending in a couple of segments worth + Len = 2*SegmentSize, + VQ1 = variable_queue_publish(false, Len, VQ0), + + %% squeeze and relax queue + Churn = Len div 32, + VQ2 = publish_fetch_and_ack(Churn, Len, VQ1), + {Duration, VQ3} = rabbit_variable_queue:ram_duration(VQ2), + VQ7 = lists:foldl( + fun (Duration1, VQ4) -> + {_Duration, VQ5} = rabbit_variable_queue:ram_duration(VQ4), + io:format("~p:~n~p~n", + [Duration1, rabbit_variable_queue:status(VQ5)]), + VQ6 = rabbit_variable_queue:set_ram_duration_target( + Duration1, VQ5), + publish_fetch_and_ack(Churn, Len, VQ6) + end, VQ3, [Duration / 4, 0, Duration / 4, infinity]), + + %% drain + {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7), + VQ9 = rabbit_variable_queue:ack(AckTags, VQ8), + {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), + + VQ10. + +publish_fetch_and_ack(0, _Len, VQ0) -> + VQ0; +publish_fetch_and_ack(N, Len, VQ0) -> + VQ1 = variable_queue_publish(false, 1, VQ0), + {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), + publish_fetch_and_ack(N-1, Len, rabbit_variable_queue:ack([AckTag], VQ2)). + +test_variable_queue_partial_segments_delta_thing(VQ0) -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + HalfSegment = SegmentSize div 2, + OneAndAHalfSegment = SegmentSize + HalfSegment, + VQ1 = variable_queue_publish(true, OneAndAHalfSegment, VQ0), + {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1), + VQ3 = check_variable_queue_status( + rabbit_variable_queue:set_ram_duration_target(0, VQ2), + %% one segment in q3 as betas, and half a segment in delta + [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}}, + {q3, SegmentSize}, + {len, SegmentSize + HalfSegment}]), + VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3), + VQ5 = check_variable_queue_status( + variable_queue_publish(true, 1, VQ4), + %% one alpha, but it's in the same segment as the deltas + [{q1, 1}, + {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}}, + {q3, SegmentSize}, + {len, SegmentSize + HalfSegment + 1}]), + {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false, + SegmentSize + HalfSegment + 1, VQ5), + VQ7 = check_variable_queue_status( + VQ6, + %% the half segment should now be in q3 as betas + [{q1, 1}, + {delta, {delta, undefined, 0, undefined}}, + {q3, HalfSegment}, + {len, HalfSegment + 1}]), + {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, + HalfSegment + 1, VQ7), + VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8), + %% should be empty now + {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), + VQ10. + +check_variable_queue_status(VQ0, Props) -> + VQ1 = variable_queue_wait_for_shuffling_end(VQ0), + S = rabbit_variable_queue:status(VQ1), + io:format("~p~n", [S]), + assert_props(S, Props), + VQ1. + +variable_queue_wait_for_shuffling_end(VQ) -> + case rabbit_variable_queue:needs_idle_timeout(VQ) of + true -> variable_queue_wait_for_shuffling_end( + rabbit_variable_queue:idle_timeout(VQ)); + false -> VQ + end. + +test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> + Count = 2 * rabbit_queue_index:next_segment_boundary(0), + VQ1 = variable_queue_publish(true, Count, VQ0), + VQ2 = variable_queue_publish(false, Count, VQ1), + VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2), + {VQ4, _AckTags} = variable_queue_fetch(Count, true, false, + Count + Count, VQ3), + {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, + Count, VQ4), + _VQ6 = rabbit_variable_queue:terminate(VQ5), + VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + {{_Msg1, true, _AckTag1, Count1}, VQ8} = + rabbit_variable_queue:fetch(true, VQ7), + VQ9 = variable_queue_publish(false, 1, VQ8), + VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9), + {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10), + {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11), + VQ12. + +test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> + VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0), + VQ2 = variable_queue_publish(false, 4, VQ1), + {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2), + VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3), + VQ5 = rabbit_variable_queue:idle_timeout(VQ4), + _VQ6 = rabbit_variable_queue:terminate(VQ5), + VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), + VQ8. + +test_queue_recover() -> + Count = 2 * rabbit_queue_index:next_segment_boundary(0), + TxID = rabbit_guid:guid(), + {new, #amqqueue { pid = QPid, name = QName }} = + rabbit_amqqueue:declare(test_queue(), true, false, [], none), + Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = 2}, <<>>), + Delivery = #delivery{mandatory = false, immediate = false, txn = TxID, + sender = self(), message = Msg}, + [true = rabbit_amqqueue:deliver(QPid, Delivery) || + _ <- lists:seq(1, Count)], + rabbit_amqqueue:commit_all([QPid], TxID, self()), + exit(QPid, kill), + MRef = erlang:monitor(process, QPid), + receive {'DOWN', MRef, process, QPid, _Info} -> ok + after 10000 -> exit(timeout_waiting_for_queue_death) + end, + rabbit_amqqueue:stop(), + ok = rabbit_amqqueue:start(), + rabbit_amqqueue:with_or_die( + QName, + fun (Q1 = #amqqueue { pid = QPid1 }) -> + CountMinusOne = Count - 1, + {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} = + rabbit_amqqueue:basic_get(Q1, self(), false), + exit(QPid1, shutdown), + VQ1 = rabbit_variable_queue:init(QName, true, true), + {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = + rabbit_variable_queue:fetch(true, VQ1), + _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2), + rabbit_amqqueue:internal_delete(QName) + end), + passed. diff --git a/src/rabbit_tests_event_receiver.erl b/src/rabbit_tests_event_receiver.erl new file mode 100644 index 0000000000..a92e3da75f --- /dev/null +++ b/src/rabbit_tests_event_receiver.erl @@ -0,0 +1,66 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_tests_event_receiver). + +-export([start/1, stop/0]). + +-export([init/1, handle_call/2, handle_event/2, handle_info/2, + terminate/2, code_change/3]). + +start(Pid) -> + gen_event:add_handler(rabbit_event, ?MODULE, [Pid]). + +stop() -> + gen_event:delete_handler(rabbit_event, ?MODULE, []). + +%%---------------------------------------------------------------------------- + +init([Pid]) -> + {ok, Pid}. + +handle_call(_Request, Pid) -> + {ok, not_understood, Pid}. + +handle_event(Event, Pid) -> + Pid ! Event, + {ok, Pid}. + +handle_info(_Info, Pid) -> + {ok, Pid}. + +terminate(_Arg, _Pid) -> + ok. + +code_change(_OldVsn, Pid, _Extra) -> + {ok, Pid}. + +%%---------------------------------------------------------------------------- diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index 2e492b80bd..47e8bb0161 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -39,8 +39,12 @@ delivery/0, content/0, decoded_content/0, undecoded_content/0, unencoded_content/0, encoded_content/0, vhost/0, ctag/0, amqp_error/0, r/1, r2/2, r3/3, ssl_socket/0, listener/0, - binding/0, amqqueue/0, exchange/0, connection/0, user/0, - error/1, ok_or_error/1, ok_or_error2/2, ok/1]). + binding/0, amqqueue/0, exchange/0, connection/0, protocol/0, + user/0, ok/1, error/1, ok_or_error/1, ok_or_error2/2, + ok_pid_or_error/0, channel_exit/0, connection_exit/0]). + +-type(channel_exit() :: no_return()). +-type(connection_exit() :: no_return()). -type(maybe(T) :: T | 'none'). -type(vhost() :: binary()). @@ -133,6 +137,8 @@ -type(connection() :: pid()). +-type(protocol() :: 'rabbit_framing_amqp_0_8' | 'rabbit_framing_amqp_0_9_1'). + -type(user() :: #user{username :: rabbit_access_control:username(), password :: rabbit_access_control:password()}). @@ -141,5 +147,6 @@ -type(error(A) :: {'error', A}). -type(ok_or_error(A) :: 'ok' | error(A)). -type(ok_or_error2(A, B) :: ok(A) | error(B)). +-type(ok_pid_or_error() :: ok_or_error2(pid(), any())). -endif. % use_specs diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl new file mode 100644 index 0000000000..30d3a8aec1 --- /dev/null +++ b/src/rabbit_variable_queue.erl @@ -0,0 +1,1433 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_variable_queue). + +-export([init/3, terminate/1, delete_and_terminate/1, + purge/1, publish/2, publish_delivered/3, fetch/2, ack/2, + tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3, + requeue/2, len/1, is_empty/1, + set_ram_duration_target/2, ram_duration/1, + needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, + status/1]). + +-export([start/1, stop/0]). + +%% exported for testing only +-export([start_msg_store/2, stop_msg_store/0]). + +%%---------------------------------------------------------------------------- +%% Definitions: + +%% alpha: this is a message where both the message itself, and its +%% position within the queue are held in RAM +%% +%% beta: this is a message where the message itself is only held on +%% disk, but its position within the queue is held in RAM. +%% +%% gamma: this is a message where the message itself is only held on +%% disk, but its position is both in RAM and on disk. +%% +%% delta: this is a collection of messages, represented by a single +%% term, where the messages and their position are only held on +%% disk. +%% +%% Note that for persistent messages, the message and its position +%% within the queue are always held on disk, *in addition* to being in +%% one of the above classifications. +%% +%% Also note that within this code, the term gamma never +%% appears. Instead, gammas are defined by betas who have had their +%% queue position recorded on disk. +%% +%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though +%% many of these steps are frequently skipped. q1 and q4 only hold +%% alphas, q2 and q3 hold both betas and gammas (as queues of queues, +%% using the bpqueue module where the block prefix determines whether +%% they're betas or gammas). When a message arrives, its +%% classification is determined. It is then added to the rightmost +%% appropriate queue. +%% +%% If a new message is determined to be a beta or gamma, q1 is +%% empty. If a new message is determined to be a delta, q1 and q2 are +%% empty (and actually q4 too). +%% +%% When removing messages from a queue, if q4 is empty then q3 is read +%% directly. If q3 becomes empty then the next segment's worth of +%% messages from delta are read into q3, reducing the size of +%% delta. If the queue is non empty, either q4 or q3 contain +%% entries. It is never permitted for delta to hold all the messages +%% in the queue. +%% +%% The duration indicated to us by the memory_monitor is used to +%% calculate, given our current ingress and egress rates, how many +%% messages we should hold in RAM. When we need to push alphas to +%% betas or betas to gammas, we favour writing out messages that are +%% further from the head of the queue. This minimises writes to disk, +%% as the messages closer to the tail of the queue stay in the queue +%% for longer, thus do not need to be replaced as quickly by sending +%% other messages to disk. +%% +%% Whilst messages are pushed to disk and forgotten from RAM as soon +%% as requested by a new setting of the queue RAM duration, the +%% inverse is not true: we only load messages back into RAM as +%% demanded as the queue is read from. Thus only publishes to the +%% queue will take up available spare capacity. +%% +%% When we report our duration to the memory monitor, we calculate +%% average ingress and egress rates over the last two samples, and +%% then calculate our duration based on the sum of the ingress and +%% egress rates. More than two samples could be used, but it's a +%% balance between responding quickly enough to changes in +%% producers/consumers versus ignoring temporary blips. The problem +%% with temporary blips is that with just a few queues, they can have +%% substantial impact on the calculation of the average duration and +%% hence cause unnecessary I/O. Another alternative is to increase the +%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5 +%% seconds. However, that then runs the risk of being too slow to +%% inform the memory monitor of changes. Thus a 5 second interval, +%% plus a rolling average over the last two samples seems to work +%% well in practice. +%% +%% The sum of the ingress and egress rates is used because the egress +%% rate alone is not sufficient. Adding in the ingress rate means that +%% queues which are being flooded by messages are given more memory, +%% resulting in them being able to process the messages faster (by +%% doing less I/O, or at least deferring it) and thus helping keep +%% their mailboxes empty and thus the queue as a whole is more +%% responsive. If such a queue also has fast but previously idle +%% consumers, the consumer can then start to be driven as fast as it +%% can go, whereas if only egress rate was being used, the incoming +%% messages may have to be written to disk and then read back in, +%% resulting in the hard disk being a bottleneck in driving the +%% consumers. Generally, we want to give Rabbit every chance of +%% getting rid of messages as fast as possible and remaining +%% responsive, and using only the egress rate impacts that goal. +%% +%% If a queue is full of transient messages, then the transition from +%% betas to deltas will be potentially very expensive as millions of +%% entries must be written to disk by the queue_index module. This can +%% badly stall the queue. In order to avoid this, the proportion of +%% gammas / (betas+gammas) must not be lower than (betas+gammas) / +%% (alphas+betas+gammas). As the queue grows or available memory +%% shrinks, the latter ratio increases, requiring the conversion of +%% more gammas to betas in order to maintain the invariant. At the +%% point at which betas and gammas must be converted to deltas, there +%% should be very few betas remaining, thus the transition is fast (no +%% work needs to be done for the gamma -> delta transition). +%% +%% The conversion of betas to gammas is done in batches of exactly +%% ?IO_BATCH_SIZE. This value should not be too small, otherwise the +%% frequent operations on the queues of q2 and q3 will not be +%% effectively amortised (switching the direction of queue access +%% defeats amortisation), nor should it be too big, otherwise +%% converting a batch stalls the queue for too long. Therefore, it +%% must be just right. ram_index_count is used here and is the number +%% of betas. +%% +%% The conversion from alphas to betas is also chunked, but only to +%% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at +%% any one time. This further smooths the effects of changes to the +%% target_ram_msg_count and ensures the queue remains responsive +%% even when there is a large amount of IO work to do. The +%% idle_timeout callback is utilised to ensure that conversions are +%% done as promptly as possible whilst ensuring the queue remains +%% responsive. +%% +%% In the queue we keep track of both messages that are pending +%% delivery and messages that are pending acks. This ensures that +%% purging (deleting the former) and deletion (deleting the former and +%% the latter) are both cheap and do require any scanning through qi +%% segments. +%% +%% Notes on Clean Shutdown +%% (This documents behaviour in variable_queue, queue_index and +%% msg_store.) +%% +%% In order to try to achieve as fast a start-up as possible, if a +%% clean shutdown occurs, we try to save out state to disk to reduce +%% work on startup. In the msg_store this takes the form of the +%% index_module's state, plus the file_summary ets table, and client +%% refs. In the VQ, this takes the form of the count of persistent +%% messages in the queue and references into the msg_stores. The +%% queue_index adds to these terms the details of its segments and +%% stores the terms in the queue directory. +%% +%% Two message stores are used. One is created for persistent messages +%% to durable queues that must survive restarts, and the other is used +%% for all other messages that just happen to need to be written to +%% disk. On start up we can therefore nuke the transient message +%% store, and be sure that the messages in the persistent store are +%% all that we need. +%% +%% The references to the msg_stores are there so that the msg_store +%% knows to only trust its saved state if all of the queues it was +%% previously talking to come up cleanly. Likewise, the queues +%% themselves (esp queue_index) skips work in init if all the queues +%% and msg_store were shutdown cleanly. This gives both good speed +%% improvements and also robustness so that if anything possibly went +%% wrong in shutdown (or there was subsequent manual tampering), all +%% messages and queues that can be recovered are recovered, safely. +%% +%% To delete transient messages lazily, the variable_queue, on +%% startup, stores the next_seq_id reported by the queue_index as the +%% transient_threshold. From that point on, whenever it's reading a +%% message off disk via the queue_index, if the seq_id is below this +%% threshold and the message is transient then it drops the message +%% (the message itself won't exist on disk because it would have been +%% stored in the transient msg_store which would have had its saved +%% state nuked on startup). This avoids the expensive operation of +%% scanning the entire queue on startup in order to delete transient +%% messages that were only pushed to disk to save memory. +%% +%%---------------------------------------------------------------------------- + +-behaviour(rabbit_backing_queue). + +-record(vqstate, + { q1, + q2, + delta, + q3, + q4, + next_seq_id, + pending_ack, + index_state, + msg_store_clients, + on_sync, + durable, + transient_threshold, + + len, + persistent_count, + + duration_target, + target_ram_msg_count, + ram_msg_count, + ram_msg_count_prev, + ram_index_count, + out_counter, + in_counter, + rates + }). + +-record(rates, { egress, ingress, avg_egress, avg_ingress, timestamp }). + +-record(msg_status, + { seq_id, + guid, + msg, + is_persistent, + is_delivered, + msg_on_disk, + index_on_disk + }). + +-record(delta, + { start_seq_id, %% start_seq_id is inclusive + count, + end_seq_id %% end_seq_id is exclusive + }). + +-record(tx, { pending_messages, pending_acks }). + +-record(sync, { acks_persistent, acks_all, pubs, funs }). + +%% When we discover, on publish, that we should write some indices to +%% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of +%% betas that we must be due to write indices for before we do any +%% work at all. This is both a minimum and a maximum - we don't write +%% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't +%% write more - we can always come back on the next publish to do +%% more. +-define(IO_BATCH_SIZE, 64). +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}). +-type(seq_id() :: non_neg_integer()). +-type(ack() :: seq_id() | 'blank_ack'). + +-type(rates() :: #rates { egress :: {timestamp(), non_neg_integer()}, + ingress :: {timestamp(), non_neg_integer()}, + avg_egress :: float(), + avg_ingress :: float(), + timestamp :: timestamp() }). + +-type(delta() :: #delta { start_seq_id :: non_neg_integer(), + count :: non_neg_integer (), + end_seq_id :: non_neg_integer() }). + +-type(sync() :: #sync { acks_persistent :: [[seq_id()]], + acks_all :: [[seq_id()]], + pubs :: [[rabbit_guid:guid()]], + funs :: [fun (() -> any())] }). + +-type(state() :: #vqstate { + q1 :: queue(), + q2 :: bpqueue:bpqueue(), + delta :: delta(), + q3 :: bpqueue:bpqueue(), + q4 :: queue(), + next_seq_id :: seq_id(), + pending_ack :: dict:dictionary(), + index_state :: any(), + msg_store_clients :: 'undefined' | {{any(), binary()}, + {any(), binary()}}, + on_sync :: sync(), + durable :: boolean(), + + len :: non_neg_integer(), + persistent_count :: non_neg_integer(), + + transient_threshold :: non_neg_integer(), + duration_target :: number() | 'infinity', + target_ram_msg_count :: non_neg_integer() | 'infinity', + ram_msg_count :: non_neg_integer(), + ram_msg_count_prev :: non_neg_integer(), + ram_index_count :: non_neg_integer(), + out_counter :: non_neg_integer(), + in_counter :: non_neg_integer(), + rates :: rates() }). + +-include("rabbit_backing_queue_spec.hrl"). + +-endif. + +-define(BLANK_DELTA, #delta { start_seq_id = undefined, + count = 0, + end_seq_id = undefined }). +-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z, + count = 0, + end_seq_id = Z }). + +-define(BLANK_SYNC, #sync { acks_persistent = [], + acks_all = [], + pubs = [], + funs = [] }). + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +start(DurableQueues) -> + {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues), + start_msg_store( + [Ref || Terms <- AllTerms, + begin + Ref = proplists:get_value(persistent_ref, Terms), + Ref =/= undefined + end], + StartFunState). + +stop() -> stop_msg_store(). + +start_msg_store(Refs, StartFunState) -> + ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store, + [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), + undefined, {fun (ok) -> finished end, ok}]), + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store, + [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), + Refs, StartFunState]). + +stop_msg_store() -> + ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), + ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). + +init(QueueName, IsDurable, Recover) -> + {DeltaCount, Terms, IndexState} = + rabbit_queue_index:init( + QueueName, Recover, + rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE), + fun (Guid) -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) + end), + {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState), + + {PRef, TRef, Terms1} = + case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of + [] -> {proplists:get_value(persistent_ref, Terms), + proplists:get_value(transient_ref, Terms), + Terms}; + _ -> {rabbit_guid:guid(), rabbit_guid:guid(), []} + end, + DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount), + Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of + true -> ?BLANK_DELTA; + false -> #delta { start_seq_id = LowSeqId, + count = DeltaCount1, + end_seq_id = NextSeqId } + end, + Now = now(), + PersistentClient = + case IsDurable of + true -> rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, PRef); + false -> undefined + end, + TransientClient = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), + State = #vqstate { + q1 = queue:new(), + q2 = bpqueue:new(), + delta = Delta, + q3 = bpqueue:new(), + q4 = queue:new(), + next_seq_id = NextSeqId, + pending_ack = dict:new(), + index_state = IndexState1, + msg_store_clients = {{PersistentClient, PRef}, + {TransientClient, TRef}}, + on_sync = ?BLANK_SYNC, + durable = IsDurable, + transient_threshold = NextSeqId, + + len = DeltaCount1, + persistent_count = DeltaCount1, + + duration_target = infinity, + target_ram_msg_count = infinity, + ram_msg_count = 0, + ram_msg_count_prev = 0, + ram_index_count = 0, + out_counter = 0, + in_counter = 0, + rates = #rates { egress = {Now, 0}, + ingress = {Now, DeltaCount1}, + avg_egress = 0.0, + avg_ingress = 0.0, + timestamp = Now } }, + a(maybe_deltas_to_betas(State)). + +terminate(State) -> + State1 = #vqstate { persistent_count = PCount, + index_state = IndexState, + msg_store_clients = {{MSCStateP, PRef}, + {MSCStateT, TRef}} } = + remove_pending_ack(true, tx_commit_index(State)), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_terminate( + MSCStateP, ?PERSISTENT_MSG_STORE) + end, + rabbit_msg_store:client_terminate(MSCStateT, ?TRANSIENT_MSG_STORE), + Terms = [{persistent_ref, PRef}, + {transient_ref, TRef}, + {persistent_count, PCount}], + a(State1 #vqstate { index_state = rabbit_queue_index:terminate( + Terms, IndexState), + msg_store_clients = undefined }). + +%% the only difference between purge and delete is that delete also +%% needs to delete everything that's been delivered and not ack'd. +delete_and_terminate(State) -> + %% TODO: there is no need to interact with qi at all - which we do + %% as part of 'purge' and 'remove_pending_ack', other than + %% deleting it. + {_PurgeCount, State1} = purge(State), + State2 = #vqstate { index_state = IndexState, + msg_store_clients = {{MSCStateP, PRef}, + {MSCStateT, TRef}} } = + remove_pending_ack(false, State1), + IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_delete_and_terminate( + MSCStateP, ?PERSISTENT_MSG_STORE, PRef) + end, + rabbit_msg_store:client_delete_and_terminate( + MSCStateT, ?TRANSIENT_MSG_STORE, TRef), + a(State2 #vqstate { index_state = IndexState1, + msg_store_clients = undefined }). + +purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) -> + %% TODO: when there are no pending acks, which is a common case, + %% we could simply wipe the qi instead of issuing delivers and + %% acks for all the messages. + IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, + IndexState), + State1 = #vqstate { q1 = Q1, index_state = IndexState2 } = + purge_betas_and_deltas(State #vqstate { q4 = queue:new(), + index_state = IndexState1 }), + IndexState3 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1, + IndexState2), + {Len, a(State1 #vqstate { q1 = queue:new(), + index_state = IndexState3, + len = 0, + ram_msg_count = 0, + ram_index_count = 0, + persistent_count = 0 })}. + +publish(Msg, State) -> + {_SeqId, State1} = publish(Msg, false, false, State), + a(reduce_memory_use(State1)). + +publish_delivered(false, _Msg, State = #vqstate { len = 0 }) -> + {blank_ack, a(State)}; +publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent }, + State = #vqstate { len = 0, + next_seq_id = SeqId, + out_counter = OutCount, + in_counter = InCount, + persistent_count = PCount, + pending_ack = PA, + durable = IsDurable }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = (msg_status(IsPersistent1, SeqId, Msg)) + #msg_status { is_delivered = true }, + {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), + PA1 = record_pending_ack(m(MsgStatus1), PA), + PCount1 = PCount + one_if(IsPersistent1), + {SeqId, a(State1 #vqstate { next_seq_id = SeqId + 1, + out_counter = OutCount + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + pending_ack = PA1 })}. + +fetch(AckRequired, State = #vqstate { q4 = Q4, + ram_msg_count = RamMsgCount, + out_counter = OutCount, + index_state = IndexState, + len = Len, + persistent_count = PCount, + pending_ack = PA }) -> + case queue:out(Q4) of + {empty, _Q4} -> + case fetch_from_q3_to_q4(State) of + {empty, State1} = Result -> a(State1), Result; + {loaded, State1} -> fetch(AckRequired, State1) + end; + {{value, MsgStatus = #msg_status { + msg = Msg, guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, is_delivered = IsDelivered, + msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }}, + Q4a} -> + + %% 1. Mark it delivered if necessary + IndexState1 = maybe_write_delivered( + IndexOnDisk andalso not IsDelivered, + SeqId, IndexState), + + %% 2. Remove from msg_store and queue index, if necessary + MsgStore = find_msg_store(IsPersistent), + Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end, + Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end, + IndexState2 = + case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of + {false, true, false, _} -> Rem(), IndexState1; + {false, true, true, _} -> Rem(), Ack(); + { true, true, true, false} -> Ack(); + _ -> IndexState1 + end, + + %% 3. If an ack is required, add something sensible to PA + {AckTag, PA1} = case AckRequired of + true -> PA2 = record_pending_ack( + MsgStatus #msg_status { + is_delivered = true }, PA), + {SeqId, PA2}; + false -> {blank_ack, PA} + end, + + PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), + Len1 = Len - 1, + {{Msg, IsDelivered, AckTag, Len1}, + a(State #vqstate { q4 = Q4a, + ram_msg_count = RamMsgCount - 1, + out_counter = OutCount + 1, + index_state = IndexState2, + len = Len1, + persistent_count = PCount1, + pending_ack = PA1 })} + end. + +ack(AckTags, State) -> + a(ack(fun rabbit_msg_store:remove/2, + fun (_AckEntry, State1) -> State1 end, + AckTags, State)). + +tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent }, + State = #vqstate { durable = IsDurable, + msg_store_clients = MSCState }) -> + Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), + store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }), + a(case IsPersistent andalso IsDurable of + true -> MsgStatus = msg_status(true, undefined, Msg), + {#msg_status { msg_on_disk = true }, MSCState1} = + maybe_write_msg_to_disk(false, MsgStatus, MSCState), + State #vqstate { msg_store_clients = MSCState1 }; + false -> State + end). + +tx_ack(Txn, AckTags, State) -> + Tx = #tx { pending_acks = Acks } = lookup_tx(Txn), + store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }), + State. + +tx_rollback(Txn, State = #vqstate { durable = IsDurable }) -> + #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), + erase_tx(Txn), + ok = case IsDurable of + true -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, + persistent_guids(Pubs)); + false -> ok + end, + {lists:append(AckTags), a(State)}. + +tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) -> + #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), + erase_tx(Txn), + PubsOrdered = lists:reverse(Pubs), + AckTags1 = lists:append(AckTags), + PersistentGuids = persistent_guids(PubsOrdered), + HasPersistentPubs = PersistentGuids =/= [], + {AckTags1, + a(case IsDurable andalso HasPersistentPubs of + true -> ok = rabbit_msg_store:sync( + ?PERSISTENT_MSG_STORE, PersistentGuids, + msg_store_callback(PersistentGuids, + PubsOrdered, AckTags1, Fun)), + State; + false -> tx_commit_post_msg_store( + HasPersistentPubs, PubsOrdered, AckTags1, Fun, State) + end)}. + +requeue(AckTags, State) -> + a(reduce_memory_use( + ack(fun rabbit_msg_store:release/2, + fun (#msg_status { msg = Msg }, State1) -> + {_SeqId, State2} = publish(Msg, true, false, State1), + State2; + ({IsPersistent, Guid}, State1) -> + #vqstate { msg_store_clients = MSCState } = State1, + {{ok, Msg = #basic_message{}}, MSCState1} = + read_from_msg_store(MSCState, IsPersistent, Guid), + State2 = State1 #vqstate { msg_store_clients = MSCState1 }, + {_SeqId, State3} = publish(Msg, true, true, State2), + State3 + end, + AckTags, State))). + +len(#vqstate { len = Len }) -> Len. + +is_empty(State) -> 0 == len(State). + +set_ram_duration_target(DurationTarget, + State = #vqstate { + rates = #rates { avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate }, + target_ram_msg_count = TargetRamMsgCount }) -> + Rate = AvgEgressRate + AvgIngressRate, + TargetRamMsgCount1 = + case DurationTarget of + infinity -> infinity; + _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec + end, + State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1, + duration_target = DurationTarget }, + a(case TargetRamMsgCount1 == infinity orelse + (TargetRamMsgCount =/= infinity andalso + TargetRamMsgCount1 >= TargetRamMsgCount) of + true -> State1; + false -> reduce_memory_use(State1) + end). + +ram_duration(State = #vqstate { + rates = #rates { egress = Egress, + ingress = Ingress, + timestamp = Timestamp } = Rates, + in_counter = InCount, + out_counter = OutCount, + ram_msg_count = RamMsgCount, + duration_target = DurationTarget, + ram_msg_count_prev = RamMsgCountPrev }) -> + Now = now(), + {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress), + {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress), + + Duration = %% msgs / (msgs/sec) == sec + case AvgEgressRate == 0 andalso AvgIngressRate == 0 of + true -> infinity; + false -> (RamMsgCountPrev + RamMsgCount) / + (2 * (AvgEgressRate + AvgIngressRate)) + end, + + {Duration, set_ram_duration_target( + DurationTarget, + State #vqstate { + rates = Rates #rates { + egress = Egress1, + ingress = Ingress1, + avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate, + timestamp = Now }, + in_counter = 0, + out_counter = 0, + ram_msg_count_prev = RamMsgCount })}. + +needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) -> + {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end, + fun (_Quota, State1) -> State1 end, + fun (State1) -> State1 end, + State), + Res; +needs_idle_timeout(_State) -> + true. + +idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))). + +handle_pre_hibernate(State = #vqstate { index_state = IndexState }) -> + State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }. + +status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + len = Len, + pending_ack = PA, + on_sync = #sync { funs = From }, + target_ram_msg_count = TargetRamMsgCount, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount, + next_seq_id = NextSeqId, + persistent_count = PersistentCount, + rates = #rates { + avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate } }) -> + [ {q1 , queue:len(Q1)}, + {q2 , bpqueue:len(Q2)}, + {delta , Delta}, + {q3 , bpqueue:len(Q3)}, + {q4 , queue:len(Q4)}, + {len , Len}, + {pending_acks , dict:size(PA)}, + {outstanding_txns , length(From)}, + {target_ram_msg_count , TargetRamMsgCount}, + {ram_msg_count , RamMsgCount}, + {ram_index_count , RamIndexCount}, + {next_seq_id , NextSeqId}, + {persistent_count , PersistentCount}, + {avg_egress_rate , AvgEgressRate}, + {avg_ingress_rate , AvgIngressRate} ]. + +%%---------------------------------------------------------------------------- +%% Minor helpers +%%---------------------------------------------------------------------------- + +a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + len = Len, + persistent_count = PersistentCount, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount }) -> + E1 = queue:is_empty(Q1), + E2 = bpqueue:is_empty(Q2), + ED = Delta#delta.count == 0, + E3 = bpqueue:is_empty(Q3), + E4 = queue:is_empty(Q4), + LZ = Len == 0, + + true = E1 or not E3, + true = E2 or not ED, + true = ED or not E3, + true = LZ == (E3 and E4), + + true = Len >= 0, + true = PersistentCount >= 0, + true = RamMsgCount >= 0, + true = RamIndexCount >= 0, + + State. + +m(MsgStatus = #msg_status { msg = Msg, + is_persistent = IsPersistent, + msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk }) -> + true = (not IsPersistent) or IndexOnDisk, + true = (not IndexOnDisk) or MsgOnDisk, + true = (Msg =/= undefined) or MsgOnDisk, + + MsgStatus. + +one_if(true ) -> 1; +one_if(false) -> 0. + +cons_if(true, E, L) -> [E | L]; +cons_if(false, _E, L) -> L. + +msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) -> + #msg_status { seq_id = SeqId, guid = Guid, msg = Msg, + is_persistent = IsPersistent, is_delivered = false, + msg_on_disk = false, index_on_disk = false }. + +find_msg_store(true) -> ?PERSISTENT_MSG_STORE; +find_msg_store(false) -> ?TRANSIENT_MSG_STORE. + +with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) -> + {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP), + {Result, {{MSCStateP1, PRef}, MSCStateT}}; +with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) -> + {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT), + {Result, {MSCStateP, {MSCStateT1, TRef}}}. + +read_from_msg_store(MSCState, IsPersistent, Guid) -> + with_msg_store_state( + MSCState, IsPersistent, + fun (MsgStore, MSCState1) -> + rabbit_msg_store:read(MsgStore, Guid, MSCState1) + end). + +maybe_write_delivered(false, _SeqId, IndexState) -> + IndexState; +maybe_write_delivered(true, SeqId, IndexState) -> + rabbit_queue_index:deliver([SeqId], IndexState). + +lookup_tx(Txn) -> case get({txn, Txn}) of + undefined -> #tx { pending_messages = [], + pending_acks = [] }; + V -> V + end. + +store_tx(Txn, Tx) -> put({txn, Txn}, Tx). + +erase_tx(Txn) -> erase({txn, Txn}). + +persistent_guids(Pubs) -> + [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs]. + +betas_from_index_entries(List, TransientThreshold, IndexState) -> + {Filtered, Delivers, Acks} = + lists:foldr( + fun ({Guid, SeqId, IsPersistent, IsDelivered}, + {Filtered1, Delivers1, Acks1}) -> + case SeqId < TransientThreshold andalso not IsPersistent of + true -> {Filtered1, + cons_if(not IsDelivered, SeqId, Delivers1), + [SeqId | Acks1]}; + false -> {[m(#msg_status { msg = undefined, + guid = Guid, + seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = true, + index_on_disk = true + }) | Filtered1], + Delivers1, + Acks1} + end + end, {[], [], []}, List), + {bpqueue:from_list([{true, Filtered}]), + rabbit_queue_index:ack(Acks, + rabbit_queue_index:deliver(Delivers, IndexState))}. + +%% the first arg is the older delta +combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) -> + ?BLANK_DELTA; +combine_deltas(?BLANK_DELTA_PATTERN(X), #delta { start_seq_id = Start, + count = Count, + end_seq_id = End } = B) -> + true = Start + Count =< End, %% ASSERTION + B; +combine_deltas(#delta { start_seq_id = Start, + count = Count, + end_seq_id = End } = A, ?BLANK_DELTA_PATTERN(Y)) -> + true = Start + Count =< End, %% ASSERTION + A; +combine_deltas(#delta { start_seq_id = StartLow, + count = CountLow, + end_seq_id = EndLow }, + #delta { start_seq_id = StartHigh, + count = CountHigh, + end_seq_id = EndHigh }) -> + Count = CountLow + CountHigh, + true = (StartLow =< StartHigh) %% ASSERTIONS + andalso ((StartLow + CountLow) =< EndLow) + andalso ((StartHigh + CountHigh) =< EndHigh) + andalso ((StartLow + Count) =< EndHigh), + #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }. + +beta_fold(Fun, Init, Q) -> + bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q). + +update_rate(Now, Then, Count, {OThen, OCount}) -> + %% avg over the current period and the previous + {1000000.0 * (Count + OCount) / timer:now_diff(Now, OThen), {Then, Count}}. + +%%---------------------------------------------------------------------------- +%% Internal major helpers for Public API +%%---------------------------------------------------------------------------- + +msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) -> + Self = self(), + F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue( + Self, fun (StateN) -> tx_commit_post_msg_store( + true, Pubs, AckTags, Fun, StateN) + end) + end, + fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler( + fun () -> rabbit_msg_store:remove( + ?PERSISTENT_MSG_STORE, + PersistentGuids) + end, F) + end) + end. + +tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun, + State = #vqstate { + on_sync = OnSync = #sync { + acks_persistent = SPAcks, + acks_all = SAcks, + pubs = SPubs, + funs = SFuns }, + pending_ack = PA, + durable = IsDurable }) -> + PersistentAcks = + case IsDurable of + true -> [AckTag || AckTag <- AckTags, + case dict:fetch(AckTag, PA) of + #msg_status {} -> false; + {IsPersistent, _Guid} -> IsPersistent + end]; + false -> [] + end, + case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of + true -> State #vqstate { on_sync = #sync { + acks_persistent = [PersistentAcks | SPAcks], + acks_all = [AckTags | SAcks], + pubs = [Pubs | SPubs], + funs = [Fun | SFuns] }}; + false -> State1 = tx_commit_index( + State #vqstate { on_sync = #sync { + acks_persistent = [], + acks_all = [AckTags], + pubs = [Pubs], + funs = [Fun] } }), + State1 #vqstate { on_sync = OnSync } + end. + +tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) -> + State; +tx_commit_index(State = #vqstate { on_sync = #sync { + acks_persistent = SPAcks, + acks_all = SAcks, + pubs = SPubs, + funs = SFuns }, + durable = IsDurable }) -> + PAcks = lists:append(SPAcks), + Acks = lists:append(SAcks), + Pubs = lists:append(lists:reverse(SPubs)), + {SeqIds, State1 = #vqstate { index_state = IndexState }} = + lists:foldl( + fun (Msg = #basic_message { is_persistent = IsPersistent }, + {SeqIdsAcc, State2}) -> + IsPersistent1 = IsDurable andalso IsPersistent, + {SeqId, State3} = publish(Msg, false, IsPersistent1, State2), + {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3} + end, {PAcks, ack(Acks, State)}, Pubs), + IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState), + [ Fun() || Fun <- lists:reverse(SFuns) ], + reduce_memory_use( + State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }). + +purge_betas_and_deltas(State = #vqstate { q3 = Q3, + index_state = IndexState }) -> + case bpqueue:is_empty(Q3) of + true -> State; + false -> IndexState1 = remove_queue_entries(fun beta_fold/3, Q3, + IndexState), + purge_betas_and_deltas( + maybe_deltas_to_betas( + State #vqstate { q3 = bpqueue:new(), + index_state = IndexState1 })) + end. + +remove_queue_entries(Fold, Q, IndexState) -> + {GuidsByStore, Delivers, Acks} = + Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q), + ok = orddict:fold(fun (MsgStore, Guids, ok) -> + rabbit_msg_store:remove(MsgStore, Guids) + end, ok, GuidsByStore), + rabbit_queue_index:ack(Acks, + rabbit_queue_index:deliver(Delivers, IndexState)). + +remove_queue_entries1( + #msg_status { guid = Guid, seq_id = SeqId, + is_delivered = IsDelivered, msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk, is_persistent = IsPersistent }, + {GuidsByStore, Delivers, Acks}) -> + {case MsgOnDisk of + true -> rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, + GuidsByStore); + false -> GuidsByStore + end, + cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers), + cons_if(IndexOnDisk, SeqId, Acks)}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for publishing +%%---------------------------------------------------------------------------- + +publish(Msg = #basic_message { is_persistent = IsPersistent }, + IsDelivered, MsgOnDisk, + State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, + next_seq_id = SeqId, + len = Len, + in_counter = InCount, + persistent_count = PCount, + durable = IsDurable, + ram_msg_count = RamMsgCount }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = (msg_status(IsPersistent1, SeqId, Msg)) + #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk }, + {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), + State2 = case bpqueue:is_empty(Q3) of + false -> State1 #vqstate { q1 = queue:in(m(MsgStatus1), Q1) }; + true -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) } + end, + PCount1 = PCount + one_if(IsPersistent1), + {SeqId, State2 #vqstate { next_seq_id = SeqId + 1, + len = Len + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + ram_msg_count = RamMsgCount + 1}}. + +maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status { + msg_on_disk = true }, MSCState) -> + {MsgStatus, MSCState}; +maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { + msg = Msg, guid = Guid, + is_persistent = IsPersistent }, MSCState) + when Force orelse IsPersistent -> + {ok, MSCState1} = + with_msg_store_state( + MSCState, IsPersistent, + fun (MsgStore, MSCState2) -> + Msg1 = Msg #basic_message { + %% don't persist any recoverable decoded properties + content = rabbit_binary_parser:clear_decoded_content( + Msg #basic_message.content)}, + rabbit_msg_store:write(MsgStore, Guid, Msg1, MSCState2) + end), + {MsgStatus #msg_status { msg_on_disk = true }, MSCState1}; +maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) -> + {MsgStatus, MSCState}. + +maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { + index_on_disk = true }, IndexState) -> + true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION + {MsgStatus, IndexState}; +maybe_write_index_to_disk(Force, MsgStatus = #msg_status { + guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered }, IndexState) + when Force orelse IsPersistent -> + true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION + IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent, + IndexState), + {MsgStatus #msg_status { index_on_disk = true }, + maybe_write_delivered(IsDelivered, SeqId, IndexState1)}; +maybe_write_index_to_disk(_Force, MsgStatus, IndexState) -> + {MsgStatus, IndexState}. + +maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, + State = #vqstate { index_state = IndexState, + msg_store_clients = MSCState }) -> + {MsgStatus1, MSCState1} = maybe_write_msg_to_disk( + ForceMsg, MsgStatus, MSCState), + {MsgStatus2, IndexState1} = maybe_write_index_to_disk( + ForceIndex, MsgStatus1, IndexState), + {MsgStatus2, State #vqstate { index_state = IndexState1, + msg_store_clients = MSCState1 }}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for acks +%%---------------------------------------------------------------------------- + +record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, + msg_on_disk = MsgOnDisk } = MsgStatus, PA) -> + AckEntry = case MsgOnDisk of + true -> {IsPersistent, Guid}; + false -> MsgStatus + end, + dict:store(SeqId, AckEntry, PA). + +remove_pending_ack(KeepPersistent, + State = #vqstate { pending_ack = PA, + index_state = IndexState }) -> + {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3, + {[], orddict:new()}, PA), + State1 = State #vqstate { pending_ack = dict:new() }, + case KeepPersistent of + true -> case orddict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of + error -> State1; + {ok, Guids} -> ok = rabbit_msg_store:remove( + ?TRANSIENT_MSG_STORE, Guids), + State1 + end; + false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState), + ok = orddict:fold( + fun (MsgStore, Guids, ok) -> + rabbit_msg_store:remove(MsgStore, Guids) + end, ok, GuidsByStore), + State1 #vqstate { index_state = IndexState1 } + end. + +ack(_MsgStoreFun, _Fun, [], State) -> + State; +ack(MsgStoreFun, Fun, AckTags, State) -> + {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state = IndexState, + persistent_count = PCount }} = + lists:foldl( + fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) -> + {ok, AckEntry} = dict:find(SeqId, PA), + {accumulate_ack(SeqId, AckEntry, Acc), + Fun(AckEntry, State2 #vqstate { + pending_ack = dict:erase(SeqId, PA) })} + end, {{[], orddict:new()}, State}, AckTags), + IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState), + ok = orddict:fold(fun (MsgStore, Guids, ok) -> + MsgStoreFun(MsgStore, Guids) + end, ok, GuidsByStore), + PCount1 = PCount - case orddict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of + error -> 0; + {ok, Guids} -> length(Guids) + end, + State1 #vqstate { index_state = IndexState1, + persistent_count = PCount1 }. + +accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS + msg_on_disk = false, + index_on_disk = false }, Acc) -> + Acc; +accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) -> + {cons_if(IsPersistent, SeqId, SeqIdsAcc), + rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, Dict)}. + +%%---------------------------------------------------------------------------- +%% Phase changes +%%---------------------------------------------------------------------------- + +%% Determine whether a reduction in memory use is necessary, and call +%% functions to perform the required phase changes. The function can +%% also be used to just do the former, by passing in dummy phase +%% change functions. +%% +%% The function does not report on any needed beta->delta conversions, +%% though the conversion function for that is called as necessary. The +%% reason is twofold. Firstly, this is safe because the conversion is +%% only ever necessary just after a transition to a +%% target_ram_msg_count of zero or after an incremental alpha->beta +%% conversion. In the former case the conversion is performed straight +%% away (i.e. any betas present at the time are converted to deltas), +%% and in the latter case the need for a conversion is flagged up +%% anyway. Secondly, this is necessary because we do not have a +%% precise and cheap predicate for determining whether a beta->delta +%% conversion is necessary - due to the complexities of retaining up +%% one segment's worth of messages in q3 - and thus would risk +%% perpetually reporting the need for a conversion when no such +%% conversion is needed. That in turn could cause an infinite loop. +reduce_memory_use(AlphaBetaFun, BetaGammaFun, BetaDeltaFun, State) -> + {Reduce, State1} = case chunk_size(State #vqstate.ram_msg_count, + State #vqstate.target_ram_msg_count) of + 0 -> {false, State}; + S1 -> {true, AlphaBetaFun(S1, State)} + end, + case State1 #vqstate.target_ram_msg_count of + infinity -> {Reduce, State1}; + 0 -> {Reduce, BetaDeltaFun(State1)}; + _ -> case chunk_size(State1 #vqstate.ram_index_count, + permitted_ram_index_count(State1)) of + ?IO_BATCH_SIZE = S2 -> {true, BetaGammaFun(S2, State1)}; + _ -> {Reduce, State1} + end + end. + +reduce_memory_use(State) -> + {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2, + fun limit_ram_index/2, + fun push_betas_to_deltas/1, + State), + State1. + +limit_ram_index(Quota, State = #vqstate { q2 = Q2, q3 = Q3, + index_state = IndexState, + ram_index_count = RamIndexCount }) -> + {Q2a, {Quota1, IndexState1}} = limit_ram_index( + fun bpqueue:map_fold_filter_r/4, + Q2, {Quota, IndexState}), + %% TODO: we shouldn't be writing index entries for messages that + %% can never end up in delta due them residing in the only segment + %% held by q3. + {Q3a, {Quota2, IndexState2}} = limit_ram_index( + fun bpqueue:map_fold_filter_r/4, + Q3, {Quota1, IndexState1}), + State #vqstate { q2 = Q2a, q3 = Q3a, + index_state = IndexState2, + ram_index_count = RamIndexCount - (Quota - Quota2) }. + +limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) -> + {Q, {0, IndexState}}; +limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) -> + MapFoldFilterFun( + fun erlang:'not'/1, + fun (MsgStatus, {0, _IndexStateN}) -> + false = MsgStatus #msg_status.index_on_disk, %% ASSERTION + stop; + (MsgStatus, {N, IndexStateN}) when N > 0 -> + false = MsgStatus #msg_status.index_on_disk, %% ASSERTION + {MsgStatus1, IndexStateN1} = + maybe_write_index_to_disk(true, MsgStatus, IndexStateN), + {true, m(MsgStatus1), {N-1, IndexStateN1}} + end, {Quota, IndexState}, Q). + +permitted_ram_index_count(#vqstate { len = 0 }) -> + infinity; +permitted_ram_index_count(#vqstate { len = Len, + q2 = Q2, + q3 = Q3, + delta = #delta { count = DeltaCount } }) -> + BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3), + BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)). + +chunk_size(Current, Permitted) + when Permitted =:= infinity orelse Permitted >= Current -> + 0; +chunk_size(Current, Permitted) -> + lists:min([Current - Permitted, ?IO_BATCH_SIZE]). + +fetch_from_q3_to_q4(State = #vqstate { + q1 = Q1, + q2 = Q2, + delta = #delta { count = DeltaCount }, + q3 = Q3, + q4 = Q4, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount, + msg_store_clients = MSCState }) -> + case bpqueue:out(Q3) of + {empty, _Q3} -> + {empty, State}; + {{value, IndexOnDisk, MsgStatus = #msg_status { + msg = undefined, guid = Guid, + is_persistent = IsPersistent }}, Q3a} -> + {{ok, Msg = #basic_message {}}, MSCState1} = + read_from_msg_store(MSCState, IsPersistent, Guid), + Q4a = queue:in(m(MsgStatus #msg_status { msg = Msg }), Q4), + RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk), + true = RamIndexCount1 >= 0, %% ASSERTION + State1 = State #vqstate { q3 = Q3a, + q4 = Q4a, + ram_msg_count = RamMsgCount + 1, + ram_index_count = RamIndexCount1, + msg_store_clients = MSCState1 }, + State2 = + case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of + {true, true} -> + %% q3 is now empty, it wasn't before; delta is + %% still empty. So q2 must be empty, and q1 + %% can now be joined onto q4 + true = bpqueue:is_empty(Q2), %% ASSERTION + State1 #vqstate { q1 = queue:new(), + q4 = queue:join(Q4a, Q1) }; + {true, false} -> + maybe_deltas_to_betas(State1); + {false, _} -> + %% q3 still isn't empty, we've not touched + %% delta, so the invariants between q1, q2, + %% delta and q3 are maintained + State1 + end, + {loaded, State2} + end. + +maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) -> + State; +maybe_deltas_to_betas(State = #vqstate { + q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + target_ram_msg_count = TargetRamMsgCount, + transient_threshold = TransientThreshold }) -> + case bpqueue:is_empty(Q3) orelse (TargetRamMsgCount /= 0) of + false -> + State; + true -> + #delta { start_seq_id = DeltaSeqId, + count = DeltaCount, + end_seq_id = DeltaSeqIdEnd } = Delta, + DeltaSeqId1 = + lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId), + DeltaSeqIdEnd]), + {List, IndexState1} = + rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState), + {Q3a, IndexState2} = betas_from_index_entries( + List, TransientThreshold, IndexState1), + State1 = State #vqstate { index_state = IndexState2 }, + case bpqueue:len(Q3a) of + 0 -> + %% we ignored every message in the segment due to + %% it being transient and below the threshold + maybe_deltas_to_betas( + State #vqstate { + delta = Delta #delta { start_seq_id = DeltaSeqId1 }}); + Q3aLen -> + Q3b = bpqueue:join(Q3, Q3a), + case DeltaCount - Q3aLen of + 0 -> + %% delta is now empty, but it wasn't + %% before, so can now join q2 onto q3 + State1 #vqstate { q2 = bpqueue:new(), + delta = ?BLANK_DELTA, + q3 = bpqueue:join(Q3b, Q2) }; + N when N > 0 -> + Delta1 = #delta { start_seq_id = DeltaSeqId1, + count = N, + end_seq_id = DeltaSeqIdEnd }, + State1 #vqstate { delta = Delta1, + q3 = Q3b } + end + end + end. + +push_alphas_to_betas(Quota, State) -> + { Quota1, State1} = maybe_push_q1_to_betas(Quota, State), + {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1), + State2. + +maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) -> + maybe_push_alphas_to_betas( + fun queue:out/1, + fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q1a, State1 = #vqstate { q3 = Q3, delta = #delta { count = 0 } }) -> + State1 #vqstate { q1 = Q1a, + q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) }; + (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q1a, State1 = #vqstate { q2 = Q2 }) -> + State1 #vqstate { q1 = Q1a, + q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) } + end, Quota, Q1, State). + +maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) -> + maybe_push_alphas_to_betas( + fun queue:out_r/1, + fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q4a, State1 = #vqstate { q3 = Q3 }) -> + State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3), + q4 = Q4a } + end, Quota, Q4, State). + +maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q, + State = #vqstate { + ram_msg_count = RamMsgCount, + target_ram_msg_count = TargetRamMsgCount }) + when Quota =:= 0 orelse + TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount -> + {Quota, State}; +maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) -> + case Generator(Q) of + {empty, _Q} -> + {Quota, State}; + {{value, MsgStatus}, Qa} -> + {MsgStatus1 = #msg_status { msg_on_disk = true, + index_on_disk = IndexOnDisk }, + State1 = #vqstate { ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount }} = + maybe_write_to_disk(true, false, MsgStatus, State), + MsgStatus2 = m(MsgStatus1 #msg_status { msg = undefined }), + RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk), + State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1, + ram_index_count = RamIndexCount1 }, + maybe_push_alphas_to_betas(Generator, Consumer, Quota - 1, Qa, + Consumer(MsgStatus2, Qa, State2)) + end. + +push_betas_to_deltas(State = #vqstate { q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + ram_index_count = RamIndexCount }) -> + {Delta2, Q2a, RamIndexCount2, IndexState2} = + push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end, + fun bpqueue:out/1, Q2, + RamIndexCount, IndexState), + {Delta3, Q3a, RamIndexCount3, IndexState3} = + push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1, + fun bpqueue:out_r/1, Q3, + RamIndexCount2, IndexState2), + Delta4 = combine_deltas(Delta3, combine_deltas(Delta, Delta2)), + State #vqstate { q2 = Q2a, + delta = Delta4, + q3 = Q3a, + index_state = IndexState3, + ram_index_count = RamIndexCount3 }. + +push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) -> + case bpqueue:out(Q) of + {empty, _Q} -> + {?BLANK_DELTA, Q, RamIndexCount, IndexState}; + {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} -> + {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} = + bpqueue:out_r(Q), + Limit = LimitFun(MinSeqId), + case MaxSeqId < Limit of + true -> {?BLANK_DELTA, Q, RamIndexCount, IndexState}; + false -> {Len, Qc, RamIndexCount1, IndexState1} = + push_betas_to_deltas(Generator, Limit, Q, 0, + RamIndexCount, IndexState), + {#delta { start_seq_id = Limit, + count = Len, + end_seq_id = MaxSeqId + 1 }, + Qc, RamIndexCount1, IndexState1} + end + end. + +push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) -> + case Generator(Q) of + {empty, _Q} -> + {Count, Q, RamIndexCount, IndexState}; + {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa} + when SeqId < Limit -> + {Count, Q, RamIndexCount, IndexState}; + {{value, IndexOnDisk, MsgStatus}, Qa} -> + {RamIndexCount1, IndexState1} = + case IndexOnDisk of + true -> {RamIndexCount, IndexState}; + false -> {#msg_status { index_on_disk = true }, + IndexState2} = + maybe_write_index_to_disk(true, MsgStatus, + IndexState), + {RamIndexCount - 1, IndexState2} + end, + push_betas_to_deltas( + Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1) + end. diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl index 8060203897..feb214c275 100644 --- a/src/rabbit_writer.erl +++ b/src/rabbit_writer.erl @@ -33,14 +33,14 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([start/3, start_link/3, shutdown/1, mainloop/1]). --export([send_command/2, send_command/3, send_command_and_signal_back/3, - send_command_and_signal_back/4, send_command_and_notify/5]). --export([internal_send_command/3, internal_send_command/5]). +-export([start/5, start_link/5, mainloop/2, mainloop1/2]). +-export([send_command/2, send_command/3, send_command_sync/2, + send_command_sync/3, send_command_and_notify/5]). +-export([internal_send_command/4, internal_send_command/6]). -import(gen_tcp). --record(wstate, {sock, channel, frame_max}). +-record(wstate, {sock, channel, frame_max, protocol}). -define(HIBERNATE_AFTER, 5000). @@ -48,97 +48,96 @@ -ifdef(use_specs). --spec(start/3 :: +-spec(start/5 :: (rabbit_net:socket(), rabbit_channel:channel_number(), - non_neg_integer()) - -> pid()). --spec(start_link/3 :: + non_neg_integer(), rabbit_types:protocol(), pid()) + -> rabbit_types:ok(pid())). +-spec(start_link/5 :: (rabbit_net:socket(), rabbit_channel:channel_number(), - non_neg_integer()) - -> pid()). + non_neg_integer(), rabbit_types:protocol(), pid()) + -> rabbit_types:ok(pid())). -spec(send_command/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). -spec(send_command/3 :: (pid(), rabbit_framing:amqp_method_record(), rabbit_types:content()) -> 'ok'). --spec(send_command_and_signal_back/3 :: - (pid(), rabbit_framing:amqp_method(), pid()) -> 'ok'). --spec(send_command_and_signal_back/4 :: - (pid(), rabbit_framing:amqp_method(), rabbit_types:content(), pid()) - -> 'ok'). +-spec(send_command_sync/2 :: + (pid(), rabbit_framing:amqp_method()) -> 'ok'). +-spec(send_command_sync/3 :: + (pid(), rabbit_framing:amqp_method(), rabbit_types:content()) -> 'ok'). -spec(send_command_and_notify/5 :: (pid(), pid(), pid(), rabbit_framing:amqp_method_record(), rabbit_types:content()) -> 'ok'). --spec(internal_send_command/3 :: +-spec(internal_send_command/4 :: (rabbit_net:socket(), rabbit_channel:channel_number(), - rabbit_framing:amqp_method_record()) + rabbit_framing:amqp_method_record(), rabbit_types:protocol()) -> 'ok'). --spec(internal_send_command/5 :: +-spec(internal_send_command/6 :: (rabbit_net:socket(), rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(), rabbit_types:content(), - non_neg_integer()) + non_neg_integer(), rabbit_types:protocol()) -> 'ok'). -endif. %%---------------------------------------------------------------------------- -start(Sock, Channel, FrameMax) -> - spawn(?MODULE, mainloop, [#wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}]). - -start_link(Sock, Channel, FrameMax) -> - spawn_link(?MODULE, mainloop, [#wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}]). - -mainloop(State) -> +start(Sock, Channel, FrameMax, Protocol, ReaderPid) -> + {ok, + proc_lib:spawn(?MODULE, mainloop, [ReaderPid, + #wstate{sock = Sock, + channel = Channel, + frame_max = FrameMax, + protocol = Protocol}])}. + +start_link(Sock, Channel, FrameMax, Protocol, ReaderPid) -> + {ok, + proc_lib:spawn_link(?MODULE, mainloop, [ReaderPid, + #wstate{sock = Sock, + channel = Channel, + frame_max = FrameMax, + protocol = Protocol}])}. + +mainloop(ReaderPid, State) -> + try + mainloop1(ReaderPid, State) + catch + exit:Error -> ReaderPid ! {channel_exit, #wstate.channel, Error} + end, + done. + +mainloop1(ReaderPid, State) -> receive - Message -> ?MODULE:mainloop(handle_message(Message, State)) + Message -> ?MODULE:mainloop1(ReaderPid, handle_message(Message, State)) after ?HIBERNATE_AFTER -> - erlang:hibernate(?MODULE, mainloop, [State]) + erlang:hibernate(?MODULE, mainloop, [ReaderPid, State]) end. -handle_message({send_command, MethodRecord}, - State = #wstate{sock = Sock, channel = Channel}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord), +handle_message({send_command, MethodRecord}, State) -> + ok = internal_send_command_async(MethodRecord, State), State; -handle_message({send_command, MethodRecord, Content}, - State = #wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), +handle_message({send_command, MethodRecord, Content}, State) -> + ok = internal_send_command_async(MethodRecord, Content, State), State; -handle_message({send_command_and_signal_back, MethodRecord, Parent}, - State = #wstate{sock = Sock, channel = Channel}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord), - Parent ! rabbit_writer_send_command_signal, +handle_message({'$gen_call', From, {send_command_sync, MethodRecord}}, State) -> + ok = internal_send_command_async(MethodRecord, State), + gen_server:reply(From, ok), State; -handle_message({send_command_and_signal_back, MethodRecord, Content, Parent}, - State = #wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), - Parent ! rabbit_writer_send_command_signal, +handle_message({'$gen_call', From, {send_command_sync, MethodRecord, Content}}, + State) -> + ok = internal_send_command_async(MethodRecord, Content, State), + gen_server:reply(From, ok), State; handle_message({send_command_and_notify, QPid, ChPid, MethodRecord, Content}, - State = #wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + State) -> + ok = internal_send_command_async(MethodRecord, Content, State), rabbit_amqqueue:notify_sent(QPid, ChPid), State; handle_message({inet_reply, _, ok}, State) -> State; handle_message({inet_reply, _, Status}, _State) -> exit({writer, send_failed, Status}); -handle_message(shutdown, _State) -> - exit(normal); handle_message(Message, _State) -> exit({writer, message_not_understood, Message}). @@ -152,49 +151,50 @@ send_command(W, MethodRecord, Content) -> W ! {send_command, MethodRecord, Content}, ok. -send_command_and_signal_back(W, MethodRecord, Parent) -> - W ! {send_command_and_signal_back, MethodRecord, Parent}, - ok. +send_command_sync(W, MethodRecord) -> + call(W, {send_command_sync, MethodRecord}). -send_command_and_signal_back(W, MethodRecord, Content, Parent) -> - W ! {send_command_and_signal_back, MethodRecord, Content, Parent}, - ok. +send_command_sync(W, MethodRecord, Content) -> + call(W, {send_command_sync, MethodRecord, Content}). send_command_and_notify(W, Q, ChPid, MethodRecord, Content) -> W ! {send_command_and_notify, Q, ChPid, MethodRecord, Content}, ok. -shutdown(W) -> - W ! shutdown, - rabbit_misc:unlink_and_capture_exit(W), - ok. +%--------------------------------------------------------------------------- + +call(Pid, Msg) -> + {ok, Res} = gen:call(Pid, '$gen_call', Msg, infinity), + Res. %--------------------------------------------------------------------------- -assemble_frames(Channel, MethodRecord) -> +assemble_frames(Channel, MethodRecord, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, none), - rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord). + rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord, + Protocol). -assemble_frames(Channel, MethodRecord, Content, FrameMax) -> +assemble_frames(Channel, MethodRecord, Content, FrameMax, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, Content), MethodName = rabbit_misc:method_record_type(MethodRecord), - true = rabbit_framing:method_has_content(MethodName), % assertion + true = Protocol:method_has_content(MethodName), % assertion MethodFrame = rabbit_binary_generator:build_simple_method_frame( - Channel, MethodRecord), + Channel, MethodRecord, Protocol), ContentFrames = rabbit_binary_generator:build_simple_content_frames( - Channel, Content, FrameMax), + Channel, Content, FrameMax, Protocol), [MethodFrame | ContentFrames]. tcp_send(Sock, Data) -> rabbit_misc:throw_on_error(inet_error, fun () -> rabbit_net:send(Sock, Data) end). -internal_send_command(Sock, Channel, MethodRecord) -> - ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord)). +internal_send_command(Sock, Channel, MethodRecord, Protocol) -> + ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, Protocol)). -internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) -> +internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax, + Protocol) -> ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, - Content, FrameMax)). + Content, FrameMax, Protocol)). %% gen_tcp:send/2 does a selective receive of {inet_reply, Sock, %% Status} to obtain the result. That is bad when it is called from @@ -214,13 +214,20 @@ internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) -> %% Also note that the port has bounded buffers and port_command blocks %% when these are full. So the fact that we process the result %% asynchronously does not impact flow control. -internal_send_command_async(Sock, Channel, MethodRecord) -> - true = port_cmd(Sock, assemble_frames(Channel, MethodRecord)), +internal_send_command_async(MethodRecord, + #wstate{sock = Sock, + channel = Channel, + protocol = Protocol}) -> + true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, Protocol)), ok. -internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax) -> +internal_send_command_async(MethodRecord, Content, + #wstate{sock = Sock, + channel = Channel, + frame_max = FrameMax, + protocol = Protocol}) -> true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, - Content, FrameMax)), + Content, FrameMax, Protocol)), ok. port_cmd(Sock, Data) -> diff --git a/src/supervisor2.erl b/src/supervisor2.erl index 03dc0f990f..4a1c5832b3 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -4,11 +4,39 @@ %% 1) the module name is supervisor2 %% %% 2) there is a new strategy called -%% simple_one_for_one_terminate. This is exactly the same as for -%% simple_one_for_one, except that children *are* explicitly -%% terminated as per the shutdown component of the child_spec. +%% simple_one_for_one_terminate. This is exactly the same as for +%% simple_one_for_one, except that children *are* explicitly +%% terminated as per the shutdown component of the child_spec. %% -%% All modifications are (C) 2010 LShift Ltd. +%% 3) child specifications can contain, as the restart type, a tuple +%% {permanent, Delay} | {transient, Delay} where Delay >= 0. The +%% delay, in seconds, indicates what should happen if a child, upon +%% being restarted, exceeds the MaxT and MaxR parameters. Thus, if +%% a child exits, it is restarted as normal. If it exits +%% sufficiently quickly and often to exceed the boundaries set by +%% the MaxT and MaxR parameters, and a Delay is specified, then +%% rather than stopping the supervisor, the supervisor instead +%% continues and tries to start up the child again, Delay seconds +%% later. +%% +%% Note that you can never restart more frequently than the MaxT +%% and MaxR parameters allow: i.e. you must wait until *both* the +%% Delay has passed *and* the MaxT and MaxR parameters allow the +%% child to be restarted. +%% +%% Also note that the Delay is a *minimum*. There is no guarantee +%% that the child will be restarted within that time, especially if +%% other processes are dying and being restarted at the same time - +%% essentially we have to wait for the delay to have passed and for +%% the MaxT and MaxR parameters to permit the child to be +%% restarted. This may require waiting for longer than Delay. +%% +%% 4) Added an 'intrinsic' restart type. Like the transient type, this +%% type means the child should only be restarted if the child exits +%% abnormally. Unlike the transient type, if the child exits +%% normally, the supervisor itself also exits normally. +%% +%% All modifications are (C) 2010 Rabbit Technologies Ltd. %% %% %CopyrightBegin% %% @@ -35,7 +63,7 @@ -export([start_link/2,start_link/3, start_child/2, restart_child/2, delete_child/2, terminate_child/2, - which_children/1, + which_children/1, find_child/2, check_childspecs/1]). -export([behaviour_info/1]). @@ -43,6 +71,7 @@ %% Internal exports -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). -export([handle_cast/2]). +-export([delayed_restart/2]). -define(DICT, dict). @@ -109,6 +138,10 @@ terminate_child(Supervisor, Name) -> which_children(Supervisor) -> call(Supervisor, which_children). +find_child(Supervisor, Name) -> + [Pid || {Name1, Pid, _Type, _Modules} <- which_children(Supervisor), + Name1 =:= Name]. + call(Supervisor, Req) -> gen_server:call(Supervisor, Req, infinity). @@ -119,6 +152,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. +delayed_restart(Supervisor, RestartDetails) -> + gen_server:cast(Supervisor, {delayed_restart, RestartDetails}). + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -315,6 +351,20 @@ handle_call(which_children, _From, State) -> {reply, Resp, State}. +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when ?is_simple(State) -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when not (?is_simple(State)) -> + case get_child(Child#child.name, State) of + {value, Child} -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; + _ -> + {noreply, State} + end; + %%% Hopefully cause a function-clause as there is no API function %%% that utilizes cast. handle_cast(null, State) -> @@ -480,16 +530,29 @@ restart_child(Pid, Reason, State) -> {ok, State} end. +do_restart({RestartType, Delay}, Reason, Child, State) -> + case restart1(Child, State) of + {ok, NState} -> + {ok, NState}; + {terminate, NState} -> + {ok, _TRef} = timer:apply_after( + trunc(Delay*1000), ?MODULE, delayed_restart, + [self(), {{RestartType, Delay}, Reason, Child}]), + {ok, NState} + end; do_restart(permanent, Reason, Child, State) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); +do_restart(intrinsic, normal, Child, State) -> + {shutdown, state_del_child(Child, State)}; do_restart(_, normal, Child, State) -> NState = state_del_child(Child, State), {ok, NState}; do_restart(_, shutdown, Child, State) -> NState = state_del_child(Child, State), {ok, NState}; -do_restart(transient, Reason, Child, State) -> +do_restart(Type, Reason, Child, State) when Type =:= transient orelse + Type =:= intrinsic -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); do_restart(temporary, Reason, Child, State) -> @@ -500,14 +563,27 @@ do_restart(temporary, Reason, Child, State) -> restart(Child, State) -> case add_restart(State) of {ok, NState} -> - restart(NState#state.strategy, Child, NState); + restart(NState#state.strategy, Child, NState, fun restart/2); {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), - {shutdown, remove_child(Child, NState)} + {shutdown, state_del_child(Child, NState)} + end. + +restart1(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState, fun restart1/2); + {terminate, _NState} -> + %% we've reached the max restart intensity, but the + %% add_restart will have added to the restarts + %% field. Given we don't want to die here, we need to go + %% back to the old restarts field otherwise we'll never + %% attempt to restart later. + {terminate, State} end. -restart(Strategy, Child, State) +restart(Strategy, Child, State, Restart) when Strategy =:= simple_one_for_one orelse Strategy =:= simple_one_for_one_terminate -> #child{mfa = {M, F, A}} = Child, @@ -521,9 +597,9 @@ restart(Strategy, Child, State) {ok, NState}; {error, Error} -> report_error(start_error, Error, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(one_for_one, Child, State) -> +restart(one_for_one, Child, State, Restart) -> case do_start_child(State#state.name, Child) of {ok, Pid} -> NState = replace_child(Child#child{pid = Pid}, State), @@ -533,25 +609,25 @@ restart(one_for_one, Child, State) -> {ok, NState}; {error, Reason} -> report_error(start_error, Reason, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(rest_for_one, Child, State) -> +restart(rest_for_one, Child, State, Restart) -> {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), ChAfter2 = terminate_children(ChAfter, State#state.name), case start_children(ChAfter2, State#state.name) of {ok, ChAfter3} -> {ok, State#state{children = ChAfter3 ++ ChBefore}}; {error, ChAfter3} -> - restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + Restart(Child, State#state{children = ChAfter3 ++ ChBefore}) end; -restart(one_for_all, Child, State) -> +restart(one_for_all, Child, State, Restart) -> Children1 = del_child(Child#child.pid, State#state.children), Children2 = terminate_children(Children1, State#state.name), case start_children(Children2, State#state.name) of {ok, NChs} -> {ok, State#state{children = NChs}}; {error, NChs} -> - restart(Child, State#state{children = NChs}) + Restart(Child, State#state{children = NChs}) end. %%----------------------------------------------------------------- @@ -577,14 +653,22 @@ terminate_simple_children(Child, Dynamics, SupName) -> ok. do_terminate(Child, SupName) when Child#child.pid =/= undefined -> - case shutdown(Child#child.pid, - Child#child.shutdown) of - ok -> - Child#child{pid = undefined}; - {error, OtherReason} -> - report_error(shutdown_error, OtherReason, Child, SupName), - Child#child{pid = undefined} - end; + ReportError = fun (Reason) -> + report_error(shutdown_error, Reason, Child, SupName) + end, + case shutdown(Child#child.pid, Child#child.shutdown) of + ok -> + ok; + {error, normal} -> + case Child#child.restart_type of + permanent -> ReportError(normal); + {permanent, _Delay} -> ReportError(normal); + _ -> ok + end; + {error, OtherReason} -> + ReportError(OtherReason) + end, + Child#child{pid = undefined}; do_terminate(Child, _SupName) -> Child. @@ -769,7 +853,9 @@ supname(N,_) -> N. %%% {Name, Func, RestartType, Shutdown, ChildType, Modules} %%% where Name is an atom %%% Func is {Mod, Fun, Args} == {atom, atom, list} -%%% RestartType is permanent | temporary | transient +%%% RestartType is permanent | temporary | transient | +%%% intrinsic | {permanent, Delay} | +%%% {transient, Delay} where Delay >= 0 %%% Shutdown = integer() | infinity | brutal_kill %%% ChildType = supervisor | worker %%% Modules = [atom()] | dynamic @@ -815,10 +901,18 @@ validFunc({M, F, A}) when is_atom(M), is_list(A) -> true; validFunc(Func) -> throw({invalid_mfa, Func}). -validRestartType(permanent) -> true; -validRestartType(temporary) -> true; -validRestartType(transient) -> true; -validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}). +validRestartType(permanent) -> true; +validRestartType(temporary) -> true; +validRestartType(transient) -> true; +validRestartType(intrinsic) -> true; +validRestartType({permanent, Delay}) -> validDelay(Delay); +validRestartType({transient, Delay}) -> validDelay(Delay); +validRestartType(RestartType) -> throw({invalid_restart_type, + RestartType}). + +validDelay(Delay) when is_number(Delay), + Delay >= 0 -> true; +validDelay(What) -> throw({invalid_delay, What}). validShutdown(Shutdown, _) when is_integer(Shutdown), Shutdown > 0 -> true; diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl index cc4982c9cb..c9809ace61 100644 --- a/src/tcp_acceptor.erl +++ b/src/tcp_acceptor.erl @@ -55,6 +55,7 @@ handle_call(_Request, _From, State) -> {noreply, State}. handle_cast(accept, State) -> + ok = file_handle_cache:obtain(), accept(State); handle_cast(_Msg, State) -> @@ -83,7 +84,8 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}}, %% is drained. gen_event:which_handlers(error_logger), %% handle - file_handle_cache:release_on_death(apply(M, F, A ++ [Sock])) + file_handle_cache:transfer(apply(M, F, A ++ [Sock])), + ok = file_handle_cache:obtain() catch {inet_error, Reason} -> gen_tcp:close(Sock), error_logger:error_msg("unable to accept TCP connection: ~p~n", @@ -92,11 +94,13 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}}, %% accept more accept(State); + handle_info({inet_async, LSock, Ref, {error, closed}}, State=#state{sock=LSock, ref=Ref}) -> %% It would be wrong to attempt to restart the acceptor when we %% know this will fail. {stop, normal, State}; + handle_info(_Info, State) -> {noreply, State}. @@ -111,7 +115,6 @@ code_change(_OldVsn, State, _Extra) -> inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F). accept(State = #state{sock=LSock}) -> - ok = file_handle_cache:obtain(), case prim_inet:async_accept(LSock, -1) of {ok, Ref} -> {noreply, State#state{ref=Ref}}; Error -> {stop, {cannot_accept, Error}, State} diff --git a/src/tcp_client_sup.erl b/src/tcp_client_sup.erl index 1b78584384..02d7e0e40d 100644 --- a/src/tcp_client_sup.erl +++ b/src/tcp_client_sup.erl @@ -31,19 +31,19 @@ -module(tcp_client_sup). --behaviour(supervisor). +-behaviour(supervisor2). -export([start_link/1, start_link/2]). -export([init/1]). start_link(Callback) -> - supervisor:start_link(?MODULE, Callback). + supervisor2:start_link(?MODULE, Callback). start_link(SupName, Callback) -> - supervisor:start_link(SupName, ?MODULE, Callback). + supervisor2:start_link(SupName, ?MODULE, Callback). init({M,F,A}) -> - {ok, {{simple_one_for_one, 10, 10}, + {ok, {{simple_one_for_one_terminate, 10, 10}, [{tcp_client, {M,F,A}, - temporary, brutal_kill, worker, [M]}]}}. + temporary, infinity, supervisor, [M]}]}}. diff --git a/src/test_sup.erl b/src/test_sup.erl new file mode 100644 index 0000000000..f41793bc89 --- /dev/null +++ b/src/test_sup.erl @@ -0,0 +1,94 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(test_sup). + +-behaviour(supervisor2). + +-export([test_supervisor_delayed_restart/0, + init/1, start_child/0]). + +test_supervisor_delayed_restart() -> + passed = with_sup(simple_one_for_one_terminate, + fun (SupPid) -> + {ok, _ChildPid} = + supervisor2:start_child(SupPid, []), + test_supervisor_delayed_restart(SupPid) + end), + passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1). + +test_supervisor_delayed_restart(SupPid) -> + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + timeout = ping_child(SupPid), + timer:sleep(1010), + ok = ping_child(SupPid), + passed. + +with_sup(RestartStrategy, Fun) -> + {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]), + Res = Fun(SupPid), + exit(SupPid, shutdown), + rabbit_misc:unlink_and_capture_exit(SupPid), + Res. + +init([RestartStrategy]) -> + {ok, {{RestartStrategy, 1, 1}, + [{test, {test_sup, start_child, []}, {permanent, 1}, + 16#ffffffff, worker, [test_sup]}]}}. + +start_child() -> + {ok, proc_lib:spawn_link(fun run_child/0)}. + +ping_child(SupPid) -> + Ref = make_ref(), + get_child_pid(SupPid) ! {ping, Ref, self()}, + receive {pong, Ref} -> ok + after 1000 -> timeout + end. + +exit_child(SupPid) -> + true = exit(get_child_pid(SupPid), abnormal), + ok. + +get_child_pid(SupPid) -> + [{_Id, ChildPid, worker, [test_sup]}] = + supervisor2:which_children(SupPid), + ChildPid. + +run_child() -> + receive {ping, Ref, Pid} -> Pid ! {pong, Ref}, + run_child() + end. diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl index bbc3a8c017..e658f005a3 100644 --- a/src/vm_memory_monitor.erl +++ b/src/vm_memory_monitor.erl @@ -72,13 +72,10 @@ -ifdef(use_specs). --spec(start_link/1 :: - (float()) -> 'ignore' | - rabbit_types:error(any()) | - rabbit_types:ok(pid())). +-spec(start_link/1 :: (float()) -> {'ok', pid()} | {'error', any()}). -spec(update/0 :: () -> 'ok'). -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')). --spec(get_vm_limit/0 :: () -> (non_neg_integer() | 'unknown')). +-spec(get_vm_limit/0 :: () -> non_neg_integer()). -spec(get_memory_limit/0 :: () -> (non_neg_integer() | 'undefined')). -spec(get_check_interval/0 :: () -> non_neg_integer()). -spec(set_check_interval/1 :: (non_neg_integer()) -> 'ok'). diff --git a/src/worker_pool.erl b/src/worker_pool.erl index 01ce3535d8..595884e033 100644 --- a/src/worker_pool.erl +++ b/src/worker_pool.erl @@ -52,7 +52,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). -spec(submit/1 :: (fun (() -> A) | {atom(), atom(), [any()]}) -> A). -spec(submit_async/1 :: (fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok'). diff --git a/src/worker_pool_sup.erl b/src/worker_pool_sup.erl index afa21164be..177a14533b 100644 --- a/src/worker_pool_sup.erl +++ b/src/worker_pool_sup.erl @@ -41,9 +41,8 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). --spec(start_link/1 :: (non_neg_integer()) -> - 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/0 :: () -> {'ok', pid()} | {'error', any()}). +-spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()} | {'error', any()}). -endif. diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl index a61e4cc372..42049d5068 100644 --- a/src/worker_pool_worker.erl +++ b/src/worker_pool_worker.erl @@ -44,8 +44,7 @@ -ifdef(use_specs). --spec(start_link/1 :: - (any()) -> {'ok', pid()} | 'ignore' | rabbit_types:error(any())). +-spec(start_link/1 :: (any()) -> {'ok', pid()} | {'error', any()}). -spec(submit/2 :: (pid(), fun (() -> A) | {atom(), atom(), [any()]}) -> A). -spec(submit_async/2 :: (pid(), fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok'). |
