diff options
| author | Emile Joubert <emile@rabbitmq.com> | 2012-10-25 13:00:09 +0100 |
|---|---|---|
| committer | Emile Joubert <emile@rabbitmq.com> | 2012-10-25 13:00:09 +0100 |
| commit | 54248e76629b351561d770208036ffbaedf962a6 (patch) | |
| tree | b0cc0604e0434f3a845fd7132505055dbb0c5e80 /src | |
| parent | 71044851c7fc988a8cb560fee98f523739705873 (diff) | |
| parent | 964876078c29a04b6dacd6f77e50e533d607ea39 (diff) | |
| download | rabbitmq-server-git-54248e76629b351561d770208036ffbaedf962a6.tar.gz | |
Merged bug25053 into default
Diffstat (limited to 'src')
66 files changed, 5356 insertions, 2794 deletions
diff --git a/src/app_utils.erl b/src/app_utils.erl index 4bef83a53e..fdf6ed410f 100644 --- a/src/app_utils.erl +++ b/src/app_utils.erl @@ -15,17 +15,21 @@ %% -module(app_utils). --export([load_applications/1, start_applications/1, - stop_applications/1, app_dependency_order/2, +-export([load_applications/1, start_applications/1, start_applications/2, + stop_applications/1, stop_applications/2, app_dependency_order/2, wait_for_applications/1]). -ifdef(use_specs). --spec load_applications([atom()]) -> 'ok'. --spec start_applications([atom()]) -> 'ok'. --spec stop_applications([atom()]) -> 'ok'. --spec wait_for_applications([atom()]) -> 'ok'. --spec app_dependency_order([atom()], boolean()) -> [digraph:vertex()]. +-type error_handler() :: fun((atom(), any()) -> 'ok'). + +-spec load_applications([atom()]) -> 'ok'. +-spec start_applications([atom()]) -> 'ok'. +-spec stop_applications([atom()]) -> 'ok'. +-spec start_applications([atom()], error_handler()) -> 'ok'. +-spec stop_applications([atom()], error_handler()) -> 'ok'. +-spec wait_for_applications([atom()]) -> 'ok'. +-spec app_dependency_order([atom()], boolean()) -> [digraph:vertex()]. -endif. @@ -37,21 +41,34 @@ load_applications(Apps) -> ok. start_applications(Apps) -> + start_applications( + Apps, fun (App, Reason) -> + throw({error, {cannot_start_application, App, Reason}}) + end). + +stop_applications(Apps) -> + stop_applications( + Apps, fun (App, Reason) -> + throw({error, {cannot_stop_application, App, Reason}}) + end). + +start_applications(Apps, ErrorHandler) -> manage_applications(fun lists:foldl/3, fun application:start/1, fun application:stop/1, already_started, - cannot_start_application, + ErrorHandler, Apps). -stop_applications(Apps) -> +stop_applications(Apps, ErrorHandler) -> manage_applications(fun lists:foldr/3, fun application:stop/1, fun application:start/1, not_started, - cannot_stop_application, + ErrorHandler, Apps). + wait_for_applications(Apps) -> [wait_for_application(App) || App <- Apps], ok. @@ -107,14 +124,14 @@ app_dependencies(App) -> {ok, Lst} -> Lst end. -manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) -> +manage_applications(Iterate, Do, Undo, SkipError, ErrorHandler, Apps) -> Iterate(fun (App, Acc) -> case Do(App) of ok -> [App | Acc]; {error, {SkipError, _}} -> Acc; {error, Reason} -> lists:foreach(Undo, Acc), - throw({error, {ErrorTag, App, Reason}}) + ErrorHandler(App, Reason) end end, [], Apps), ok. diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index f3b4dbafa2..3260d36986 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -120,12 +120,12 @@ %% do not need to worry about their handles being closed by the server %% - reopening them when necessary is handled transparently. %% -%% The server also supports obtain, release and transfer. obtain/0 +%% The server also supports obtain, release and transfer. obtain/{0,1} %% blocks until a file descriptor is available, at which point the -%% requesting process is considered to 'own' one more -%% descriptor. release/0 is the inverse operation and releases a -%% previously obtained descriptor. transfer/1 transfers ownership of a -%% file descriptor between processes. It is non-blocking. Obtain has a +%% requesting process is considered to 'own' more descriptor(s). +%% release/{0,1} is the inverse operation and releases previously obtained +%% descriptor(s). transfer/{1,2} transfers ownership of file descriptor(s) +%% between processes. It is non-blocking. Obtain has a %% lower limit, set by the ?OBTAIN_LIMIT/1 macro. File handles can use %% the entire limit, but will be evicted by obtain calls up to the %% point at which no more obtain calls can be satisfied by the obtains @@ -136,8 +136,8 @@ %% as sockets can do so in such a way that the overall number of open %% file descriptors is managed. %% -%% The callers of register_callback/3, obtain/0, and the argument of -%% transfer/1 are monitored, reducing the count of handles in use +%% The callers of register_callback/3, obtain, and the argument of +%% transfer are monitored, reducing the count of handles in use %% appropriately when the processes terminate. -behaviour(gen_server2). @@ -146,12 +146,13 @@ -export([open/3, close/1, read/2, append/2, needs_sync/1, sync/1, position/2, truncate/1, current_virtual_offset/1, current_raw_offset/1, flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]). --export([obtain/0, release/0, transfer/1, set_limit/1, get_limit/0, info_keys/0, +-export([obtain/0, obtain/1, release/0, release/1, transfer/1, transfer/2, + set_limit/1, get_limit/0, info_keys/0, info/0, info/1]). -export([ulimit/0]). --export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3, prioritise_cast/2]). +-export([start_link/0, start_link/2, init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3, prioritise_cast/2]). -define(SERVER, ?MODULE). -define(RESERVED_FOR_OTHERS, 100). @@ -195,7 +196,9 @@ obtain_count, obtain_pending, clients, - timer_ref + timer_ref, + alarm_set, + alarm_clear }). -record(cstate, @@ -249,8 +252,11 @@ -spec(clear/1 :: (ref()) -> ok_or_error()). -spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). -spec(obtain/0 :: () -> 'ok'). +-spec(obtain/1 :: (non_neg_integer()) -> 'ok'). -spec(release/0 :: () -> 'ok'). +-spec(release/1 :: (non_neg_integer()) -> 'ok'). -spec(transfer/1 :: (pid()) -> 'ok'). +-spec(transfer/2 :: (pid(), non_neg_integer()) -> 'ok'). -spec(set_limit/1 :: (non_neg_integer()) -> 'ok'). -spec(get_limit/0 :: () -> non_neg_integer()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). @@ -268,7 +274,11 @@ %%---------------------------------------------------------------------------- start_link() -> - gen_server2:start_link({local, ?SERVER}, ?MODULE, [], [{timeout, infinity}]). + start_link(fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1). + +start_link(AlarmSet, AlarmClear) -> + gen_server2:start_link({local, ?SERVER}, ?MODULE, [AlarmSet, AlarmClear], + [{timeout, infinity}]). register_callback(M, F, A) when is_atom(M) andalso is_atom(F) andalso is_list(A) -> @@ -374,11 +384,11 @@ sync(Ref) -> end). needs_sync(Ref) -> - with_handles( - [Ref], - fun ([#handle { is_dirty = false, write_buffer = [] }]) -> false; - ([_Handle]) -> true - end). + %% This must *not* use with_handles/2; see bug 25052 + case get({Ref, fhc_handle}) of + #handle { is_dirty = false, write_buffer = [] } -> false; + #handle {} -> true + end. position(Ref, NewOffset) -> with_flushed_handles( @@ -479,18 +489,22 @@ set_maximum_since_use(MaximumAge) -> true -> ok end. -obtain() -> +obtain() -> obtain(1). +release() -> release(1). +transfer(Pid) -> transfer(Pid, 1). + +obtain(Count) when Count > 0 -> %% If the FHC isn't running, obtains succeed immediately. case whereis(?SERVER) of undefined -> ok; - _ -> gen_server2:call(?SERVER, {obtain, self()}, infinity) + _ -> gen_server2:call(?SERVER, {obtain, Count, self()}, infinity) end. -release() -> - gen_server2:cast(?SERVER, {release, self()}). +release(Count) when Count > 0 -> + gen_server2:cast(?SERVER, {release, Count, self()}). -transfer(Pid) -> - gen_server2:cast(?SERVER, {transfer, self(), Pid}). +transfer(Pid, Count) when Count > 0 -> + gen_server2:cast(?SERVER, {transfer, Count, self(), Pid}). set_limit(Limit) -> gen_server2:call(?SERVER, {set_limit, Limit}, infinity). @@ -806,7 +820,7 @@ i(Item, _) -> throw({bad_argument, Item}). %% gen_server2 callbacks %%---------------------------------------------------------------------------- -init([]) -> +init([AlarmSet, AlarmClear]) -> Limit = case application:get_env(file_handles_high_watermark) of {ok, Watermark} when (is_integer(Watermark) andalso Watermark > 0) -> @@ -830,11 +844,13 @@ init([]) -> obtain_count = 0, obtain_pending = pending_new(), clients = Clients, - timer_ref = undefined }}. + timer_ref = undefined, + alarm_set = AlarmSet, + alarm_clear = AlarmClear }}. prioritise_cast(Msg, _State) -> case Msg of - {release, _} -> 5; + {release, _, _} -> 5; _ -> 0 end. @@ -867,11 +883,12 @@ handle_call({open, Pid, Requested, EldestUnusedSince}, From, false -> {noreply, run_pending_item(Item, State)} end; -handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count = Count, - obtain_pending = Pending, - clients = Clients }) -> +handle_call({obtain, N, Pid}, From, State = #fhc_state { + obtain_count = Count, + obtain_pending = Pending, + clients = Clients }) -> ok = track_client(Pid, Clients), - Item = #pending { kind = obtain, pid = Pid, requested = 1, from = From }, + Item = #pending { kind = obtain, pid = Pid, requested = N, from = From }, Enqueue = fun () -> true = ets:update_element(Clients, Pid, {#cstate.blocked, true}), @@ -882,7 +899,7 @@ handle_call({obtain, Pid}, From, State = #fhc_state { obtain_count = Count, case obtain_limit_reached(State) of true -> Enqueue(); false -> case needs_reduce(State #fhc_state { - obtain_count = Count + 1 }) of + obtain_count = Count + N }) of true -> reduce(Enqueue()); false -> adjust_alarm( State, run_pending_item(Item, State)) @@ -917,9 +934,9 @@ handle_cast({update, Pid, EldestUnusedSince}, %% storm of messages {noreply, State}; -handle_cast({release, Pid}, State) -> +handle_cast({release, N, Pid}, State) -> {noreply, adjust_alarm(State, process_pending( - update_counts(obtain, Pid, -1, State)))}; + update_counts(obtain, Pid, -N, State)))}; handle_cast({close, Pid, EldestUnusedSince}, State = #fhc_state { elders = Elders, clients = Clients }) -> @@ -931,11 +948,11 @@ handle_cast({close, Pid, EldestUnusedSince}, {noreply, adjust_alarm(State, process_pending( update_counts(open, Pid, -1, State)))}; -handle_cast({transfer, FromPid, ToPid}, State) -> +handle_cast({transfer, N, FromPid, ToPid}, State) -> ok = track_client(ToPid, State#fhc_state.clients), {noreply, process_pending( - update_counts(obtain, ToPid, +1, - update_counts(obtain, FromPid, -1, State)))}. + update_counts(obtain, ToPid, +N, + update_counts(obtain, FromPid, -N, State)))}. handle_info(check_counts, State) -> {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}; @@ -1026,10 +1043,11 @@ obtain_limit_reached(#fhc_state { obtain_limit = Limit, obtain_count = Count}) -> Limit =/= infinity andalso Count >= Limit. -adjust_alarm(OldState, NewState) -> +adjust_alarm(OldState = #fhc_state { alarm_set = AlarmSet, + alarm_clear = AlarmClear }, NewState) -> case {obtain_limit_reached(OldState), obtain_limit_reached(NewState)} of - {false, true} -> alarm_handler:set_alarm({file_descriptor_limit, []}); - {true, false} -> alarm_handler:clear_alarm(file_descriptor_limit); + {false, true} -> AlarmSet({file_descriptor_limit, []}); + {true, false} -> AlarmClear(file_descriptor_limit); _ -> ok end, NewState. diff --git a/src/gatherer.erl b/src/gatherer.erl index 98b360389a..29d2d71366 100644 --- a/src/gatherer.erl +++ b/src/gatherer.erl @@ -18,7 +18,7 @@ -behaviour(gen_server2). --export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]). +-export([start_link/0, stop/1, fork/1, finish/1, in/2, sync_in/2, out/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -32,6 +32,7 @@ -spec(fork/1 :: (pid()) -> 'ok'). -spec(finish/1 :: (pid()) -> 'ok'). -spec(in/2 :: (pid(), any()) -> 'ok'). +-spec(sync_in/2 :: (pid(), any()) -> 'ok'). -spec(out/1 :: (pid()) -> {'value', any()} | 'empty'). -endif. @@ -62,6 +63,9 @@ finish(Pid) -> in(Pid, Value) -> gen_server2:cast(Pid, {in, Value}). +sync_in(Pid, Value) -> + gen_server2:call(Pid, {in, Value}, infinity). + out(Pid) -> gen_server2:call(Pid, out, infinity). @@ -78,19 +82,22 @@ handle_call(stop, _From, State) -> handle_call(fork, _From, State = #gstate { forks = Forks }) -> {reply, ok, State #gstate { forks = Forks + 1 }, hibernate}; +handle_call({in, Value}, From, State) -> + {noreply, in(Value, From, State), hibernate}; + handle_call(out, From, State = #gstate { forks = Forks, values = Values, blocked = Blocked }) -> case queue:out(Values) of + {empty, _} when Forks == 0 -> + {reply, empty, State, hibernate}; {empty, _} -> - case Forks of - 0 -> {reply, empty, State, hibernate}; - _ -> {noreply, - State #gstate { blocked = queue:in(From, Blocked) }, - hibernate} - end; - {{value, _Value} = V, NewValues} -> - {reply, V, State #gstate { values = NewValues }, hibernate} + {noreply, State #gstate { blocked = queue:in(From, Blocked) }, + hibernate}; + {{value, {PendingIn, Value}}, NewValues} -> + reply(PendingIn, ok), + {reply, {value, Value}, State #gstate { values = NewValues }, + hibernate} end; handle_call(Msg, _From, State) -> @@ -107,15 +114,8 @@ handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) -> {noreply, State #gstate { forks = NewForks, blocked = NewBlocked }, hibernate}; -handle_cast({in, Value}, State = #gstate { values = Values, - blocked = Blocked }) -> - {noreply, case queue:out(Blocked) of - {empty, _} -> - State #gstate { values = queue:in(Value, Values) }; - {{value, From}, NewBlocked} -> - gen_server2:reply(From, {value, Value}), - State #gstate { blocked = NewBlocked } - end, hibernate}; +handle_cast({in, Value}, State) -> + {noreply, in(Value, undefined, State), hibernate}; handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}. @@ -128,3 +128,18 @@ code_change(_OldVsn, State, _Extra) -> terminate(_Reason, State) -> State. + +%%---------------------------------------------------------------------------- + +in(Value, From, State = #gstate { values = Values, blocked = Blocked }) -> + case queue:out(Blocked) of + {empty, _} -> + State #gstate { values = queue:in({From, Value}, Values) }; + {{value, PendingOut}, NewBlocked} -> + reply(From, ok), + gen_server2:reply(PendingOut, {value, Value}), + State #gstate { blocked = NewBlocked } + end. + +reply(undefined, _Reply) -> ok; +reply(From, Reply) -> gen_server2:reply(From, Reply). diff --git a/src/gm.erl b/src/gm.erl index f88ed18fbf..4a95de0dd1 100644 --- a/src/gm.erl +++ b/src/gm.erl @@ -77,9 +77,13 @@ %% confirmed_broadcast/2 directly from the callback module otherwise %% you will deadlock the entire group. %% -%% group_members/1 -%% Provide the Pid. Returns a list of the current group members. +%% info/1 +%% Provide the Pid. Returns a proplist with various facts, including +%% the group name and the current group members. %% +%% forget_group/1 +%% Provide the group name. Removes its mnesia record. Makes no attempt +%% to ensure the group is empty. %% %% Implementation Overview %% ----------------------- @@ -372,8 +376,8 @@ -behaviour(gen_server2). --export([create_tables/0, start_link/3, leave/1, broadcast/2, - confirmed_broadcast/2, group_members/1]). +-export([create_tables/0, start_link/4, leave/1, broadcast/2, + confirmed_broadcast/2, info/1, forget_group/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, prioritise_info/2]). @@ -404,7 +408,8 @@ callback_args, confirms, broadcast_buffer, - broadcast_timer + broadcast_timer, + txn_executor }). -record(gm_group, { name, version, members }). @@ -424,14 +429,16 @@ -export_type([group_name/0]). -type(group_name() :: any()). +-type(txn_fun() :: fun((fun(() -> any())) -> any())). -spec(create_tables/0 :: () -> 'ok' | {'aborted', any()}). --spec(start_link/3 :: (group_name(), atom(), any()) -> +-spec(start_link/4 :: (group_name(), atom(), any(), txn_fun()) -> rabbit_types:ok_pid_or_error()). -spec(leave/1 :: (pid()) -> 'ok'). -spec(broadcast/2 :: (pid(), any()) -> 'ok'). -spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok'). --spec(group_members/1 :: (pid()) -> [pid()]). +-spec(info/1 :: (pid()) -> rabbit_types:infos()). +-spec(forget_group/1 :: (group_name()) -> 'ok'). %% The joined, members_changed and handle_msg callbacks can all return %% any of the following terms: @@ -502,8 +509,8 @@ table_definitions() -> {Name, Attributes} = ?TABLE, [{Name, [?TABLE_MATCH | Attributes]}]. -start_link(GroupName, Module, Args) -> - gen_server2:start_link(?MODULE, [GroupName, Module, Args], []). +start_link(GroupName, Module, Args, TxnFun) -> + gen_server2:start_link(?MODULE, [GroupName, Module, Args, TxnFun], []). leave(Server) -> gen_server2:cast(Server, leave). @@ -514,11 +521,17 @@ broadcast(Server, Msg) -> confirmed_broadcast(Server, Msg) -> gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). -group_members(Server) -> - gen_server2:call(Server, group_members, infinity). +info(Server) -> + gen_server2:call(Server, info, infinity). +forget_group(GroupName) -> + {atomic, ok} = mnesia:sync_transaction( + fun () -> + mnesia:delete({?GROUP_TABLE, GroupName}) + end), + ok. -init([GroupName, Module, Args]) -> +init([GroupName, Module, Args, TxnFun]) -> {MegaSecs, Secs, MicroSecs} = now(), random:seed(MegaSecs, Secs, MicroSecs), Self = make_member(GroupName), @@ -534,7 +547,8 @@ init([GroupName, Module, Args]) -> callback_args = Args, confirms = queue:new(), broadcast_buffer = [], - broadcast_timer = undefined }, hibernate, + broadcast_timer = undefined, + txn_executor = TxnFun }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -553,12 +567,16 @@ handle_call({confirmed_broadcast, Msg}, _From, handle_call({confirmed_broadcast, Msg}, From, State) -> internal_broadcast(Msg, From, State); -handle_call(group_members, _From, +handle_call(info, _From, State = #state { members_state = undefined }) -> reply(not_joined, State); -handle_call(group_members, _From, State = #state { view = View }) -> - reply(get_pids(alive_view_members(View)), State); +handle_call(info, _From, State = #state { group_name = GroupName, + module = Module, + view = View }) -> + reply([{group_name, GroupName}, + {module, Module}, + {group_members, get_pids(alive_view_members(View))}], State); handle_call({add_on_right, _NewMember}, _From, State = #state { members_state = undefined }) -> @@ -570,7 +588,8 @@ handle_call({add_on_right, NewMember}, _From, view = View, members_state = MembersState, module = Module, - callback_args = Args }) -> + callback_args = Args, + txn_executor = TxnFun }) -> {MembersState1, Group} = record_new_member_in_group( GroupName, Self, NewMember, @@ -581,7 +600,7 @@ handle_call({add_on_right, NewMember}, _From, {catchup, Self, prepare_members_state(MembersState1)}), MembersState1 - end), + end, TxnFun), View2 = group_to_view(Group), State1 = check_neighbours(State #state { view = View2, members_state = MembersState1 }), @@ -627,8 +646,9 @@ handle_cast(join, State = #state { self = Self, group_name = GroupName, members_state = undefined, module = Module, - callback_args = Args }) -> - View = join_group(Self, GroupName), + callback_args = Args, + txn_executor = TxnFun }) -> + View = join_group(Self, GroupName, TxnFun), MembersState = case alive_view_members(View) of [Self] -> blank_member_state(); @@ -655,7 +675,8 @@ handle_info({'DOWN', MRef, process, _Pid, Reason}, view = View, module = Module, callback_args = Args, - confirms = Confirms }) -> + confirms = Confirms, + txn_executor = TxnFun }) -> Member = case {Left, Right} of {{Member1, MRef}, _} -> Member1; {_, {Member1, MRef}} -> Member1; @@ -668,7 +689,8 @@ handle_info({'DOWN', MRef, process, _Pid, Reason}, noreply(State); _ -> View1 = - group_to_view(record_dead_member_in_group(Member, GroupName)), + group_to_view(record_dead_member_in_group(Member, + GroupName, TxnFun)), {Result, State2} = case alive_view_members(View1) of [Self] -> @@ -970,14 +992,15 @@ ensure_alive_suffix1(MembersQ) -> %% View modification %% --------------------------------------------------------------------------- -join_group(Self, GroupName) -> - join_group(Self, GroupName, read_group(GroupName)). +join_group(Self, GroupName, TxnFun) -> + join_group(Self, GroupName, read_group(GroupName), TxnFun). -join_group(Self, GroupName, {error, not_found}) -> - join_group(Self, GroupName, prune_or_create_group(Self, GroupName)); -join_group(Self, _GroupName, #gm_group { members = [Self] } = Group) -> +join_group(Self, GroupName, {error, not_found}, TxnFun) -> + join_group(Self, GroupName, + prune_or_create_group(Self, GroupName, TxnFun), TxnFun); +join_group(Self, _GroupName, #gm_group { members = [Self] } = Group, _TxnFun) -> group_to_view(Group); -join_group(Self, GroupName, #gm_group { members = Members } = Group) -> +join_group(Self, GroupName, #gm_group { members = Members } = Group, TxnFun) -> case lists:member(Self, Members) of true -> group_to_view(Group); @@ -985,20 +1008,22 @@ join_group(Self, GroupName, #gm_group { members = Members } = Group) -> case lists:filter(fun is_member_alive/1, Members) of [] -> join_group(Self, GroupName, - prune_or_create_group(Self, GroupName)); + prune_or_create_group(Self, GroupName, TxnFun)); Alive -> Left = lists:nth(random:uniform(length(Alive)), Alive), Handler = fun () -> join_group( Self, GroupName, - record_dead_member_in_group(Left, GroupName)) + record_dead_member_in_group( + Left, GroupName, TxnFun), + TxnFun) end, try case gen_server2:call( get_pid(Left), {add_on_right, Self}, infinity) of {ok, Group1} -> group_to_view(Group1); - not_ready -> join_group(Self, GroupName) + not_ready -> join_group(Self, GroupName, TxnFun) end catch exit:{R, _} @@ -1017,29 +1042,29 @@ read_group(GroupName) -> [Group] -> Group end. -prune_or_create_group(Self, GroupName) -> - {atomic, Group} = - mnesia:sync_transaction( - fun () -> GroupNew = #gm_group { name = GroupName, - members = [Self], - version = ?VERSION_START }, - case mnesia:read({?GROUP_TABLE, GroupName}) of - [] -> - mnesia:write(GroupNew), - GroupNew; - [Group1 = #gm_group { members = Members }] -> - case lists:any(fun is_member_alive/1, Members) of - true -> Group1; - false -> mnesia:write(GroupNew), - GroupNew - end - end - end), +prune_or_create_group(Self, GroupName, TxnFun) -> + Group = TxnFun( + fun () -> + GroupNew = #gm_group { name = GroupName, + members = [Self], + version = ?VERSION_START }, + case mnesia:read({?GROUP_TABLE, GroupName}) of + [] -> + mnesia:write(GroupNew), + GroupNew; + [Group1 = #gm_group { members = Members }] -> + case lists:any(fun is_member_alive/1, Members) of + true -> Group1; + false -> mnesia:write(GroupNew), + GroupNew + end + end + end), Group. -record_dead_member_in_group(Member, GroupName) -> - {atomic, Group} = - mnesia:sync_transaction( +record_dead_member_in_group(Member, GroupName, TxnFun) -> + Group = + TxnFun( fun () -> [Group1 = #gm_group { members = Members, version = Ver }] = mnesia:read({?GROUP_TABLE, GroupName}), case lists:splitwith( @@ -1056,9 +1081,9 @@ record_dead_member_in_group(Member, GroupName) -> end), Group. -record_new_member_in_group(GroupName, Left, NewMember, Fun) -> - {atomic, {Result, Group}} = - mnesia:sync_transaction( +record_new_member_in_group(GroupName, Left, NewMember, Fun, TxnFun) -> + {Result, Group} = + TxnFun( fun () -> [#gm_group { members = Members, version = Ver } = Group1] = mnesia:read({?GROUP_TABLE, GroupName}), @@ -1073,10 +1098,10 @@ record_new_member_in_group(GroupName, Left, NewMember, Fun) -> end), {Result, Group}. -erase_members_in_group(Members, GroupName) -> +erase_members_in_group(Members, GroupName, TxnFun) -> DeadMembers = [{dead, Id} || Id <- Members], - {atomic, Group} = - mnesia:sync_transaction( + Group = + TxnFun( fun () -> [Group1 = #gm_group { members = [_|_] = Members1, version = Ver }] = @@ -1097,7 +1122,8 @@ maybe_erase_aliases(State = #state { self = Self, view = View0, members_state = MembersState, module = Module, - callback_args = Args }, View) -> + callback_args = Args, + txn_executor = TxnFun }, View) -> #view_member { aliases = Aliases } = fetch_view_member(Self, View), {Erasable, MembersState1} = ?SETS:fold( @@ -1114,7 +1140,7 @@ maybe_erase_aliases(State = #state { self = Self, case Erasable of [] -> {ok, State1 #state { view = View }}; _ -> View1 = group_to_view( - erase_members_in_group(Erasable, GroupName)), + erase_members_in_group(Erasable, GroupName, TxnFun)), {callback_view_changed(Args, Module, View0, View1), check_neighbours(State1 #state { view = View1 })} end. diff --git a/src/gm_soak_test.erl b/src/gm_soak_test.erl index 572175410d..5fbfc22371 100644 --- a/src/gm_soak_test.erl +++ b/src/gm_soak_test.erl @@ -105,7 +105,9 @@ spawn_member() -> random:seed(MegaSecs, Secs, MicroSecs), %% start up delay of no more than 10 seconds timer:sleep(random:uniform(10000)), - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []), + {ok, Pid} = gm:start_link( + ?MODULE, ?MODULE, [], + fun rabbit_misc:execute_mnesia_transaction/1), Start = random:uniform(10000), send_loop(Pid, Start, Start + random:uniform(10000)), gm:leave(Pid), diff --git a/src/gm_speed_test.erl b/src/gm_speed_test.erl index dad75bd447..84d4ab2fb1 100644 --- a/src/gm_speed_test.erl +++ b/src/gm_speed_test.erl @@ -44,7 +44,8 @@ terminate(Owner, _Reason) -> %% other wile_e_coyote(Time, WriteUnit) -> - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), receive joined -> ok end, timer:sleep(1000), %% wait for all to join timer:send_after(Time, stop), diff --git a/src/gm_tests.erl b/src/gm_tests.erl index 0a2d420469..a9c0ba9035 100644 --- a/src/gm_tests.erl +++ b/src/gm_tests.erl @@ -76,7 +76,9 @@ test_confirmed_broadcast() -> test_member_death() -> with_two_members( fun (Pid, Pid2) -> - {ok, Pid3} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid3} = gm:start_link( + ?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), passed = receive_joined(Pid3, [Pid, Pid2, Pid3], timeout_joining_gm_group_3), passed = receive_birth(Pid, Pid3, timeout_waiting_for_birth_3_1), @@ -128,10 +130,12 @@ test_broadcast_fun(Fun) -> with_two_members(Fun) -> ok = gm:create_tables(), - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1), - {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self()), + {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(), + fun rabbit_misc:execute_mnesia_transaction/1), passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2), passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2), diff --git a/src/mirrored_supervisor.erl b/src/mirrored_supervisor.erl index 4fc488b88a..24c3ebd008 100644 --- a/src/mirrored_supervisor.erl +++ b/src/mirrored_supervisor.erl @@ -174,7 +174,7 @@ -spec start_internal(Group, ChildSpecs) -> Result when Group :: group_name(), ChildSpecs :: [supervisor2:child_spec()], - Result :: supervisor2:startlink_ret(). + Result :: {'ok', pid()} | {'error', term()}. -spec create_tables() -> Result when Result :: 'ok'. diff --git a/src/mochijson2.erl b/src/mochijson2.erl new file mode 100644 index 0000000000..bddb52cc6f --- /dev/null +++ b/src/mochijson2.erl @@ -0,0 +1,893 @@ +%% This file is a copy of `mochijson2.erl' from mochiweb, revision +%% d541e9a0f36c00dcadc2e589f20e47fbf46fc76f. For the license, see +%% `LICENSE-MIT-Mochi'. + +%% @author Bob Ippolito <bob@mochimedia.com> +%% @copyright 2007 Mochi Media, Inc. + +%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works +%% with binaries as strings, arrays as lists (without an {array, _}) +%% wrapper and it only knows how to decode UTF-8 (and ASCII). +%% +%% JSON terms are decoded as follows (javascript -> erlang): +%% <ul> +%% <li>{"key": "value"} -> +%% {struct, [{<<"key">>, <<"value">>}]}</li> +%% <li>["array", 123, 12.34, true, false, null] -> +%% [<<"array">>, 123, 12.34, true, false, null] +%% </li> +%% </ul> +%% <ul> +%% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li> +%% <li>Objects decode to {struct, PropList}</li> +%% <li>Numbers decode to integer or float</li> +%% <li>true, false, null decode to their respective terms.</li> +%% </ul> +%% The encoder will accept the same format that the decoder will produce, +%% but will also allow additional cases for leniency: +%% <ul> +%% <li>atoms other than true, false, null will be considered UTF-8 +%% strings (even as a proplist key) +%% </li> +%% <li>{json, IoList} will insert IoList directly into the output +%% with no validation +%% </li> +%% <li>{array, Array} will be encoded as Array +%% (legacy mochijson style) +%% </li> +%% <li>A non-empty raw proplist will be encoded as an object as long +%% as the first pair does not have an atom key of json, struct, +%% or array +%% </li> +%% </ul> + +-module(mochijson2). +-author('bob@mochimedia.com'). +-export([encoder/1, encode/1]). +-export([decoder/1, decode/1, decode/2]). + +%% This is a macro to placate syntax highlighters.. +-define(Q, $\"). +-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, + column=N+S#decoder.column}). +-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, + column=1+S#decoder.column}). +-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, + column=1, + line=1+S#decoder.line}). +-define(INC_CHAR(S, C), + case C of + $\n -> + S#decoder{column=1, + line=1+S#decoder.line, + offset=1+S#decoder.offset}; + _ -> + S#decoder{column=1+S#decoder.column, + offset=1+S#decoder.offset} + end). +-define(IS_WHITESPACE(C), + (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). + +%% @type json_string() = atom | binary() +%% @type json_number() = integer() | float() +%% @type json_array() = [json_term()] +%% @type json_object() = {struct, [{json_string(), json_term()}]} +%% @type json_eep18_object() = {[{json_string(), json_term()}]} +%% @type json_iolist() = {json, iolist()} +%% @type json_term() = json_string() | json_number() | json_array() | +%% json_object() | json_eep18_object() | json_iolist() + +-record(encoder, {handler=null, + utf8=false}). + +-record(decoder, {object_hook=null, + offset=0, + line=1, + column=1, + state=null}). + +%% @spec encoder([encoder_option()]) -> function() +%% @doc Create an encoder/1 with the given options. +%% @type encoder_option() = handler_option() | utf8_option() +%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) +encoder(Options) -> + State = parse_encoder_options(Options, #encoder{}), + fun (O) -> json_encode(O, State) end. + +%% @spec encode(json_term()) -> iolist() +%% @doc Encode the given as JSON to an iolist. +encode(Any) -> + json_encode(Any, #encoder{}). + +%% @spec decoder([decoder_option()]) -> function() +%% @doc Create a decoder/1 with the given options. +decoder(Options) -> + State = parse_decoder_options(Options, #decoder{}), + fun (O) -> json_decode(O, State) end. + +%% @spec decode(iolist(), [{format, proplist | eep18 | struct}]) -> json_term() +%% @doc Decode the given iolist to Erlang terms using the given object format +%% for decoding, where proplist returns JSON objects as [{binary(), json_term()}] +%% proplists, eep18 returns JSON objects as {[binary(), json_term()]}, and struct +%% returns them as-is. +decode(S, Options) -> + json_decode(S, parse_decoder_options(Options, #decoder{})). + +%% @spec decode(iolist()) -> json_term() +%% @doc Decode the given iolist to Erlang terms. +decode(S) -> + json_decode(S, #decoder{}). + +%% Internal API + +parse_encoder_options([], State) -> + State; +parse_encoder_options([{handler, Handler} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{handler=Handler}); +parse_encoder_options([{utf8, Switch} | Rest], State) -> + parse_encoder_options(Rest, State#encoder{utf8=Switch}). + +parse_decoder_options([], State) -> + State; +parse_decoder_options([{object_hook, Hook} | Rest], State) -> + parse_decoder_options(Rest, State#decoder{object_hook=Hook}); +parse_decoder_options([{format, Format} | Rest], State) + when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist -> + parse_decoder_options(Rest, State#decoder{object_hook=Format}). + +json_encode(true, _State) -> + <<"true">>; +json_encode(false, _State) -> + <<"false">>; +json_encode(null, _State) -> + <<"null">>; +json_encode(I, _State) when is_integer(I) -> + integer_to_list(I); +json_encode(F, _State) when is_float(F) -> + mochinum:digits(F); +json_encode(S, State) when is_binary(S); is_atom(S) -> + json_encode_string(S, State); +json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso + K =/= array andalso + K =/= json) -> + json_encode_proplist(Props, State); +json_encode({struct, Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({Props}, State) when is_list(Props) -> + json_encode_proplist(Props, State); +json_encode({}, State) -> + json_encode_proplist([], State); +json_encode(Array, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({array, Array}, State) when is_list(Array) -> + json_encode_array(Array, State); +json_encode({json, IoList}, _State) -> + IoList; +json_encode(Bad, #encoder{handler=null}) -> + exit({json_encode, {bad_term, Bad}}); +json_encode(Bad, State=#encoder{handler=Handler}) -> + json_encode(Handler(Bad), State). + +json_encode_array([], _State) -> + <<"[]">>; +json_encode_array(L, State) -> + F = fun (O, Acc) -> + [$,, json_encode(O, State) | Acc] + end, + [$, | Acc1] = lists:foldl(F, "[", L), + lists:reverse([$\] | Acc1]). + +json_encode_proplist([], _State) -> + <<"{}">>; +json_encode_proplist(Props, State) -> + F = fun ({K, V}, Acc) -> + KS = json_encode_string(K, State), + VS = json_encode(V, State), + [$,, VS, $:, KS | Acc] + end, + [$, | Acc1] = lists:foldl(F, "{", Props), + lists:reverse([$\} | Acc1]). + +json_encode_string(A, State) when is_atom(A) -> + L = atom_to_list(A), + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q]) + end; +json_encode_string(B, State) when is_binary(B) -> + case json_bin_is_safe(B) of + true -> + [?Q, B, ?Q]; + false -> + json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q]) + end; +json_encode_string(I, _State) when is_integer(I) -> + [?Q, integer_to_list(I), ?Q]; +json_encode_string(L, State) when is_list(L) -> + case json_string_is_safe(L) of + true -> + [?Q, L, ?Q]; + false -> + json_encode_string_unicode(L, State, [?Q]) + end. + +json_string_is_safe([]) -> + true; +json_string_is_safe([C | Rest]) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> + false; + C when C < 16#7f -> + json_string_is_safe(Rest); + _ -> + false + end. + +json_bin_is_safe(<<>>) -> + true; +json_bin_is_safe(<<C, Rest/binary>>) -> + case C of + ?Q -> + false; + $\\ -> + false; + $\b -> + false; + $\f -> + false; + $\n -> + false; + $\r -> + false; + $\t -> + false; + C when C >= 0, C < $\s; C >= 16#7f -> + false; + C when C < 16#7f -> + json_bin_is_safe(Rest) + end. + +json_encode_string_unicode([], _State, Acc) -> + lists:reverse([$\" | Acc]); +json_encode_string_unicode([C | Cs], State, Acc) -> + Acc1 = case C of + ?Q -> + [?Q, $\\ | Acc]; + %% Escaping solidus is only useful when trying to protect + %% against "</script>" injection attacks which are only + %% possible when JSON is inserted into a HTML document + %% in-line. mochijson2 does not protect you from this, so + %% if you do insert directly into HTML then you need to + %% uncomment the following case or escape the output of encode. + %% + %% $/ -> + %% [$/, $\\ | Acc]; + %% + $\\ -> + [$\\, $\\ | Acc]; + $\b -> + [$b, $\\ | Acc]; + $\f -> + [$f, $\\ | Acc]; + $\n -> + [$n, $\\ | Acc]; + $\r -> + [$r, $\\ | Acc]; + $\t -> + [$t, $\\ | Acc]; + C when C >= 0, C < $\s -> + [unihex(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> + [xmerl_ucs:to_utf8(C) | Acc]; + C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> + [unihex(C) | Acc]; + C when C < 16#7f -> + [C | Acc]; + _ -> + exit({json_encode, {bad_char, C}}) + end, + json_encode_string_unicode(Cs, State, Acc1). + +hexdigit(C) when C >= 0, C =< 9 -> + C + $0; +hexdigit(C) when C =< 15 -> + C + $a - 10. + +unihex(C) when C < 16#10000 -> + <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, + Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], + [$\\, $u | Digits]; +unihex(C) when C =< 16#10FFFF -> + N = C - 16#10000, + S1 = 16#d800 bor ((N bsr 10) band 16#3ff), + S2 = 16#dc00 bor (N band 16#3ff), + [unihex(S1), unihex(S2)]. + +json_decode(L, S) when is_list(L) -> + json_decode(iolist_to_binary(L), S); +json_decode(B, S) -> + {Res, S1} = decode1(B, S), + {eof, _} = tokenize(B, S1#decoder{state=trim}), + Res. + +decode1(B, S=#decoder{state=null}) -> + case tokenize(B, S#decoder{state=any}) of + {{const, C}, S1} -> + {C, S1}; + {start_array, S1} -> + decode_array(B, S1); + {start_object, S1} -> + decode_object(B, S1) + end. + +make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct -> + V; +make_object({struct, P}, #decoder{object_hook=eep18}) -> + {P}; +make_object({struct, P}, #decoder{object_hook=proplist}) -> + P; +make_object(V, #decoder{object_hook=Hook}) -> + Hook(V). + +decode_object(B, S) -> + decode_object(B, S#decoder{state=key}, []). + +decode_object(B, S=#decoder{state=key}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {{const, K}, S1} -> + {colon, S2} = tokenize(B, S1), + {V, S3} = decode1(B, S2#decoder{state=null}), + decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) + end; +decode_object(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_object, S1} -> + V = make_object({struct, lists:reverse(Acc)}, S1), + {V, S1#decoder{state=null}}; + {comma, S1} -> + decode_object(B, S1#decoder{state=key}, Acc) + end. + +decode_array(B, S) -> + decode_array(B, S#decoder{state=any}, []). + +decode_array(B, S=#decoder{state=any}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {start_array, S1} -> + {Array, S2} = decode_array(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {start_object, S1} -> + {Array, S2} = decode_object(B, S1), + decode_array(B, S2#decoder{state=comma}, [Array | Acc]); + {{const, Const}, S1} -> + decode_array(B, S1#decoder{state=comma}, [Const | Acc]) + end; +decode_array(B, S=#decoder{state=comma}, Acc) -> + case tokenize(B, S) of + {end_array, S1} -> + {lists:reverse(Acc), S1#decoder{state=null}}; + {comma, S1} -> + decode_array(B, S1#decoder{state=any}, Acc) + end. + +tokenize_string(B, S=#decoder{offset=O}) -> + case tokenize_string_fast(B, O) of + {escape, O1} -> + Length = O1 - O, + S1 = ?ADV_COL(S, Length), + <<_:O/binary, Head:Length/binary, _/binary>> = B, + tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); + O1 -> + Length = O1 - O, + <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, + {{const, String}, ?ADV_COL(S, Length + 1)} + end. + +tokenize_string_fast(B, O) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + O; + <<_:O/binary, $\\, _/binary>> -> + {escape, O}; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string_fast(B, 1 + O); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string_fast(B, 2 + O); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string_fast(B, 3 + O); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string_fast(B, 4 + O); + _ -> + throw(invalid_utf8) + end. + +tokenize_string(B, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, ?Q, _/binary>> -> + {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; + <<_:O/binary, "\\\"", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); + <<_:O/binary, "\\\\", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); + <<_:O/binary, "\\/", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); + <<_:O/binary, "\\b", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); + <<_:O/binary, "\\f", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); + <<_:O/binary, "\\n", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); + <<_:O/binary, "\\r", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); + <<_:O/binary, "\\t", _/binary>> -> + tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); + <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> + C = erlang:list_to_integer([C3, C2, C1, C0], 16), + if C > 16#D7FF, C < 16#DC00 -> + %% coalesce UTF-16 surrogate pair + <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, + D = erlang:list_to_integer([D3,D2,D1,D0], 16), + [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer, + D:16/big-unsigned-integer>>), + Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc), + tokenize_string(B, ?ADV_COL(S, 12), Acc1); + true -> + Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), + tokenize_string(B, ?ADV_COL(S, 6), Acc1) + end; + <<_:O/binary, C1, _/binary>> when C1 < 128 -> + tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]); + <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, + C2 >= 128, C2 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]); + <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, + C2 >= 128, C2 =< 191, + C3 >= 128, C3 =< 191, + C4 >= 128, C4 =< 191 -> + tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]); + _ -> + throw(invalid_utf8) + end. + +tokenize_number(B, S) -> + case tokenize_number(B, sign, S, []) of + {{int, Int}, S1} -> + {{const, list_to_integer(Int)}, S1}; + {{float, Float}, S1} -> + {{const, list_to_float(Float)}, S1} + end. + +tokenize_number(B, sign, S=#decoder{offset=O}, []) -> + case B of + <<_:O/binary, $-, _/binary>> -> + tokenize_number(B, int, ?INC_COL(S), [$-]); + _ -> + tokenize_number(B, int, S, []) + end; +tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $0, _/binary>> -> + tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); + <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, frac, S, Acc) + end; +tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> + tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); + _ -> + {{int, lists:reverse(Acc)}, S} + end; +tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); + <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> + tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end; +tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> + tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); + _ -> + tokenize_number(B, eint, S, Acc) + end; +tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) + end; +tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> + case B of + <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> + tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); + _ -> + {{float, lists:reverse(Acc)}, S} + end. + +tokenize(B, S=#decoder{offset=O}) -> + case B of + <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> + tokenize(B, ?INC_CHAR(S, C)); + <<_:O/binary, "{", _/binary>> -> + {start_object, ?INC_COL(S)}; + <<_:O/binary, "}", _/binary>> -> + {end_object, ?INC_COL(S)}; + <<_:O/binary, "[", _/binary>> -> + {start_array, ?INC_COL(S)}; + <<_:O/binary, "]", _/binary>> -> + {end_array, ?INC_COL(S)}; + <<_:O/binary, ",", _/binary>> -> + {comma, ?INC_COL(S)}; + <<_:O/binary, ":", _/binary>> -> + {colon, ?INC_COL(S)}; + <<_:O/binary, "null", _/binary>> -> + {{const, null}, ?ADV_COL(S, 4)}; + <<_:O/binary, "true", _/binary>> -> + {{const, true}, ?ADV_COL(S, 4)}; + <<_:O/binary, "false", _/binary>> -> + {{const, false}, ?ADV_COL(S, 5)}; + <<_:O/binary, "\"", _/binary>> -> + tokenize_string(B, ?INC_COL(S)); + <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) + orelse C =:= $- -> + tokenize_number(B, S); + <<_:O/binary>> -> + trim = S#decoder.state, + {eof, S} + end. +%% +%% Tests +%% +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + + +%% testing constructs borrowed from the Yaws JSON implementation. + +%% Create an object from a list of Key/Value pairs. + +obj_new() -> + {struct, []}. + +is_obj({struct, Props}) -> + F = fun ({K, _}) when is_binary(K) -> true end, + lists:all(F, Props). + +obj_from_list(Props) -> + Obj = {struct, Props}, + ?assert(is_obj(Obj)), + Obj. + +%% Test for equivalence of Erlang terms. +%% Due to arbitrary order of construction, equivalent objects might +%% compare unequal as erlang terms, so we need to carefully recurse +%% through aggregates (tuples and objects). + +equiv({struct, Props1}, {struct, Props2}) -> + equiv_object(Props1, Props2); +equiv(L1, L2) when is_list(L1), is_list(L2) -> + equiv_list(L1, L2); +equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; +equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; +equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true. + +%% Object representation and traversal order is unknown. +%% Use the sledgehammer and sort property lists. + +equiv_object(Props1, Props2) -> + L1 = lists:keysort(1, Props1), + L2 = lists:keysort(1, Props2), + Pairs = lists:zip(L1, L2), + true = lists:all(fun({{K1, V1}, {K2, V2}}) -> + equiv(K1, K2) and equiv(V1, V2) + end, Pairs). + +%% Recursively compare tuple elements for equivalence. + +equiv_list([], []) -> + true; +equiv_list([V1 | L1], [V2 | L2]) -> + equiv(V1, V2) andalso equiv_list(L1, L2). + +decode_test() -> + [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), + <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). + +e2j_vec_test() -> + test_one(e2j_test_vec(utf8), 1). + +test_one([], _N) -> + %% io:format("~p tests passed~n", [N-1]), + ok; +test_one([{E, J} | Rest], N) -> + %% io:format("[~p] ~p ~p~n", [N, E, J]), + true = equiv(E, decode(J)), + true = equiv(E, decode(encode(E))), + test_one(Rest, 1+N). + +e2j_test_vec(utf8) -> + [ + {1, "1"}, + {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes + {-1, "-1"}, + {-3.1416, "-3.14160"}, + {12.0e10, "1.20000e+11"}, + {1.234E+10, "1.23400e+10"}, + {-1.234E-10, "-1.23400e-10"}, + {10.0, "1.0e+01"}, + {123.456, "1.23456E+2"}, + {10.0, "1e1"}, + {<<"foo">>, "\"foo\""}, + {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, + {<<"">>, "\"\""}, + {<<"\n\n\n">>, "\"\\n\\n\\n\""}, + {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, + {obj_new(), "{}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, + {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), + "{\"foo\":\"bar\",\"baz\":123}"}, + {[], "[]"}, + {[[]], "[[]]"}, + {[1, <<"foo">>], "[1,\"foo\"]"}, + + %% json array in a json object + {obj_from_list([{<<"foo">>, [123]}]), + "{\"foo\":[123]}"}, + + %% json object in a json object + {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), + "{\"foo\":{\"bar\":true}}"}, + + %% fold evaluation order + {obj_from_list([{<<"foo">>, []}, + {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, + {<<"alice">>, <<"bob">>}]), + "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, + + %% json object in a json array + {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], + "[-123,\"foo\",{\"bar\":[]},null]"} + ]. + +%% test utf8 encoding +encoder_utf8_test() -> + %% safe conversion case (default) + [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = + encode(<<1,"\321\202\320\265\321\201\321\202">>), + + %% raw utf8 output (optional) + Enc = mochijson2:encoder([{utf8, true}]), + [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = + Enc(<<1,"\321\202\320\265\321\201\321\202">>). + +input_validation_test() -> + Good = [ + {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound + {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro + {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius + ], + lists:foreach(fun({CodePoint, UTF8}) -> + Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), + Expect = decode(UTF8) + end, Good), + + Bad = [ + %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte + <<?Q, 16#80, ?Q>>, + %% missing continuations, last byte in each should be 80-BF + <<?Q, 16#C2, 16#7F, ?Q>>, + <<?Q, 16#E0, 16#80,16#7F, ?Q>>, + <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, + %% we don't support code points > 10FFFF per RFC 3629 + <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>, + %% escape characters trigger a different code path + <<?Q, $\\, $\n, 16#80, ?Q>> + ], + lists:foreach( + fun(X) -> + ok = try decode(X) catch invalid_utf8 -> ok end, + %% could be {ucs,{bad_utf8_character_code}} or + %% {json_encode,{bad_char,_}} + {'EXIT', _} = (catch encode(X)) + end, Bad). + +inline_json_test() -> + ?assertEqual(<<"\"iodata iodata\"">>, + iolist_to_binary( + encode({json, [<<"\"iodata">>, " iodata\""]}))), + ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, + decode( + encode({struct, + [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), + ok. + +big_unicode_test() -> + UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(UTF8Seq))), + ?assertEqual( + UTF8Seq, + decode(iolist_to_binary(encode(UTF8Seq)))), + ok. + +custom_decoder_test() -> + ?assertEqual( + {struct, [{<<"key">>, <<"value">>}]}, + (decoder([]))("{\"key\": \"value\"}")), + F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, + ?assertEqual( + win, + (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), + ok. + +atom_test() -> + %% JSON native atoms + [begin + ?assertEqual(A, decode(atom_to_list(A))), + ?assertEqual(iolist_to_binary(atom_to_list(A)), + iolist_to_binary(encode(A))) + end || A <- [true, false, null]], + %% Atom to string + ?assertEqual( + <<"\"foo\"">>, + iolist_to_binary(encode(foo))), + ?assertEqual( + <<"\"\\ud834\\udd20\"">>, + iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))), + ok. + +key_encode_test() -> + %% Some forms are accepted as keys that would not be strings in other + %% cases + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{foo, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode({struct, [{"foo", 1}]}))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{foo, 1}]))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{<<"foo">>, 1}]))), + ?assertEqual( + <<"{\"foo\":1}">>, + iolist_to_binary(encode([{"foo", 1}]))), + ?assertEqual( + <<"{\"\\ud834\\udd20\":1}">>, + iolist_to_binary( + encode({struct, [{[16#0001d120], 1}]}))), + ?assertEqual( + <<"{\"1\":1}">>, + iolist_to_binary(encode({struct, [{1, 1}]}))), + ok. + +unsafe_chars_test() -> + Chars = "\"\\\b\f\n\r\t", + [begin + ?assertEqual(false, json_string_is_safe([C])), + ?assertEqual(false, json_bin_is_safe(<<C>>)), + ?assertEqual(<<C>>, decode(encode(<<C>>))) + end || C <- Chars], + ?assertEqual( + false, + json_string_is_safe([16#0001d120])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))), + ?assertEqual( + [16#0001d120], + xmerl_ucs:from_utf8( + binary_to_list( + decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))), + ?assertEqual( + false, + json_string_is_safe([16#110000])), + ?assertEqual( + false, + json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))), + %% solidus can be escaped but isn't unsafe by default + ?assertEqual( + <<"/">>, + decode(<<"\"\\/\"">>)), + ok. + +int_test() -> + ?assertEqual(0, decode("0")), + ?assertEqual(1, decode("1")), + ?assertEqual(11, decode("11")), + ok. + +large_int_test() -> + ?assertEqual(<<"-2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(-2147483649214748364921474836492147483649))), + ?assertEqual(<<"2147483649214748364921474836492147483649">>, + iolist_to_binary(encode(2147483649214748364921474836492147483649))), + ok. + +float_test() -> + ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))), + ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))), + ok. + +handler_test() -> + ?assertEqual( + {'EXIT',{json_encode,{bad_term,{x,y}}}}, + catch encode({x,y})), + F = fun ({x,y}) -> [] end, + ?assertEqual( + <<"[]">>, + iolist_to_binary((encoder([{handler, F}]))({x, y}))), + ok. + +encode_empty_test_() -> + [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))} + || {A, B} <- [{"eep18 {}", {}}, + {"eep18 {[]}", {[]}}, + {"{struct, []}", {struct, []}}]]. + +encode_test_() -> + P = [{<<"k">>, <<"v">>}], + JSON = iolist_to_binary(encode({struct, P})), + [{atom_to_list(F), + ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))} + || F <- [struct, eep18, proplist]]. + +format_test_() -> + P = [{<<"k">>, <<"v">>}], + JSON = iolist_to_binary(encode({struct, P})), + [{atom_to_list(F), + ?_assertEqual(A, decode(JSON, [{format, F}]))} + || {F, A} <- [{struct, {struct, P}}, + {eep18, {P}}, + {proplist, P}]]. + +-endif. diff --git a/src/mochinum.erl b/src/mochinum.erl new file mode 100644 index 0000000000..4ea7a22acf --- /dev/null +++ b/src/mochinum.erl @@ -0,0 +1,358 @@ +%% This file is a copy of `mochijson2.erl' from mochiweb, revision +%% d541e9a0f36c00dcadc2e589f20e47fbf46fc76f. For the license, see +%% `LICENSE-MIT-Mochi'. + +%% @copyright 2007 Mochi Media, Inc. +%% @author Bob Ippolito <bob@mochimedia.com> + +%% @doc Useful numeric algorithms for floats that cover some deficiencies +%% in the math module. More interesting is digits/1, which implements +%% the algorithm from: +%% http://www.cs.indiana.edu/~burger/fp/index.html +%% See also "Printing Floating-Point Numbers Quickly and Accurately" +%% in Proceedings of the SIGPLAN '96 Conference on Programming Language +%% Design and Implementation. + +-module(mochinum). +-author("Bob Ippolito <bob@mochimedia.com>"). +-export([digits/1, frexp/1, int_pow/2, int_ceil/1]). + +%% IEEE 754 Float exponent bias +-define(FLOAT_BIAS, 1022). +-define(MIN_EXP, -1074). +-define(BIG_POW, 4503599627370496). + +%% External API + +%% @spec digits(number()) -> string() +%% @doc Returns a string that accurately represents the given integer or float +%% using a conservative amount of digits. Great for generating +%% human-readable output, or compact ASCII serializations for floats. +digits(N) when is_integer(N) -> + integer_to_list(N); +digits(0.0) -> + "0.0"; +digits(Float) -> + {Frac1, Exp1} = frexp_int(Float), + [Place0 | Digits0] = digits1(Float, Exp1, Frac1), + {Place, Digits} = transform_digits(Place0, Digits0), + R = insert_decimal(Place, Digits), + case Float < 0 of + true -> + [$- | R]; + _ -> + R + end. + +%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()} +%% @doc Return the fractional and exponent part of an IEEE 754 double, +%% equivalent to the libc function of the same name. +%% F = Frac * pow(2, Exp). +frexp(F) -> + frexp1(unpack(F)). + +%% @spec int_pow(X::integer(), N::integer()) -> Y::integer() +%% @doc Moderately efficient way to exponentiate integers. +%% int_pow(10, 2) = 100. +int_pow(_X, 0) -> + 1; +int_pow(X, N) when N > 0 -> + int_pow(X, N, 1). + +%% @spec int_ceil(F::float()) -> integer() +%% @doc Return the ceiling of F as an integer. The ceiling is defined as +%% F when F == trunc(F); +%% trunc(F) when F < 0; +%% trunc(F) + 1 when F > 0. +int_ceil(X) -> + T = trunc(X), + case (X - T) of + Pos when Pos > 0 -> T + 1; + _ -> T + end. + + +%% Internal API + +int_pow(X, N, R) when N < 2 -> + R * X; +int_pow(X, N, R) -> + int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end). + +insert_decimal(0, S) -> + "0." ++ S; +insert_decimal(Place, S) when Place > 0 -> + L = length(S), + case Place - L of + 0 -> + S ++ ".0"; + N when N < 0 -> + {S0, S1} = lists:split(L + N, S), + S0 ++ "." ++ S1; + N when N < 6 -> + %% More places than digits + S ++ lists:duplicate(N, $0) ++ ".0"; + _ -> + insert_decimal_exp(Place, S) + end; +insert_decimal(Place, S) when Place > -6 -> + "0." ++ lists:duplicate(abs(Place), $0) ++ S; +insert_decimal(Place, S) -> + insert_decimal_exp(Place, S). + +insert_decimal_exp(Place, S) -> + [C | S0] = S, + S1 = case S0 of + [] -> + "0"; + _ -> + S0 + end, + Exp = case Place < 0 of + true -> + "e-"; + false -> + "e+" + end, + [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)). + + +digits1(Float, Exp, Frac) -> + Round = ((Frac band 1) =:= 0), + case Exp >= 0 of + true -> + BExp = 1 bsl Exp, + case (Frac =/= ?BIG_POW) of + true -> + scale((Frac * BExp * 2), 2, BExp, BExp, + Round, Round, Float); + false -> + scale((Frac * BExp * 4), 4, (BExp * 2), BExp, + Round, Round, Float) + end; + false -> + case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of + true -> + scale((Frac * 2), 1 bsl (1 - Exp), 1, 1, + Round, Round, Float); + false -> + scale((Frac * 4), 1 bsl (2 - Exp), 2, 1, + Round, Round, Float) + end + end. + +scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) -> + Est = int_ceil(math:log10(abs(Float)) - 1.0e-10), + %% Note that the scheme implementation uses a 326 element look-up table + %% for int_pow(10, N) where we do not. + case Est >= 0 of + true -> + fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est, + LowOk, HighOk); + false -> + Scale = int_pow(10, -Est), + fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est, + LowOk, HighOk) + end. + +fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) -> + TooLow = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TooLow of + true -> + [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)]; + false -> + [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)] + end. + +generate(R0, S, MPlus, MMinus, LowOk, HighOk) -> + D = R0 div S, + R = R0 rem S, + TC1 = case LowOk of + true -> + R =< MMinus; + false -> + R < MMinus + end, + TC2 = case HighOk of + true -> + (R + MPlus) >= S; + false -> + (R + MPlus) > S + end, + case TC1 of + false -> + case TC2 of + false -> + [D | generate(R * 10, S, MPlus * 10, MMinus * 10, + LowOk, HighOk)]; + true -> + [D + 1] + end; + true -> + case TC2 of + false -> + [D]; + true -> + case R * 2 < S of + true -> + [D]; + false -> + [D + 1] + end + end + end. + +unpack(Float) -> + <<Sign:1, Exp:11, Frac:52>> = <<Float:64/float>>, + {Sign, Exp, Frac}. + +frexp1({_Sign, 0, 0}) -> + {0.0, 0}; +frexp1({Sign, 0, Frac}) -> + Exp = log2floor(Frac), + <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, (Frac-1):52>>, + {Frac1, -(?FLOAT_BIAS) - 52 + Exp}; +frexp1({Sign, Exp, Frac}) -> + <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, Frac:52>>, + {Frac1, Exp - ?FLOAT_BIAS}. + +log2floor(Int) -> + log2floor(Int, 0). + +log2floor(0, N) -> + N; +log2floor(Int, N) -> + log2floor(Int bsr 1, 1 + N). + + +transform_digits(Place, [0 | Rest]) -> + transform_digits(Place, Rest); +transform_digits(Place, Digits) -> + {Place, [$0 + D || D <- Digits]}. + + +frexp_int(F) -> + case unpack(F) of + {_Sign, 0, Frac} -> + {Frac, ?MIN_EXP}; + {_Sign, Exp, Frac} -> + {Frac + (1 bsl 52), Exp - 53 - ?FLOAT_BIAS} + end. + +%% +%% Tests +%% +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +int_ceil_test() -> + ?assertEqual(1, int_ceil(0.0001)), + ?assertEqual(0, int_ceil(0.0)), + ?assertEqual(1, int_ceil(0.99)), + ?assertEqual(1, int_ceil(1.0)), + ?assertEqual(-1, int_ceil(-1.5)), + ?assertEqual(-2, int_ceil(-2.0)), + ok. + +int_pow_test() -> + ?assertEqual(1, int_pow(1, 1)), + ?assertEqual(1, int_pow(1, 0)), + ?assertEqual(1, int_pow(10, 0)), + ?assertEqual(10, int_pow(10, 1)), + ?assertEqual(100, int_pow(10, 2)), + ?assertEqual(1000, int_pow(10, 3)), + ok. + +digits_test() -> + ?assertEqual("0", + digits(0)), + ?assertEqual("0.0", + digits(0.0)), + ?assertEqual("1.0", + digits(1.0)), + ?assertEqual("-1.0", + digits(-1.0)), + ?assertEqual("0.1", + digits(0.1)), + ?assertEqual("0.01", + digits(0.01)), + ?assertEqual("0.001", + digits(0.001)), + ?assertEqual("1.0e+6", + digits(1000000.0)), + ?assertEqual("0.5", + digits(0.5)), + ?assertEqual("4503599627370496.0", + digits(4503599627370496.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 =:= 5.0e-324 + <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual("5.0e-324", + digits(SmallDenorm)), + ?assertEqual(SmallDenorm, + list_to_float(digits(SmallDenorm))), + %% large denormalized number + %% 2.22507385850720088902e-308 + <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual("2.225073858507201e-308", + digits(BigDenorm)), + ?assertEqual(BigDenorm, + list_to_float(digits(BigDenorm))), + %% small normalized number + %% 2.22507385850720138309e-308 + <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual("2.2250738585072014e-308", + digits(SmallNorm)), + ?assertEqual(SmallNorm, + list_to_float(digits(SmallNorm))), + %% large normalized number + %% 1.79769313486231570815e+308 + <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual("1.7976931348623157e+308", + digits(LargeNorm)), + ?assertEqual(LargeNorm, + list_to_float(digits(LargeNorm))), + %% issue #10 - mochinum:frexp(math:pow(2, -1074)). + ?assertEqual("5.0e-324", + digits(math:pow(2, -1074))), + ok. + +frexp_test() -> + %% zero + ?assertEqual({0.0, 0}, frexp(0.0)), + %% one + ?assertEqual({0.5, 1}, frexp(1.0)), + %% negative one + ?assertEqual({-0.5, 1}, frexp(-1.0)), + %% small denormalized number + %% 4.94065645841246544177e-324 + <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>, + ?assertEqual({0.5, -1073}, frexp(SmallDenorm)), + %% large denormalized number + %% 2.22507385850720088902e-308 + <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>, + ?assertEqual( + {0.99999999999999978, -1022}, + frexp(BigDenorm)), + %% small normalized number + %% 2.22507385850720138309e-308 + <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>, + ?assertEqual({0.5, -1021}, frexp(SmallNorm)), + %% large normalized number + %% 1.79769313486231570815e+308 + <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>, + ?assertEqual( + {0.99999999999999989, 1024}, + frexp(LargeNorm)), + %% issue #10 - mochinum:frexp(math:pow(2, -1074)). + ?assertEqual( + {0.5, -1073}, + frexp(math:pow(2, -1074))), + ok. + +-endif. diff --git a/src/pmon.erl b/src/pmon.erl index 457865774b..1aeebb72bc 100644 --- a/src/pmon.erl +++ b/src/pmon.erl @@ -27,37 +27,39 @@ -opaque(?MODULE() :: dict()). +-type(item() :: pid() | {atom(), node()}). + -spec(new/0 :: () -> ?MODULE()). --spec(monitor/2 :: (pid(), ?MODULE()) -> ?MODULE()). --spec(monitor_all/2 :: ([pid()], ?MODULE()) -> ?MODULE()). --spec(demonitor/2 :: (pid(), ?MODULE()) -> ?MODULE()). --spec(is_monitored/2 :: (pid(), ?MODULE()) -> boolean()). --spec(erase/2 :: (pid(), ?MODULE()) -> ?MODULE()). --spec(monitored/1 :: (?MODULE()) -> [pid()]). +-spec(monitor/2 :: (item(), ?MODULE()) -> ?MODULE()). +-spec(monitor_all/2 :: ([item()], ?MODULE()) -> ?MODULE()). +-spec(demonitor/2 :: (item(), ?MODULE()) -> ?MODULE()). +-spec(is_monitored/2 :: (item(), ?MODULE()) -> boolean()). +-spec(erase/2 :: (item(), ?MODULE()) -> ?MODULE()). +-spec(monitored/1 :: (?MODULE()) -> [item()]). -spec(is_empty/1 :: (?MODULE()) -> boolean()). -endif. new() -> dict:new(). -monitor(Pid, M) -> - case dict:is_key(Pid, M) of +monitor(Item, M) -> + case dict:is_key(Item, M) of true -> M; - false -> dict:store(Pid, erlang:monitor(process, Pid), M) + false -> dict:store(Item, erlang:monitor(process, Item), M) end. -monitor_all(Pids, M) -> lists:foldl(fun monitor/2, M, Pids). +monitor_all(Items, M) -> lists:foldl(fun monitor/2, M, Items). -demonitor(Pid, M) -> - case dict:find(Pid, M) of +demonitor(Item, M) -> + case dict:find(Item, M) of {ok, MRef} -> erlang:demonitor(MRef), - dict:erase(Pid, M); + dict:erase(Item, M); error -> M end. -is_monitored(Pid, M) -> dict:is_key(Pid, M). +is_monitored(Item, M) -> dict:is_key(Item, M). -erase(Pid, M) -> dict:erase(Pid, M). +erase(Item, M) -> dict:erase(Item, M). monitored(M) -> dict:fetch_keys(M). diff --git a/src/rabbit.erl b/src/rabbit.erl index fda489fe61..69f77824f6 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -20,7 +20,8 @@ -export([start/0, boot/0, stop/0, stop_and_halt/0, await_startup/0, status/0, is_running/0, - is_running/1, environment/0, rotate_logs/1, force_event_refresh/0]). + is_running/1, environment/0, rotate_logs/1, force_event_refresh/0, + start_fhc/0]). -export([start/2, stop/1]). @@ -53,8 +54,7 @@ -rabbit_boot_step({file_handle_cache, [{description, "file handle cache server"}, - {mfa, {rabbit_sup, start_restartable_child, - [file_handle_cache]}}, + {mfa, {rabbit, start_fhc, []}}, {requires, pre_boot}, {enables, worker_pool}]}). @@ -176,7 +176,7 @@ -rabbit_boot_step({notify_cluster, [{description, "notify cluster nodes"}, - {mfa, {rabbit_node_monitor, notify_cluster, []}}, + {mfa, {rabbit_node_monitor, notify_node_up, []}}, {requires, networking}]}). %%--------------------------------------------------------------------------- @@ -301,7 +301,10 @@ start() -> %% mnesia after just restarting the app ok = ensure_application_loaded(), ok = ensure_working_log_handlers(), - ok = app_utils:start_applications(app_startup_order()), + rabbit_node_monitor:prepare_cluster_status_files(), + rabbit_mnesia:check_cluster_consistency(), + ok = app_utils:start_applications( + app_startup_order(), fun handle_app_error/2), ok = print_plugin_info(rabbit_plugins:active()) end). @@ -310,26 +313,43 @@ boot() -> ok = ensure_application_loaded(), maybe_hipe_compile(), ok = ensure_working_log_handlers(), + rabbit_node_monitor:prepare_cluster_status_files(), ok = rabbit_upgrade:maybe_upgrade_mnesia(), + %% It's important that the consistency check happens after + %% the upgrade, since if we are a secondary node the + %% primary node will have forgotten us + rabbit_mnesia:check_cluster_consistency(), Plugins = rabbit_plugins:setup(), ToBeLoaded = Plugins ++ ?APPS, ok = app_utils:load_applications(ToBeLoaded), StartupApps = app_utils:app_dependency_order(ToBeLoaded, false), - ok = app_utils:start_applications(StartupApps), + ok = app_utils:start_applications( + StartupApps, fun handle_app_error/2), ok = print_plugin_info(Plugins) end). +handle_app_error(App, {bad_return, {_MFA, {'EXIT', {Reason, _}}}}) -> + throw({could_not_start, App, Reason}); + +handle_app_error(App, Reason) -> + throw({could_not_start, App, Reason}). + start_it(StartFun) -> try StartFun() + catch + throw:{could_not_start, _App, _Reason}=Err -> + boot_error(Err, not_available); + _:Reason -> + boot_error(Reason, erlang:get_stacktrace()) after %% give the error loggers some time to catch up timer:sleep(100) end. stop() -> - rabbit_log:info("Stopping Rabbit~n"), + rabbit_log:info("Stopping RabbitMQ~n"), ok = app_utils:stop_applications(app_shutdown_order()). stop_and_halt() -> @@ -349,7 +369,7 @@ status() -> {running_applications, application:which_applications(infinity)}, {os, os:type()}, {erlang_version, erlang:system_info(system_version)}, - {memory, erlang:memory()}], + {memory, rabbit_vm:memory()}], S2 = rabbit_misc:filter_exit_map( fun ({Key, {M, F, A}}) -> {Key, erlang:apply(M, F, A)} end, [{vm_memory_high_watermark, {vm_memory_monitor, @@ -397,6 +417,9 @@ rotate_logs(BinarySuffix) -> start(normal, []) -> case erts_version_check() of ok -> + {ok, Vsn} = application:get_key(rabbit, vsn), + error_logger:info_msg("Starting RabbitMQ ~s on Erlang ~s~n", + [Vsn, erlang:system_info(otp_release)]), {ok, SupPid} = rabbit_sup:start_link(), true = register(rabbit, self()), print_banner(), @@ -408,12 +431,11 @@ start(normal, []) -> end. stop(_State) -> - ok = rabbit_mnesia:record_running_nodes(), terminated_ok = error_logger:delete_report_handler(rabbit_error_logger), ok = rabbit_alarm:stop(), ok = case rabbit_mnesia:is_clustered() of true -> rabbit_amqqueue:on_node_down(node()); - false -> rabbit_mnesia:empty_ram_only_tables() + false -> rabbit_table:clear_ram_only_tables() end, ok. @@ -444,7 +466,7 @@ run_boot_step({StepName, Attributes}) -> [try apply(M,F,A) catch - _:Reason -> boot_step_error(Reason, erlang:get_stacktrace()) + _:Reason -> boot_error(Reason, erlang:get_stacktrace()) end || {M,F,A} <- MFAs], io:format("done~n"), ok @@ -483,14 +505,17 @@ sort_boot_steps(UnsortedSteps) -> {mfa, {M,F,A}} <- Attributes, not erlang:function_exported(M, F, length(A))] of [] -> SortedSteps; - MissingFunctions -> boot_error( + MissingFunctions -> basic_boot_error( + {missing_functions, MissingFunctions}, "Boot step functions not exported: ~p~n", [MissingFunctions]) end; {error, {vertex, duplicate, StepName}} -> - boot_error("Duplicate boot step name: ~w~n", [StepName]); + basic_boot_error({duplicate_boot_step, StepName}, + "Duplicate boot step name: ~w~n", [StepName]); {error, {edge, Reason, From, To}} -> - boot_error( + basic_boot_error( + {invalid_boot_step_dependency, From, To}, "Could not add boot step dependency of ~w on ~w:~n~s", [To, From, case Reason of @@ -504,30 +529,38 @@ sort_boot_steps(UnsortedSteps) -> end]) end. -boot_step_error({error, {timeout_waiting_for_tables, _}}, _Stacktrace) -> +boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Stacktrace) -> + AllNodes = rabbit_mnesia:cluster_nodes(all), {Err, Nodes} = - case rabbit_mnesia:read_previously_running_nodes() of + case AllNodes -- [node()] of [] -> {"Timeout contacting cluster nodes. Since RabbitMQ was" " shut down forcefully~nit cannot determine which nodes" - " are timing out. Details on all nodes will~nfollow.~n", - rabbit_mnesia:all_clustered_nodes() -- [node()]}; + " are timing out.~n", []}; Ns -> {rabbit_misc:format( "Timeout contacting cluster nodes: ~p.~n", [Ns]), Ns} end, - boot_error(Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); - -boot_step_error(Reason, Stacktrace) -> - boot_error("Error description:~n ~p~n~n" - "Log files (may contain more information):~n ~s~n ~s~n~n" - "Stack trace:~n ~p~n~n", - [Reason, log_location(kernel), log_location(sasl), Stacktrace]). + basic_boot_error(Term, + Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); +boot_error(Reason, Stacktrace) -> + Fmt = "Error description:~n ~p~n~n" ++ + "Log files (may contain more information):~n ~s~n ~s~n~n", + Args = [Reason, log_location(kernel), log_location(sasl)], + boot_error(Reason, Fmt, Args, Stacktrace). + +boot_error(Reason, Fmt, Args, Stacktrace) -> + case Stacktrace of + not_available -> basic_boot_error(Reason, Fmt, Args); + _ -> basic_boot_error(Reason, Fmt ++ + "Stack trace:~n ~p~n~n", + Args ++ [Stacktrace]) + end. -boot_error(Format, Args) -> +basic_boot_error(Reason, Format, Args) -> io:format("~n~nBOOT FAILED~n===========~n~n" ++ Format, Args), - error_logger:error_msg(Format, Args), + rabbit_misc:local_info_msg(Format, Args), timer:sleep(1000), - exit({?MODULE, failure_during_boot}). + exit({?MODULE, failure_during_boot, Reason}). %%--------------------------------------------------------------------------- %% boot step functions @@ -540,7 +573,7 @@ recover() -> rabbit_binding:recover(rabbit_exchange:recover(), rabbit_amqqueue:start()). maybe_insert_default_data() -> - case rabbit_mnesia:is_db_empty() of + case rabbit_table:is_empty() of true -> insert_default_data(); false -> ok end. @@ -730,3 +763,10 @@ config_files() -> [File] <- Files]; error -> [] end. + +%% We don't want this in fhc since it references rabbit stuff. And we can't put +%% this in the bootstep directly. +start_fhc() -> + rabbit_sup:start_restartable_child( + file_handle_cache, + [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]). diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl index d16d90a45d..e6625b2b90 100644 --- a/src/rabbit_alarm.erl +++ b/src/rabbit_alarm.erl @@ -18,22 +18,28 @@ -behaviour(gen_event). --export([start/0, stop/0, register/2, on_node_up/1, on_node_down/1]). +-export([start_link/0, start/0, stop/0, register/2, set_alarm/1, + clear_alarm/1, get_alarms/0, on_node_up/1, on_node_down/1]). -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2, code_change/3]). -export([remote_conserve_resources/3]). %% Internal use only --record(alarms, {alertees, alarmed_nodes}). +-define(SERVER, ?MODULE). + +-record(alarms, {alertees, alarmed_nodes, alarms}). %%---------------------------------------------------------------------------- -ifdef(use_specs). +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). -spec(register/2 :: (pid(), rabbit_types:mfargs()) -> boolean()). +-spec(set_alarm/1 :: (any()) -> 'ok'). +-spec(clear_alarm/1 :: (any()) -> 'ok'). -spec(on_node_up/1 :: (node()) -> 'ok'). -spec(on_node_down/1 :: (node()) -> 'ok'). @@ -41,59 +47,70 @@ %%---------------------------------------------------------------------------- +start_link() -> + gen_event:start_link({local, ?SERVER}). + start() -> - ok = alarm_handler:add_alarm_handler(?MODULE, []), + ok = rabbit_sup:start_restartable_child(?MODULE), + ok = gen_event:add_handler(?SERVER, ?MODULE, []), {ok, MemoryWatermark} = application:get_env(vm_memory_high_watermark), - rabbit_sup:start_restartable_child(vm_memory_monitor, [MemoryWatermark]), - + rabbit_sup:start_restartable_child( + vm_memory_monitor, [MemoryWatermark, fun rabbit_alarm:set_alarm/1, + fun rabbit_alarm:clear_alarm/1]), {ok, DiskLimit} = application:get_env(disk_free_limit), rabbit_sup:start_restartable_child(rabbit_disk_monitor, [DiskLimit]), ok. -stop() -> - ok = alarm_handler:delete_alarm_handler(?MODULE). +stop() -> ok. register(Pid, HighMemMFA) -> - gen_event:call(alarm_handler, ?MODULE, - {register, Pid, HighMemMFA}, + gen_event:call(?SERVER, ?MODULE, {register, Pid, HighMemMFA}, infinity). -on_node_up(Node) -> gen_event:notify(alarm_handler, {node_up, Node}). +set_alarm(Alarm) -> gen_event:notify(?SERVER, {set_alarm, Alarm}). +clear_alarm(Alarm) -> gen_event:notify(?SERVER, {clear_alarm, Alarm}). + +get_alarms() -> gen_event:call(?SERVER, ?MODULE, get_alarms, infinity). -on_node_down(Node) -> gen_event:notify(alarm_handler, {node_down, Node}). +on_node_up(Node) -> gen_event:notify(?SERVER, {node_up, Node}). +on_node_down(Node) -> gen_event:notify(?SERVER, {node_down, Node}). -%% Can't use alarm_handler:{set,clear}_alarm because that doesn't -%% permit notifying a remote node. remote_conserve_resources(Pid, Source, true) -> - gen_event:notify({alarm_handler, node(Pid)}, + gen_event:notify({?SERVER, node(Pid)}, {set_alarm, {{resource_limit, Source, node()}, []}}); remote_conserve_resources(Pid, Source, false) -> - gen_event:notify({alarm_handler, node(Pid)}, + gen_event:notify({?SERVER, node(Pid)}, {clear_alarm, {resource_limit, Source, node()}}). + %%---------------------------------------------------------------------------- init([]) -> {ok, #alarms{alertees = dict:new(), - alarmed_nodes = dict:new()}}. + alarmed_nodes = dict:new(), + alarms = []}}. handle_call({register, Pid, HighMemMFA}, State) -> {ok, 0 < dict:size(State#alarms.alarmed_nodes), internal_register(Pid, HighMemMFA, State)}; +handle_call(get_alarms, State = #alarms{alarms = Alarms}) -> + {ok, Alarms, State}; + handle_call(_Request, State) -> {ok, not_understood, State}. -handle_event({set_alarm, {{resource_limit, Source, Node}, []}}, State) -> - {ok, maybe_alert(fun dict:append/3, Node, Source, State)}; +handle_event({set_alarm, Alarm}, State = #alarms{alarms = Alarms}) -> + handle_set_alarm(Alarm, State#alarms{alarms = [Alarm|Alarms]}); -handle_event({clear_alarm, {resource_limit, Source, Node}}, State) -> - {ok, maybe_alert(fun dict_unappend/3, Node, Source, State)}; +handle_event({clear_alarm, Alarm}, State = #alarms{alarms = Alarms}) -> + handle_clear_alarm(Alarm, State#alarms{alarms = lists:keydelete(Alarm, 1, + Alarms)}); handle_event({node_up, Node}, State) -> %% Must do this via notify and not call to avoid possible deadlock. ok = gen_event:notify( - {alarm_handler, Node}, + {?SERVER, Node}, {register, self(), {?MODULE, remote_conserve_resources, []}}), {ok, State}; @@ -186,3 +203,25 @@ internal_register(Pid, {M, F, A} = HighMemMFA, end, NewAlertees = dict:store(Pid, HighMemMFA, Alertees), State#alarms{alertees = NewAlertees}. + +handle_set_alarm({{resource_limit, Source, Node}, []}, State) -> + rabbit_log:warning("~s resource limit alarm set on node ~p~n", + [Source, Node]), + {ok, maybe_alert(fun dict:append/3, Node, Source, State)}; +handle_set_alarm({file_descriptor_limit, []}, State) -> + rabbit_log:warning("file descriptor limit alarm set~n"), + {ok, State}; +handle_set_alarm(Alarm, State) -> + rabbit_log:warning("alarm '~p' set~n", [Alarm]), + {ok, State}. + +handle_clear_alarm({resource_limit, Source, Node}, State) -> + rabbit_log:warning("~s resource limit alarm cleared on node ~p~n", + [Source, Node]), + {ok, maybe_alert(fun dict_unappend/3, Node, Source, State)}; +handle_clear_alarm(file_descriptor_limit, State) -> + rabbit_log:warning("file descriptor limit alarm cleared~n"), + {ok, State}; +handle_clear_alarm(Alarm, State) -> + rabbit_log:warning("alarm '~p' cleared~n", [Alarm]), + {ok, State}. diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index afbaea651b..6ad85b24f5 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -22,14 +22,14 @@ check_exclusive_access/2, with_exclusive_access_or_die/3, stat/1, deliver/2, deliver_flow/2, requeue/3, ack/3, reject/4]). -export([list/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). --export([force_event_refresh/0]). +-export([force_event_refresh/0, wake_up/1]). -export([consumers/1, consumers_all/1, consumer_info_keys/0]). -export([basic_get/3, basic_consume/7, basic_cancel/4]). -export([notify_sent/2, notify_sent_queue_down/1, unblock/2, flush_all/2]). -export([notify_down_all/2, limit_all/3]). -export([on_node_down/1]). -export([update/2, store_queue/1, policy_changed/2]). - +-export([start_mirroring/1, stop_mirroring/1]). %% internal -export([internal_declare/2, internal_delete/2, run_backing_queue/3, @@ -40,6 +40,8 @@ -define(INTEGER_ARG_TYPES, [byte, short, signedint, long]). +-define(MAX_EXPIRY_TIMER, 4294967295). + -define(MORE_CONSUMER_CREDIT_AFTER, 50). -define(FAILOVER_WAIT_MILLIS, 100). @@ -58,7 +60,7 @@ -type(msg_id() :: non_neg_integer()). -type(ok_or_errors() :: 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). --type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered'). +-type(routing_result() :: 'routed' | 'unroutable'). -type(queue_or_not_found() :: rabbit_types:amqqueue() | 'not_found'). -spec(start/0 :: () -> [name()]). @@ -102,6 +104,7 @@ -spec(info_all/2 :: (rabbit_types:vhost(), rabbit_types:info_keys()) -> [rabbit_types:infos()]). -spec(force_event_refresh/0 :: () -> 'ok'). +-spec(wake_up/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(consumers/1 :: (rabbit_types:amqqueue()) -> [{pid(), rabbit_types:ctag(), boolean()}]). @@ -162,6 +165,8 @@ -spec(store_queue/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(policy_changed/2 :: (rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok'). +-spec(start_mirroring/1 :: (pid()) -> 'ok'). +-spec(stop_mirroring/1 :: (pid()) -> 'ok'). -endif. @@ -207,18 +212,20 @@ recover_durable_queues(DurableQueues) -> declare(QueueName, Durable, AutoDelete, Args, Owner) -> ok = check_declare_arguments(QueueName, Args), - {Node, MNodes} = determine_queue_nodes(Args), - Q = start_queue_process(Node, #amqqueue{name = QueueName, - durable = Durable, - auto_delete = AutoDelete, - arguments = Args, - exclusive_owner = Owner, - pid = none, - slave_pids = [], - mirror_nodes = MNodes}), - case gen_server2:call(Q#amqqueue.pid, {init, false}, infinity) of + Q0 = rabbit_policy:set(#amqqueue{name = QueueName, + durable = Durable, + auto_delete = AutoDelete, + arguments = Args, + exclusive_owner = Owner, + pid = none, + slave_pids = [], + sync_slave_pids = [], + gm_pids = []}), + {Node, _MNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q0), + Q1 = start_queue_process(Node, Q0), + case gen_server2:call(Q1#amqqueue.pid, {init, false}, infinity) of not_found -> rabbit_misc:not_found(QueueName); - Q1 -> Q1 + Q2 -> Q2 end. internal_declare(Q, true) -> @@ -267,24 +274,8 @@ store_queue(Q = #amqqueue{durable = false}) -> ok = mnesia:write(rabbit_queue, Q, write), ok. -policy_changed(_Q1, _Q2) -> - ok. - -determine_queue_nodes(Args) -> - Policy = rabbit_misc:table_lookup(Args, <<"x-ha-policy">>), - PolicyParams = rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>), - case {Policy, PolicyParams} of - {{_Type, <<"nodes">>}, {array, Nodes}} -> - case [list_to_atom(binary_to_list(Node)) || - {longstr, Node} <- Nodes] of - [Node] -> {Node, undefined}; - [First | Rest] -> {First, [First | Rest]} - end; - {{_Type, <<"all">>}, _} -> - {node(), all}; - _ -> - {node(), undefined} - end. +policy_changed(Q1, Q2) -> + rabbit_mirror_queue_misc:update_mirrors(Q1, Q2). start_queue_process(Node, Q) -> {ok, Pid} = rabbit_amqqueue_sup:start_child(Node, [Q]), @@ -307,10 +298,17 @@ lookup(Name) -> with(Name, F, E) -> case lookup(Name) of - {ok, Q = #amqqueue{slave_pids = []}} -> - rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); - {ok, Q} -> - E1 = fun () -> timer:sleep(25), with(Name, F, E) end, + {ok, Q = #amqqueue{pid = QPid}} -> + %% We check is_process_alive(QPid) in case we receive a + %% nodedown (for example) in F() that has nothing to do + %% with the QPid. + E1 = fun () -> + case rabbit_misc:is_process_alive(QPid) of + true -> E(); + false -> timer:sleep(25), + with(Name, F, E) + end + end, rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end); {error, not_found} -> E() @@ -351,13 +349,11 @@ with_exclusive_access_or_die(Name, ReaderPid, F) -> assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, RequiredArgs) -> rabbit_misc:assert_args_equivalence( - Args, RequiredArgs, QueueName, - [<<"x-expires">>, <<"x-message-ttl">>, <<"x-ha-policy">>]). + Args, RequiredArgs, QueueName, [<<"x-expires">>, <<"x-message-ttl">>]). check_declare_arguments(QueueName, Args) -> Checks = [{<<"x-expires">>, fun check_positive_int_arg/2}, {<<"x-message-ttl">>, fun check_non_neg_int_arg/2}, - {<<"x-ha-policy">>, fun check_ha_policy_arg/2}, {<<"x-dead-letter-exchange">>, fun check_string_arg/2}, {<<"x-dead-letter-routing-key">>, fun check_dlxrk_arg/2}], [case rabbit_misc:table_lookup(Args, Key) of @@ -386,16 +382,18 @@ check_int_arg({Type, _}, _) -> check_positive_int_arg({Type, Val}, Args) -> case check_int_arg({Type, Val}, Args) of - ok when Val > 0 -> ok; - ok -> {error, {value_zero_or_less, Val}}; - Error -> Error + ok when Val > ?MAX_EXPIRY_TIMER -> {error, {value_too_big, Val}}; + ok when Val > 0 -> ok; + ok -> {error, {value_zero_or_less, Val}}; + Error -> Error end. check_non_neg_int_arg({Type, Val}, Args) -> case check_int_arg({Type, Val}, Args) of - ok when Val >= 0 -> ok; - ok -> {error, {value_less_than_zero, Val}}; - Error -> Error + ok when Val > ?MAX_EXPIRY_TIMER -> {error, {value_too_big, Val}}; + ok when Val >= 0 -> ok; + ok -> {error, {value_less_than_zero, Val}}; + Error -> Error end. check_dlxrk_arg({longstr, _}, Args) -> @@ -406,29 +404,6 @@ check_dlxrk_arg({longstr, _}, Args) -> check_dlxrk_arg({Type, _}, _Args) -> {error, {unacceptable_type, Type}}. -check_ha_policy_arg({longstr, <<"all">>}, _Args) -> - ok; -check_ha_policy_arg({longstr, <<"nodes">>}, Args) -> - case rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>) of - undefined -> - {error, {require, 'x-ha-policy-params'}}; - {array, []} -> - {error, {require_non_empty_list_of_nodes_for_ha}}; - {array, Ary} -> - case lists:all(fun ({longstr, _Node}) -> true; - (_ ) -> false - end, Ary) of - true -> ok; - false -> {error, {require_node_list_as_longstrs_for_ha, Ary}} - end; - {Type, _} -> - {error, {ha_nodes_policy_params_not_array_of_longstr, Type}} - end; -check_ha_policy_arg({longstr, Policy}, _Args) -> - {error, {invalid_ha_policy, Policy}}; -check_ha_policy_arg({Type, _}, _Args) -> - {error, {unacceptable_type, Type}}. - list() -> mnesia:dirty_match_object(rabbit_queue, #amqqueue{_ = '_'}). @@ -475,6 +450,8 @@ force_event_refresh(QNames) -> force_event_refresh(Failed) end. +wake_up(#amqqueue{pid = QPid}) -> gen_server2:cast(QPid, wake_up). + consumers(#amqqueue{ pid = QPid }) -> delegate_call(QPid, consumers). @@ -561,7 +538,12 @@ flush_all(QPids, ChPid) -> internal_delete1(QueueName) -> ok = mnesia:delete({rabbit_queue, QueueName}), - ok = mnesia:delete({rabbit_durable_queue, QueueName}), + %% this 'guarded' delete prevents unnecessary writes to the mnesia + %% disk log + case mnesia:wread({rabbit_durable_queue, QueueName}) of + [] -> ok; + [_] -> ok = mnesia:delete({rabbit_durable_queue, QueueName}) + end, %% we want to execute some things, as decided by rabbit_exchange, %% after the transaction. rabbit_binding:remove_for_destination(QueueName). @@ -591,6 +573,9 @@ set_ram_duration_target(QPid, Duration) -> set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). +start_mirroring(QPid) -> ok = delegate_call(QPid, start_mirroring). +stop_mirroring(QPid) -> ok = delegate_call(QPid, stop_mirroring). + on_node_down(Node) -> rabbit_misc:execute_mnesia_tx_with_tail( fun () -> QsDels = @@ -599,7 +584,7 @@ on_node_down(Node) -> slave_pids = []} <- mnesia:table(rabbit_queue), node(Pid) == Node andalso - not is_process_alive(Pid)])), + not rabbit_misc:is_process_alive(Pid)])), {Qs, Dels} = lists:unzip(QsDels), T = rabbit_binding:process_deletions( lists:foldl(fun rabbit_binding:combine_deletions/2, @@ -625,60 +610,76 @@ pseudo_queue(QueueName, Pid) -> auto_delete = false, arguments = [], pid = Pid, - slave_pids = [], - mirror_nodes = undefined}. + slave_pids = []}. -deliver([], #delivery{mandatory = false, immediate = false}, _Flow) -> +deliver([], #delivery{mandatory = false}, _Flow) -> %% /dev/null optimisation {routed, []}; -deliver(Qs, Delivery = #delivery{mandatory = false, immediate = false}, Flow) -> - %% optimisation: when Mandatory = false and Immediate = false, - %% rabbit_amqqueue:deliver will deliver the message to the queue - %% process asynchronously, and return true, which means all the - %% QPids will always be returned. It is therefore safe to use a - %% fire-and-forget cast here and return the QPids - the semantics - %% is preserved. This scales much better than the non-immediate - %% case below. - QPids = qpids(Qs), +deliver(Qs, Delivery = #delivery{mandatory = false}, Flow) -> + %% optimisation: when Mandatory = false, rabbit_amqqueue:deliver + %% will deliver the message to the queue process asynchronously, + %% and return true, which means all the QPids will always be + %% returned. It is therefore safe to use a fire-and-forget cast + %% here and return the QPids - the semantics is preserved. This + %% scales much better than the case below. + {MPids, SPids} = qpids(Qs), + QPids = MPids ++ SPids, case Flow of flow -> [credit_flow:send(QPid) || QPid <- QPids]; noflow -> ok end, - delegate:invoke_no_result( - QPids, fun (QPid) -> - gen_server2:cast(QPid, {deliver, Delivery, Flow}) - end), + + %% We let slaves know that they were being addressed as slaves at + %% the time - if they receive such a message from the channel + %% after they have become master they should mark the message as + %% 'delivered' since they do not know what the master may have + %% done with it. + MMsg = {deliver, Delivery, false, Flow}, + SMsg = {deliver, Delivery, true, Flow}, + delegate:invoke_no_result(MPids, + fun (QPid) -> gen_server2:cast(QPid, MMsg) end), + delegate:invoke_no_result(SPids, + fun (QPid) -> gen_server2:cast(QPid, SMsg) end), {routed, QPids}; -deliver(Qs, Delivery = #delivery{mandatory = Mandatory, immediate = Immediate}, - _Flow) -> - QPids = qpids(Qs), - {Success, _} = - delegate:invoke( - QPids, fun (QPid) -> - gen_server2:call(QPid, {deliver, Delivery}, infinity) - end), - case {Mandatory, Immediate, - lists:foldl(fun ({QPid, true}, {_, H}) -> {true, [QPid | H]}; - ({_, false}, {_, H}) -> {true, H} - end, {false, []}, Success)} of - {true, _ , {false, []}} -> {unroutable, []}; - {_ , true, {_ , []}} -> {not_delivered, []}; - {_ , _ , {_ , R}} -> {routed, R} +deliver(Qs, Delivery, _Flow) -> + {MPids, SPids} = qpids(Qs), + %% see comment above + MMsg = {deliver, Delivery, false}, + SMsg = {deliver, Delivery, true}, + {MRouted, _} = delegate:invoke( + MPids, fun (QPid) -> + ok = gen_server2:call(QPid, MMsg, infinity) + end), + {SRouted, _} = delegate:invoke( + SPids, fun (QPid) -> + ok = gen_server2:call(QPid, SMsg, infinity) + end), + case MRouted ++ SRouted of + [] -> {unroutable, []}; + R -> {routed, [QPid || {QPid, ok} <- R]} end. -qpids(Qs) -> lists:append([[QPid | SPids] || - #amqqueue{pid = QPid, slave_pids = SPids} <- Qs]). +qpids(Qs) -> + {MPids, SPids} = lists:foldl(fun (#amqqueue{pid = QPid, slave_pids = SPids}, + {MPidAcc, SPidAcc}) -> + {[QPid | MPidAcc], [SPids | SPidAcc]} + end, {[], []}, Qs), + {MPids, lists:append(SPids)}. safe_delegate_call_ok(F, Pids) -> - case delegate:invoke(Pids, fun (Pid) -> - rabbit_misc:with_exit_handler( - fun () -> ok end, - fun () -> F(Pid) end) - end) of - {_, []} -> ok; - {_, Bad} -> {error, Bad} + {_, Bads} = delegate:invoke(Pids, fun (Pid) -> + rabbit_misc:with_exit_handler( + fun () -> ok end, + fun () -> F(Pid) end) + end), + case lists:filter( + fun ({_Pid, {exit, {R, _}, _}}) -> rabbit_misc:is_abnormal_exit(R); + ({_Pid, _}) -> false + end, Bads) of + [] -> ok; + Bads1 -> {error, Bads1} end. delegate_call(Pid, Msg) -> diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index b42d1aea75..8d05a78c57 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -26,7 +26,7 @@ -export([start_link/1, info_keys/0]). --export([init_with_backing_queue_state/8]). +-export([init_with_backing_queue_state/7]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1, prioritise_call/3, @@ -47,6 +47,7 @@ msg_id_to_channel, ttl, ttl_timer_ref, + ttl_timer_expiry, senders, publish_seqno, unconfirmed, @@ -75,8 +76,8 @@ -spec(start_link/1 :: (rabbit_types:amqqueue()) -> rabbit_types:ok_pid_or_error()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). --spec(init_with_backing_queue_state/8 :: - (rabbit_types:amqqueue(), atom(), tuple(), any(), [any()], +-spec(init_with_backing_queue_state/7 :: + (rabbit_types:amqqueue(), atom(), tuple(), any(), [rabbit_types:delivery()], pmon:pmon(), dict()) -> #q{}). -endif. @@ -85,14 +86,17 @@ -define(STATISTICS_KEYS, [pid, + policy, exclusive_consumer_pid, exclusive_consumer_tag, messages_ready, messages_unacknowledged, messages, consumers, + active_consumers, memory, slave_pids, + synchronised_slave_pids, backing_queue_status ]). @@ -102,13 +106,11 @@ durable, auto_delete, arguments, - owner_pid, - slave_pids, - synchronised_slave_pids + owner_pid ]). -define(INFO_KEYS, - ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid, slave_pids]). + ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). %%---------------------------------------------------------------------------- @@ -144,7 +146,7 @@ init(Q) -> {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, - RateTRef, AckTags, Deliveries, Senders, MTC) -> + RateTRef, Deliveries, Senders, MTC) -> case Owner of none -> ok; _ -> erlang:monitor(process, Owner) @@ -166,12 +168,10 @@ init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, delayed_stop = undefined, queue_monitors = pmon:new(), msg_id_to_channel = MTC}, - State1 = requeue_and_run(AckTags, process_args( - rabbit_event:init_stats_timer( - State, #q.stats_timer))), - lists:foldl( - fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end, - State1, Deliveries). + State1 = process_args(rabbit_event:init_stats_timer(State, #q.stats_timer)), + lists:foldl(fun (Delivery, StateN) -> + deliver_or_enqueue(Delivery, true, StateN) + end, State1, Deliveries). terminate(shutdown = R, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); @@ -179,7 +179,6 @@ terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); terminate(Reason, State = #q{q = #amqqueue{name = QName}, backing_queue = BQ}) -> - %% FIXME: How do we cancel active subscriptions? terminate_shutdown( fun (BQS) -> BQS1 = BQ:delete_and_terminate(Reason, BQS), @@ -230,8 +229,7 @@ matches(false, Q1, Q2) -> Q1#amqqueue.exclusive_owner =:= Q2#amqqueue.exclusive_owner andalso Q1#amqqueue.arguments =:= Q2#amqqueue.arguments andalso Q1#amqqueue.pid =:= Q2#amqqueue.pid andalso - Q1#amqqueue.slave_pids =:= Q2#amqqueue.slave_pids andalso - Q1#amqqueue.mirror_nodes =:= Q2#amqqueue.mirror_nodes. + Q1#amqqueue.slave_pids =:= Q2#amqqueue.slave_pids. bq_init(BQ, Q, Recover) -> Self = self(), @@ -296,11 +294,11 @@ next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> timed -> {ensure_sync_timer(State1), 0 } end. -backing_queue_module(#amqqueue{arguments = Args}) -> - case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of - undefined -> {ok, BQM} = application:get_env(backing_queue_module), - BQM; - _Policy -> rabbit_mirror_queue_master +backing_queue_module(Q) -> + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> {ok, BQM} = application:get_env(backing_queue_module), + BQM; + true -> rabbit_mirror_queue_master end. ensure_sync_timer(State = #q{sync_timer_ref = undefined}) -> @@ -499,32 +497,29 @@ confirm_messages(MsgIds, State = #q{msg_id_to_channel = MTC}) -> rabbit_misc:gb_trees_foreach(fun rabbit_misc:confirm_to_sender/2, CMs), State#q{msg_id_to_channel = MTC1}. -should_confirm_message(#delivery{msg_seq_no = undefined}, _State) -> - never; -should_confirm_message(#delivery{sender = SenderPid, +send_or_record_confirm(#delivery{msg_seq_no = undefined}, State) -> + {never, State}; +send_or_record_confirm(#delivery{sender = SenderPid, msg_seq_no = MsgSeqNo, message = #basic_message { is_persistent = true, id = MsgId}}, - #q{q = #amqqueue{durable = true}}) -> - {eventually, SenderPid, MsgSeqNo, MsgId}; -should_confirm_message(#delivery{sender = SenderPid, - msg_seq_no = MsgSeqNo}, - _State) -> - {immediately, SenderPid, MsgSeqNo}. - -needs_confirming({eventually, _, _, _}) -> true; -needs_confirming(_) -> false. - -maybe_record_confirm_message({eventually, SenderPid, MsgSeqNo, MsgId}, - State = #q{msg_id_to_channel = MTC}) -> - State#q{msg_id_to_channel = - gb_trees:insert(MsgId, {SenderPid, MsgSeqNo}, MTC)}; -maybe_record_confirm_message({immediately, SenderPid, MsgSeqNo}, State) -> + State = #q{q = #amqqueue{durable = true}, + msg_id_to_channel = MTC}) -> + MTC1 = gb_trees:insert(MsgId, {SenderPid, MsgSeqNo}, MTC), + {eventually, State#q{msg_id_to_channel = MTC1}}; +send_or_record_confirm(#delivery{sender = SenderPid, + msg_seq_no = MsgSeqNo}, State) -> rabbit_misc:confirm_to_sender(SenderPid, [MsgSeqNo]), - State; -maybe_record_confirm_message(_Confirm, State) -> - State. + {immediately, State}. + +discard(#delivery{sender = SenderPid, message = #basic_message{id = MsgId}}, + State) -> + %% fake an 'eventual' confirm from BQ; noop if not needed + State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = + confirm_messages([MsgId], State), + BQS1 = BQ:discard(MsgId, SenderPid, BQS), + State1#q{backing_queue_state = BQS1}. run_message_queue(State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = @@ -534,60 +529,50 @@ run_message_queue(State) -> BQ:is_empty(BQS), State1), State2. -attempt_delivery(#delivery{sender = SenderPid, message = Message}, Confirm, +attempt_delivery(Delivery = #delivery{sender = SenderPid, message = Message}, + Props = #message_properties{delivered = Delivered}, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> case BQ:is_duplicate(Message, BQS) of {false, BQS1} -> deliver_msgs_to_consumers( - fun (AckRequired, State1 = #q{backing_queue_state = BQS2}) -> - Props = message_properties(Confirm, State1), + fun (true, State1 = #q{backing_queue_state = BQS2}) -> {AckTag, BQS3} = BQ:publish_delivered( - AckRequired, Message, Props, - SenderPid, BQS2), - {{Message, false, AckTag}, true, - State1#q{backing_queue_state = BQS3}} + Message, Props, SenderPid, BQS2), + {{Message, Delivered, AckTag}, + true, State1#q{backing_queue_state = BQS3}}; + (false, State1) -> + {{Message, Delivered, undefined}, + true, discard(Delivery, State1)} end, false, State#q{backing_queue_state = BQS1}); - {Duplicate, BQS1} -> - %% if the message has previously been seen by the BQ then - %% it must have been seen under the same circumstances as - %% now: i.e. if it is now a deliver_immediately then it - %% must have been before. - {case Duplicate of - published -> true; - discarded -> false - end, - State#q{backing_queue_state = BQS1}} + {published, BQS1} -> + {true, State#q{backing_queue_state = BQS1}}; + {discarded, BQS1} -> + {false, State#q{backing_queue_state = BQS1}} end. -deliver_or_enqueue(Delivery = #delivery{message = Message, - msg_seq_no = MsgSeqNo, - sender = SenderPid}, State) -> - Confirm = should_confirm_message(Delivery, State), - case attempt_delivery(Delivery, Confirm, State) of - {true, State1} -> - maybe_record_confirm_message(Confirm, State1); - %% the next two are optimisations - {false, State1 = #q{ttl = 0, dlx = undefined}} when Confirm == never -> - discard_delivery(Delivery, State1); - {false, State1 = #q{ttl = 0, dlx = undefined}} -> - rabbit_misc:confirm_to_sender(SenderPid, [MsgSeqNo]), - discard_delivery(Delivery, State1); - {false, State1} -> - State2 = #q{backing_queue = BQ, backing_queue_state = BQS} = - maybe_record_confirm_message(Confirm, State1), - Props = message_properties(Confirm, State2), +deliver_or_enqueue(Delivery = #delivery{message = Message, sender = SenderPid}, + Delivered, State) -> + {Confirm, State1} = send_or_record_confirm(Delivery, State), + Props = message_properties(Confirm, Delivered, State), + case attempt_delivery(Delivery, Props, State1) of + {true, State2} -> + State2; + %% The next one is an optimisation + {false, State2 = #q{ttl = 0, dlx = undefined}} -> + discard(Delivery, State2); + {false, State2 = #q{backing_queue = BQ, backing_queue_state = BQS}} -> BQS1 = BQ:publish(Message, Props, SenderPid, BQS), - ensure_ttl_timer(State2#q{backing_queue_state = BQS1}) + ensure_ttl_timer(Props#message_properties.expiry, + State2#q{backing_queue_state = BQS1}) end. -requeue_and_run(AckTags, State = #q{backing_queue = BQ}) -> - run_backing_queue(BQ, fun (M, BQS) -> - {_MsgIds, BQS1} = M:requeue(AckTags, BQS), - BQS1 - end, State). +requeue_and_run(AckTags, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + run_message_queue(State#q{backing_queue_state = BQS1}). -fetch(AckRequired, State = #q{backing_queue_state = BQS, - backing_queue = BQ}) -> +fetch(AckRequired, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), {Result, State#q{backing_queue_state = BQS1}}. @@ -681,12 +666,9 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). qname(#q{q = #amqqueue{name = QName}}) -> QName. -backing_queue_timeout(State = #q{backing_queue = BQ}) -> - run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). - -run_backing_queue(Mod, Fun, State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - run_message_queue(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}). +backing_queue_timeout(State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + State#q{backing_queue_state = BQ:timeout(BQS)}. subtract_acks(ChPid, AckTags, State, Fun) -> case lookup_ch(ChPid) of @@ -698,15 +680,10 @@ subtract_acks(ChPid, AckTags, State, Fun) -> Fun(State) end. -discard_delivery(#delivery{sender = SenderPid, - message = Message}, - State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - State#q{backing_queue_state = BQ:discard(Message, SenderPid, BQS)}. - -message_properties(Confirm, #q{ttl = TTL}) -> +message_properties(Confirm, Delivered, #q{ttl = TTL}) -> #message_properties{expiry = calculate_msg_expiry(TTL), - needs_confirming = needs_confirming(Confirm)}. + needs_confirming = Confirm == eventually, + delivered = Delivered}. calculate_msg_expiry(undefined) -> undefined; calculate_msg_expiry(TTL) -> now_micros() + (TTL * 1000). @@ -717,28 +694,40 @@ drop_expired_messages(State = #q{backing_queue_state = BQS, backing_queue = BQ }) -> Now = now_micros(), DLXFun = dead_letter_fun(expired, State), - ExpirePred = fun (#message_properties{expiry = Expiry}) -> Now > Expiry end, - case DLXFun of - undefined -> {undefined, BQS1} = BQ:dropwhile(ExpirePred, false, BQS), - BQS1; - _ -> {Msgs, BQS1} = BQ:dropwhile(ExpirePred, true, BQS), - lists:foreach( - fun({Msg, AckTag}) -> DLXFun(Msg, AckTag) end, Msgs), - BQS1 - end, - ensure_ttl_timer(State#q{backing_queue_state = BQS1}). - -ensure_ttl_timer(State = #q{backing_queue = BQ, - backing_queue_state = BQS, - ttl = TTL, - ttl_timer_ref = undefined}) - when TTL =/= undefined -> - case BQ:is_empty(BQS) of - true -> State; - false -> TRef = erlang:send_after(TTL, self(), drop_expired), - State#q{ttl_timer_ref = TRef} + ExpirePred = fun (#message_properties{expiry = Exp}) -> Now >= Exp end, + {Props, BQS1} = case DLXFun of + undefined -> {Next, undefined, BQS2} = + BQ:dropwhile(ExpirePred, false, BQS), + {Next, BQS2}; + _ -> {Next, Msgs, BQS2} = + BQ:dropwhile(ExpirePred, true, BQS), + DLXFun(Msgs), + {Next, BQS2} + end, + ensure_ttl_timer(case Props of + undefined -> undefined; + #message_properties{expiry = Exp} -> Exp + end, State#q{backing_queue_state = BQS1}). + +ensure_ttl_timer(undefined, State) -> + State; +ensure_ttl_timer(_Expiry, State = #q{ttl = undefined}) -> + State; +ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = undefined}) -> + After = (case Expiry - now_micros() of + V when V > 0 -> V + 999; %% always fire later + _ -> 0 + end) div 1000, + TRef = erlang:send_after(After, self(), drop_expired), + State#q{ttl_timer_ref = TRef, ttl_timer_expiry = Expiry}; +ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = TRef, + ttl_timer_expiry = TExpiry}) + when Expiry + 1000 < TExpiry -> + case erlang:cancel_timer(TRef) of + false -> State; + _ -> ensure_ttl_timer(Expiry, State#q{ttl_timer_ref = undefined}) end; -ensure_ttl_timer(State) -> +ensure_ttl_timer(_Expiry, State) -> State. ack_if_no_dlx(AckTags, State = #q{dlx = undefined, @@ -752,43 +741,23 @@ ack_if_no_dlx(_AckTags, State) -> dead_letter_fun(_Reason, #q{dlx = undefined}) -> undefined; dead_letter_fun(Reason, _State) -> - fun(Msg, AckTag) -> - gen_server2:cast(self(), {dead_letter, {Msg, AckTag}, Reason}) - end. - -dead_letter_publish(Msg, Reason, State = #q{publish_seqno = MsgSeqNo}) -> - DLMsg = #basic_message{exchange_name = XName} = - make_dead_letter_msg(Reason, Msg, State), - case rabbit_exchange:lookup(XName) of - {ok, X} -> - Delivery = rabbit_basic:delivery(false, false, DLMsg, MsgSeqNo), - {Queues, Cycles} = detect_dead_letter_cycles( - DLMsg, rabbit_exchange:route(X, Delivery)), - lists:foreach(fun log_cycle_once/1, Cycles), - QPids = rabbit_amqqueue:lookup(Queues), - {_, DeliveredQPids} = rabbit_amqqueue:deliver(QPids, Delivery), - DeliveredQPids; - {error, not_found} -> - [] - end. - -dead_letter_msg(Msg, AckTag, Reason, State = #q{publish_seqno = MsgSeqNo, - unconfirmed = UC}) -> - QPids = dead_letter_publish(Msg, Reason, State), - State1 = State#q{queue_monitors = pmon:monitor_all( - QPids, State#q.queue_monitors), - publish_seqno = MsgSeqNo + 1}, - case QPids of - [] -> cleanup_after_confirm([AckTag], State1); - _ -> UC1 = dtree:insert(MsgSeqNo, QPids, AckTag, UC), - noreply(State1#q{unconfirmed = UC1}) - end. + fun(Msgs) -> gen_server2:cast(self(), {dead_letter, Msgs, Reason}) end. + +dead_letter_publish(Msg, Reason, X, State = #q{publish_seqno = MsgSeqNo}) -> + DLMsg = make_dead_letter_msg(Reason, Msg, State), + Delivery = rabbit_basic:delivery(false, DLMsg, MsgSeqNo), + {Queues, Cycles} = detect_dead_letter_cycles( + DLMsg, rabbit_exchange:route(X, Delivery)), + lists:foreach(fun log_cycle_once/1, Cycles), + QPids = rabbit_amqqueue:lookup(Queues), + {_, DeliveredQPids} = rabbit_amqqueue:deliver(QPids, Delivery), + DeliveredQPids. handle_queue_down(QPid, Reason, State = #q{queue_monitors = QMons, unconfirmed = UC}) -> case pmon:is_monitored(QPid, QMons) of false -> noreply(State); - true -> case rabbit_misc:is_abnormal_termination(Reason) of + true -> case rabbit_misc:is_abnormal_exit(Reason) of true -> {Lost, _UC1} = dtree:take_all(QPid, UC), QNameS = rabbit_misc:rs(qname(State)), rabbit_log:warning("DLQ ~p for ~s died with " @@ -893,37 +862,7 @@ make_dead_letter_msg(Reason, now_micros() -> timer:now_diff(now(), {0,0,0}). -infos(Items, State) -> - {Prefix, Items1} = - case lists:member(synchronised_slave_pids, Items) of - true -> Prefix1 = slaves_status(State), - case lists:member(slave_pids, Items) of - true -> {Prefix1, Items -- [slave_pids]}; - false -> {proplists:delete(slave_pids, Prefix1), Items} - end; - false -> {[], Items} - end, - Prefix ++ [{Item, i(Item, State)} - || Item <- (Items1 -- [synchronised_slave_pids])]. - -slaves_status(#q{q = #amqqueue{name = Name}}) -> - case rabbit_amqqueue:lookup(Name) of - {ok, #amqqueue{mirror_nodes = undefined}} -> - [{slave_pids, ''}, {synchronised_slave_pids, ''}]; - {ok, #amqqueue{slave_pids = SPids}} -> - {Results, _Bad} = - delegate:invoke(SPids, fun rabbit_mirror_queue_slave:info/1), - {SPids1, SSPids} = - lists:foldl( - fun ({Pid, Infos}, {SPidsN, SSPidsN}) -> - {[Pid | SPidsN], - case proplists:get_bool(is_synchronised, Infos) of - true -> [Pid | SSPidsN]; - false -> SSPidsN - end} - end, {[], []}, Results), - [{slave_pids, SPids1}, {synchronised_slave_pids, SSPids}] - end. +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. i(name, #q{q = #amqqueue{name = Name}}) -> Name; i(durable, #q{q = #amqqueue{durable = Durable}}) -> Durable; @@ -935,6 +874,12 @@ i(owner_pid, #q{q = #amqqueue{exclusive_owner = none}}) -> ''; i(owner_pid, #q{q = #amqqueue{exclusive_owner = ExclusiveOwner}}) -> ExclusiveOwner; +i(policy, #q{q = #amqqueue{name = Name}}) -> + {ok, Q} = rabbit_amqqueue:lookup(Name), + case rabbit_policy:name(Q) of + none -> ''; + Policy -> Policy + end; i(exclusive_consumer_pid, #q{exclusive_consumer = none}) -> ''; i(exclusive_consumer_pid, #q{exclusive_consumer = {ChPid, _ConsumerTag}}) -> @@ -952,13 +897,24 @@ i(messages, State) -> messages_unacknowledged]]); i(consumers, _) -> consumer_count(); +i(active_consumers, _) -> + active_consumer_count(); i(memory, _) -> {memory, M} = process_info(self(), memory), M; i(slave_pids, #q{q = #amqqueue{name = Name}}) -> - case rabbit_amqqueue:lookup(Name) of - {ok, #amqqueue{mirror_nodes = undefined}} -> []; - {ok, #amqqueue{slave_pids = SPids}} -> SPids + {ok, Q = #amqqueue{slave_pids = SPids}} = + rabbit_amqqueue:lookup(Name), + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> SPids + end; +i(synchronised_slave_pids, #q{q = #amqqueue{name = Name}}) -> + {ok, Q = #amqqueue{sync_slave_pids = SSPids}} = + rabbit_amqqueue:lookup(Name), + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> SSPids end; i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) -> BQ:status(BQS); @@ -1063,28 +1019,10 @@ handle_call({info, Items}, _From, State) -> handle_call(consumers, _From, State) -> reply(consumers(State), State); -handle_call({deliver, Delivery = #delivery{immediate = true}}, _From, State) -> - %% FIXME: Is this correct semantics? - %% - %% I'm worried in particular about the case where an exchange has - %% two queues against a particular routing key, and a message is - %% sent in immediate mode through the binding. In non-immediate - %% mode, both queues get the message, saving it for later if - %% there's noone ready to receive it just now. In immediate mode, - %% should both queues still get the message, somehow, or should - %% just all ready-to-consume queues get the message, with unready - %% queues discarding the message? - %% - Confirm = should_confirm_message(Delivery, State), - {Delivered, State1} = attempt_delivery(Delivery, Confirm, State), - reply(Delivered, case Delivered of - true -> maybe_record_confirm_message(Confirm, State1); - false -> discard_delivery(Delivery, State1) - end); - -handle_call({deliver, Delivery = #delivery{mandatory = true}}, From, State) -> - gen_server2:reply(From, true), - noreply(deliver_or_enqueue(Delivery, State)); +handle_call({deliver, Delivery, Delivered}, From, State) -> + %% Synchronous, "mandatory" deliver mode. + gen_server2:reply(From, ok), + noreply(deliver_or_enqueue(Delivery, Delivered, State)); handle_call({notify_down, ChPid}, From, State) -> %% we want to do this synchronously, so that auto_deleted queues @@ -1200,6 +1138,23 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> ChPid, AckTags, State, fun (State1) -> requeue_and_run(AckTags, State1) end)); +handle_call(start_mirroring, _From, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + %% lookup again to get policy for init_with_existing_bq + {ok, Q} = rabbit_amqqueue:lookup(qname(State)), + true = BQ =/= rabbit_mirror_queue_master, %% assertion + BQ1 = rabbit_mirror_queue_master, + BQS1 = BQ1:init_with_existing_bq(Q, BQ, BQS), + reply(ok, State#q{backing_queue = BQ1, + backing_queue_state = BQS1}); + +handle_call(stop_mirroring, _From, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + BQ = rabbit_mirror_queue_master, %% assertion + {BQ1, BQS1} = BQ:stop_mirroring(BQS), + reply(ok, State#q{backing_queue = BQ1, + backing_queue_state = BQS1}); + handle_call(force_event_refresh, _From, State = #q{exclusive_consumer = Exclusive}) -> rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State)), @@ -1224,19 +1179,21 @@ handle_cast({confirm, MsgSeqNos, QPid}, State = #q{unconfirmed = UC}) -> handle_cast(_, State = #q{delayed_stop = DS}) when DS =/= undefined -> noreply(State); -handle_cast({run_backing_queue, Mod, Fun}, State) -> - noreply(run_backing_queue(Mod, Fun, State)); +handle_cast({run_backing_queue, Mod, Fun}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + noreply(run_message_queue( + State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)})); -handle_cast({deliver, Delivery = #delivery{sender = Sender}, Flow}, +handle_cast({deliver, Delivery = #delivery{sender = Sender}, Delivered, Flow}, State = #q{senders = Senders}) -> - %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. + %% Asynchronous, non-"mandatory" deliver mode. Senders1 = case Flow of flow -> credit_flow:ack(Sender), pmon:monitor(Sender, Senders); noflow -> Senders end, State1 = State#q{senders = Senders1}, - noreply(deliver_or_enqueue(Delivery, State1)); + noreply(deliver_or_enqueue(Delivery, Delivered, State1)); handle_cast({ack, AckTags, ChPid}, State) -> noreply(subtract_acks( @@ -1254,7 +1211,12 @@ handle_cast({reject, AckTags, Requeue, ChPid}, State) -> true -> fun (State1) -> requeue_and_run(AckTags, State1) end; false -> fun (State1 = #q{backing_queue = BQ, backing_queue_state = BQS}) -> - Fun = dead_letter_fun(rejected, State1), + Fun = + case dead_letter_fun(rejected, State1) of + undefined -> undefined; + F -> fun(M, A) -> F([{M, A}]) + end + end, BQS1 = BQ:fold(Fun, BQS, AckTags), ack_if_no_dlx( AckTags, @@ -1306,8 +1268,27 @@ handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), noreply(State); -handle_cast({dead_letter, {Msg, AckTag}, Reason}, State) -> - dead_letter_msg(Msg, AckTag, Reason, State). +handle_cast({dead_letter, Msgs, Reason}, State = #q{dlx = XName}) -> + case rabbit_exchange:lookup(XName) of + {ok, X} -> + noreply(lists:foldl( + fun({Msg, AckTag}, State1 = #q{publish_seqno = SeqNo, + unconfirmed = UC, + queue_monitors = QMon}) -> + QPids = dead_letter_publish(Msg, Reason, X, + State1), + UC1 = dtree:insert(SeqNo, QPids, AckTag, UC), + QMons = pmon:monitor_all(QPids, QMon), + State1#q{queue_monitors = QMons, + publish_seqno = SeqNo + 1, + unconfirmed = UC1} + end, State, Msgs)); + {error, not_found} -> + cleanup_after_confirm([AckTag || {_, AckTag} <- Msgs], State) + end; + +handle_cast(wake_up, State) -> + noreply(State). %% We need to not ignore this as we need to remove outstanding %% confirms due to queue death. diff --git a/src/rabbit_auth_backend.erl b/src/rabbit_auth_backend.erl index e89951e701..c9475efd27 100644 --- a/src/rabbit_auth_backend.erl +++ b/src/rabbit_auth_backend.erl @@ -20,7 +20,7 @@ %% A description proplist as with auth mechanisms, %% exchanges. Currently unused. --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% Check a user can log in, given a username and a proplist of %% authentication information (e.g. [{password, Password}]). diff --git a/src/rabbit_auth_mechanism.erl b/src/rabbit_auth_mechanism.erl index eda6a74305..c7d74dc372 100644 --- a/src/rabbit_auth_mechanism.erl +++ b/src/rabbit_auth_mechanism.erl @@ -19,7 +19,7 @@ -ifdef(use_specs). %% A description. --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% If this mechanism is enabled, should it be offered for a given socket? %% (primarily so EXTERNAL can be SSL-only) diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index dc144a0e53..af660c60a0 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -18,17 +18,21 @@ -ifdef(use_specs). +-export_type([async_callback/0]). + %% We can't specify a per-queue ack/state with callback signatures -type(ack() :: any()). -type(state() :: any()). +-type(msg_ids() :: [rabbit_types:msg_id()]). -type(fetch_result(Ack) :: ('empty' | %% Message, IsDelivered, AckTag, Remaining_Len {rabbit_types:basic_message(), boolean(), Ack, non_neg_integer()})). -type(attempt_recovery() :: boolean()). -type(purged_msg_count() :: non_neg_integer()). --type(async_callback() :: fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')). +-type(async_callback() :: + fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok')). -type(duration() :: ('undefined' | 'infinity' | number())). -type(msg_fun() :: fun((rabbit_types:basic_message(), ack()) -> 'ok') | @@ -80,12 +84,16 @@ %% Called for messages which have already been passed straight %% out to a client. The queue will be empty for these calls %% (i.e. saves the round trip through the backing queue). --callback publish_delivered(true, rabbit_types:basic_message(), - rabbit_types:message_properties(), pid(), state()) - -> {ack(), state()}; - (false, rabbit_types:basic_message(), +-callback publish_delivered(rabbit_types:basic_message(), rabbit_types:message_properties(), pid(), state()) - -> {undefined, state()}. + -> {ack(), state()}. + +%% Called to inform the BQ about messages which have reached the +%% queue, but are not going to be further passed to BQ for some +%% reason. Note that this may be invoked for messages for which +%% BQ:is_duplicate/2 has already returned {'published' | 'discarded', +%% BQS}. +-callback discard(rabbit_types:msg_id(), pid(), state()) -> state(). %% Return ids of messages which have been confirmed since the last %% invocation of this function (or initialisation). @@ -114,16 +122,18 @@ %% first time the message id appears in the result of %% drain_confirmed. All subsequent appearances of that message id will %% be ignored. --callback drain_confirmed(state()) -> {[rabbit_guid:guid()], state()}. +-callback drain_confirmed(state()) -> {msg_ids(), state()}. %% Drop messages from the head of the queue while the supplied predicate returns %% true. Also accepts a boolean parameter that determines whether the messages %% necessitate an ack or not. If they do, the function returns a list of %% messages with the respective acktags. -callback dropwhile(msg_pred(), true, state()) - -> {[{rabbit_types:basic_message(), ack()}], state()}; + -> {rabbit_types:message_properties() | undefined, + [{rabbit_types:basic_message(), ack()}], state()}; (msg_pred(), false, state()) - -> {undefined, state()}. + -> {rabbit_types:message_properties() | undefined, + undefined, state()}. %% Produce the next message. -callback fetch(true, state()) -> {fetch_result(ack()), state()}; @@ -131,7 +141,7 @@ %% Acktags supplied are for messages which can now be forgotten %% about. Must return 1 msg_id per Ack, in the same order as Acks. --callback ack([ack()], state()) -> {[rabbit_guid:guid()], state()}. +-callback ack([ack()], state()) -> {msg_ids(), state()}. %% Acktags supplied are for messages which should be processed. The %% provided callback function is called with each message. @@ -139,7 +149,7 @@ %% Reinsert messages into the queue which have already been delivered %% and were pending acknowledgement. --callback requeue([ack()], state()) -> {[rabbit_guid:guid()], state()}. +-callback requeue([ack()], state()) -> {msg_ids(), state()}. %% How long is my queue? -callback len(state()) -> non_neg_integer(). @@ -147,6 +157,9 @@ %% Is my queue empty? -callback is_empty(state()) -> boolean(). +%% What's the queue depth, where depth = length + number of pending acks +-callback depth(state()) -> non_neg_integer(). + %% For the next three functions, the assumption is that you're %% monitoring something like the ingress and egress rates of the %% queue. The RAM duration is thus the length of time represented by @@ -191,13 +204,6 @@ -callback is_duplicate(rabbit_types:basic_message(), state()) -> {'false'|'published'|'discarded', state()}. -%% Called to inform the BQ about messages which have reached the -%% queue, but are not going to be further passed to BQ for some -%% reason. Note that this is may be invoked for messages for which -%% BQ:is_duplicate/2 has already returned {'published' | 'discarded', -%% BQS}. --callback discard(rabbit_types:basic_message(), pid(), state()) -> state(). - -else. -export([behaviour_info/1]). @@ -205,11 +211,11 @@ behaviour_info(callbacks) -> [{start, 1}, {stop, 0}, {init, 3}, {terminate, 2}, {delete_and_terminate, 2}, {purge, 1}, {publish, 4}, - {publish_delivered, 5}, {drain_confirmed, 1}, {dropwhile, 3}, + {publish_delivered, 4}, {discard, 3}, {drain_confirmed, 1}, {dropwhile, 3}, {fetch, 2}, {ack, 2}, {fold, 3}, {requeue, 2}, {len, 1}, - {is_empty, 1}, {set_ram_duration_target, 2}, {ram_duration, 1}, - {needs_timeout, 1}, {timeout, 1}, {handle_pre_hibernate, 1}, - {status, 1}, {invoke, 3}, {is_duplicate, 2}, {discard, 3}]; + {is_empty, 1}, {depth, 1}, {set_ram_duration_target, 2}, + {ram_duration, 1}, {needs_timeout, 1}, {timeout, 1}, + {handle_pre_hibernate, 1}, {status, 1}, {invoke, 3}, {is_duplicate, 2}] ; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl index a84800c0ec..b37fbb29e2 100644 --- a/src/rabbit_backing_queue_qc.erl +++ b/src/rabbit_backing_queue_qc.erl @@ -119,7 +119,7 @@ qc_publish_multiple(#state{}) -> qc_publish_delivered(#state{bqstate = BQ}) -> {call, ?BQMOD, publish_delivered, - [boolean(), qc_message(), #message_properties{}, self(), BQ]}. + [qc_message(), #message_properties{}, self(), BQ]}. qc_fetch(#state{bqstate = BQ}) -> {call, ?BQMOD, fetch, [boolean(), BQ]}. @@ -199,7 +199,7 @@ next_state(S, _BQ, {call, ?MODULE, publish_multiple, [PublishCount]}) -> next_state(S, Res, {call, ?BQMOD, publish_delivered, - [AckReq, Msg, MsgProps, _Pid, _BQ]}) -> + [Msg, MsgProps, _Pid, _BQ]}) -> #state{confirms = Confirms, acks = Acks, next_seq_id = NextSeq} = S, AckTag = {call, erlang, element, [1, Res]}, BQ1 = {call, erlang, element, [2, Res]}, @@ -213,10 +213,7 @@ next_state(S, Res, true -> gb_sets:add(MsgId, Confirms); _ -> Confirms end, - acks = case AckReq of - true -> [{AckTag, {NextSeq, {MsgProps, Msg}}}|Acks]; - false -> Acks - end + acks = [{AckTag, {NextSeq, {MsgProps, Msg}}}|Acks] }; next_state(S, Res, {call, ?BQMOD, fetch, [AckReq, _BQ]}) -> @@ -268,7 +265,7 @@ next_state(S, Res, {call, ?BQMOD, drain_confirmed, _Args}) -> S#state{bqstate = BQ1}; next_state(S, Res, {call, ?BQMOD, dropwhile, _Args}) -> - BQ = {call, erlang, element, [2, Res]}, + BQ = {call, erlang, element, [3, Res]}, #state{messages = Messages} = S, Msgs1 = drop_messages(Messages), S#state{bqstate = BQ, len = gb_trees:size(Msgs1), messages = Msgs1}; @@ -391,4 +388,13 @@ drop_messages(Messages) -> end end. +-else. + +-export([prop_disabled/0]). + +prop_disabled() -> + exit({compiled_without_proper, + "PropEr was not present during compilation of the test module. " + "Hence all tests are disabled."}). + -endif. diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl index 734456d35f..db2b7e9570 100644 --- a/src/rabbit_basic.erl +++ b/src/rabbit_basic.erl @@ -18,9 +18,9 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([publish/4, publish/6, publish/1, +-export([publish/4, publish/5, publish/1, message/3, message/4, properties/1, append_table_header/3, - extract_headers/1, map_headers/2, delivery/4, header_routes/1]). + extract_headers/1, map_headers/2, delivery/3, header_routes/1]). -export([build_content/2, from_content/1]). %%---------------------------------------------------------------------------- @@ -40,13 +40,13 @@ -spec(publish/4 :: (exchange_input(), rabbit_router:routing_key(), properties_input(), body_input()) -> publish_result()). --spec(publish/6 :: - (exchange_input(), rabbit_router:routing_key(), boolean(), boolean(), +-spec(publish/5 :: + (exchange_input(), rabbit_router:routing_key(), boolean(), properties_input(), body_input()) -> publish_result()). -spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()). --spec(delivery/4 :: - (boolean(), boolean(), rabbit_types:message(), undefined | integer()) -> +-spec(delivery/3 :: + (boolean(), rabbit_types:message(), undefined | integer()) -> rabbit_types:delivery()). -spec(message/4 :: (rabbit_exchange:name(), rabbit_router:routing_key(), @@ -80,18 +80,16 @@ %% Convenience function, for avoiding round-trips in calls across the %% erlang distributed network. publish(Exchange, RoutingKeyBin, Properties, Body) -> - publish(Exchange, RoutingKeyBin, false, false, Properties, Body). + publish(Exchange, RoutingKeyBin, false, Properties, Body). %% Convenience function, for avoiding round-trips in calls across the %% erlang distributed network. -publish(X = #exchange{name = XName}, RKey, Mandatory, Immediate, Props, Body) -> - publish(X, delivery(Mandatory, Immediate, - message(XName, RKey, properties(Props), Body), - undefined)); -publish(XName, RKey, Mandatory, Immediate, Props, Body) -> - publish(delivery(Mandatory, Immediate, - message(XName, RKey, properties(Props), Body), - undefined)). +publish(X = #exchange{name = XName}, RKey, Mandatory, Props, Body) -> + Message = message(XName, RKey, properties(Props), Body), + publish(X, delivery(Mandatory, Message, undefined)); +publish(XName, RKey, Mandatory, Props, Body) -> + Message = message(XName, RKey, properties(Props), Body), + publish(delivery(Mandatory, Message, undefined)). publish(Delivery = #delivery{ message = #basic_message{exchange_name = XName}}) -> @@ -105,8 +103,8 @@ publish(X, Delivery) -> {RoutingRes, DeliveredQPids} = rabbit_amqqueue:deliver(Qs, Delivery), {ok, RoutingRes, DeliveredQPids}. -delivery(Mandatory, Immediate, Message, MsgSeqNo) -> - #delivery{mandatory = Mandatory, immediate = Immediate, sender = self(), +delivery(Mandatory, Message, MsgSeqNo) -> + #delivery{mandatory = Mandatory, sender = self(), message = Message, msg_seq_no = MsgSeqNo}. build_content(Properties, BodyBin) when is_binary(BodyBin) -> diff --git a/src/rabbit_binding.erl b/src/rabbit_binding.erl index f0ea514dcf..0d23f716af 100644 --- a/src/rabbit_binding.erl +++ b/src/rabbit_binding.erl @@ -169,9 +169,9 @@ add(Binding, InnerFun) -> add(Src, Dst, B) -> [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]], - case (not (SrcDurable andalso DstDurable) orelse - mnesia:read({rabbit_durable_route, B}) =:= []) of - true -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable, + case (SrcDurable andalso DstDurable andalso + mnesia:read({rabbit_durable_route, B}) =/= []) of + false -> ok = sync_route(#route{binding = B}, SrcDurable, DstDurable, fun mnesia:write/3), x_callback(transaction, Src, add_binding, B), Serial = rabbit_exchange:serial(Src), @@ -179,7 +179,7 @@ add(Src, Dst, B) -> x_callback(Serial, Src, add_binding, B), ok = rabbit_event:notify(binding_created, info(B)) end; - false -> rabbit_misc:const({error, binding_not_found}) + true -> rabbit_misc:const({error, binding_not_found}) end. remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end). @@ -277,21 +277,15 @@ has_for_source(SrcName) -> remove_for_source(SrcName) -> lock_route_tables(), Match = #route{binding = #binding{source = SrcName, _ = '_'}}, - Routes = lists:usort( - mnesia:match_object(rabbit_route, Match, write) ++ - mnesia:match_object(rabbit_durable_route, Match, write)), - [begin - sync_route(Route, fun mnesia:delete_object/3), - Route#route.binding - end || Route <- Routes]. + remove_routes( + lists:usort(mnesia:match_object(rabbit_route, Match, write) ++ + mnesia:match_object(rabbit_durable_route, Match, write))). -remove_for_destination(Dst) -> - remove_for_destination( - Dst, fun (R) -> sync_route(R, fun mnesia:delete_object/3) end). +remove_for_destination(DstName) -> + remove_for_destination(DstName, fun remove_routes/1). -remove_transient_for_destination(Dst) -> - remove_for_destination( - Dst, fun (R) -> sync_transient_route(R, fun mnesia:delete_object/3) end). +remove_transient_for_destination(DstName) -> + remove_for_destination(DstName, fun remove_transient_routes/1). %%---------------------------------------------------------------------------- @@ -308,6 +302,14 @@ binding_action(Binding = #binding{source = SrcName, Fun(Src, Dst, Binding#binding{args = SortedArgs}) end). +delete_object(Tab, Record, LockKind) -> + %% this 'guarded' delete prevents unnecessary writes to the mnesia + %% disk log + case mnesia:match_object(Tab, Record, LockKind) of + [] -> ok; + [_] -> mnesia:delete_object(Tab, Record, LockKind) + end. + sync_route(R, Fun) -> sync_route(R, true, true, Fun). sync_route(Route, true, true, Fun) -> @@ -370,16 +372,32 @@ lock_route_tables() -> rabbit_semi_durable_route, rabbit_durable_route]]. -remove_for_destination(DstName, DeleteFun) -> +remove_routes(Routes) -> + %% This partitioning allows us to suppress unnecessary delete + %% operations on disk tables, which require an fsync. + {TransientRoutes, DurableRoutes} = + lists:partition(fun (R) -> mnesia:match_object( + rabbit_durable_route, R, write) == [] end, + Routes), + [ok = sync_transient_route(R, fun mnesia:delete_object/3) || + R <- TransientRoutes], + [ok = sync_route(R, fun mnesia:delete_object/3) || + R <- DurableRoutes], + [R#route.binding || R <- Routes]. + +remove_transient_routes(Routes) -> + [begin + ok = sync_transient_route(R, fun delete_object/3), + R#route.binding + end || R <- Routes]. + +remove_for_destination(DstName, Fun) -> lock_route_tables(), Match = reverse_route( #route{binding = #binding{destination = DstName, _ = '_'}}), - ReverseRoutes = mnesia:match_object(rabbit_reverse_route, Match, write), - Bindings = [begin - Route = reverse_route(ReverseRoute), - ok = DeleteFun(Route), - Route#route.binding - end || ReverseRoute <- ReverseRoutes], + Routes = [reverse_route(R) || R <- mnesia:match_object( + rabbit_reverse_route, Match, write)], + Bindings = Fun(Routes), group_bindings_fold(fun maybe_auto_delete/3, new_deletions(), lists:keysort(#binding.source, Bindings)). diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index 22c6a22361..0d13312b0b 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -136,7 +136,7 @@ flushed(Pid, QPid) -> gen_server2:cast(Pid, {flushed, QPid}). list() -> - rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running), rabbit_channel, list_local, []). list_local() -> @@ -267,7 +267,7 @@ handle_cast({method, Method, Content, Flow}, catch exit:Reason = #amqp_error{} -> MethodName = rabbit_misc:method_record_type(Method), - send_exception(Reason#amqp_error{method = MethodName}, State); + handle_exception(Reason#amqp_error{method = MethodName}, State); _:Reason -> {stop, {Reason, erlang:get_stacktrace()}, State} end; @@ -400,24 +400,29 @@ return_ok(State, false, Msg) -> {reply, Msg, State}. ok_msg(true, _Msg) -> undefined; ok_msg(false, Msg) -> Msg. -send_exception(Reason, State = #ch{protocol = Protocol, - channel = Channel, - writer_pid = WriterPid, - reader_pid = ReaderPid, - conn_pid = ConnPid}) -> - {CloseChannel, CloseMethod} = - rabbit_binary_generator:map_exception(Channel, Reason, Protocol), - rabbit_log:error("connection ~p, channel ~p - error:~n~p~n", - [ConnPid, Channel, Reason]), +handle_exception(Reason, State = #ch{protocol = Protocol, + channel = Channel, + writer_pid = WriterPid, + reader_pid = ReaderPid, + conn_pid = ConnPid}) -> %% something bad's happened: notify_queues may not be 'ok' {_Result, State1} = notify_queues(State), - case CloseChannel of - Channel -> ok = rabbit_writer:send_command(WriterPid, CloseMethod), - {noreply, State1}; - _ -> ReaderPid ! {channel_exit, Channel, Reason}, - {stop, normal, State1} + case rabbit_binary_generator:map_exception(Channel, Reason, Protocol) of + {Channel, CloseMethod} -> + rabbit_log:error("connection ~p, channel ~p - soft error:~n~p~n", + [ConnPid, Channel, Reason]), + ok = rabbit_writer:send_command(WriterPid, CloseMethod), + {noreply, State1}; + {0, _} -> + ReaderPid ! {channel_exit, Channel, Reason}, + {stop, normal, State1} end. +precondition_failed(Format) -> precondition_failed(Format, []). + +precondition_failed(Format, Params) -> + rabbit_misc:protocol_error(precondition_failed, Format, Params). + return_queue_declare_ok(#resource{name = ActualName}, NoWait, MessageCount, ConsumerCount, State) -> return_ok(State#ch{most_recently_declared_queue = ActualName}, NoWait, @@ -460,10 +465,14 @@ check_user_id_header(#'P_basic'{user_id = Username}, #ch{user = #user{username = Username}}) -> ok; check_user_id_header(#'P_basic'{user_id = Claimed}, - #ch{user = #user{username = Actual}}) -> - rabbit_misc:protocol_error( - precondition_failed, "user_id property set to '~s' but " - "authenticated user was '~s'", [Claimed, Actual]). + #ch{user = #user{username = Actual, + tags = Tags}}) -> + case lists:member(impersonator, Tags) of + true -> ok; + false -> precondition_failed( + "user_id property set to '~s' but authenticated user was " + "'~s'", [Claimed, Actual]) + end. check_internal_exchange(#exchange{name = Name, internal = true}) -> rabbit_misc:protocol_error(access_refused, @@ -589,10 +598,12 @@ handle_method(_Method, _, #ch{tx_status = TxStatus}) handle_method(#'access.request'{},_, State) -> {reply, #'access.request_ok'{ticket = 1}, State}; +handle_method(#'basic.publish'{immediate = true}, _Content, _State) -> + rabbit_misc:protocol_error(not_implemented, "immediate=true", []); + handle_method(#'basic.publish'{exchange = ExchangeNameBin, routing_key = RoutingKey, - mandatory = Mandatory, - immediate = Immediate}, + mandatory = Mandatory}, Content, State = #ch{virtual_host = VHostPath, tx_status = TxStatus, confirm_enabled = ConfirmEnabled, @@ -614,8 +625,7 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of {ok, Message} -> rabbit_trace:tap_trace_in(Message, TraceState), - Delivery = rabbit_basic:delivery(Mandatory, Immediate, Message, - MsgSeqNo), + Delivery = rabbit_basic:delivery(Mandatory, Message, MsgSeqNo), QNames = rabbit_exchange:route(Exchange, Delivery), {noreply, case TxStatus of @@ -625,8 +635,7 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, State1#ch{uncommitted_message_q = NewTMQ} end}; {error, Reason} -> - rabbit_misc:protocol_error(precondition_failed, - "invalid message: ~p", [Reason]) + precondition_failed("invalid message: ~p", [Reason]) end; handle_method(#'basic.nack'{delivery_tag = DeliveryTag, @@ -881,8 +890,7 @@ handle_method(#'exchange.delete'{exchange = ExchangeNameBin, {error, not_found} -> rabbit_misc:not_found(ExchangeName); {error, in_use} -> - rabbit_misc:protocol_error( - precondition_failed, "~s in use", [rabbit_misc:rs(ExchangeName)]); + precondition_failed("~s in use", [rabbit_misc:rs(ExchangeName)]); ok -> return_ok(State, NoWait, #'exchange.delete_ok'{}) end; @@ -980,11 +988,9 @@ handle_method(#'queue.delete'{queue = QueueNameBin, QueueName, ConnPid, fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of {error, in_use} -> - rabbit_misc:protocol_error( - precondition_failed, "~s in use", [rabbit_misc:rs(QueueName)]); + precondition_failed("~s in use", [rabbit_misc:rs(QueueName)]); {error, not_empty} -> - rabbit_misc:protocol_error( - precondition_failed, "~s not empty", [rabbit_misc:rs(QueueName)]); + precondition_failed("~s not empty", [rabbit_misc:rs(QueueName)]); {ok, PurgedMessageCount} -> return_ok(State, NoWait, #'queue.delete_ok'{message_count = PurgedMessageCount}) @@ -1019,15 +1025,13 @@ handle_method(#'queue.purge'{queue = QueueNameBin, #'queue.purge_ok'{message_count = PurgedMessageCount}); handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) -> - rabbit_misc:protocol_error( - precondition_failed, "cannot switch from confirm to tx mode", []); + precondition_failed("cannot switch from confirm to tx mode"); handle_method(#'tx.select'{}, _, State) -> {reply, #'tx.select_ok'{}, State#ch{tx_status = in_progress}}; handle_method(#'tx.commit'{}, _, #ch{tx_status = none}) -> - rabbit_misc:protocol_error( - precondition_failed, "channel is not transactional", []); + precondition_failed("channel is not transactional"); handle_method(#'tx.commit'{}, _, State = #ch{uncommitted_message_q = TMQ, @@ -1041,8 +1045,7 @@ handle_method(#'tx.commit'{}, _, {noreply, maybe_complete_tx(new_tx(State1#ch{tx_status = committing}))}; handle_method(#'tx.rollback'{}, _, #ch{tx_status = none}) -> - rabbit_misc:protocol_error( - precondition_failed, "channel is not transactional", []); + precondition_failed("channel is not transactional"); handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ, uncommitted_acks = TAL, @@ -1052,8 +1055,7 @@ handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ, {reply, #'tx.rollback_ok'{}, new_tx(State#ch{unacked_message_q = UAMQ1})}; handle_method(#'confirm.select'{}, _, #ch{tx_status = in_progress}) -> - rabbit_misc:protocol_error( - precondition_failed, "cannot switch from tx to confirm mode", []); + precondition_failed("cannot switch from tx to confirm mode"); handle_method(#'confirm.select'{nowait = NoWait}, _, State) -> return_ok(State#ch{confirm_enabled = true}, @@ -1119,7 +1121,7 @@ monitor_delivering_queue(false, QPid, State = #ch{queue_monitors = QMons, delivering_queues = sets:add_element(QPid, DQ)}. handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed = UC}) -> - case rabbit_misc:is_abnormal_termination(Reason) of + case rabbit_misc:is_abnormal_exit(Reason) of true -> {MXs, UC1} = dtree:take_all(QPid, UC), send_nacks(MXs, State#ch{unconfirmed = UC1}); false -> {MXs, UC1} = dtree:take(QPid, UC), @@ -1263,8 +1265,7 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> QTail, DeliveryTag, Multiple) end; {empty, _} -> - rabbit_misc:protocol_error( - precondition_failed, "unknown delivery tag ~w", [DeliveryTag]) + precondition_failed("unknown delivery tag ~w", [DeliveryTag]) end. ack(Acked, State) -> @@ -1342,20 +1343,16 @@ deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ QPid <- DeliveredQPids]], publish, State2), State2. -process_routing_result(unroutable, _, XName, MsgSeqNo, Msg, State) -> +process_routing_result(unroutable, _, XName, MsgSeqNo, Msg, State) -> ok = basic_return(Msg, State, no_route), maybe_incr_stats([{Msg#basic_message.exchange_name, 1}], return_unroutable, State), record_confirm(MsgSeqNo, XName, State); -process_routing_result(not_delivered, _, XName, MsgSeqNo, Msg, State) -> - ok = basic_return(Msg, State, no_consumers), - maybe_incr_stats([{XName, 1}], return_not_delivered, State), - record_confirm(MsgSeqNo, XName, State); -process_routing_result(routed, [], XName, MsgSeqNo, _, State) -> +process_routing_result(routed, [], XName, MsgSeqNo, _, State) -> record_confirm(MsgSeqNo, XName, State); -process_routing_result(routed, _, _, undefined, _, State) -> +process_routing_result(routed, _, _, undefined, _, State) -> State; -process_routing_result(routed, QPids, XName, MsgSeqNo, _, State) -> +process_routing_result(routed, QPids, XName, MsgSeqNo, _, State) -> State#ch{unconfirmed = dtree:insert(MsgSeqNo, QPids, XName, State#ch.unconfirmed)}. @@ -1423,7 +1420,7 @@ complete_tx(State = #ch{tx_status = committing}) -> ok = rabbit_writer:send_command(State#ch.writer_pid, #'tx.commit_ok'{}), State#ch{tx_status = in_progress}; complete_tx(State = #ch{tx_status = failed}) -> - {noreply, State1} = send_exception( + {noreply, State1} = handle_exception( rabbit_misc:amqp_error( precondition_failed, "partial tx completion", [], 'tx.commit'), diff --git a/src/rabbit_control_main.erl b/src/rabbit_control_main.erl index b23088cc7a..25f7d758c6 100644 --- a/src/rabbit_control_main.erl +++ b/src/rabbit_control_main.erl @@ -25,10 +25,14 @@ -define(QUIET_OPT, "-q"). -define(NODE_OPT, "-n"). -define(VHOST_OPT, "-p"). +-define(RAM_OPT, "--ram"). +-define(OFFLINE_OPT, "--offline"). -define(QUIET_DEF, {?QUIET_OPT, flag}). -define(NODE_DEF(Node), {?NODE_OPT, {option, Node}}). -define(VHOST_DEF, {?VHOST_OPT, {option, "/"}}). +-define(RAM_DEF, {?RAM_OPT, flag}). +-define(OFFLINE_DEF, {?OFFLINE_OPT, flag}). -define(GLOBAL_DEFS(Node), [?QUIET_DEF, ?NODE_DEF(Node)]). @@ -41,8 +45,10 @@ force_reset, rotate_logs, - cluster, - force_cluster, + {join_cluster, [?RAM_DEF]}, + change_cluster_node_type, + update_cluster_nodes, + {forget_cluster_node, [?OFFLINE_DEF]}, cluster_status, add_user, @@ -60,9 +66,13 @@ {list_permissions, [?VHOST_DEF]}, list_user_permissions, - set_parameter, - clear_parameter, - list_parameters, + {set_parameter, [?VHOST_DEF]}, + {clear_parameter, [?VHOST_DEF]}, + {list_parameters, [?VHOST_DEF]}, + + {set_policy, [?VHOST_DEF]}, + {clear_policy, [?VHOST_DEF]}, + {list_policies, [?VHOST_DEF]}, {list_queues, [?VHOST_DEF]}, {list_exchanges, [?VHOST_DEF]}, @@ -92,7 +102,9 @@ {"Bindings", rabbit_binding, info_all, info_keys}, {"Consumers", rabbit_amqqueue, consumers_all, consumer_info_keys}, {"Permissions", rabbit_auth_backend_internal, list_vhost_permissions, - vhost_perms_info_keys}]). + vhost_perms_info_keys}, + {"Policies", rabbit_policy, list_formatted, info_keys}, + {"Parameters", rabbit_runtime_parameters, list_formatted, info_keys}]). %%---------------------------------------------------------------------------- @@ -190,11 +202,11 @@ print_report(Node, {Descr, Module, InfoFun, KeysFun}, VHostArg) -> print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg). print_report0(Node, {Module, InfoFun, KeysFun}, VHostArg) -> - case Results = rpc_call(Node, Module, InfoFun, VHostArg) of - [_|_] -> InfoItems = rpc_call(Node, Module, KeysFun, []), - display_row([atom_to_list(I) || I <- InfoItems]), - display_info_list(Results, InfoItems); - _ -> ok + case rpc_call(Node, Module, InfoFun, VHostArg) of + [_|_] = Results -> InfoItems = rpc_call(Node, Module, KeysFun, []), + display_row([atom_to_list(I) || I <- InfoItems]), + display_info_list(Results, InfoItems); + _ -> ok end, io:nl(). @@ -239,17 +251,34 @@ action(force_reset, Node, [], _Opts, Inform) -> Inform("Forcefully resetting node ~p", [Node]), call(Node, {rabbit_mnesia, force_reset, []}); -action(cluster, Node, ClusterNodeSs, _Opts, Inform) -> - ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), - Inform("Clustering node ~p with ~p", - [Node, ClusterNodes]), - rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); - -action(force_cluster, Node, ClusterNodeSs, _Opts, Inform) -> - ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), - Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", - [Node, ClusterNodes]), - rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); +action(join_cluster, Node, [ClusterNodeS], Opts, Inform) -> + ClusterNode = list_to_atom(ClusterNodeS), + NodeType = case proplists:get_bool(?RAM_OPT, Opts) of + true -> ram; + false -> disc + end, + Inform("Clustering node ~p with ~p", [Node, ClusterNode]), + rpc_call(Node, rabbit_mnesia, join_cluster, [ClusterNode, NodeType]); + +action(change_cluster_node_type, Node, ["ram"], _Opts, Inform) -> + Inform("Turning ~p into a ram node", [Node]), + rpc_call(Node, rabbit_mnesia, change_cluster_node_type, [ram]); +action(change_cluster_node_type, Node, [Type], _Opts, Inform) + when Type =:= "disc" orelse Type =:= "disk" -> + Inform("Turning ~p into a disc node", [Node]), + rpc_call(Node, rabbit_mnesia, change_cluster_node_type, [disc]); + +action(update_cluster_nodes, Node, [ClusterNodeS], _Opts, Inform) -> + ClusterNode = list_to_atom(ClusterNodeS), + Inform("Updating cluster nodes for ~p from ~p", [Node, ClusterNode]), + rpc_call(Node, rabbit_mnesia, update_cluster_nodes, [ClusterNode]); + +action(forget_cluster_node, Node, [ClusterNodeS], Opts, Inform) -> + ClusterNode = list_to_atom(ClusterNodeS), + RemoveWhenOffline = proplists:get_bool(?OFFLINE_OPT, Opts), + Inform("Removing node ~p from cluster", [ClusterNode]), + rpc_call(Node, rabbit_mnesia, forget_cluster_node, + [ClusterNode, RemoveWhenOffline]); action(wait, Node, [PidFile], _Opts, Inform) -> Inform("Waiting for ~p", [Node]), @@ -414,50 +443,76 @@ action(list_permissions, Node, [], Opts, Inform) -> list_vhost_permissions, [VHost]}), rabbit_auth_backend_internal:vhost_perms_info_keys()); -action(set_parameter, Node, [Component, Key, Value], _Opts, Inform) -> +action(set_parameter, Node, [Component, Key, Value], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), Inform("Setting runtime parameter ~p for component ~p to ~p", [Key, Component, Value]), rpc_call(Node, rabbit_runtime_parameters, parse_set, - [list_to_binary(Component), list_to_binary(Key), Value]); + [VHostArg, list_to_binary(Component), list_to_binary(Key), Value]); -action(clear_parameter, Node, [Component, Key], _Opts, Inform) -> +action(clear_parameter, Node, [Component, Key], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), Inform("Clearing runtime parameter ~p for component ~p", [Key, Component]), - rpc_call(Node, rabbit_runtime_parameters, clear, [list_to_binary(Component), + rpc_call(Node, rabbit_runtime_parameters, clear, [VHostArg, + list_to_binary(Component), list_to_binary(Key)]); -action(list_parameters, Node, Args = [], _Opts, Inform) -> +action(list_parameters, Node, [], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), Inform("Listing runtime parameters", []), display_info_list( - rpc_call(Node, rabbit_runtime_parameters, list_formatted, Args), + rpc_call(Node, rabbit_runtime_parameters, list_formatted, [VHostArg]), rabbit_runtime_parameters:info_keys()); +action(set_policy, Node, [Key, Pattern, Defn | Prio], Opts, Inform) + when Prio == [] orelse length(Prio) == 1 -> + Msg = "Setting policy ~p for pattern ~p to ~p", + {InformMsg, Prio1} = case Prio of [] -> {Msg, undefined}; + [P] -> {Msg ++ " with priority ~s", P} + end, + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + Inform(InformMsg, [Key, Pattern, Defn] ++ Prio), + rpc_call(Node, rabbit_policy, parse_set, + [VHostArg, list_to_binary(Key), Pattern, Defn, Prio1]); + +action(clear_policy, Node, [Key], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + Inform("Clearing policy ~p", [Key]), + rpc_call(Node, rabbit_policy, delete, [VHostArg, list_to_binary(Key)]); + +action(list_policies, Node, [], Opts, Inform) -> + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + Inform("Listing policies", []), + display_info_list(rpc_call(Node, rabbit_policy, list_formatted, [VHostArg]), + rabbit_policy:info_keys()); + action(report, Node, _Args, _Opts, Inform) -> - io:format("Reporting server status on ~p~n~n", [erlang:universaltime()]), + Inform("Reporting server status on ~p~n~n", [erlang:universaltime()]), [begin ok = action(Action, N, [], [], Inform), io:nl() end || - N <- unsafe_rpc(Node, rabbit_mnesia, running_clustered_nodes, []), + N <- unsafe_rpc(Node, rabbit_mnesia, cluster_nodes, [running]), Action <- [status, cluster_status, environment]], VHosts = unsafe_rpc(Node, rabbit_vhost, list, []), [print_report(Node, Q) || Q <- ?GLOBAL_QUERIES], [print_report(Node, Q, [V]) || Q <- ?VHOST_QUERIES, V <- VHosts], - io:format("End of server status report~n"), ok; action(eval, Node, [Expr], _Opts, _Inform) -> case erl_scan:string(Expr) of {ok, Scanned, _} -> case erl_parse:parse_exprs(Scanned) of - {ok, Parsed} -> - {value, Value, _} = unsafe_rpc( - Node, erl_eval, exprs, [Parsed, []]), - io:format("~p~n", [Value]), - ok; - {error, E} -> - {error_string, format_parse_error(E)} + {ok, Parsed} -> {value, Value, _} = + unsafe_rpc( + Node, erl_eval, exprs, [Parsed, []]), + io:format("~p~n", [Value]), + ok; + {error, E} -> {error_string, format_parse_error(E)} end; {error, E, _} -> {error_string, format_parse_error(E)} end. +format_parse_error({_Line, Mod, Err}) -> lists:flatten(Mod:format_error(Err)). + %%---------------------------------------------------------------------------- wait_for_application(Node, PidFile, Application, Inform) -> @@ -544,9 +599,6 @@ exit_loop(Port) -> {Port, _} -> exit_loop(Port) end. -format_parse_error({_Line, Mod, Err}) -> - lists:flatten(Mod:format_error(Err)). - %%---------------------------------------------------------------------------- default_if_empty(List, Default) when is_list(List) -> diff --git a/src/rabbit_direct.erl b/src/rabbit_direct.erl index c07ad832f0..689e5d839c 100644 --- a/src/rabbit_direct.erl +++ b/src/rabbit_direct.erl @@ -31,8 +31,9 @@ -spec(force_event_refresh/0 :: () -> 'ok'). -spec(list/0 :: () -> [pid()]). -spec(list_local/0 :: () -> [pid()]). --spec(connect/5 :: (rabbit_types:username(), rabbit_types:vhost(), - rabbit_types:protocol(), pid(), +-spec(connect/5 :: ((rabbit_types:username() | rabbit_types:user() | + {rabbit_types:username(), rabbit_types:password()}), + rabbit_types:vhost(), rabbit_types:protocol(), pid(), rabbit_event:event_props()) -> {'ok', {rabbit_types:user(), rabbit_framing:amqp_table()}}). @@ -40,7 +41,6 @@ (rabbit_channel:channel_number(), pid(), pid(), string(), rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), pid()) -> {'ok', pid()}). - -spec(disconnect/2 :: (pid(), rabbit_event:event_props()) -> 'ok'). -endif. @@ -60,32 +60,40 @@ list_local() -> pg_local:get_members(rabbit_direct). list() -> - rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running), rabbit_direct, list_local, []). %%---------------------------------------------------------------------------- +connect(User = #user{}, VHost, Protocol, Pid, Infos) -> + try rabbit_access_control:check_vhost_access(User, VHost) of + ok -> ok = pg_local:join(rabbit_direct, Pid), + rabbit_event:notify(connection_created, Infos), + {ok, {User, rabbit_reader:server_properties(Protocol)}} + catch + exit:#amqp_error{name = access_refused} -> + {error, access_refused} + end; + +connect({Username, Password}, VHost, Protocol, Pid, Infos) -> + connect0(check_user_pass_login, Username, Password, VHost, Protocol, Pid, + Infos); + connect(Username, VHost, Protocol, Pid, Infos) -> + connect0(check_user_login, Username, [], VHost, Protocol, Pid, Infos). + +connect0(FunctionName, U, P, VHost, Protocol, Pid, Infos) -> case rabbit:is_running() of true -> - case rabbit_access_control:check_user_login(Username, []) of - {ok, User} -> - try rabbit_access_control:check_vhost_access(User, VHost) of - ok -> ok = pg_local:join(rabbit_direct, Pid), - rabbit_event:notify(connection_created, Infos), - {ok, {User, - rabbit_reader:server_properties(Protocol)}} - catch - exit:#amqp_error{name = access_refused} -> - {error, access_refused} - end; - {refused, _Msg, _Args} -> - {error, auth_failure} + case rabbit_access_control:FunctionName(U, P) of + {ok, User} -> connect(User, VHost, Protocol, Pid, Infos); + {refused, _M, _A} -> {error, auth_failure} end; false -> {error, broker_not_found_on_node} end. + start_channel(Number, ClientChannelPid, ConnPid, ConnName, Protocol, User, VHost, Capabilities, Collector) -> {ok, _, {ChannelPid, _}} = diff --git a/src/rabbit_disk_monitor.erl b/src/rabbit_disk_monitor.erl index 58375abb45..6330d555fe 100644 --- a/src/rabbit_disk_monitor.erl +++ b/src/rabbit_disk_monitor.erl @@ -137,7 +137,7 @@ dir() -> rabbit_mnesia:dir(). set_disk_limits(State, Limit) -> State1 = State#state { limit = Limit }, rabbit_log:info("Disk free limit set to ~pMB~n", - [trunc(interpret_limit(Limit) / 1048576)]), + [trunc(interpret_limit(Limit) / 1000000)]), internal_update(State1). internal_update(State = #state { limit = Limit, @@ -148,11 +148,11 @@ internal_update(State = #state { limit = Limit, NewAlarmed = CurrentFreeBytes < LimitBytes, case {Alarmed, NewAlarmed} of {false, true} -> - emit_update_info("exceeded", CurrentFreeBytes, LimitBytes), - alarm_handler:set_alarm({{resource_limit, disk, node()}, []}); + emit_update_info("insufficient", CurrentFreeBytes, LimitBytes), + rabbit_alarm:set_alarm({{resource_limit, disk, node()}, []}); {true, false} -> - emit_update_info("below limit", CurrentFreeBytes, LimitBytes), - alarm_handler:clear_alarm({resource_limit, disk, node()}); + emit_update_info("sufficient", CurrentFreeBytes, LimitBytes), + rabbit_alarm:clear_alarm({resource_limit, disk, node()}); _ -> ok end, @@ -187,10 +187,10 @@ interpret_limit({mem_relative, R}) -> interpret_limit(L) -> L. -emit_update_info(State, CurrentFree, Limit) -> +emit_update_info(StateStr, CurrentFree, Limit) -> rabbit_log:info( - "Disk free space limit now ~s. Free bytes:~p Limit:~p~n", - [State, CurrentFree, Limit]). + "Disk free space ~s. Free bytes:~p Limit:~p~n", + [StateStr, CurrentFree, Limit]). start_timer(Timeout) -> {ok, TRef} = timer:send_interval(Timeout, update), diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl index f1672f4e7b..a9af2d8a38 100644 --- a/src/rabbit_error_logger.erl +++ b/src/rabbit_error_logger.erl @@ -81,7 +81,7 @@ publish1(RoutingKey, Format, Data, LogExch) -> %% second resolution, not millisecond. Timestamp = rabbit_misc:now_ms() div 1000, {ok, _RoutingRes, _DeliveredQPids} = - rabbit_basic:publish(LogExch, RoutingKey, false, false, + rabbit_basic:publish(LogExch, RoutingKey, #'P_basic'{content_type = <<"text/plain">>, timestamp = Timestamp}, list_to_binary(io_lib:format(Format, Data))), diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl index 57c571f1ff..a205b23d0b 100644 --- a/src/rabbit_exchange.erl +++ b/src/rabbit_exchange.erl @@ -298,7 +298,10 @@ i(durable, #exchange{durable = Durable}) -> Durable; i(auto_delete, #exchange{auto_delete = AutoDelete}) -> AutoDelete; i(internal, #exchange{internal = Internal}) -> Internal; i(arguments, #exchange{arguments = Arguments}) -> Arguments; -i(policy, X) -> rabbit_policy:name(X); +i(policy, X) -> case rabbit_policy:name(X) of + none -> ''; + Policy -> Policy + end; i(Item, _) -> throw({bad_argument, Item}). info(X = #exchange{}) -> infos(?INFO_KEYS, X). @@ -402,7 +405,12 @@ conditional_delete(X = #exchange{name = XName}) -> end. unconditional_delete(X = #exchange{name = XName}) -> - ok = mnesia:delete({rabbit_durable_exchange, XName}), + %% this 'guarded' delete prevents unnecessary writes to the mnesia + %% disk log + case mnesia:wread({rabbit_durable_exchange, XName}) of + [] -> ok; + [_] -> ok = mnesia:delete({rabbit_durable_exchange, XName}) + end, ok = mnesia:delete({rabbit_exchange, XName}), ok = mnesia:delete({rabbit_exchange_serial, XName}), Bindings = rabbit_binding:remove_for_source(XName), diff --git a/src/rabbit_exchange_decorator.erl b/src/rabbit_exchange_decorator.erl index b40ceda918..0881942717 100644 --- a/src/rabbit_exchange_decorator.erl +++ b/src/rabbit_exchange_decorator.erl @@ -31,7 +31,7 @@ -type(tx() :: 'transaction' | 'none'). -type(serial() :: pos_integer() | tx()). --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% Should Rabbit ensure that all binding events that are %% delivered to an individual exchange can be serialised? (they diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl index e6470b721e..c5583ffdee 100644 --- a/src/rabbit_exchange_type.erl +++ b/src/rabbit_exchange_type.erl @@ -21,7 +21,7 @@ -type(tx() :: 'transaction' | 'none'). -type(serial() :: pos_integer() | tx()). --callback description() -> [proplist:property()]. +-callback description() -> [proplists:property()]. %% Should Rabbit ensure that all binding events that are %% delivered to an individual exchange can be serialised? (they @@ -54,13 +54,13 @@ %% called when comparing exchanges for equivalence - should return ok or %% exit with #amqp_error{} --callback assert_args_equivalence (rabbit_types:exchange(), - rabbit_framing:amqp_table()) -> +-callback assert_args_equivalence(rabbit_types:exchange(), + rabbit_framing:amqp_table()) -> 'ok' | rabbit_types:connection_exit(). %% called when the policy attached to this exchange changes. --callback policy_changed ( - serial(), rabbit_types:exchange(), rabbit_types:exchange()) -> 'ok'. +-callback policy_changed(serial(), rabbit_types:exchange(), + rabbit_types:exchange()) -> 'ok'. -else. diff --git a/src/rabbit_file.erl b/src/rabbit_file.erl index a95f8f269d..26f7479612 100644 --- a/src/rabbit_file.erl +++ b/src/rabbit_file.erl @@ -105,9 +105,9 @@ with_fhc_handle(Fun) -> with_fhc_handle(1, Fun). with_fhc_handle(N, Fun) -> - [ ok = file_handle_cache:obtain() || _ <- lists:seq(1, N)], + ok = file_handle_cache:obtain(N), try Fun() - after [ ok = file_handle_cache:release() || _ <- lists:seq(1, N)] + after ok = file_handle_cache:release(N) end. read_term_file(File) -> diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl index ba0cb04f71..cedbbdb380 100644 --- a/src/rabbit_guid.erl +++ b/src/rabbit_guid.erl @@ -144,11 +144,7 @@ gen_secure() -> %% employs base64url encoding, which is safer in more contexts than %% plain base64. string(G, Prefix) -> - Prefix ++ "-" ++ lists:foldl(fun ($\+, Acc) -> [$\- | Acc]; - ($\/, Acc) -> [$\_ | Acc]; - ($\=, Acc) -> Acc; - (Chr, Acc) -> [Chr | Acc] - end, [], base64:encode_to_string(G)). + Prefix ++ "-" ++ rabbit_misc:base64url(G). binary(G, Prefix) -> list_to_binary(string(G, Prefix)). diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl index 80b4e768a3..05aad8c903 100644 --- a/src/rabbit_heartbeat.erl +++ b/src/rabbit_heartbeat.erl @@ -59,21 +59,15 @@ start_heartbeat_sender(Sock, TimeoutSec, SendFun) -> %% the 'div 2' is there so that we don't end up waiting for nearly %% 2 * TimeoutSec before sending a heartbeat in the boundary case %% where the last message was sent just after a heartbeat. - heartbeater( - {Sock, TimeoutSec * 1000 div 2, send_oct, 0, - fun () -> - SendFun(), - continue - end}). + heartbeater({Sock, TimeoutSec * 1000 div 2, send_oct, 0, + fun () -> SendFun(), continue end}). start_heartbeat_receiver(Sock, TimeoutSec, ReceiveFun) -> %% we check for incoming data every interval, and time out after %% two checks with no change. As a result we will time out between %% 2 and 3 intervals after the last data has been received. - heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, fun () -> - ReceiveFun(), - stop - end}). + heartbeater({Sock, TimeoutSec * 1000, recv_oct, 1, + fun () -> ReceiveFun(), stop end}). start_heartbeat_fun(SupPid) -> fun (Sock, SendTimeoutSec, SendFun, ReceiveTimeoutSec, ReceiveFun) -> @@ -88,17 +82,11 @@ start_heartbeat_fun(SupPid) -> {Sender, Receiver} end. -pause_monitor({_Sender, none}) -> - ok; -pause_monitor({_Sender, Receiver}) -> - Receiver ! pause, - ok. +pause_monitor({_Sender, none}) -> ok; +pause_monitor({_Sender, Receiver}) -> Receiver ! pause, ok. -resume_monitor({_Sender, none}) -> - ok; -resume_monitor({_Sender, Receiver}) -> - Receiver ! resume, - ok. +resume_monitor({_Sender, none}) -> ok; +resume_monitor({_Sender, Receiver}) -> Receiver ! resume, ok. %%---------------------------------------------------------------------------- start_heartbeater(0, _SupPid, _Sock, _TimeoutFun, _Name, _Callback) -> @@ -106,8 +94,7 @@ start_heartbeater(0, _SupPid, _Sock, _TimeoutFun, _Name, _Callback) -> start_heartbeater(TimeoutSec, SupPid, Sock, TimeoutFun, Name, Callback) -> supervisor2:start_child( SupPid, {Name, - {rabbit_heartbeat, Callback, - [Sock, TimeoutSec, TimeoutFun]}, + {rabbit_heartbeat, Callback, [Sock, TimeoutSec, TimeoutFun]}, transient, ?MAX_WAIT, worker, [rabbit_heartbeat]}). heartbeater(Params) -> @@ -117,15 +104,11 @@ heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params, {StatVal, SameCount}) -> Recurse = fun (V) -> heartbeater(Params, V) end, receive - pause -> - receive - resume -> - Recurse({0, 0}); - Other -> - exit({unexpected_message, Other}) - end; - Other -> - exit({unexpected_message, Other}) + pause -> receive + resume -> Recurse({0, 0}); + Other -> exit({unexpected_message, Other}) + end; + Other -> exit({unexpected_message, Other}) after TimeoutMillisec -> case rabbit_net:getstat(Sock, [StatName]) of {ok, [{StatName, NewStatVal}]} -> diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl index a6b4eeb05f..8dfa89d319 100644 --- a/src/rabbit_log.erl +++ b/src/rabbit_log.erl @@ -40,18 +40,20 @@ -spec(log/3 :: (category(), level(), string()) -> 'ok'). -spec(log/4 :: (category(), level(), string(), [any()]) -> 'ok'). --spec(info/1 :: (string()) -> 'ok'). --spec(info/2 :: (string(), [any()]) -> 'ok'). + +-spec(info/1 :: (string()) -> 'ok'). +-spec(info/2 :: (string(), [any()]) -> 'ok'). -spec(warning/1 :: (string()) -> 'ok'). -spec(warning/2 :: (string(), [any()]) -> 'ok'). --spec(error/1 :: (string()) -> 'ok'). --spec(error/2 :: (string(), [any()]) -> 'ok'). +-spec(error/1 :: (string()) -> 'ok'). +-spec(error/2 :: (string(), [any()]) -> 'ok'). -endif. %%---------------------------------------------------------------------------- start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + log(Category, Level, Fmt) -> log(Category, Level, Fmt, []). log(Category, Level, Fmt, Args) when is_list(Args) -> diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 3e058793ec..e1a21cf786 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -33,16 +33,14 @@ gm, monitors, death_fun, - length_fun + depth_fun }). --define(ONE_SECOND, 1000). - -ifdef(use_specs). -spec(start_link/4 :: (rabbit_types:amqqueue(), pid() | 'undefined', rabbit_mirror_queue_master:death_fun(), - rabbit_mirror_queue_master:length_fun()) -> + rabbit_mirror_queue_master:depth_fun()) -> rabbit_types:ok_pid_or_error()). -spec(get_gm/1 :: (pid()) -> pid()). -spec(ensure_monitoring/2 :: (pid(), [pid()]) -> 'ok'). @@ -103,19 +101,25 @@ %% channel during a publish, only some of the mirrors may receive that %% publish. As a result of this problem, the messages broadcast over %% the gm contain published content, and thus slaves can operate -%% successfully on messages that they only receive via the gm. The key -%% purpose of also sending messages directly from the channels to the -%% slaves is that without this, in the event of the death of the -%% master, messages could be lost until a suitable slave is promoted. +%% successfully on messages that they only receive via the gm. +%% +%% The key purpose of also sending messages directly from the channels +%% to the slaves is that without this, in the event of the death of +%% the master, messages could be lost until a suitable slave is +%% promoted. However, that is not the only reason. A slave cannot send +%% confirms for a message until it has seen it from the +%% channel. Otherwise, it might send a confirm to a channel for a +%% message that it might *never* receive from that channel. This can +%% happen because new slaves join the gm ring (and thus receive +%% messages from the master) before inserting themselves in the +%% queue's mnesia record (which is what channels look at for routing). +%% As it turns out, channels will simply ignore such bogus confirms, +%% but relying on that would introduce a dangerously tight coupling. %% -%% However, that is not the only reason. For example, if confirms are -%% in use, then there is no guarantee that every slave will see the -%% delivery with the same msg_seq_no. As a result, the slaves have to -%% wait until they've seen both the publish via gm, and the publish -%% via the channel before they have enough information to be able to -%% perform the publish to their own bq, and subsequently issue the -%% confirm, if necessary. Either form of publish can arrive first, and -%% a slave can be upgraded to the master at any point during this +%% Hence the slaves have to wait until they've seen both the publish +%% via gm, and the publish via the channel before they issue the +%% confirm. Either form of publish can arrive first, and a slave can +%% be upgraded to the master at any point during this %% process. Confirms continue to be issued correctly, however. %% %% Because the slave is a full process, it impersonates parts of the @@ -134,25 +138,31 @@ %% gm should be processed as normal, but fetches which are for %% messages the slave has never seen should be ignored. Similarly, %% acks for messages the slave never fetched should be -%% ignored. Eventually, as the master is consumed from, the messages -%% at the head of the queue which were there before the slave joined -%% will disappear, and the slave will become fully synced with the -%% state of the master. The detection of the sync-status of a slave is -%% done entirely based on length: if the slave and the master both -%% agree on the length of the queue after the fetch of the head of the -%% queue (or a 'set_length' results in a slave having to drop some -%% messages from the head of its queue), then the queues must be in -%% sync. The only other possibility is that the slave's queue is -%% shorter, and thus the fetch should be ignored. In case slaves are -%% joined to an empty queue which only goes on to receive publishes, -%% they start by asking the master to broadcast its length. This is -%% enough for slaves to always be able to work out when their head -%% does not differ from the master (and is much simpler and cheaper -%% than getting the master to hang on to the guid of the msg at the -%% head of its queue). When a slave is promoted to a master, it -%% unilaterally broadcasts its length, in order to solve the problem -%% of length requests from new slaves being unanswered by a dead -%% master. +%% ignored. Similarly, we don't republish rejected messages that we +%% haven't seen. Eventually, as the master is consumed from, the +%% messages at the head of the queue which were there before the slave +%% joined will disappear, and the slave will become fully synced with +%% the state of the master. +%% +%% The detection of the sync-status is based on the depth of the BQs, +%% where the depth is defined as the sum of the length of the BQ (as +%% per BQ:len) and the messages pending an acknowledgement. When the +%% depth of the slave is equal to the master's, then the slave is +%% synchronised. We only store the difference between the two for +%% simplicity. Comparing the length is not enough since we need to +%% take into account rejected messages which will make it back into +%% the master queue but can't go back in the slave, since we don't +%% want "holes" in the slave queue. Note that the depth, and the +%% length likewise, must always be shorter on the slave - we assert +%% that in various places. In case slaves are joined to an empty queue +%% which only goes on to receive publishes, they start by asking the +%% master to broadcast its depth. This is enough for slaves to always +%% be able to work out when their head does not differ from the master +%% (and is much simpler and cheaper than getting the master to hang on +%% to the guid of the msg at the head of its queue). When a slave is +%% promoted to a master, it unilaterally broadcasts its depth, in +%% order to solve the problem of depth requests from new slaves being +%% unanswered by a dead master. %% %% Obviously, due to the async nature of communication across gm, the %% slaves can fall behind. This does not matter from a sync pov: if @@ -293,15 +303,15 @@ %% if they have no mirrored content at all. This is not surprising: to %% achieve anything more sophisticated would require the master and %% recovering slave to be able to check to see whether they agree on -%% the last seen state of the queue: checking length alone is not +%% the last seen state of the queue: checking depth alone is not %% sufficient in this case. %% %% For more documentation see the comments in bug 23554. %% %%---------------------------------------------------------------------------- -start_link(Queue, GM, DeathFun, LengthFun) -> - gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, LengthFun], []). +start_link(Queue, GM, DeathFun, DepthFun) -> + gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, DepthFun], []). get_gm(CPid) -> gen_server2:call(CPid, get_gm, infinity). @@ -313,10 +323,12 @@ ensure_monitoring(CPid, Pids) -> %% gen_server %% --------------------------------------------------------------------------- -init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) -> +init([#amqqueue { name = QueueName } = Q, GM, DeathFun, DepthFun]) -> GM1 = case GM of undefined -> - {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]), + {ok, GM2} = gm:start_link( + QueueName, ?MODULE, [self()], + fun rabbit_misc:execute_mnesia_transaction/1), receive {joined, GM2, _Members} -> ok end, @@ -325,12 +337,11 @@ init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) -> true = link(GM), GM end, - ensure_gm_heartbeat(), {ok, #state { q = Q, gm = GM1, monitors = pmon:new(), death_fun = DeathFun, - length_fun = LengthFun }, + depth_fun = DepthFun }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -340,7 +351,7 @@ handle_call(get_gm, _From, State = #state { gm = GM }) -> handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName, pid = MPid } }) when node(MPid) =:= node() -> - case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + case rabbit_mirror_queue_misc:remove_from_queue(QueueName, MPid, Deaths) of {ok, MPid, DeadPids} -> rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName, DeadPids), @@ -349,8 +360,8 @@ handle_cast({gm_deaths, Deaths}, {stop, normal, State} end; -handle_cast(request_length, State = #state { length_fun = LengthFun }) -> - ok = LengthFun(), +handle_cast(request_depth, State = #state { depth_fun = DepthFun }) -> + ok = DepthFun(), noreply(State); handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) -> @@ -359,11 +370,6 @@ handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) -> handle_cast({delete_and_terminate, Reason}, State) -> {stop, Reason, State}. -handle_info(send_gm_heartbeat, State = #state { gm = GM }) -> - gm:broadcast(GM, heartbeat), - ensure_gm_heartbeat(), - noreply(State); - handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, State = #state { monitors = Mons, death_fun = DeathFun }) -> @@ -399,9 +405,7 @@ members_changed([_CPid], _Births, []) -> members_changed([CPid], _Births, Deaths) -> ok = gen_server2:cast(CPid, {gm_deaths, Deaths}). -handle_msg([_CPid], _From, heartbeat) -> - ok; -handle_msg([CPid], _From, request_length = Msg) -> +handle_msg([CPid], _From, request_depth = Msg) -> ok = gen_server2:cast(CPid, Msg); handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> ok = gen_server2:cast(CPid, Msg); @@ -420,6 +424,3 @@ noreply(State) -> reply(Reply, State) -> {reply, Reply, State, hibernate}. - -ensure_gm_heartbeat() -> - erlang:send_after(?ONE_SECOND, self(), send_gm_heartbeat). diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 750bcd56e2..cce19c907a 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -17,15 +17,17 @@ -module(rabbit_mirror_queue_master). -export([init/3, terminate/2, delete_and_terminate/2, - purge/1, publish/4, publish_delivered/5, fetch/2, ack/2, - requeue/2, len/1, is_empty/1, drain_confirmed/1, dropwhile/3, - set_ram_duration_target/2, ram_duration/1, + purge/1, publish/4, publish_delivered/4, discard/3, fetch/2, ack/2, + requeue/2, len/1, is_empty/1, depth/1, drain_confirmed/1, + dropwhile/3, set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, is_duplicate/2, discard/3, fold/3]). + status/1, invoke/3, is_duplicate/2, fold/3]). -export([start/1, stop/0]). --export([promote_backing_queue_state/6, sender_death_fun/0, length_fun/0]). +-export([promote_backing_queue_state/7, sender_death_fun/0, depth_fun/0]). + +-export([init_with_existing_bq/3, stop_mirroring/1]). -behaviour(rabbit_backing_queue). @@ -44,10 +46,10 @@ -ifdef(use_specs). --export_type([death_fun/0, length_fun/0]). +-export_type([death_fun/0, depth_fun/0]). -type(death_fun() :: fun ((pid()) -> 'ok')). --type(length_fun() :: fun (() -> 'ok')). +-type(depth_fun() :: fun (() -> 'ok')). -type(master_state() :: #state { gm :: pid(), coordinator :: pid(), backing_queue :: atom(), @@ -59,10 +61,14 @@ known_senders :: set() }). --spec(promote_backing_queue_state/6 :: - (pid(), atom(), any(), pid(), dict(), [pid()]) -> master_state()). +-spec(promote_backing_queue_state/7 :: + (pid(), atom(), any(), pid(), [any()], dict(), [pid()]) -> + master_state()). -spec(sender_death_fun/0 :: () -> death_fun()). --spec(length_fun/0 :: () -> length_fun()). +-spec(depth_fun/0 :: () -> depth_fun()). +-spec(init_with_existing_bq/3 :: (rabbit_types:amqqueue(), atom(), any()) -> + master_state()). +-spec(stop_mirroring/1 :: (master_state()) -> {atom(), any()}). -endif. @@ -82,21 +88,27 @@ stop() -> %% Same as start/1. exit({not_valid_for_generic_backing_queue, ?MODULE}). -init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover, - AsyncCallback) -> - {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( - Q, undefined, sender_death_fun(), length_fun()), - GM = rabbit_mirror_queue_coordinator:get_gm(CPid), - MNodes1 = - (case MNodes of - all -> rabbit_mnesia:all_clustered_nodes(); - undefined -> []; - _ -> MNodes - end) -- [node()], - [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1], +init(Q = #amqqueue{name = QName}, Recover, AsyncCallback) -> {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover, AsyncCallback), - ok = gm:broadcast(GM, {length, BQ:len(BQS)}), + State = #state{gm = GM} = init_with_existing_bq(Q, BQ, BQS), + {_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q), + rabbit_mirror_queue_misc:add_mirrors(QName, SNodes), + ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), + State. + +init_with_existing_bq(Q = #amqqueue{name = QName}, BQ, BQS) -> + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( + Q, undefined, sender_death_fun(), depth_fun()), + GM = rabbit_mirror_queue_coordinator:get_gm(CPid), + Self = self(), + ok = rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q1 = #amqqueue{gm_pids = GMPids}] + = mnesia:read({rabbit_queue, QName}), + ok = rabbit_amqqueue:store_queue( + Q1#amqqueue{gm_pids = [{GM, Self} | GMPids]}) + end), #state { gm = GM, coordinator = CPid, backing_queue = BQ, @@ -107,8 +119,16 @@ init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover, ack_msg_id = dict:new(), known_senders = sets:new() }. +stop_mirroring(State = #state { coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS }) -> + unlink(CPid), + stop_all_slaves(shutdown, State), + {BQ, BQS}. + terminate({shutdown, dropped} = Reason, - State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> %% Backing queue termination - this node has been explicitly %% dropped. Normally, non-durable queues would be tidied up on %% startup, but there's a possibility that we will be added back @@ -124,28 +144,35 @@ terminate(Reason, %% node. Thus just let some other slave take over. State #state { backing_queue_state = BQ:terminate(Reason, BQS) }. -delete_and_terminate(Reason, State = #state { gm = GM, - backing_queue = BQ, +delete_and_terminate(Reason, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - Slaves = [Pid || Pid <- gm:group_members(GM), node(Pid) =/= node()], - MRefs = [erlang:monitor(process, S) || S <- Slaves], - ok = gm:broadcast(GM, {delete_and_terminate, Reason}), - monitor_wait(MRefs), + stop_all_slaves(Reason, State), State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), set_delivered = 0 }. -monitor_wait([]) -> - ok; -monitor_wait([MRef | MRefs]) -> - receive({'DOWN', MRef, process, _Pid, _Info}) -> - ok - end, - monitor_wait(MRefs). +stop_all_slaves(Reason, #state{gm = GM}) -> + Info = gm:info(GM), + Slaves = [Pid || Pid <- proplists:get_value(group_members, Info), + node(Pid) =/= node()], + MRefs = [erlang:monitor(process, S) || S <- Slaves], + ok = gm:broadcast(GM, {delete_and_terminate, Reason}), + [receive {'DOWN', MRef, process, _Pid, _Info} -> ok end || MRef <- MRefs], + %% Normally when we remove a slave another slave or master will + %% notice and update Mnesia. But we just removed them all, and + %% have stopped listening ourselves. So manually clean up. + QName = proplists:get_value(group_name, Info), + rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q] = mnesia:read({rabbit_queue, QName}), + rabbit_mirror_queue_misc:store_updated_slaves( + Q #amqqueue { gm_pids = [], slave_pids = [] }) + end), + ok = gm:forget_group(QName). purge(State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {set_length, 0, false}), + ok = gm:broadcast(GM, {drop, 0, BQ:len(BQS), false}), {Count, BQS1} = BQ:purge(BQS), {Count, State #state { backing_queue_state = BQS1, set_delivered = 0 }}. @@ -156,28 +183,42 @@ publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, backing_queue = BQ, backing_queue_state = BQS }) -> false = dict:is_key(MsgId, SS), %% ASSERTION - ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), + ok = gm:broadcast(GM, {publish, ChPid, MsgProps, Msg}), BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). -publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, +publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, State = #state { gm = GM, seen_status = SS, backing_queue = BQ, backing_queue_state = BQS, ack_msg_id = AM }) -> false = dict:is_key(MsgId, SS), %% ASSERTION - %% Must use confirmed_broadcast here in order to guarantee that - %% all slaves are forced to interpret this publish_delivered at - %% the same point, especially if we die and a slave is promoted. - ok = gm:confirmed_broadcast( - GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}), - {AckTag, BQS1} = - BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), + ok = gm:broadcast(GM, {publish_delivered, ChPid, MsgProps, Msg}), + {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, BQS), AM1 = maybe_store_acktag(AckTag, MsgId, AM), - {AckTag, - ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1, - ack_msg_id = AM1 })}. + State1 = State #state { backing_queue_state = BQS1, ack_msg_id = AM1 }, + {AckTag, ensure_monitoring(ChPid, State1)}. + +discard(MsgId, ChPid, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS }) -> + %% It's a massive error if we get told to discard something that's + %% already been published or published-and-confirmed. To do that + %% would require non FIFO access. Hence we should not find + %% 'published' or 'confirmed' in this dict:find. + case dict:find(MsgId, SS) of + error -> + ok = gm:broadcast(GM, {discard, ChPid, MsgId}), + BQS1 = BQ:discard(MsgId, ChPid, BQS), + ensure_monitoring( + ChPid, State #state { + backing_queue_state = BQS1, + seen_status = dict:erase(MsgId, SS) }); + {ok, discarded} -> + State + end. dropwhile(Pred, AckRequired, State = #state{gm = GM, @@ -185,13 +226,13 @@ dropwhile(Pred, AckRequired, set_delivered = SetDelivered, backing_queue_state = BQS }) -> Len = BQ:len(BQS), - {Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS), + {Next, Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS), Len1 = BQ:len(BQS1), - ok = gm:broadcast(GM, {set_length, Len1, AckRequired}), Dropped = Len - Len1, + ok = gm:broadcast(GM, {drop, Len1, Dropped, AckRequired}), SetDelivered1 = lists:max([0, SetDelivered - Dropped]), - {Msgs, State #state { backing_queue_state = BQS1, - set_delivered = SetDelivered1 } }. + {Next, Msgs, State #state { backing_queue_state = BQS1, + set_delivered = SetDelivered1 } }. drain_confirmed(State = #state { backing_queue = BQ, backing_queue_state = BQS, @@ -274,6 +315,9 @@ len(#state { backing_queue = BQ, backing_queue_state = BQS }) -> is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:is_empty(BQS). +depth(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:depth(BQS). + set_ram_duration_target(Target, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = @@ -348,35 +392,20 @@ is_duplicate(Message = #basic_message { id = MsgId }, {discarded, State} end. -discard(Msg = #basic_message { id = MsgId }, ChPid, - State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - seen_status = SS }) -> - %% It's a massive error if we get told to discard something that's - %% already been published or published-and-confirmed. To do that - %% would require non FIFO access. Hence we should not find - %% 'published' or 'confirmed' in this dict:find. - case dict:find(MsgId, SS) of - error -> - ok = gm:broadcast(GM, {discard, ChPid, Msg}), - State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS), - seen_status = dict:erase(MsgId, SS) }; - {ok, discarded} -> - State - end. - %% --------------------------------------------------------------------------- %% Other exported functions %% --------------------------------------------------------------------------- -promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) -> - Len = BQ:len(BQS), - ok = gm:broadcast(GM, {length, Len}), +promote_backing_queue_state(CPid, BQ, BQS, GM, AckTags, SeenStatus, KS) -> + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + Len = BQ:len(BQS1), + Depth = BQ:depth(BQS1), + true = Len == Depth, %% ASSERTION: everything must have been requeued + ok = gm:broadcast(GM, {depth, Depth}), #state { gm = GM, coordinator = CPid, backing_queue = BQ, - backing_queue_state = BQS, + backing_queue_state = BQS1, set_delivered = Len, seen_status = SeenStatus, confirmed = [], @@ -395,7 +424,7 @@ sender_death_fun() -> end) end. -length_fun() -> +depth_fun() -> Self = self(), fun () -> rabbit_amqqueue:run_backing_queue( @@ -403,15 +432,13 @@ length_fun() -> fun (?MODULE, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {length, BQ:len(BQS)}), + ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), State end) end. -maybe_store_acktag(undefined, _MsgId, AM) -> - AM; -maybe_store_acktag(AckTag, MsgId, AM) -> - dict:store(AckTag, MsgId, AM). +maybe_store_acktag(undefined, _MsgId, AM) -> AM; +maybe_store_acktag(AckTag, MsgId, AM) -> dict:store(AckTag, MsgId, AM). ensure_monitoring(ChPid, State = #state { coordinator = CPid, known_senders = KS }) -> diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 180677fe55..4a00846e4d 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -15,28 +15,45 @@ %% -module(rabbit_mirror_queue_misc). +-behaviour(rabbit_policy_validator). --export([remove_from_queue/2, on_node_up/0, - drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3, - report_deaths/4]). +-export([remove_from_queue/3, on_node_up/0, add_mirrors/2, add_mirror/2, + report_deaths/4, store_updated_slaves/1, suggested_queue_nodes/1, + is_mirrored/1, update_mirrors/2, validate_policy/1]). + +%% for testing only +-export([suggested_queue_nodes/4]). -include("rabbit.hrl"). +-rabbit_boot_step({?MODULE, + [{description, "HA policy validation"}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-mode">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-params">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + %%---------------------------------------------------------------------------- -ifdef(use_specs). --spec(remove_from_queue/2 :: - (rabbit_amqqueue:name(), [pid()]) +-spec(remove_from_queue/3 :: + (rabbit_amqqueue:name(), pid(), [pid()]) -> {'ok', pid(), [pid()]} | {'error', 'not_found'}). -spec(on_node_up/0 :: () -> 'ok'). --spec(drop_mirror/2 :: - (rabbit_amqqueue:name(), node()) -> rabbit_types:ok_or_error(any())). +-spec(add_mirrors/2 :: (rabbit_amqqueue:name(), [node()]) -> 'ok'). -spec(add_mirror/2 :: - (rabbit_amqqueue:name(), node()) -> rabbit_types:ok_or_error(any())). --spec(add_mirror/3 :: - (rabbit_types:vhost(), binary(), atom()) - -> rabbit_types:ok_or_error(any())). + (rabbit_amqqueue:name(), node()) -> + {'ok', atom()} | rabbit_types:error(any())). +-spec(store_updated_slaves/1 :: (rabbit_types:amqqueue()) -> + rabbit_types:amqqueue()). +-spec(suggested_queue_nodes/1 :: (rabbit_types:amqqueue()) -> + {node(), [node()]}). +-spec(is_mirrored/1 :: (rabbit_types:amqqueue()) -> boolean()). +-spec(update_mirrors/2 :: + (rabbit_types:amqqueue(), rabbit_types:amqqueue()) -> 'ok'). -endif. @@ -50,29 +67,35 @@ %% slave (now master) receives messages it's not ready for (for %% example, new consumers). %% Returns {ok, NewMPid, DeadPids} -remove_from_queue(QueueName, DeadPids) -> - DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], +remove_from_queue(QueueName, Self, DeadGMPids) -> rabbit_misc:execute_mnesia_transaction( fun () -> %% Someone else could have deleted the queue before we %% get here. case mnesia:read({rabbit_queue, QueueName}) of [] -> {error, not_found}; - [Q = #amqqueue { pid = QPid, - slave_pids = SPids }] -> - [QPid1 | SPids1] = Alive = - [Pid || Pid <- [QPid | SPids], - not lists:member(node(Pid), DeadNodes)], + [Q = #amqqueue { pid = QPid, + slave_pids = SPids, + gm_pids = GMPids }] -> + {Dead, GMPids1} = lists:partition( + fun ({GM, _}) -> + lists:member(GM, DeadGMPids) + end, GMPids), + DeadPids = [Pid || {_GM, Pid} <- Dead], + Alive = [QPid | SPids] -- DeadPids, + {QPid1, SPids1} = promote_slave(Alive), case {{QPid, SPids}, {QPid1, SPids1}} of {Same, Same} -> + GMPids = GMPids1, %% ASSERTION {ok, QPid1, []}; - _ when QPid =:= QPid1 orelse node(QPid1) =:= node() -> + _ when QPid =:= QPid1 orelse QPid1 =:= Self -> %% Either master hasn't changed, so %% we're ok to update mnesia; or we have %% become the master. - Q1 = Q #amqqueue { pid = QPid1, - slave_pids = SPids1 }, - ok = rabbit_amqqueue:store_queue(Q1), + store_updated_slaves( + Q #amqqueue { pid = QPid1, + slave_pids = SPids1, + gm_pids = GMPids1 }), {ok, QPid1, [QPid | SPids] -- Alive}; _ -> %% Master has changed, and we're not it, @@ -85,32 +108,41 @@ remove_from_queue(QueueName, DeadPids) -> end). on_node_up() -> - Qs = + QNames = rabbit_misc:execute_mnesia_transaction( fun () -> mnesia:foldl( - fun (#amqqueue { mirror_nodes = undefined }, QsN) -> - QsN; - (#amqqueue { name = QName, - mirror_nodes = all }, QsN) -> - [QName | QsN]; - (#amqqueue { name = QName, - mirror_nodes = MNodes }, QsN) -> - case lists:member(node(), MNodes) of - true -> [QName | QsN]; - false -> QsN + fun (Q = #amqqueue{name = QName, + pid = Pid, + slave_pids = SPids}, QNames0) -> + %% We don't want to pass in the whole + %% cluster - we don't want a situation + %% where starting one node causes us to + %% decide to start a mirror on another + PossibleNodes0 = [node(P) || P <- [Pid | SPids]], + PossibleNodes = + case lists:member(node(), PossibleNodes0) of + true -> PossibleNodes0; + false -> [node() | PossibleNodes0] + end, + {_MNode, SNodes} = suggested_queue_nodes( + Q, PossibleNodes), + case lists:member(node(), SNodes) of + true -> [QName | QNames0]; + false -> QNames0 end end, [], rabbit_queue) end), - [add_mirror(Q, node()) || Q <- Qs], + [{ok, _} = add_mirror(QName, node()) || QName <- QNames], ok. -drop_mirror(VHostPath, QueueName, MirrorNode) -> - drop_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). +drop_mirrors(QName, Nodes) -> + [ok = drop_mirror(QName, Node) || Node <- Nodes], + ok. -drop_mirror(Queue, MirrorNode) -> +drop_mirror(QName, MirrorNode) -> if_mirrored_queue( - Queue, + QName, fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids }) -> case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of [] -> @@ -126,38 +158,61 @@ drop_mirror(Queue, MirrorNode) -> end end). -add_mirror(VHostPath, QueueName, MirrorNode) -> - add_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). +add_mirrors(QName, Nodes) -> + [{ok, _} = add_mirror(QName, Node) || Node <- Nodes], + ok. -add_mirror(Queue, MirrorNode) -> +add_mirror(QName, MirrorNode) -> if_mirrored_queue( - Queue, + QName, fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) -> case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of - [] -> case rabbit_mirror_queue_slave_sup:start_child( - MirrorNode, [Q]) of - {ok, undefined} -> %% Already running - ok; - {ok, SPid} -> - rabbit_log:info( - "Adding mirror of ~s on node ~p: ~p~n", - [rabbit_misc:rs(Name), MirrorNode, SPid]), - ok; - Other -> - Other - end; - [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}} + [] -> + start_child(Name, MirrorNode, Q); + [SPid] -> + case rabbit_misc:is_process_alive(SPid) of + true -> {ok, already_mirrored}; + false -> start_child(Name, MirrorNode, Q) + end end end). -if_mirrored_queue(Queue, Fun) -> - rabbit_amqqueue:with( - Queue, fun (#amqqueue { arguments = Args } = Q) -> - case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of - undefined -> ok; - _ -> Fun(Q) - end - end). +start_child(Name, MirrorNode, Q) -> + case rabbit_misc:with_exit_handler( + rabbit_misc:const({ok, down}), + fun () -> + rabbit_mirror_queue_slave_sup:start_child(MirrorNode, [Q]) + end) of + {ok, undefined} -> + %% this means the mirror process was + %% already running on the given node. + {ok, already_mirrored}; + {ok, down} -> + %% Node went down between us deciding to start a mirror + %% and actually starting it. Which is fine. + {ok, node_down}; + {ok, SPid} -> + rabbit_log:info("Adding mirror of ~s on node ~p: ~p~n", + [rabbit_misc:rs(Name), MirrorNode, SPid]), + {ok, started}; + {error, {{stale_master_pid, StalePid}, _}} -> + rabbit_log:warning("Detected stale HA master while adding " + "mirror of ~s on node ~p: ~p~n", + [rabbit_misc:rs(Name), MirrorNode, StalePid]), + {ok, stale_master}; + {error, {{duplicate_live_master, _}=Err, _}} -> + Err; + Other -> + Other + end. + +if_mirrored_queue(QName, Fun) -> + rabbit_amqqueue:with(QName, fun (Q) -> + case is_mirrored(Q) of + false -> ok; + true -> Fun(Q) + end + end). report_deaths(_MirrorPid, _IsMaster, _QueueName, []) -> ok; @@ -172,3 +227,146 @@ report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) -> end, rabbit_misc:pid_to_string(MirrorPid), [[rabbit_misc:pid_to_string(P), $ ] || P <- DeadPids]]). + +store_updated_slaves(Q = #amqqueue{slave_pids = SPids, + sync_slave_pids = SSPids}) -> + SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)], + Q1 = Q#amqqueue{sync_slave_pids = SSPids1}, + ok = rabbit_amqqueue:store_queue(Q1), + %% Wake it up so that we emit a stats event + rabbit_amqqueue:wake_up(Q1), + Q1. + +%%---------------------------------------------------------------------------- + +promote_slave([SPid | SPids]) -> + %% The slave pids are maintained in descending order of age, so + %% the one to promote is the oldest. + {SPid, SPids}. + +suggested_queue_nodes(Q) -> + suggested_queue_nodes(Q, rabbit_mnesia:cluster_nodes(running)). + +%% This variant exists so we can pull a call to +%% rabbit_mnesia:cluster_nodes(running) out of a loop or +%% transaction or both. +suggested_queue_nodes(Q, PossibleNodes) -> + {MNode0, SNodes} = actual_queue_nodes(Q), + MNode = case MNode0 of + none -> node(); + _ -> MNode0 + end, + suggested_queue_nodes(policy(<<"ha-mode">>, Q), policy(<<"ha-params">>, Q), + {MNode, SNodes}, PossibleNodes). + +policy(Policy, Q) -> + case rabbit_policy:get(Policy, Q) of + {ok, P} -> P; + _ -> none + end. + +suggested_queue_nodes(<<"all">>, _Params, {MNode, _SNodes}, Possible) -> + {MNode, Possible -- [MNode]}; +suggested_queue_nodes(<<"nodes">>, Nodes0, {MNode, _SNodes}, Possible) -> + Nodes = [list_to_atom(binary_to_list(Node)) || Node <- Nodes0], + Unavailable = Nodes -- Possible, + Available = Nodes -- Unavailable, + case Available of + [] -> %% We have never heard of anything? Not much we can do but + %% keep the master alive. + {MNode, []}; + _ -> case lists:member(MNode, Available) of + true -> {MNode, Available -- [MNode]}; + false -> promote_slave(Available) + end + end; +%% When we need to add nodes, we randomise our candidate list as a +%% crude form of load-balancing. TODO it would also be nice to +%% randomise the list of ones to remove when we have too many - but +%% that would fail to take account of synchronisation... +suggested_queue_nodes(<<"exactly">>, Count, {MNode, SNodes}, Possible) -> + SCount = Count - 1, + {MNode, case SCount > length(SNodes) of + true -> Cand = shuffle((Possible -- [MNode]) -- SNodes), + SNodes ++ lists:sublist(Cand, SCount - length(SNodes)); + false -> lists:sublist(SNodes, SCount) + end}; +suggested_queue_nodes(_, _, {MNode, _}, _) -> + {MNode, []}. + +shuffle(L) -> + {A1,A2,A3} = now(), + random:seed(A1, A2, A3), + {_, L1} = lists:unzip(lists:keysort(1, [{random:uniform(), N} || N <- L])), + L1. + +actual_queue_nodes(#amqqueue{pid = MPid, slave_pids = SPids}) -> + {case MPid of + none -> none; + _ -> node(MPid) + end, [node(Pid) || Pid <- SPids]}. + +is_mirrored(Q) -> + case policy(<<"ha-mode">>, Q) of + <<"all">> -> true; + <<"nodes">> -> true; + <<"exactly">> -> true; + _ -> false + end. + + +%% [1] - rabbit_amqqueue:start_mirroring/1 will turn unmirrored to +%% master and start any needed slaves. However, if node(QPid) is not +%% in the nodes for the policy, it won't switch it. So this is for the +%% case where we kill the existing queue and restart elsewhere. TODO: +%% is this TRTTD? All alternatives seem ugly. +update_mirrors(OldQ = #amqqueue{pid = QPid}, + NewQ = #amqqueue{pid = QPid}) -> + case {is_mirrored(OldQ), is_mirrored(NewQ)} of + {false, false} -> ok; + {true, false} -> rabbit_amqqueue:stop_mirroring(QPid); + {false, true} -> rabbit_amqqueue:start_mirroring(QPid), + update_mirrors0(OldQ, NewQ); %% [1] + {true, true} -> update_mirrors0(OldQ, NewQ) + end. + +update_mirrors0(OldQ = #amqqueue{name = QName}, + NewQ = #amqqueue{name = QName}) -> + All = fun ({A,B}) -> [A|B] end, + OldNodes = All(actual_queue_nodes(OldQ)), + NewNodes = All(suggested_queue_nodes(NewQ)), + add_mirrors(QName, NewNodes -- OldNodes), + drop_mirrors(QName, OldNodes -- NewNodes), + ok. + +%%---------------------------------------------------------------------------- + +validate_policy(KeyList) -> + validate_policy( + proplists:get_value(<<"ha-mode">>, KeyList), + proplists:get_value(<<"ha-params">>, KeyList, none)). + +validate_policy(<<"all">>, none) -> + ok; +validate_policy(<<"all">>, _Params) -> + {error, "ha-mode=\"all\" does not take parameters", []}; + +validate_policy(<<"nodes">>, []) -> + {error, "ha-mode=\"nodes\" list must be non-empty", []}; +validate_policy(<<"nodes">>, Nodes) when is_list(Nodes) -> + case [I || I <- Nodes, not is_binary(I)] of + [] -> ok; + Invalid -> {error, "ha-mode=\"nodes\" takes a list of strings, " + "~p was not a string", [Invalid]} + end; +validate_policy(<<"nodes">>, Params) -> + {error, "ha-mode=\"nodes\" takes a list, ~p given", [Params]}; + +validate_policy(<<"exactly">>, N) when is_integer(N) andalso N > 0 -> + ok; +validate_policy(<<"exactly">>, Params) -> + {error, "ha-mode=\"exactly\" takes an integer, ~p given", [Params]}; + +validate_policy(Mode, _Params) -> + {error, "~p is not a valid ha-mode value", [Mode]}. + diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 03fafc3e5a..1ba1420f42 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -19,17 +19,8 @@ %% For general documentation of HA design, see %% rabbit_mirror_queue_coordinator %% -%% We join the GM group before we add ourselves to the amqqueue -%% record. As a result: -%% 1. We can receive msgs from GM that correspond to messages we will -%% never receive from publishers. -%% 2. When we receive a message from publishers, we must receive a -%% message from the GM group for it. -%% 3. However, that instruction from the GM group can arrive either -%% before or after the actual message. We need to be able to -%% distinguish between GM instructions arriving early, and case (1) -%% above. -%% +%% We receive messages from GM and from publishers, and the gm +%% messages can arrive either before or after the 'actual' message. %% All instructions from the GM group must be processed in the order %% in which they're received. @@ -73,63 +64,59 @@ -record(state, { q, gm, - master_pid, backing_queue, backing_queue_state, sync_timer_ref, rate_timer_ref, - sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId} + sender_queues, %% :: Pid -> {Q Msg, Set MsgId} msg_id_ack, %% :: MsgId -> AckTag - ack_num, msg_id_status, known_senders, - synchronised + %% Master depth - local depth + depth_delta }). -start_link(Q) -> - gen_server2:start_link(?MODULE, Q, []). +start_link(Q) -> gen_server2:start_link(?MODULE, Q, []). set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). -info(QPid) -> - gen_server2:call(QPid, info, infinity). - -init(#amqqueue { name = QueueName } = Q) -> +info(QPid) -> gen_server2:call(QPid, info, infinity). + +init(Q = #amqqueue { name = QName }) -> + %% We join the GM group before we add ourselves to the amqqueue + %% record. As a result: + %% 1. We can receive msgs from GM that correspond to messages we will + %% never receive from publishers. + %% 2. When we receive a message from publishers, we must receive a + %% message from the GM group for it. + %% 3. However, that instruction from the GM group can arrive either + %% before or after the actual message. We need to be able to + %% distinguish between GM instructions arriving early, and case (1) + %% above. + %% + process_flag(trap_exit, true), %% amqqueue_process traps exits too. + {ok, GM} = gm:start_link(QName, ?MODULE, [self()], + fun rabbit_misc:execute_mnesia_transaction/1), + receive {joined, GM} -> ok end, Self = self(), Node = node(), case rabbit_misc:execute_mnesia_transaction( - fun () -> - [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] = - mnesia:read({rabbit_queue, QueueName}), - case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of - [] -> MPids1 = MPids ++ [Self], - ok = rabbit_amqqueue:store_queue( - Q1 #amqqueue { slave_pids = MPids1 }), - {new, QPid}; - [SPid] -> true = rabbit_misc:is_process_alive(SPid), - existing - end - end) of - {new, MPid} -> - process_flag(trap_exit, true), %% amqqueue_process traps exits too. - {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]), - receive {joined, GM} -> - ok - end, - erlang:monitor(process, MPid), + fun() -> init_it(Self, GM, Node, QName) end) of + {new, QPid} -> + erlang:monitor(process, QPid), ok = file_handle_cache:register_callback( rabbit_amqqueue, set_maximum_since_use, [Self]), ok = rabbit_memory_monitor:register( Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}), {ok, BQ} = application:get_env(backing_queue_module), - BQS = bq_init(BQ, Q, false), - State = #state { q = Q, + Q1 = Q #amqqueue { pid = QPid }, + BQS = bq_init(BQ, Q1, false), + State = #state { q = Q1, gm = GM, - master_pid = MPid, backing_queue = BQ, backing_queue_state = BQS, rate_timer_ref = undefined, @@ -137,70 +124,83 @@ init(#amqqueue { name = QueueName } = Q) -> sender_queues = dict:new(), msg_id_ack = dict:new(), - ack_num = 0, msg_id_status = dict:new(), known_senders = pmon:new(), - synchronised = false + depth_delta = undefined }, rabbit_event:notify(queue_slave_created, infos(?CREATION_EVENT_KEYS, State)), - ok = gm:broadcast(GM, request_length), + ok = gm:broadcast(GM, request_depth), {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}; + {stale, StalePid} -> + {stop, {stale_master_pid, StalePid}}; + duplicate_live_master -> + {stop, {duplicate_live_master, Node}}; existing -> + gm:leave(GM), ignore end. -handle_call({deliver, Delivery = #delivery { immediate = true }}, - From, State) -> - %% It is safe to reply 'false' here even if a) we've not seen the - %% msg via gm, or b) the master dies before we receive the msg via - %% gm. In the case of (a), we will eventually receive the msg via - %% gm, and it's only the master's result to the channel that is - %% important. In the case of (b), if the master does die and we do - %% get promoted then at that point we have no consumers, thus - %% 'false' is precisely the correct answer. However, we must be - %% careful to _not_ enqueue the message in this case. - - %% Note this is distinct from the case where we receive the msg - %% via gm first, then we're promoted to master, and only then do - %% we receive the msg from the channel. - gen_server2:reply(From, false), %% master may deliver it, not us - noreply(maybe_enqueue_message(Delivery, false, State)); - -handle_call({deliver, Delivery = #delivery { mandatory = true }}, - From, State) -> - gen_server2:reply(From, true), %% amqqueue throws away the result anyway - noreply(maybe_enqueue_message(Delivery, true, State)); +init_it(Self, GM, Node, QName) -> + [Q = #amqqueue { pid = QPid, slave_pids = SPids, gm_pids = GMPids }] = + mnesia:read({rabbit_queue, QName}), + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of + [] -> add_slave(Q, Self, GM), + {new, QPid}; + [QPid] -> case rabbit_misc:is_process_alive(QPid) of + true -> duplicate_live_master; + false -> {stale, QPid} + end; + [SPid] -> case rabbit_misc:is_process_alive(SPid) of + true -> existing; + false -> Q1 = Q#amqqueue { + slave_pids = SPids -- [SPid], + gm_pids = [T || T = {_, S} <- GMPids, + S =/= SPid] }, + add_slave(Q1, Self, GM), + {new, QPid} + end + end. + +%% Add to the end, so they are in descending order of age, see +%% rabbit_mirror_queue_misc:promote_slave/1 +add_slave(Q = #amqqueue { slave_pids = SPids, gm_pids = GMPids }, New, GM) -> + rabbit_mirror_queue_misc:store_updated_slaves( + Q#amqqueue{slave_pids = SPids ++ [New], gm_pids = [{GM, New} | GMPids]}). + +handle_call({deliver, Delivery, true}, From, State) -> + %% Synchronous, "mandatory" deliver mode. + gen_server2:reply(From, ok), + noreply(maybe_enqueue_message(Delivery, State)); handle_call({gm_deaths, Deaths}, From, - State = #state { q = #amqqueue { name = QueueName }, - gm = GM, - master_pid = MPid }) -> - %% The GM has told us about deaths, which means we're not going to - %% receive any more messages from GM - case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + State = #state { q = Q = #amqqueue { name = QName, pid = MPid }}) -> + Self = self(), + case rabbit_mirror_queue_misc:remove_from_queue(QName, Self, Deaths) of {error, not_found} -> gen_server2:reply(From, ok), {stop, normal, State}; {ok, Pid, DeadPids} -> - rabbit_mirror_queue_misc:report_deaths(self(), false, QueueName, + rabbit_mirror_queue_misc:report_deaths(Self, false, QName, DeadPids), - if node(Pid) =:= node(MPid) -> + case Pid of + MPid -> %% master hasn't changed - reply(ok, State); - node(Pid) =:= node() -> + gen_server2:reply(From, ok), + noreply(State); + Self -> %% we've become master - promote_me(From, State); - true -> - %% master has changed to not us. + QueueState = promote_me(From, State), + {become, rabbit_amqqueue_process, QueueState, hibernate}; + _ -> + %% master has changed to not us gen_server2:reply(From, ok), erlang:monitor(process, Pid), - ok = gm:broadcast(GM, heartbeat), - noreply(State #state { master_pid = Pid }) + noreply(State #state { q = Q #amqqueue { pid = Pid } }) end end; @@ -213,13 +213,14 @@ handle_cast({run_backing_queue, Mod, Fun}, State) -> handle_cast({gm, Instruction}, State) -> handle_process_result(process_instruction(Instruction, State)); -handle_cast({deliver, Delivery = #delivery{sender = Sender}, Flow}, State) -> - %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. +handle_cast({deliver, Delivery = #delivery{sender = Sender}, true, Flow}, + State) -> + %% Asynchronous, non-"mandatory", deliver mode. case Flow of flow -> credit_flow:ack(Sender); noflow -> ok end, - noreply(maybe_enqueue_message(Delivery, true, State)); + noreply(maybe_enqueue_message(Delivery, State)); handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), @@ -249,8 +250,8 @@ handle_info(timeout, State) -> noreply(backing_queue_timeout(State)); handle_info({'DOWN', _MonitorRef, process, MPid, _Reason}, - State = #state { gm = GM, master_pid = MPid }) -> - ok = gm:broadcast(GM, {process_death, MPid}), + State = #state { gm = GM, q = #amqqueue { pid = MPid } }) -> + ok = gm:broadcast(GM, process_death), noreply(State); handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> @@ -286,7 +287,7 @@ terminate(Reason, #state { q = Q, rate_timer_ref = RateTRef }) -> ok = gm:leave(GM), QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( - Q, BQ, BQS, RateTRef, [], [], pmon:new(), dict:new()), + Q, BQ, BQS, RateTRef, [], pmon:new(), dict:new()), rabbit_amqqueue_process:terminate(Reason, QueueState); terminate([_SPid], _Reason) -> %% gm case @@ -332,25 +333,26 @@ prioritise_info(Msg, _State) -> %% GM %% --------------------------------------------------------------------------- -joined([SPid], _Members) -> - SPid ! {joined, self()}, - ok. +joined([SPid], _Members) -> SPid ! {joined, self()}, ok. -members_changed([_SPid], _Births, []) -> - ok; -members_changed([SPid], _Births, Deaths) -> - inform_deaths(SPid, Deaths). +members_changed([_SPid], _Births, []) -> ok; +members_changed([ SPid], _Births, Deaths) -> inform_deaths(SPid, Deaths). -handle_msg([_SPid], _From, heartbeat) -> - ok; -handle_msg([_SPid], _From, request_length) -> +handle_msg([_SPid], _From, request_depth) -> %% This is only of value to the master ok; handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> %% This is only of value to the master ok; -handle_msg([SPid], _From, {process_death, Pid}) -> - inform_deaths(SPid, [Pid]); +handle_msg([_SPid], _From, process_death) -> + %% Since GM is by nature lazy we need to make sure there is some + %% traffic when a master dies, to make sure we get informed of the + %% death. That's all process_death does, create some traffic. We + %% must not take any notice of the master death here since it + %% comes without ordering guarantees - there could still be + %% messages from the master we have yet to receive. When we get + %% members_changed, then there will be no more messages. + ok; handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) -> ok = gen_server2:cast(CPid, {gm, Msg}), {stop, {shutdown, ring_shutdown}}; @@ -371,8 +373,8 @@ infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. i(pid, _State) -> self(); i(name, #state { q = #amqqueue { name = Name } }) -> Name; -i(master_pid, #state { master_pid = MPid }) -> MPid; -i(is_synchronised, #state { synchronised = Synchronised }) -> Synchronised; +i(master_pid, #state { q = #amqqueue { pid = MPid } }) -> MPid; +i(is_synchronised, #state { depth_delta = DD }) -> DD =:= 0; i(Item, _State) -> throw({bad_argument, Item}). bq_init(BQ, Q, Recover) -> @@ -390,14 +392,20 @@ run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. -needs_confirming(#delivery{ msg_seq_no = undefined }, _State) -> - never; -needs_confirming(#delivery { message = #basic_message { - is_persistent = true } }, - #state { q = #amqqueue { durable = true } }) -> - eventually; -needs_confirming(_Delivery, _State) -> - immediately. +send_or_record_confirm(_, #delivery{ msg_seq_no = undefined }, MS, _State) -> + MS; +send_or_record_confirm(published, #delivery { sender = ChPid, + msg_seq_no = MsgSeqNo, + message = #basic_message { + id = MsgId, + is_persistent = true } }, + MS, #state { q = #amqqueue { durable = true } }) -> + dict:store(MsgId, {published, ChPid, MsgSeqNo} , MS); +send_or_record_confirm(_Status, #delivery { sender = ChPid, + msg_seq_no = MsgSeqNo }, + MS, _State) -> + ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), + MS. confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> {CMs, MS1} = @@ -409,16 +417,16 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> %% If it needed confirming, it'll have %% already been done. Acc; - {ok, {published, ChPid}} -> + {ok, published} -> %% Still not seen it from the channel, just %% record that it's been confirmed. - {CMsN, dict:store(MsgId, {confirmed, ChPid}, MSN)}; + {CMsN, dict:store(MsgId, confirmed, MSN)}; {ok, {published, ChPid, MsgSeqNo}} -> %% Seen from both GM and Channel. Can now %% confirm. {rabbit_misc:gb_trees_cons(ChPid, MsgSeqNo, CMsN), dict:erase(MsgId, MSN)}; - {ok, {confirmed, _ChPid}} -> + {ok, confirmed} -> %% It's already been confirmed. This is %% probably it's been both sync'd to disk %% and then delivered and ack'd before we've @@ -442,17 +450,14 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, msg_id_ack = MA, msg_id_status = MS, known_senders = KS }) -> - rabbit_event:notify(queue_slave_promoted, [{pid, self()}, - {name, QName}]), rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n", [rabbit_misc:rs(QName), rabbit_misc:pid_to_string(self())]), Q1 = Q #amqqueue { pid = self() }, {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( Q1, GM, rabbit_mirror_queue_master:sender_death_fun(), - rabbit_mirror_queue_master:length_fun()), + rabbit_mirror_queue_master:depth_fun()), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), - ok = gm:confirmed_broadcast(GM, heartbeat), %% Everything that we're monitoring, we need to ensure our new %% coordinator is monitoring. @@ -460,8 +465,7 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, ok = rabbit_mirror_queue_coordinator:ensure_monitoring(CPid, MPids), %% We find all the messages that we've received from channels but - %% not from gm, and if they're due to be enqueued on promotion - %% then we pass them to the + %% not from gm, and pass them to the %% queue_process:init_with_backing_queue_state to be enqueued. %% %% We also have to requeue messages which are pending acks: the @@ -489,18 +493,18 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, %% %% MS contains the following three entry types: %% - %% a) {published, ChPid}: + %% a) published: %% published via gm only; pending arrival of publication from %% channel, maybe pending confirm. %% %% b) {published, ChPid, MsgSeqNo}: %% published via gm and channel; pending confirm. %% - %% c) {confirmed, ChPid}: + %% c) confirmed: %% published via gm only, and confirmed; pending publication %% from channel. %% - %% d) discarded + %% d) discarded: %% seen via gm only as discarded. Pending publication from %% channel %% @@ -517,34 +521,24 @@ promote_me(From, #state { q = Q = #amqqueue { name = QName }, %% those messages are then requeued. However, as discussed above, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. - %% - %% Everything that's in MA gets requeued. Consequently the new - %% master should start with a fresh AM as there are no messages - %% pending acks. - MSList = dict:to_list(MS), - SS = dict:from_list( - [E || E = {_MsgId, discarded} <- MSList] ++ - [{MsgId, Status} - || {MsgId, {Status, _ChPid}} <- MSList, - Status =:= published orelse Status =:= confirmed]), + St = [published, confirmed, discarded], + SS = dict:filter(fun (_MsgId, Status) -> lists:member(Status, St) end, MS), + AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, SS, MPids), - - MTC = lists:foldl(fun ({MsgId, {published, ChPid, MsgSeqNo}}, MTC0) -> - gb_trees:insert(MsgId, {ChPid, MsgSeqNo}, MTC0); - (_, MTC0) -> - MTC0 - end, gb_trees:empty(), MSList), - NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)], - AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)], + CPid, BQ, BQS, GM, AckTags, SS, MPids), + + MTC = dict:fold(fun (MsgId, {published, ChPid, MsgSeqNo}, MTC0) -> + gb_trees:insert(MsgId, {ChPid, MsgSeqNo}, MTC0); + (_Msgid, _Status, MTC0) -> + MTC0 + end, gb_trees:empty(), MS), Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ), - {Delivery, true} <- queue:to_list(PubQ)], - QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( - Q1, rabbit_mirror_queue_master, MasterState, RateTRef, - AckTags, Deliveries, KS, MTC), - {become, rabbit_amqqueue_process, QueueState, hibernate}. + Delivery <- queue:to_list(PubQ)], + rabbit_amqqueue_process:init_with_backing_queue_state( + Q1, rabbit_mirror_queue_master, MasterState, RateTRef, Deliveries, KS, + MTC). noreply(State) -> {NewState, Timeout} = next_state(State), @@ -560,9 +554,9 @@ next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> confirm_messages(MsgIds, State #state { backing_queue_state = BQS1 })), case BQ:needs_timeout(BQS1) of - false -> {stop_sync_timer(State1), hibernate}; - idle -> {stop_sync_timer(State1), 0 }; - timed -> {ensure_sync_timer(State1), 0 } + false -> {stop_sync_timer(State1), hibernate }; + idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL}; + timed -> {ensure_sync_timer(State1), 0 } end. backing_queue_timeout(State = #state { backing_queue = BQ }) -> @@ -638,49 +632,22 @@ confirm_sender_death(Pid) -> ok. maybe_enqueue_message( - Delivery = #delivery { message = #basic_message { id = MsgId }, - msg_seq_no = MsgSeqNo, - sender = ChPid }, - EnqueueOnPromotion, + Delivery = #delivery { message = #basic_message { id = MsgId }, + sender = ChPid }, State = #state { sender_queues = SQ, msg_id_status = MS }) -> State1 = ensure_monitoring(ChPid, State), %% We will never see {published, ChPid, MsgSeqNo} here. case dict:find(MsgId, MS) of error -> {MQ, PendingCh} = get_sender_queue(ChPid, SQ), - MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ), + MQ1 = queue:in(Delivery, MQ), SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ), State1 #state { sender_queues = SQ1 }; - {ok, {confirmed, ChPid}} -> - %% BQ has confirmed it but we didn't know what the - %% msg_seq_no was at the time. We do now! - ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { sender_queues = SQ1, - msg_id_status = dict:erase(MsgId, MS) }; - {ok, {published, ChPid}} -> - %% It was published to the BQ and we didn't know the - %% msg_seq_no so couldn't confirm it at the time. - case needs_confirming(Delivery, State1) of - never -> - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = dict:erase(MsgId, MS), - sender_queues = SQ1 }; - eventually -> - State1 #state { - msg_id_status = - dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) }; - immediately -> - ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = dict:erase(MsgId, MS), - sender_queues = SQ1 } - end; - {ok, discarded} -> - %% We've already heard from GM that the msg is to be - %% discarded. We won't see this again. + {ok, Status} -> + MS1 = send_or_record_confirm( + Status, Delivery, dict:erase(MsgId, MS), State1), SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = dict:erase(MsgId, MS), + State1 #state { msg_id_status = MS1, sender_queues = SQ1 } end. @@ -698,45 +665,27 @@ remove_from_pending_ch(MsgId, ChPid, SQ) -> dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ) end. -process_instruction( - {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }}, - State = #state { sender_queues = SQ, - backing_queue = BQ, - backing_queue_state = BQS, - msg_id_status = MS }) -> - - %% We really are going to do the publish right now, even though we - %% may not have seen it directly from the channel. As a result, we - %% may know that it needs confirming without knowing its - %% msg_seq_no, which means that we can see the confirmation come - %% back from the backing queue without knowing the msg_seq_no, - %% which means that we're going to have to hang on to the fact - %% that we've seen the msg_id confirmed until we can associate it - %% with a msg_seq_no. +publish_or_discard(Status, ChPid, MsgId, + State = #state { sender_queues = SQ, msg_id_status = MS }) -> + %% We really are going to do the publish/discard right now, even + %% though we may not have seen it directly from the channel. But + %% we cannot issues confirms until the latter has happened. So we + %% need to keep track of the MsgId and its confirmation status in + %% the meantime. State1 = ensure_monitoring(ChPid, State), {MQ, PendingCh} = get_sender_queue(ChPid, SQ), {MQ1, PendingCh1, MS1} = case queue:out(MQ) of {empty, _MQ2} -> {MQ, sets:add_element(MsgId, PendingCh), - dict:store(MsgId, {published, ChPid}, MS)}; - {{value, {Delivery = #delivery { - msg_seq_no = MsgSeqNo, - message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, MQ2} -> - %% We received the msg from the channel first. Thus we - %% need to deal with confirms here. - case needs_confirming(Delivery, State1) of - never -> - {MQ2, PendingCh, MS}; - eventually -> - {MQ2, PendingCh, - dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)}; - immediately -> - ok = rabbit_misc:confirm_to_sender(ChPid, [MsgSeqNo]), - {MQ2, PendingCh, MS} - end; - {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> + dict:store(MsgId, Status, MS)}; + {{value, Delivery = #delivery { + message = #basic_message { id = MsgId } }}, MQ2} -> + {MQ2, PendingCh, + %% We received the msg from the channel first. Thus + %% we need to deal with confirms here. + send_or_record_confirm(Status, Delivery, MS, State1)}; + {{value, #delivery {}}, _MQ2} -> %% The instruction was sent to us before we were %% within the slave_pids within the #amqqueue{} %% record. We'll never receive the message directly @@ -744,73 +693,48 @@ process_instruction( %% expecting any confirms from us. {MQ, PendingCh, MS} end, - SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), - State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 }, - - {ok, - case Deliver of - false -> - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State2 #state { backing_queue_state = BQS1 }; - {true, AckRequired} -> - {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, - ChPid, BQS), - maybe_store_ack(AckRequired, MsgId, AckTag, - State2 #state { backing_queue_state = BQS1 }) - end}; -process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, - State = #state { sender_queues = SQ, - backing_queue = BQ, - backing_queue_state = BQS, - msg_id_status = MS }) -> - %% Many of the comments around the publish head above apply here - %% too. - State1 = ensure_monitoring(ChPid, State), - {MQ, PendingCh} = get_sender_queue(ChPid, SQ), - {MQ1, PendingCh1, MS1} = - case queue:out(MQ) of - {empty, _MQ} -> - {MQ, sets:add_element(MsgId, PendingCh), - dict:store(MsgId, discarded, MS)}; - {{value, {#delivery { message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, MQ2} -> - %% We've already seen it from the channel, we're not - %% going to see this again, so don't add it to MS - {MQ2, PendingCh, MS}; - {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> - %% The instruction was sent to us before we were - %% within the slave_pids within the #amqqueue{} - %% record. We'll never receive the message directly - %% from the channel. - {MQ, PendingCh, MS} - end, - SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), - BQS1 = BQ:discard(Msg, ChPid, BQS), - {ok, State1 #state { sender_queues = SQ1, - msg_id_status = MS1, - backing_queue_state = BQS1 }}; -process_instruction({set_length, Length, AckRequired}, + State1 #state { sender_queues = SQ1, msg_id_status = MS1 }. + + +process_instruction({publish, ChPid, MsgProps, + Msg = #basic_message { id = MsgId }}, State) -> + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(published, ChPid, MsgId, State), + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({publish_delivered, ChPid, MsgProps, + Msg = #basic_message { id = MsgId }}, State) -> + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(published, ChPid, MsgId, State), + {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, BQS), + {ok, maybe_store_ack(true, MsgId, AckTag, + State1 #state { backing_queue_state = BQS1 })}; +process_instruction({discard, ChPid, MsgId}, State) -> + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(discarded, ChPid, MsgId, State), + BQS1 = BQ:discard(MsgId, ChPid, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({drop, Length, Dropped, AckRequired}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> QLen = BQ:len(BQS), - ToDrop = QLen - Length, - {ok, - case ToDrop >= 0 of - true -> - State1 = - lists:foldl( - fun (const, StateN = #state {backing_queue_state = BQSN}) -> - {{#basic_message{id = MsgId}, _IsDelivered, AckTag, - _Remaining}, BQSN1} = BQ:fetch(AckRequired, BQSN), - maybe_store_ack( - AckRequired, MsgId, AckTag, - StateN #state { backing_queue_state = BQSN1 }) - end, State, lists:duplicate(ToDrop, const)), - set_synchronised(true, State1); - false -> - State - end}; + ToDrop = case QLen - Length of + N when N > 0 -> N; + _ -> 0 + end, + State1 = lists:foldl( + fun (const, StateN = #state{backing_queue_state = BQSN}) -> + {{#basic_message{id = MsgId}, _, AckTag, _}, BQSN1} = + BQ:fetch(AckRequired, BQSN), + maybe_store_ack( + AckRequired, MsgId, AckTag, + StateN #state { backing_queue_state = BQSN1 }) + end, State, lists:duplicate(ToDrop, const)), + {ok, case AckRequired of + true -> State1; + false -> update_delta(ToDrop - Dropped, State1) + end}; process_instruction({fetch, AckRequired, MsgId, Remaining}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> @@ -821,11 +745,10 @@ process_instruction({fetch, AckRequired, MsgId, Remaining}, AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS), maybe_store_ack(AckRequired, MsgId, AckTag, State #state { backing_queue_state = BQS1 }); - Other when Other + 1 =:= Remaining -> - set_synchronised(true, State); - Other when Other < Remaining -> - %% we must be shorter than the master - State + _ when QLen =< Remaining andalso AckRequired -> + State; + _ when QLen =< Remaining -> + update_delta(-1, State) end}; process_instruction({ack, MsgIds}, State = #state { backing_queue = BQ, @@ -834,27 +757,17 @@ process_instruction({ack, MsgIds}, {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), [] = MsgIds1 -- MsgIds, %% ASSERTION - {ok, State #state { msg_id_ack = MA1, - backing_queue_state = BQS1 }}; + {ok, update_delta(length(MsgIds1) - length(MsgIds), + State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 })}; process_instruction({requeue, MsgIds}, State = #state { backing_queue = BQ, backing_queue_state = BQS, msg_id_ack = MA }) -> {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), - {ok, case length(AckTags) =:= length(MsgIds) of - true -> - {MsgIds, BQS1} = BQ:requeue(AckTags, BQS), - State #state { msg_id_ack = MA1, - backing_queue_state = BQS1 }; - false -> - %% The only thing we can safely do is nuke out our BQ - %% and MA. The interaction between this and confirms - %% doesn't really bear thinking about... - {_Count, BQS1} = BQ:purge(BQS), - {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1), - State #state { msg_id_ack = dict:new(), - backing_queue_state = BQS2 } - end}; + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + {ok, State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 }}; process_instruction({sender_death, ChPid}, State = #state { sender_queues = SQ, msg_id_status = MS, @@ -872,10 +785,11 @@ process_instruction({sender_death, ChPid}, msg_id_status = MS1, known_senders = pmon:demonitor(ChPid, KS) } end}; -process_instruction({length, Length}, - State = #state { backing_queue = BQ, +process_instruction({depth, Depth}, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - {ok, set_synchronised(Length =:= BQ:len(BQS), State)}; + {ok, set_delta(Depth - BQ:depth(BQS), State)}; + process_instruction({delete_and_terminate, Reason}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> @@ -887,31 +801,45 @@ msg_ids_to_acktags(MsgIds, MA) -> lists:foldl( fun (MsgId, {Acc, MAN}) -> case dict:find(MsgId, MA) of - error -> {Acc, MAN}; - {ok, {_Num, AckTag}} -> {[AckTag | Acc], - dict:erase(MsgId, MAN)} + error -> {Acc, MAN}; + {ok, AckTag} -> {[AckTag | Acc], dict:erase(MsgId, MAN)} end end, {[], MA}, MsgIds), {lists:reverse(AckTags), MA1}. -ack_all(BQ, MA, BQS) -> - BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS). - maybe_store_ack(false, _MsgId, _AckTag, State) -> State; -maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA, - ack_num = Num }) -> - State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA), - ack_num = Num + 1 }. - -%% We intentionally leave out the head where a slave becomes -%% unsynchronised: we assert that can never happen. -set_synchronised(true, State = #state { q = #amqqueue { name = QName }, - synchronised = false }) -> - rabbit_event:notify(queue_slave_synchronised, [{pid, self()}, - {name, QName}]), - State #state { synchronised = true }; -set_synchronised(true, State) -> +maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA }) -> + State #state { msg_id_ack = dict:store(MsgId, AckTag, MA) }. + +set_delta(0, State = #state { depth_delta = undefined }) -> + ok = record_synchronised(State#state.q), + State #state { depth_delta = 0 }; +set_delta(NewDelta, State = #state { depth_delta = undefined }) -> + true = NewDelta > 0, %% assertion + State #state { depth_delta = NewDelta }; +set_delta(NewDelta, State = #state { depth_delta = Delta }) -> + update_delta(NewDelta - Delta, State). + +update_delta(_DeltaChange, State = #state { depth_delta = undefined }) -> State; -set_synchronised(false, State = #state { synchronised = false }) -> - State. +update_delta( DeltaChange, State = #state { depth_delta = 0 }) -> + 0 = DeltaChange, %% assertion: we cannot become unsync'ed + State; +update_delta( DeltaChange, State = #state { depth_delta = Delta }) -> + true = DeltaChange =< 0, %% assertion: we cannot become 'less' sync'ed + set_delta(Delta + DeltaChange, State #state { depth_delta = undefined }). + +record_synchronised(#amqqueue { name = QName }) -> + Self = self(), + rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:read({rabbit_queue, QName}) of + [] -> + ok; + [Q = #amqqueue { sync_slave_pids = SSPids }] -> + rabbit_mirror_queue_misc:store_updated_slaves( + Q #amqqueue { sync_slave_pids = [Self | SSPids] }), + ok + end + end). diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index d41aa09b15..ab9a9cebd4 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -19,7 +19,7 @@ -include("rabbit_framing.hrl"). -export([method_record_type/1, polite_pause/0, polite_pause/1]). --export([die/1, frame_error/2, amqp_error/4, quit/1, quit/2, +-export([die/1, frame_error/2, amqp_error/4, quit/1, protocol_error/3, protocol_error/4, protocol_error/1]). -export([not_found/1, assert_args_equivalence/4]). -export([dirty_read/1]). @@ -29,14 +29,14 @@ -export([enable_cover/1, report_cover/1]). -export([start_cover/1]). -export([confirm_to_sender/2]). --export([throw_on_error/2, with_exit_handler/2, filter_exit_map/2]). --export([is_abnormal_termination/1]). +-export([throw_on_error/2, with_exit_handler/2, is_abnormal_exit/1, + filter_exit_map/2]). -export([with_user/2, with_user_and_vhost/3]). -export([execute_mnesia_transaction/1]). -export([execute_mnesia_transaction/2]). -export([execute_mnesia_tx_with_tail/1]). -export([ensure_ok/2]). --export([tcp_name/3]). +-export([tcp_name/3, format_inet_error/1]). -export([upmap/2, map_in_order/2]). -export([table_filter/3]). -export([dirty_read_all/1, dirty_foreach_key/2, dirty_dump_log/1]). @@ -60,6 +60,15 @@ -export([multi_call/2]). -export([os_cmd/1]). -export([gb_sets_difference/2]). +-export([version/0]). +-export([sequence_error/1]). +-export([json_encode/1, json_decode/1, json_to_term/1, term_to_json/1]). +-export([base64url/1]). + +%% Horrible macro to use in guards +-define(IS_BENIGN_EXIT(R), + R =:= noproc; R =:= noconnection; R =:= nodedown; R =:= normal; + R =:= shutdown). %%---------------------------------------------------------------------------- @@ -87,7 +96,6 @@ (rabbit_framing:amqp_exception()) -> channel_or_connection_exit()). -spec(quit/1 :: (integer()) -> no_return()). --spec(quit/2 :: (string(), [term()]) -> no_return()). -spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary()) -> rabbit_types:connection_exit()). @@ -137,8 +145,8 @@ -spec(throw_on_error/2 :: (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A). -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A). +-spec(is_abnormal_exit/1 :: (any()) -> boolean()). -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]). --spec(is_abnormal_termination/1 :: (any()) -> boolean()). -spec(with_user/2 :: (rabbit_types:username(), thunk(A)) -> A). -spec(with_user_and_vhost/3 :: (rabbit_types:username(), rabbit_types:vhost(), thunk(A)) @@ -152,6 +160,7 @@ -spec(tcp_name/3 :: (atom(), inet:ip_address(), rabbit_networking:ip_port()) -> atom()). +-spec(format_inet_error/1 :: (atom()) -> string()). -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(table_filter/3:: (fun ((A) -> boolean()), fun ((A, boolean()) -> 'ok'), @@ -212,6 +221,14 @@ ([pid()], any()) -> {[{pid(), any()}], [{pid(), any()}]}). -spec(os_cmd/1 :: (string()) -> string()). -spec(gb_sets_difference/2 :: (gb_set(), gb_set()) -> gb_set()). +-spec(version/0 :: () -> string()). +-spec(sequence_error/1 :: ([({'error', any()} | any())]) + -> {'error', any()} | any()). +-spec(json_encode/1 :: (any()) -> {'ok', string()} | {'error', any()}). +-spec(json_decode/1 :: (string()) -> {'ok', any()} | 'error'). +-spec(json_to_term/1 :: (any()) -> any()). +-spec(term_to_json/1 :: (any()) -> any()). +-spec(base64url/1 :: (binary()) -> string()). -endif. @@ -390,19 +407,9 @@ report_coverage_percentage(File, Cov, NotCov, Mod) -> confirm_to_sender(Pid, MsgSeqNos) -> gen_server2:cast(Pid, {confirm, MsgSeqNos, self()}). -%% -%% @doc Halts the emulator after printing out an error message io-formatted with -%% the supplied arguments. The exit status of the beam process will be set to 1. -%% -quit(Fmt, Args) -> - io:format("ERROR: " ++ Fmt ++ "~n", Args), - quit(1). - -%% %% @doc Halts the emulator returning the given status code to the os. %% On Windows this function will block indefinitely so as to give the io %% subsystem time to flush stdout completely. -%% quit(Status) -> case os:type() of {unix, _} -> halt(Status); @@ -423,13 +430,14 @@ with_exit_handler(Handler, Thunk) -> try Thunk() catch - exit:{R, _} when R =:= noproc; R =:= nodedown; - R =:= normal; R =:= shutdown -> - Handler(); - exit:{{R, _}, _} when R =:= nodedown; R =:= shutdown -> - Handler() + exit:{R, _} when ?IS_BENIGN_EXIT(R) -> Handler(); + exit:{{R, _}, _} when ?IS_BENIGN_EXIT(R) -> Handler() end. +is_abnormal_exit(R) when ?IS_BENIGN_EXIT(R) -> false; +is_abnormal_exit({R, _}) when ?IS_BENIGN_EXIT(R) -> false; +is_abnormal_exit(_) -> true. + filter_exit_map(F, L) -> Ref = make_ref(), lists:filter(fun (R) -> R =/= Ref end, @@ -437,11 +445,6 @@ filter_exit_map(F, L) -> fun () -> Ref end, fun () -> F(I) end) || I <- L]). -is_abnormal_termination(Reason) - when Reason =:= noproc; Reason =:= noconnection; - Reason =:= normal; Reason =:= shutdown -> false; -is_abnormal_termination({shutdown, _}) -> false; -is_abnormal_termination(_) -> true. with_user(Username, Thunk) -> fun () -> @@ -510,6 +513,10 @@ tcp_name(Prefix, IPAddress, Port) list_to_atom( format("~w_~s:~w", [Prefix, inet_parse:ntoa(IPAddress), Port])). +format_inet_error(address) -> "cannot connect to host/port"; +format_inet_error(timeout) -> "timed out"; +format_inet_error(Error) -> inet:format_error(Error). + %% This is a modified version of Luke Gorrie's pmap - %% http://lukego.livejournal.com/6753.html - that doesn't care about %% the order in which results are received. @@ -939,3 +946,53 @@ os_cmd(Command) -> gb_sets_difference(S1, S2) -> gb_sets:fold(fun gb_sets:delete_any/2, S1, S2). + +version() -> + {ok, VSN} = application:get_key(rabbit, vsn), + VSN. + +sequence_error([T]) -> T; +sequence_error([{error, _} = Error | _]) -> Error; +sequence_error([_ | Rest]) -> sequence_error(Rest). + +json_encode(Term) -> + try + {ok, mochijson2:encode(Term)} + catch + exit:{json_encode, E} -> + {error, E} + end. + +json_decode(Term) -> + try + {ok, mochijson2:decode(Term)} + catch + %% Sadly `mochijson2:decode/1' does not offer a nice way to catch + %% decoding errors... + error:_ -> error + end. + +json_to_term({struct, L}) -> + [{K, json_to_term(V)} || {K, V} <- L]; +json_to_term(L) when is_list(L) -> + [json_to_term(I) || I <- L]; +json_to_term(V) when is_binary(V) orelse is_number(V) orelse V =:= null orelse + V =:= true orelse V =:= false -> + V. + +%% This has the flaw that empty lists will never be JSON objects, so use with +%% care. +term_to_json([{_, _}|_] = L) -> + {struct, [{K, term_to_json(V)} || {K, V} <- L]}; +term_to_json(L) when is_list(L) -> + [term_to_json(I) || I <- L]; +term_to_json(V) when is_binary(V) orelse is_number(V) orelse V =:= null orelse + V =:= true orelse V =:= false -> + V. + +base64url(In) -> + lists:reverse(lists:foldl(fun ($\+, Acc) -> [$\- | Acc]; + ($\/, Acc) -> [$\_ | Acc]; + ($\=, Acc) -> Acc; + (Chr, Acc) -> [Chr | Acc] + end, [], base64:encode_to_string(In))). diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 7e9346f903..d6c6f360d1 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -14,23 +14,37 @@ %% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. %% - -module(rabbit_mnesia). --export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, - cluster/1, force_cluster/1, reset/0, force_reset/0, init_db/3, - is_clustered/0, running_clustered_nodes/0, all_clustered_nodes/0, - empty_ram_only_tables/0, copy_db/1, wait_for_tables/1, - create_cluster_nodes_config/1, read_cluster_nodes_config/0, - record_running_nodes/0, read_previously_running_nodes/0, - running_nodes_filename/0, is_disc_node/0, on_node_down/1, - on_node_up/1]). - --export([table_names/0]). - -%% create_tables/0 exported for helping embed RabbitMQ in or alongside -%% other mnesia-using Erlang applications, such as ejabberd --export([create_tables/0]). +-export([init/0, + join_cluster/2, + reset/0, + force_reset/0, + update_cluster_nodes/1, + change_cluster_node_type/1, + forget_cluster_node/2, + + status/0, + is_clustered/0, + cluster_nodes/1, + node_type/0, + dir/0, + cluster_status_from_mnesia/0, + + init_db_unchecked/2, + copy_db/1, + check_cluster_consistency/0, + ensure_mnesia_dir/0, + + on_node_up/1, + on_node_down/1 + ]). + +%% Used internally in rpc calls +-export([node_info/0, + remove_node_if_mnesia_running/1, + is_running_remote/0 + ]). -include("rabbit.hrl"). @@ -38,314 +52,422 @@ -ifdef(use_specs). --export_type([node_type/0]). +-export_type([node_type/0, cluster_status/0]). --type(node_type() :: disc_only | disc | ram | unknown). --spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | - {'running_nodes', [node()]}]). --spec(dir/0 :: () -> file:filename()). --spec(ensure_mnesia_dir/0 :: () -> 'ok'). +-type(node_type() :: disc | ram). +-type(cluster_status() :: {[node()], [node()], [node()]}). + +%% Main interface -spec(init/0 :: () -> 'ok'). --spec(init_db/3 :: ([node()], boolean(), rabbit_misc:thunk('ok')) -> 'ok'). --spec(is_db_empty/0 :: () -> boolean()). --spec(cluster/1 :: ([node()]) -> 'ok'). --spec(force_cluster/1 :: ([node()]) -> 'ok'). --spec(cluster/2 :: ([node()], boolean()) -> 'ok'). +-spec(join_cluster/2 :: (node(), node_type()) -> 'ok'). -spec(reset/0 :: () -> 'ok'). -spec(force_reset/0 :: () -> 'ok'). +-spec(update_cluster_nodes/1 :: (node()) -> 'ok'). +-spec(change_cluster_node_type/1 :: (node_type()) -> 'ok'). +-spec(forget_cluster_node/2 :: (node(), boolean()) -> 'ok'). + +%% Various queries to get the status of the db +-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | + {'running_nodes', [node()]}]). -spec(is_clustered/0 :: () -> boolean()). --spec(running_clustered_nodes/0 :: () -> [node()]). --spec(all_clustered_nodes/0 :: () -> [node()]). --spec(empty_ram_only_tables/0 :: () -> 'ok'). --spec(create_tables/0 :: () -> 'ok'). +-spec(cluster_nodes/1 :: ('all' | 'disc' | 'ram' | 'running') -> [node()]). +-spec(node_type/0 :: () -> node_type()). +-spec(dir/0 :: () -> file:filename()). +-spec(cluster_status_from_mnesia/0 :: () -> rabbit_types:ok_or_error2( + cluster_status(), any())). + +%% Operations on the db and utils, mainly used in `rabbit_upgrade' and `rabbit' +-spec(init_db_unchecked/2 :: ([node()], node_type()) -> 'ok'). -spec(copy_db/1 :: (file:filename()) -> rabbit_types:ok_or_error(any())). --spec(wait_for_tables/1 :: ([atom()]) -> 'ok'). --spec(create_cluster_nodes_config/1 :: ([node()]) -> 'ok'). --spec(read_cluster_nodes_config/0 :: () -> [node()]). --spec(record_running_nodes/0 :: () -> 'ok'). --spec(read_previously_running_nodes/0 :: () -> [node()]). --spec(running_nodes_filename/0 :: () -> file:filename()). --spec(is_disc_node/0 :: () -> boolean()). +-spec(check_cluster_consistency/0 :: () -> 'ok'). +-spec(ensure_mnesia_dir/0 :: () -> 'ok'). + +%% Hooks used in `rabbit_node_monitor' -spec(on_node_up/1 :: (node()) -> 'ok'). -spec(on_node_down/1 :: (node()) -> 'ok'). --spec(table_names/0 :: () -> [atom()]). - -endif. %%---------------------------------------------------------------------------- - -status() -> - [{nodes, case mnesia:system_info(is_running) of - yes -> [{Key, Nodes} || - {Key, CopyType} <- [{disc_only, disc_only_copies}, - {disc, disc_copies}, - {ram, ram_copies}], - begin - Nodes = nodes_of_type(CopyType), - Nodes =/= [] - end]; - no -> case all_clustered_nodes() of - [] -> []; - Nodes -> [{unknown, Nodes}] - end; - Reason when Reason =:= starting; Reason =:= stopping -> - exit({rabbit_busy, try_again_later}) - end}, - {running_nodes, running_clustered_nodes()}]. +%% Main interface +%%---------------------------------------------------------------------------- init() -> ensure_mnesia_running(), ensure_mnesia_dir(), - Nodes = read_cluster_nodes_config(), - ok = init_db(Nodes, should_be_disc_node(Nodes)), + case is_virgin_node() of + true -> init_from_config(); + false -> NodeType = node_type(), + init_db_and_upgrade(cluster_nodes(all), NodeType, + NodeType =:= ram) + end, %% We intuitively expect the global name server to be synced when - %% Mnesia is up. In fact that's not guaranteed to be the case - let's - %% make it so. + %% Mnesia is up. In fact that's not guaranteed to be the case - + %% let's make it so. ok = global:sync(), - ok = delete_previously_running_nodes(), ok. -is_db_empty() -> - lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, - table_names()). - -cluster(ClusterNodes) -> - cluster(ClusterNodes, false). -force_cluster(ClusterNodes) -> - cluster(ClusterNodes, true). - -%% Alter which disk nodes this node is clustered with. This can be a -%% subset of all the disk nodes in the cluster but can (and should) -%% include the node itself if it is to be a disk rather than a ram -%% node. If Force is false, only connections to online nodes are -%% allowed. -cluster(ClusterNodes, Force) -> - rabbit_misc:local_info_msg("Clustering with ~p~s~n", - [ClusterNodes, if Force -> " forcefully"; - true -> "" - end]), +init_from_config() -> + {ok, {TryNodes, NodeType}} = + application:get_env(rabbit, cluster_nodes), + case find_good_node(nodes_excl_me(TryNodes)) of + {ok, Node} -> + rabbit_log:info("Node '~p' selected for clustering from " + "configuration~n", [Node]), + {ok, {_, DiscNodes, _}} = discover_cluster(Node), + init_db_and_upgrade(DiscNodes, NodeType, true), + rabbit_node_monitor:notify_joined_cluster(); + none -> + rabbit_log:warning("Could not find any suitable node amongst the " + "ones provided in the configuration: ~p~n", + [TryNodes]), + init_db_and_upgrade([node()], disc, false) + end. + +%% Make the node join a cluster. The node will be reset automatically +%% before we actually cluster it. The nodes provided will be used to +%% find out about the nodes in the cluster. +%% +%% This function will fail if: +%% +%% * The node is currently the only disc node of its cluster +%% * We can't connect to any of the nodes provided +%% * The node is currently already clustered with the cluster of the nodes +%% provided +%% +%% Note that we make no attempt to verify that the nodes provided are +%% all in the same cluster, we simply pick the first online node and +%% we cluster to its cluster. +join_cluster(DiscoveryNode, NodeType) -> ensure_mnesia_not_running(), ensure_mnesia_dir(), - - case not Force andalso is_clustered() andalso - is_only_disc_node(node(), false) andalso - not should_be_disc_node(ClusterNodes) - of - true -> log_both("last running disc node leaving cluster"); - _ -> ok + case is_only_clustered_disc_node() of + true -> e(clustering_only_disc_node); + false -> ok end, - - %% Wipe mnesia if we're changing type from disc to ram - case {is_disc_node(), should_be_disc_node(ClusterNodes)} of - {true, false} -> rabbit_misc:with_local_io( - fun () -> error_logger:warning_msg( - "changing node type; wiping " - "mnesia...~n~n") - end), - rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), - cannot_delete_schema); - _ -> ok + {ClusterNodes, _, _} = case discover_cluster(DiscoveryNode) of + {ok, Res} -> Res; + {error, _} = E -> throw(E) + end, + case me_in_nodes(ClusterNodes) of + true -> e(already_clustered); + false -> ok end, - %% Pre-emptively leave the cluster - %% - %% We're trying to handle the following two cases: - %% 1. We have a two-node cluster, where both nodes are disc nodes. - %% One node is re-clustered as a ram node. When it tries to - %% re-join the cluster, but before it has time to update its - %% tables definitions, the other node will order it to re-create - %% its disc tables. So, we need to leave the cluster before we - %% can join it again. - %% 2. We have a two-node cluster, where both nodes are disc nodes. - %% One node is forcefully reset (so, the other node thinks its - %% still a part of the cluster). The reset node is re-clustered - %% as a ram node. Same as above, we need to leave the cluster - %% before we can join it. But, since we don't know if we're in a - %% cluster or not, we just pre-emptively leave it before joining. - ProperClusterNodes = ClusterNodes -- [node()], - try - ok = leave_cluster(ProperClusterNodes, ProperClusterNodes) - catch - {error, {no_running_cluster_nodes, _, _}} when Force -> - ok - end, + %% reset the node. this simplifies things and it will be needed in + %% this case - we're joining a new cluster with new nodes which + %% are not in synch with the current node. I also lifts the burden + %% of reseting the node from the user. + reset(false), %% Join the cluster - start_mnesia(), - try - ok = init_db(ClusterNodes, Force), - ok = create_cluster_nodes_config(ClusterNodes) - after - stop_mnesia() - end, + rabbit_misc:local_info_msg("Clustering with ~p as ~p node~n", + [ClusterNodes, NodeType]), + ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true), + rabbit_node_monitor:notify_joined_cluster(), ok. %% return node to its virgin state, where it is not member of any %% cluster, has no cluster configuration, no local database, and no %% persisted messages -reset() -> reset(false). -force_reset() -> reset(true). - -is_clustered() -> - RunningNodes = running_clustered_nodes(), - [node()] /= RunningNodes andalso [] /= RunningNodes. - -all_clustered_nodes() -> - mnesia:system_info(db_nodes). - -running_clustered_nodes() -> - mnesia:system_info(running_db_nodes). - -empty_ram_only_tables() -> - Node = node(), - lists:foreach( - fun (TabName) -> - case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of - true -> {atomic, ok} = mnesia:clear_table(TabName); - false -> ok - end - end, table_names()), +reset() -> + rabbit_misc:local_info_msg("Resetting Rabbit~n", []), + reset(false). + +force_reset() -> + rabbit_misc:local_info_msg("Resetting Rabbit forcefully~n", []), + reset(true). + +reset(Force) -> + ensure_mnesia_not_running(), + Nodes = case Force of + true -> + nodes(); + false -> + AllNodes = cluster_nodes(all), + %% Reconnecting so that we will get an up to date + %% nodes. We don't need to check for consistency + %% because we are resetting. Force=true here so + %% that reset still works when clustered with a + %% node which is down. + init_db_with_mnesia(AllNodes, node_type(), false, false), + case is_only_clustered_disc_node() of + true -> e(resetting_only_disc_node); + false -> ok + end, + leave_cluster(), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema), + cluster_nodes(all) + end, + %% We need to make sure that we don't end up in a distributed + %% Erlang system with nodes while not being in an Mnesia cluster + %% with them. We don't handle that well. + [erlang:disconnect_node(N) || N <- Nodes], + %% remove persisted messages and any other garbage we find + ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")), + ok = rabbit_node_monitor:reset_cluster_status(), ok. -%%-------------------------------------------------------------------- +change_cluster_node_type(Type) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + case is_clustered() of + false -> e(not_clustered); + true -> ok + end, + {_, _, RunningNodes} = case discover_cluster(cluster_nodes(all)) of + {ok, Status} -> Status; + {error, _Reason} -> e(cannot_connect_to_cluster) + end, + %% We might still be marked as running by a remote node since the + %% information of us going down might not have propagated yet. + Node = case RunningNodes -- [node()] of + [] -> e(no_online_cluster_nodes); + [Node0|_] -> Node0 + end, + ok = reset(), + ok = join_cluster(Node, Type). -nodes_of_type(Type) -> - %% This function should return the nodes of a certain type (ram, - %% disc or disc_only) in the current cluster. The type of nodes - %% is determined when the cluster is initially configured. - mnesia:table_info(schema, Type). - -%% The tables aren't supposed to be on disk on a ram node -table_definitions(disc) -> - table_definitions(); -table_definitions(ram) -> - [{Tab, copy_type_to_ram(TabDef)} || {Tab, TabDef} <- table_definitions()]. - -table_definitions() -> - [{rabbit_user, - [{record_name, internal_user}, - {attributes, record_info(fields, internal_user)}, - {disc_copies, [node()]}, - {match, #internal_user{_='_'}}]}, - {rabbit_user_permission, - [{record_name, user_permission}, - {attributes, record_info(fields, user_permission)}, - {disc_copies, [node()]}, - {match, #user_permission{user_vhost = #user_vhost{_='_'}, - permission = #permission{_='_'}, - _='_'}}]}, - {rabbit_vhost, - [{record_name, vhost}, - {attributes, record_info(fields, vhost)}, - {disc_copies, [node()]}, - {match, #vhost{_='_'}}]}, - {rabbit_listener, - [{record_name, listener}, - {attributes, record_info(fields, listener)}, - {type, bag}, - {match, #listener{_='_'}}]}, - {rabbit_durable_route, - [{record_name, route}, - {attributes, record_info(fields, route)}, - {disc_copies, [node()]}, - {match, #route{binding = binding_match(), _='_'}}]}, - {rabbit_semi_durable_route, - [{record_name, route}, - {attributes, record_info(fields, route)}, - {type, ordered_set}, - {match, #route{binding = binding_match(), _='_'}}]}, - {rabbit_route, - [{record_name, route}, - {attributes, record_info(fields, route)}, - {type, ordered_set}, - {match, #route{binding = binding_match(), _='_'}}]}, - {rabbit_reverse_route, - [{record_name, reverse_route}, - {attributes, record_info(fields, reverse_route)}, - {type, ordered_set}, - {match, #reverse_route{reverse_binding = reverse_binding_match(), - _='_'}}]}, - {rabbit_topic_trie_node, - [{record_name, topic_trie_node}, - {attributes, record_info(fields, topic_trie_node)}, - {type, ordered_set}, - {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]}, - {rabbit_topic_trie_edge, - [{record_name, topic_trie_edge}, - {attributes, record_info(fields, topic_trie_edge)}, - {type, ordered_set}, - {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]}, - {rabbit_topic_trie_binding, - [{record_name, topic_trie_binding}, - {attributes, record_info(fields, topic_trie_binding)}, - {type, ordered_set}, - {match, #topic_trie_binding{trie_binding = trie_binding_match(), - _='_'}}]}, - {rabbit_durable_exchange, - [{record_name, exchange}, - {attributes, record_info(fields, exchange)}, - {disc_copies, [node()]}, - {match, #exchange{name = exchange_name_match(), _='_'}}]}, - {rabbit_exchange, - [{record_name, exchange}, - {attributes, record_info(fields, exchange)}, - {match, #exchange{name = exchange_name_match(), _='_'}}]}, - {rabbit_exchange_serial, - [{record_name, exchange_serial}, - {attributes, record_info(fields, exchange_serial)}, - {match, #exchange_serial{name = exchange_name_match(), _='_'}}]}, - {rabbit_runtime_parameters, - [{record_name, runtime_parameters}, - {attributes, record_info(fields, runtime_parameters)}, - {disc_copies, [node()]}, - {match, #runtime_parameters{_='_'}}]}, - {rabbit_durable_queue, - [{record_name, amqqueue}, - {attributes, record_info(fields, amqqueue)}, - {disc_copies, [node()]}, - {match, #amqqueue{name = queue_name_match(), _='_'}}]}, - {rabbit_queue, - [{record_name, amqqueue}, - {attributes, record_info(fields, amqqueue)}, - {match, #amqqueue{name = queue_name_match(), _='_'}}]}] - ++ gm:table_definitions() - ++ mirrored_supervisor:table_definitions(). - -binding_match() -> - #binding{source = exchange_name_match(), - destination = binding_destination_match(), - _='_'}. -reverse_binding_match() -> - #reverse_binding{destination = binding_destination_match(), - source = exchange_name_match(), - _='_'}. -binding_destination_match() -> - resource_match('_'). -trie_node_match() -> - #trie_node{ exchange_name = exchange_name_match(), _='_'}. -trie_edge_match() -> - #trie_edge{ exchange_name = exchange_name_match(), _='_'}. -trie_binding_match() -> - #trie_binding{exchange_name = exchange_name_match(), _='_'}. -exchange_name_match() -> - resource_match(exchange). -queue_name_match() -> - resource_match(queue). -resource_match(Kind) -> - #resource{kind = Kind, _='_'}. - -table_names() -> - [Tab || {Tab, _} <- table_definitions()]. - -replicated_table_names() -> - [Tab || {Tab, TabDef} <- table_definitions(), - not lists:member({local_content, true}, TabDef) - ]. +update_cluster_nodes(DiscoveryNode) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + Status = {AllNodes, _, _} = + case discover_cluster(DiscoveryNode) of + {ok, Status0} -> Status0; + {error, _Reason} -> e(cannot_connect_to_node) + end, + case me_in_nodes(AllNodes) of + true -> + %% As in `check_consistency/0', we can safely delete the + %% schema here, since it'll be replicated from the other + %% nodes + mnesia:delete_schema([node()]), + rabbit_node_monitor:write_cluster_status(Status), + rabbit_misc:local_info_msg("Updating cluster nodes from ~p~n", + [DiscoveryNode]), + init_db_with_mnesia(AllNodes, node_type(), true, true); + false -> + e(inconsistent_cluster) + end, + ok. + +%% We proceed like this: try to remove the node locally. If the node +%% is offline, we remove the node if: +%% * This node is a disc node +%% * All other nodes are offline +%% * This node was, at the best of our knowledge (see comment below) +%% the last or second to last after the node we're removing to go +%% down +forget_cluster_node(Node, RemoveWhenOffline) -> + case lists:member(Node, cluster_nodes(all)) of + true -> ok; + false -> e(not_a_cluster_node) + end, + case {RemoveWhenOffline, mnesia:system_info(is_running)} of + {true, no} -> remove_node_offline_node(Node); + {true, yes} -> e(online_node_offline_flag); + {false, no} -> e(offline_node_no_offline_flag); + {false, yes} -> rabbit_misc:local_info_msg( + "Removing node ~p from cluster~n", [Node]), + case remove_node_if_mnesia_running(Node) of + ok -> ok; + {error, _} = Err -> throw(Err) + end + end. + +remove_node_offline_node(Node) -> + %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we + %% want - we need to know the running nodes *now*. If the current node is a + %% RAM node it will return bogus results, but we don't care since we only do + %% this operation from disc nodes. + case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of + {[], disc} -> + %% Note that while we check if the nodes was the last to go down, + %% apart from the node we're removing from, this is still unsafe. + %% Consider the situation in which A and B are clustered. A goes + %% down, and records B as the running node. Then B gets clustered + %% with C, C goes down and B goes down. In this case, C is the + %% second-to-last, but we don't know that and we'll remove B from A + %% anyway, even if that will lead to bad things. + case cluster_nodes(running) -- [node(), Node] of + [] -> start_mnesia(), + try + %% What we want to do here is replace the last node to + %% go down with the current node. The way we do this + %% is by force loading the table, and making sure that + %% they are loaded. + rabbit_table:force_load(), + rabbit_table:wait_for_replicated(), + forget_cluster_node(Node, false) + after + stop_mnesia() + end; + _ -> e(not_last_node_to_go_down) + end; + {_, _} -> + e(removing_node_from_offline_node) + end. + + +%%---------------------------------------------------------------------------- +%% Queries +%%---------------------------------------------------------------------------- + +status() -> + IfNonEmpty = fun (_, []) -> []; + (Type, Nodes) -> [{Type, Nodes}] + end, + [{nodes, (IfNonEmpty(disc, cluster_nodes(disc)) ++ + IfNonEmpty(ram, cluster_nodes(ram)))}] ++ + case mnesia:system_info(is_running) of + yes -> RunningNodes = cluster_nodes(running), + [{running_nodes, cluster_nodes(running)}, + {partitions, mnesia_partitions(RunningNodes)}]; + no -> [] + end. + +mnesia_partitions(Nodes) -> + {Replies, _BadNodes} = rpc:multicall( + Nodes, rabbit_node_monitor, partitions, []), + [Reply || Reply = {_, R} <- Replies, R =/= []]. + +is_clustered() -> AllNodes = cluster_nodes(all), + AllNodes =/= [] andalso AllNodes =/= [node()]. + +cluster_nodes(WhichNodes) -> cluster_status(WhichNodes). + +%% This function is the actual source of information, since it gets +%% the data from mnesia. Obviously it'll work only when mnesia is +%% running. +cluster_status_from_mnesia() -> + case mnesia:system_info(is_running) of + no -> + {error, mnesia_not_running}; + yes -> + %% If the tables are not present, it means that + %% `init_db/3' hasn't been run yet. In other words, either + %% we are a virgin node or a restarted RAM node. In both + %% cases we're not interested in what mnesia has to say. + NodeType = case mnesia:system_info(use_dir) of + true -> disc; + false -> ram + end, + case rabbit_table:is_present() of + true -> AllNodes = mnesia:system_info(db_nodes), + DiscCopies = mnesia:table_info(schema, disc_copies), + DiscNodes = case NodeType of + disc -> nodes_incl_me(DiscCopies); + ram -> DiscCopies + end, + %% `mnesia:system_info(running_db_nodes)' is safe since + %% we know that mnesia is running + RunningNodes = mnesia:system_info(running_db_nodes), + {ok, {AllNodes, DiscNodes, RunningNodes}}; + false -> {error, tables_not_present} + end + end. + +cluster_status(WhichNodes) -> + {AllNodes, DiscNodes, RunningNodes} = Nodes = + case cluster_status_from_mnesia() of + {ok, Nodes0} -> + Nodes0; + {error, _Reason} -> + {AllNodes0, DiscNodes0, RunningNodes0} = + rabbit_node_monitor:read_cluster_status(), + %% The cluster status file records the status when the node is + %% online, but we know for sure that the node is offline now, so + %% we can remove it from the list of running nodes. + {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)} + end, + case WhichNodes of + status -> Nodes; + all -> AllNodes; + disc -> DiscNodes; + ram -> AllNodes -- DiscNodes; + running -> RunningNodes + end. + +node_info() -> + {erlang:system_info(otp_release), rabbit_misc:version(), + cluster_status_from_mnesia()}. + +node_type() -> + DiscNodes = cluster_nodes(disc), + case DiscNodes =:= [] orelse me_in_nodes(DiscNodes) of + true -> disc; + false -> ram + end. dir() -> mnesia:system_info(directory). +%%---------------------------------------------------------------------------- +%% Operations on the db +%%---------------------------------------------------------------------------- + +%% Adds the provided nodes to the mnesia cluster, creating a new +%% schema if there is the need to and catching up if there are other +%% nodes in the cluster already. It also updates the cluster status +%% file. +init_db(ClusterNodes, NodeType, CheckOtherNodes) -> + Nodes = change_extra_db_nodes(ClusterNodes, CheckOtherNodes), + %% Note that we use `system_info' here and not the cluster status + %% since when we start rabbit for the first time the cluster + %% status will say we are a disc node but the tables won't be + %% present yet. + WasDiscNode = mnesia:system_info(use_dir), + case {Nodes, WasDiscNode, NodeType} of + {[], _, ram} -> + %% Standalone ram node, we don't want that + throw({error, cannot_create_standalone_ram_node}); + {[], false, disc} -> + %% RAM -> disc, starting from scratch + ok = create_schema(); + {[], true, disc} -> + %% First disc node up + ok; + {[AnotherNode | _], _, _} -> + %% Subsequent node in cluster, catch up + ensure_version_ok( + rpc:call(AnotherNode, rabbit_version, recorded, [])), + ok = rabbit_table:wait_for_replicated(), + ok = rabbit_table:create_local_copy(NodeType) + end, + ensure_schema_integrity(), + rabbit_node_monitor:update_cluster_status(), + ok. + +init_db_unchecked(ClusterNodes, NodeType) -> + init_db(ClusterNodes, NodeType, false). + +init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes) -> + ok = init_db(ClusterNodes, NodeType, CheckOtherNodes), + ok = case rabbit_upgrade:maybe_upgrade_local() of + ok -> ok; + starting_from_scratch -> rabbit_version:record_desired(); + version_not_available -> schema_ok_or_move() + end, + %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget + %% about the cluster + case NodeType of + ram -> start_mnesia(), + change_extra_db_nodes(ClusterNodes, false), + rabbit_table:wait_for_replicated(); + disc -> ok + end, + ok. + +init_db_with_mnesia(ClusterNodes, NodeType, + CheckOtherNodes, CheckConsistency) -> + start_mnesia(CheckConsistency), + try + init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes) + after + stop_mnesia() + end. + ensure_mnesia_dir() -> MnesiaDir = dir() ++ "/", case filelib:ensure_dir(MnesiaDir) of @@ -378,210 +500,111 @@ ensure_mnesia_not_running() -> end. ensure_schema_integrity() -> - case check_schema_integrity() of + case rabbit_table:check_schema_integrity() of ok -> ok; {error, Reason} -> throw({error, {schema_integrity_check_failed, Reason}}) end. -check_schema_integrity() -> - Tables = mnesia:system_info(tables), - case check_tables(fun (Tab, TabDef) -> - case lists:member(Tab, Tables) of - false -> {error, {table_missing, Tab}}; - true -> check_table_attributes(Tab, TabDef) - end - end) of - ok -> ok = wait_for_tables(), - check_tables(fun check_table_content/2); - Other -> Other - end. - -check_table_attributes(Tab, TabDef) -> - {_, ExpAttrs} = proplists:lookup(attributes, TabDef), - case mnesia:table_info(Tab, attributes) of - ExpAttrs -> ok; - Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}} - end. +copy_db(Destination) -> + ok = ensure_mnesia_not_running(), + rabbit_file:recursive_copy(dir(), Destination). -check_table_content(Tab, TabDef) -> - {_, Match} = proplists:lookup(match, TabDef), - case mnesia:dirty_first(Tab) of - '$end_of_table' -> +%% This does not guarantee us much, but it avoids some situations that +%% will definitely end up badly +check_cluster_consistency() -> + %% We want to find 0 or 1 consistent nodes. + case lists:foldl( + fun (Node, {error, _}) -> check_cluster_consistency(Node); + (_Node, {ok, Status}) -> {ok, Status} + end, {error, not_found}, nodes_excl_me(cluster_nodes(all))) + of + {ok, Status = {RemoteAllNodes, _, _}} -> + case ordsets:is_subset(ordsets:from_list(cluster_nodes(all)), + ordsets:from_list(RemoteAllNodes)) of + true -> + ok; + false -> + %% We delete the schema here since we think we are + %% clustered with nodes that are no longer in the + %% cluster and there is no other way to remove + %% them from our schema. On the other hand, we are + %% sure that there is another online node that we + %% can use to sync the tables with. There is a + %% race here: if between this check and the + %% `init_db' invocation the cluster gets + %% disbanded, we're left with a node with no + %% mnesia data that will try to connect to offline + %% nodes. + mnesia:delete_schema([node()]) + end, + rabbit_node_monitor:write_cluster_status(Status); + {error, not_found} -> ok; - Key -> - ObjList = mnesia:dirty_read(Tab, Key), - MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), - case ets:match_spec_run(ObjList, MatchComp) of - ObjList -> ok; - _ -> {error, {table_content_invalid, Tab, Match, ObjList}} - end + {error, _} = E -> + throw(E) end. -check_tables(Fun) -> - case [Error || {Tab, TabDef} <- table_definitions( - case is_disc_node() of - true -> disc; - false -> ram - end), - case Fun(Tab, TabDef) of - ok -> Error = none, false; - {error, Error} -> true - end] of - [] -> ok; - Errors -> {error, Errors} +check_cluster_consistency(Node) -> + case rpc:call(Node, rabbit_mnesia, node_info, []) of + {badrpc, _Reason} -> + {error, not_found}; + {_OTP, _Rabbit, {error, _}} -> + {error, not_found}; + {OTP, Rabbit, {ok, Status}} -> + case check_consistency(OTP, Rabbit, Node, Status) of + {error, _} = E -> E; + {ok, Res} -> {ok, Res} + end end. -%% The cluster node config file contains some or all of the disk nodes -%% that are members of the cluster this node is / should be a part of. -%% -%% If the file is absent, the list is empty, or only contains the -%% current node, then the current node is a standalone (disk) -%% node. Otherwise it is a node that is part of a cluster as either a -%% disk node, if it appears in the cluster node config, or ram node if -%% it doesn't. - -cluster_nodes_config_filename() -> - dir() ++ "/cluster_nodes.config". - -create_cluster_nodes_config(ClusterNodes) -> - FileName = cluster_nodes_config_filename(), - case rabbit_file:write_term_file(FileName, [ClusterNodes]) of - ok -> ok; - {error, Reason} -> - throw({error, {cannot_create_cluster_nodes_config, - FileName, Reason}}) - end. +%%-------------------------------------------------------------------- +%% Hooks for `rabbit_node_monitor' +%%-------------------------------------------------------------------- -read_cluster_nodes_config() -> - FileName = cluster_nodes_config_filename(), - case rabbit_file:read_term_file(FileName) of - {ok, [ClusterNodes]} -> ClusterNodes; - {error, enoent} -> - {ok, ClusterNodes} = application:get_env(rabbit, cluster_nodes), - ClusterNodes; - {error, Reason} -> - throw({error, {cannot_read_cluster_nodes_config, - FileName, Reason}}) +on_node_up(Node) -> + case running_disc_nodes() of + [Node] -> rabbit_log:info("cluster contains disc nodes again~n"); + _ -> ok end. -delete_cluster_nodes_config() -> - FileName = cluster_nodes_config_filename(), - case file:delete(FileName) of - ok -> ok; - {error, enoent} -> ok; - {error, Reason} -> - throw({error, {cannot_delete_cluster_nodes_config, - FileName, Reason}}) +on_node_down(_Node) -> + case running_disc_nodes() of + [] -> rabbit_log:info("only running disc node went down~n"); + _ -> ok end. -running_nodes_filename() -> - filename:join(dir(), "nodes_running_at_shutdown"). - -record_running_nodes() -> - FileName = running_nodes_filename(), - Nodes = running_clustered_nodes() -- [node()], - %% Don't check the result: we're shutting down anyway and this is - %% a best-effort-basis. - rabbit_file:write_term_file(FileName, [Nodes]), - ok. - -read_previously_running_nodes() -> - FileName = running_nodes_filename(), - case rabbit_file:read_term_file(FileName) of - {ok, [Nodes]} -> Nodes; - {error, enoent} -> []; - {error, Reason} -> throw({error, {cannot_read_previous_nodes_file, - FileName, Reason}}) - end. +running_disc_nodes() -> + {_AllNodes, DiscNodes, RunningNodes} = cluster_status(status), + ordsets:to_list(ordsets:intersection(ordsets:from_list(DiscNodes), + ordsets:from_list(RunningNodes))). -delete_previously_running_nodes() -> - FileName = running_nodes_filename(), - case file:delete(FileName) of - ok -> ok; - {error, enoent} -> ok; - {error, Reason} -> throw({error, {cannot_delete_previous_nodes_file, - FileName, Reason}}) - end. - -init_db(ClusterNodes, Force) -> - init_db( - ClusterNodes, Force, - fun () -> - case rabbit_upgrade:maybe_upgrade_local() of - ok -> ok; - %% If we're just starting up a new node we won't have a - %% version - starting_from_scratch -> ok = rabbit_version:record_desired() - end - end). - -%% Take a cluster node config and create the right kind of node - a -%% standalone disk node, or disk or ram node connected to the -%% specified cluster nodes. If Force is false, don't allow -%% connections to offline nodes. -init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) -> - UClusterNodes = lists:usort(ClusterNodes), - ProperClusterNodes = UClusterNodes -- [node()], - case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of - {ok, []} when not Force andalso ProperClusterNodes =/= [] -> - throw({error, {failed_to_cluster_with, ProperClusterNodes, - "Mnesia could not connect to any disc nodes."}}); - {ok, Nodes} -> - WasDiscNode = is_disc_node(), - WantDiscNode = should_be_disc_node(ClusterNodes), - %% We create a new db (on disk, or in ram) in the first - %% two cases and attempt to upgrade the in the other two - case {Nodes, WasDiscNode, WantDiscNode} of - {[], _, false} -> - %% New ram node; start from scratch - ok = create_schema(ram); - {[], false, true} -> - %% Nothing there at all, start from scratch - ok = create_schema(disc); - {[], true, true} -> - %% We're the first node up - case rabbit_upgrade:maybe_upgrade_local() of - ok -> ensure_schema_integrity(); - version_not_available -> ok = schema_ok_or_move() - end; - {[AnotherNode|_], _, _} -> - %% Subsequent node in cluster, catch up - ensure_version_ok( - rpc:call(AnotherNode, rabbit_version, recorded, [])), - {CopyType, CopyTypeAlt} = - case WantDiscNode of - true -> {disc, disc_copies}; - false -> {ram, ram_copies} - end, - ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, CopyTypeAlt), - ok = create_local_table_copies(CopyType), - - ok = SecondaryPostMnesiaFun(), - %% We've taken down mnesia, so ram nodes will need - %% to re-sync - case is_disc_node() of - false -> start_mnesia(), - mnesia:change_config(extra_db_nodes, - ProperClusterNodes), - wait_for_replicated_tables(); - true -> ok - end, +%%-------------------------------------------------------------------- +%% Internal helpers +%%-------------------------------------------------------------------- - ensure_schema_integrity(), - ok - end; - {error, Reason} -> - %% one reason we may end up here is if we try to join - %% nodes together that are currently running standalone or - %% are members of a different cluster - throw({error, {unable_to_join_cluster, ClusterNodes, Reason}}) +discover_cluster(Nodes) when is_list(Nodes) -> + lists:foldl(fun (_, {ok, Res}) -> {ok, Res}; + (Node, {error, _}) -> discover_cluster(Node) + end, {error, no_nodes_provided}, Nodes); +discover_cluster(Node) -> + OfflineError = + {error, {cannot_discover_cluster, + "The nodes provided are either offline or not running"}}, + case node() of + Node -> {error, {cannot_discover_cluster, + "Cannot cluster node with itself"}}; + _ -> case rpc:call(Node, + rabbit_mnesia, cluster_status_from_mnesia, []) of + {badrpc, _Reason} -> OfflineError; + {error, mnesia_not_running} -> OfflineError; + {ok, Res} -> {ok, Res} + end end. schema_ok_or_move() -> - case check_schema_integrity() of + case rabbit_table:check_schema_integrity() of ok -> ok; {error, Reason} -> @@ -592,7 +615,7 @@ schema_ok_or_move() -> "and recreating schema from scratch~n", [Reason]), ok = move_db(), - ok = create_schema(disc) + ok = create_schema() end. ensure_version_ok({ok, DiscVersion}) -> @@ -604,25 +627,16 @@ ensure_version_ok({ok, DiscVersion}) -> ensure_version_ok({error, _}) -> ok = rabbit_version:record_desired(). -create_schema(Type) -> +%% We only care about disc nodes since ram nodes are supposed to catch +%% up only +create_schema() -> stop_mnesia(), - case Type of - disc -> rabbit_misc:ensure_ok(mnesia:create_schema([node()]), - cannot_create_schema); - ram -> %% remove the disc schema since this is a ram node - rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), - cannot_delete_schema) - end, + rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema), start_mnesia(), - ok = create_tables(Type), + ok = rabbit_table:create(), ensure_schema_integrity(), ok = rabbit_version:record_desired(). -is_disc_node() -> mnesia:system_info(use_dir). - -should_be_disc_node(ClusterNodes) -> - ClusterNodes == [] orelse lists:member(node(), ClusterNodes). - move_db() -> stop_mnesia(), MnesiaDir = filename:dirname(dir() ++ "/"), @@ -644,186 +658,187 @@ move_db() -> start_mnesia(), ok. -copy_db(Destination) -> - ok = ensure_mnesia_not_running(), - rabbit_file:recursive_copy(dir(), Destination). - -create_tables() -> create_tables(disc). - -create_tables(Type) -> - lists:foreach(fun ({Tab, TabDef}) -> - TabDef1 = proplists:delete(match, TabDef), - case mnesia:create_table(Tab, TabDef1) of - {atomic, ok} -> ok; - {aborted, Reason} -> - throw({error, {table_creation_failed, - Tab, TabDef1, Reason}}) - end - end, - table_definitions(Type)), - ok. - -copy_type_to_ram(TabDef) -> - [{disc_copies, []}, {ram_copies, [node()]} - | proplists:delete(ram_copies, proplists:delete(disc_copies, TabDef))]. - -table_has_copy_type(TabDef, DiscType) -> - lists:member(node(), proplists:get_value(DiscType, TabDef, [])). - -create_local_table_copies(Type) -> - lists:foreach( - fun ({Tab, TabDef}) -> - HasDiscCopies = table_has_copy_type(TabDef, disc_copies), - HasDiscOnlyCopies = table_has_copy_type(TabDef, disc_only_copies), - LocalTab = proplists:get_bool(local_content, TabDef), - StorageType = - if - Type =:= disc orelse LocalTab -> - if - HasDiscCopies -> disc_copies; - HasDiscOnlyCopies -> disc_only_copies; - true -> ram_copies - end; -%%% unused code - commented out to keep dialyzer happy -%%% Type =:= disc_only -> -%%% if -%%% HasDiscCopies or HasDiscOnlyCopies -> -%%% disc_only_copies; -%%% true -> ram_copies -%%% end; - Type =:= ram -> - ram_copies - end, - ok = create_local_table_copy(Tab, StorageType) - end, - table_definitions(Type)), - ok. - -create_local_table_copy(Tab, Type) -> - StorageType = mnesia:table_info(Tab, storage_type), - {atomic, ok} = - if - StorageType == unknown -> - mnesia:add_table_copy(Tab, node(), Type); - StorageType /= Type -> - mnesia:change_table_copy_type(Tab, node(), Type); - true -> {atomic, ok} - end, - ok. - -wait_for_replicated_tables() -> wait_for_tables(replicated_table_names()). - -wait_for_tables() -> wait_for_tables(table_names()). - -wait_for_tables(TableNames) -> - case mnesia:wait_for_tables(TableNames, 30000) of - ok -> - ok; - {timeout, BadTabs} -> - throw({error, {timeout_waiting_for_tables, BadTabs}}); - {error, Reason} -> - throw({error, {failed_waiting_for_tables, Reason}}) +remove_node_if_mnesia_running(Node) -> + case mnesia:system_info(is_running) of + yes -> + %% Deleting the the schema copy of the node will result in + %% the node being removed from the cluster, with that + %% change being propagated to all nodes + case mnesia:del_table_copy(schema, Node) of + {atomic, ok} -> + rabbit_node_monitor:notify_left_cluster(Node), + ok; + {aborted, Reason} -> + {error, {failed_to_remove_node, Node, Reason}} + end; + no -> + {error, mnesia_not_running} end. -reset(Force) -> - rabbit_misc:local_info_msg("Resetting Rabbit~s~n", [if Force -> " forcefully"; - true -> "" - end]), - ensure_mnesia_not_running(), - case not Force andalso is_clustered() andalso - is_only_disc_node(node(), false) - of - true -> log_both("no other disc nodes running"); - false -> ok - end, - Node = node(), - Nodes = all_clustered_nodes() -- [Node], - case Force of - true -> ok; - false -> - ensure_mnesia_dir(), - start_mnesia(), - RunningNodes = - try - %% Force=true here so that reset still works when clustered - %% with a node which is down - ok = init_db(read_cluster_nodes_config(), true), - running_clustered_nodes() -- [Node] - after - stop_mnesia() - end, - leave_cluster(Nodes, RunningNodes), - rabbit_misc:ensure_ok(mnesia:delete_schema([Node]), - cannot_delete_schema) - end, - %% We need to make sure that we don't end up in a distributed - %% Erlang system with nodes while not being in an Mnesia cluster - %% with them. We don't handle that well. - [erlang:disconnect_node(N) || N <- Nodes], - ok = delete_cluster_nodes_config(), - %% remove persisted messages and any other garbage we find - ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")), - ok. +leave_cluster() -> + case nodes_excl_me(cluster_nodes(all)) of + [] -> ok; + AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of + true -> ok; + false -> e(no_running_cluster_nodes) + end + end. -leave_cluster([], _) -> ok; -leave_cluster(Nodes, RunningNodes) -> - %% find at least one running cluster node and instruct it to - %% remove our schema copy which will in turn result in our node - %% being removed as a cluster node from the schema, with that - %% change being propagated to all nodes - case lists:any( - fun (Node) -> - case rpc:call(Node, mnesia, del_table_copy, - [schema, node()]) of - {atomic, ok} -> true; - {badrpc, nodedown} -> false; - {aborted, {node_not_running, _}} -> false; - {aborted, Reason} -> - throw({error, {failed_to_leave_cluster, - Nodes, RunningNodes, Reason}}) - end - end, - RunningNodes) of - true -> ok; - false -> throw({error, {no_running_cluster_nodes, - Nodes, RunningNodes}}) +leave_cluster(Node) -> + case rpc:call(Node, + rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of + ok -> true; + {error, mnesia_not_running} -> false; + {error, Reason} -> throw({error, Reason}); + {badrpc, nodedown} -> false end. wait_for(Condition) -> error_logger:info_msg("Waiting for ~p...~n", [Condition]), timer:sleep(1000). -on_node_up(Node) -> - case is_only_disc_node(Node, true) of - true -> rabbit_log:info("cluster contains disc nodes again~n"); - false -> ok - end. - -on_node_down(Node) -> - case is_only_disc_node(Node, true) of - true -> rabbit_log:info("only running disc node went down~n"); +start_mnesia(CheckConsistency) -> + case CheckConsistency of + true -> check_cluster_consistency(); false -> ok - end. - -is_only_disc_node(Node, _MnesiaRunning = true) -> - RunningSet = sets:from_list(running_clustered_nodes()), - DiscSet = sets:from_list(nodes_of_type(disc_copies)), - [Node] =:= sets:to_list(sets:intersection(RunningSet, DiscSet)); -is_only_disc_node(Node, false) -> - start_mnesia(), - Res = is_only_disc_node(Node, true), - stop_mnesia(), - Res. - -log_both(Warning) -> - io:format("Warning: ~s~n", [Warning]), - rabbit_misc:with_local_io( - fun () -> error_logger:warning_msg("~s~n", [Warning]) end). - -start_mnesia() -> + end, rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), ensure_mnesia_running(). +start_mnesia() -> + start_mnesia(true). + stop_mnesia() -> stopped = mnesia:stop(), ensure_mnesia_not_running(). + +change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) -> + ClusterNodes = nodes_excl_me(ClusterNodes0), + case {mnesia:change_config(extra_db_nodes, ClusterNodes), ClusterNodes} of + {{ok, []}, [_|_]} when CheckOtherNodes -> + throw({error, {failed_to_cluster_with, ClusterNodes, + "Mnesia could not connect to any nodes."}}); + {{ok, Nodes}, _} -> + Nodes + end. + +is_running_remote() -> {mnesia:system_info(is_running) =:= yes, node()}. + +check_consistency(OTP, Rabbit) -> + rabbit_misc:sequence_error( + [check_otp_consistency(OTP), check_rabbit_consistency(Rabbit)]). + +check_consistency(OTP, Rabbit, Node, Status) -> + rabbit_misc:sequence_error( + [check_otp_consistency(OTP), + check_rabbit_consistency(Rabbit), + check_nodes_consistency(Node, Status)]). + +check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) -> + case me_in_nodes(RemoteAllNodes) of + true -> + {ok, RemoteStatus}; + false -> + {error, {inconsistent_cluster, + rabbit_misc:format("Node ~p thinks it's clustered " + "with node ~p, but ~p disagrees", + [node(), Node, Node])}} + end. + +check_version_consistency(This, Remote, _) when This =:= Remote -> + ok; +check_version_consistency(This, Remote, Name) -> + {error, {inconsistent_cluster, + rabbit_misc:format("~s version mismatch: local node is ~s, " + "remote node ~s", [Name, This, Remote])}}. + +check_otp_consistency(Remote) -> + check_version_consistency(erlang:system_info(otp_release), Remote, "OTP"). + +check_rabbit_consistency(Remote) -> + check_version_consistency(rabbit_misc:version(), Remote, "Rabbit"). + +%% This is fairly tricky. We want to know if the node is in the state +%% that a `reset' would leave it in. We cannot simply check if the +%% mnesia tables aren't there because restarted RAM nodes won't have +%% tables while still being non-virgin. What we do instead is to +%% check if the mnesia directory is non existant or empty, with the +%% exception of the cluster status files, which will be there thanks to +%% `rabbit_node_monitor:prepare_cluster_status_file/0'. +is_virgin_node() -> + case rabbit_file:list_dir(dir()) of + {error, enoent} -> + true; + {ok, []} -> + true; + {ok, [File1, File2]} -> + lists:usort([dir() ++ "/" ++ File1, dir() ++ "/" ++ File2]) =:= + lists:usort([rabbit_node_monitor:cluster_status_filename(), + rabbit_node_monitor:running_nodes_filename()]); + {ok, _} -> + false + end. + +find_good_node([]) -> + none; +find_good_node([Node | Nodes]) -> + case rpc:call(Node, rabbit_mnesia, node_info, []) of + {badrpc, _Reason} -> find_good_node(Nodes); + {OTP, Rabbit, _} -> case check_consistency(OTP, Rabbit) of + {error, _} -> find_good_node(Nodes); + ok -> {ok, Node} + end + end. + +is_only_clustered_disc_node() -> + node_type() =:= disc andalso is_clustered() andalso + cluster_nodes(disc) =:= [node()]. + +me_in_nodes(Nodes) -> lists:member(node(), Nodes). + +nodes_incl_me(Nodes) -> lists:usort([node()|Nodes]). + +nodes_excl_me(Nodes) -> Nodes -- [node()]. + +e(Tag) -> throw({error, {Tag, error_description(Tag)}}). + +error_description(clustering_only_disc_node) -> + "You cannot cluster a node if it is the only disc node in its existing " + " cluster. If new nodes joined while this node was offline, use " + "\"update_cluster_nodes\" to add them manually."; +error_description(resetting_only_disc_node) -> + "You cannot reset a node when it is the only disc node in a cluster. " + "Please convert another node of the cluster to a disc node first."; +error_description(already_clustered) -> + "You are already clustered with the nodes you have selected."; +error_description(not_clustered) -> + "Non-clustered nodes can only be disc nodes."; +error_description(cannot_connect_to_cluster) -> + "Could not connect to the cluster nodes present in this node's " + "status file. If the cluster has changed, you can use the " + "\"update_cluster_nodes\" command to point to the new cluster nodes."; +error_description(no_online_cluster_nodes) -> + "Could not find any online cluster nodes. If the cluster has changed, " + "you can use the 'recluster' command."; +error_description(cannot_connect_to_node) -> + "Could not connect to the cluster node provided."; +error_description(inconsistent_cluster) -> + "The nodes provided do not have this node as part of the cluster."; +error_description(not_a_cluster_node) -> + "The node selected is not in the cluster."; +error_description(online_node_offline_flag) -> + "You set the --offline flag, which is used to remove nodes remotely from " + "offline nodes, but this node is online."; +error_description(offline_node_no_offline_flag) -> + "You are trying to remove a node from an offline node. That is dangerous, " + "but can be done with the --offline flag. Please consult the manual " + "for rabbitmqctl for more information."; +error_description(not_last_node_to_go_down) -> + "The node you're trying to remove from was not the last to go down " + "(excluding the node you are removing). Please use the the last node " + "to go down to remove nodes when the cluster is offline."; +error_description(removing_node_from_offline_node) -> + "To remove a node remotely from an offline node, the node you're removing " + "from must be a disc node and all the other nodes must be offline."; +error_description(no_running_cluster_nodes) -> + "You cannot leave a cluster if no online nodes are present.". diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index d69dad1f8b..c2e55022d9 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -1394,7 +1394,7 @@ filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION. filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)). -list_sorted_file_names(Dir, Ext) -> +list_sorted_filenames(Dir, Ext) -> lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end, filelib:wildcard("*" ++ Ext, Dir)). @@ -1531,8 +1531,8 @@ count_msg_refs(Gen, Seed, State) -> end. recover_crashed_compactions(Dir) -> - FileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION), - TmpFileNames = list_sorted_file_names(Dir, ?FILE_EXTENSION_TMP), + FileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION), + TmpFileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION_TMP), lists:foreach( fun (TmpFileName) -> NonTmpRelatedFileName = @@ -1609,7 +1609,7 @@ build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit}, ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State), {ok, Pid} = gatherer:start_link(), case [filename_to_num(FileName) || - FileName <- list_sorted_file_names(Dir, ?FILE_EXTENSION)] of + FileName <- list_sorted_filenames(Dir, ?FILE_EXTENSION)] of [] -> build_index(Pid, undefined, [State #msstate.current_file], State); Files -> {Offset, State1} = build_index(Pid, undefined, Files, State), @@ -2023,7 +2023,7 @@ transform_dir(BaseDir, Store, TransformFun) -> CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end, case filelib:is_dir(TmpDir) of true -> throw({error, transform_failed_previously}); - false -> FileList = list_sorted_file_names(Dir, ?FILE_EXTENSION), + false -> FileList = list_sorted_filenames(Dir, ?FILE_EXTENSION), foreach_file(Dir, TmpDir, TransformFile, FileList), foreach_file(Dir, fun file:delete/1, FileList), foreach_file(TmpDir, Dir, CopyFile, FileList), diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl index bedf5142da..038154c36e 100644 --- a/src/rabbit_net.erl +++ b/src/rabbit_net.erl @@ -19,7 +19,7 @@ -export([is_ssl/1, ssl_info/1, controlling_process/2, getstat/2, recv/1, async_recv/3, port_command/2, getopts/2, setopts/2, send/2, - close/1, maybe_fast_close/1, sockname/1, peername/1, peercert/1, + close/1, fast_close/1, sockname/1, peername/1, peercert/1, tune_buffer_size/1, connection_string/2]). %%--------------------------------------------------------------------------- @@ -59,7 +59,7 @@ -spec(setopts/2 :: (socket(), opts()) -> ok_or_any_error()). -spec(send/2 :: (socket(), binary() | iolist()) -> ok_or_any_error()). -spec(close/1 :: (socket()) -> ok_or_any_error()). --spec(maybe_fast_close/1 :: (socket()) -> ok_or_any_error()). +-spec(fast_close/1 :: (socket()) -> ok_or_any_error()). -spec(sockname/1 :: (socket()) -> ok_val_or_error({inet:ip_address(), rabbit_networking:ip_port()})). @@ -77,6 +77,8 @@ %%--------------------------------------------------------------------------- +-define(SSL_CLOSE_TIMEOUT, 5000). + -define(IS_SSL(Sock), is_record(Sock, ssl_socket)). is_ssl(Sock) -> ?IS_SSL(Sock). @@ -148,8 +150,31 @@ send(Sock, Data) when is_port(Sock) -> gen_tcp:send(Sock, Data). close(Sock) when ?IS_SSL(Sock) -> ssl:close(Sock#ssl_socket.ssl); close(Sock) when is_port(Sock) -> gen_tcp:close(Sock). -maybe_fast_close(Sock) when ?IS_SSL(Sock) -> ok; -maybe_fast_close(Sock) when is_port(Sock) -> erlang:port_close(Sock), ok. +fast_close(Sock) when ?IS_SSL(Sock) -> + %% We cannot simply port_close the underlying tcp socket since the + %% TLS protocol is quite insistent that a proper closing handshake + %% should take place (see RFC 5245 s7.2.1). So we call ssl:close + %% instead, but that can block for a very long time, e.g. when + %% there is lots of pending output and there is tcp backpressure, + %% or the ssl_connection process has entered the the + %% workaround_transport_delivery_problems function during + %% termination, which, inexplicably, does a gen_tcp:recv(Socket, + %% 0), which may never return if the client doesn't send a FIN or + %% that gets swallowed by the network. Since there is no timeout + %% variant of ssl:close, we construct our own. + {Pid, MRef} = spawn_monitor(fun () -> ssl:close(Sock#ssl_socket.ssl) end), + erlang:send_after(?SSL_CLOSE_TIMEOUT, self(), {Pid, ssl_close_timeout}), + receive + {Pid, ssl_close_timeout} -> + erlang:demonitor(MRef, [flush]), + exit(Pid, kill); + {'DOWN', MRef, process, Pid, _Reason} -> + ok + end, + catch port_close(Sock#ssl_socket.tcp), + ok; +fast_close(Sock) when is_port(Sock) -> + catch port_close(Sock), ok. sockname(Sock) when ?IS_SSL(Sock) -> ssl:sockname(Sock#ssl_socket.ssl); sockname(Sock) when is_port(Sock) -> inet:sockname(Sock). diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index 94a5a2b79d..5cf8d1aef6 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -160,7 +160,19 @@ ssl_transform_fun(SslOpts) -> case catch ssl:ssl_accept(Sock, SslOpts, ?SSL_TIMEOUT * 1000) of {ok, SslSock} -> {ok, #ssl_socket{tcp = Sock, ssl = SslSock}}; + {error, timeout} -> + {error, {ssl_upgrade_error, timeout}}; {error, Reason} -> + %% We have no idea what state the ssl_connection + %% process is in - it could still be happily + %% going, it might be stuck, or it could be just + %% about to fail. There is little that our caller + %% can do but close the TCP socket, but this could + %% cause ssl alerts to get dropped (which is bad + %% form, according to the TLS spec). So we give + %% the ssl_connection a little bit of time to send + %% such alerts. + timer:sleep(?SSL_TIMEOUT * 1000), {error, {ssl_upgrade_error, Reason}}; {'EXIT', Reason} -> {error, {ssl_upgrade_failure, Reason}} @@ -283,7 +295,7 @@ start_ssl_client(SslOpts, Sock) -> start_client(Sock, ssl_transform_fun(SslOpts)). connections() -> - rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:cluster_nodes(running), rabbit_networking, connections_local, []). connections_local() -> diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 323cf0ce9e..b11c9d049a 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -19,68 +19,232 @@ -behaviour(gen_server). -export([start_link/0]). +-export([running_nodes_filename/0, + cluster_status_filename/0, prepare_cluster_status_files/0, + write_cluster_status/1, read_cluster_status/0, + update_cluster_status/0, reset_cluster_status/0]). +-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]). +-export([partitions/0]). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). --export([notify_cluster/0, rabbit_running_on/1]). +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). -define(SERVER, ?MODULE). -define(RABBIT_UP_RPC_TIMEOUT, 2000). +-record(state, {monitors, partitions}). + %%---------------------------------------------------------------------------- -ifdef(use_specs). -spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). --spec(rabbit_running_on/1 :: (node()) -> 'ok'). --spec(notify_cluster/0 :: () -> 'ok'). + +-spec(running_nodes_filename/0 :: () -> string()). +-spec(cluster_status_filename/0 :: () -> string()). +-spec(prepare_cluster_status_files/0 :: () -> 'ok'). +-spec(write_cluster_status/1 :: (rabbit_mnesia:cluster_status()) -> 'ok'). +-spec(read_cluster_status/0 :: () -> rabbit_mnesia:cluster_status()). +-spec(update_cluster_status/0 :: () -> 'ok'). +-spec(reset_cluster_status/0 :: () -> 'ok'). + +-spec(notify_node_up/0 :: () -> 'ok'). +-spec(notify_joined_cluster/0 :: () -> 'ok'). +-spec(notify_left_cluster/1 :: (node()) -> 'ok'). + +-spec(partitions/0 :: () -> {node(), [{atom(), node()}]}). -endif. -%%-------------------------------------------------------------------- +%%---------------------------------------------------------------------------- +%% Start +%%---------------------------------------------------------------------------- + +start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +%%---------------------------------------------------------------------------- +%% Cluster file operations +%%---------------------------------------------------------------------------- + +%% The cluster file information is kept in two files. The "cluster +%% status file" contains all the clustered nodes and the disc nodes. +%% The "running nodes file" contains the currently running nodes or +%% the running nodes at shutdown when the node is down. +%% +%% We strive to keep the files up to date and we rely on this +%% assumption in various situations. Obviously when mnesia is offline +%% the information we have will be outdated, but it cannot be +%% otherwise. + +running_nodes_filename() -> + filename:join(rabbit_mnesia:dir(), "nodes_running_at_shutdown"). + +cluster_status_filename() -> + rabbit_mnesia:dir() ++ "/cluster_nodes.config". + +prepare_cluster_status_files() -> + rabbit_mnesia:ensure_mnesia_dir(), + CorruptFiles = fun () -> throw({error, corrupt_cluster_status_files}) end, + RunningNodes1 = case try_read_file(running_nodes_filename()) of + {ok, [Nodes]} when is_list(Nodes) -> Nodes; + {ok, _ } -> CorruptFiles(); + {error, enoent} -> [] + end, + ThisNode = [node()], + %% The running nodes file might contain a set or a list, in case + %% of the legacy file + RunningNodes2 = lists:usort(ThisNode ++ RunningNodes1), + {AllNodes1, WantDiscNode} = + case try_read_file(cluster_status_filename()) of + {ok, [{AllNodes, DiscNodes0}]} -> + {AllNodes, lists:member(node(), DiscNodes0)}; + {ok, [AllNodes0]} when is_list(AllNodes0) -> + {legacy_cluster_nodes(AllNodes0), + legacy_should_be_disc_node(AllNodes0)}; + {ok, _} -> + CorruptFiles(); + {error, enoent} -> + {legacy_cluster_nodes([]), true} + end, + AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2), + DiscNodes = case WantDiscNode of + true -> ThisNode; + false -> [] + end, + ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). + +write_cluster_status({All, Disc, Running}) -> + ClusterStatusFN = cluster_status_filename(), + Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of + ok -> + RunningNodesFN = running_nodes_filename(), + {RunningNodesFN, + rabbit_file:write_term_file(RunningNodesFN, [Running])}; + E1 = {error, _} -> + {ClusterStatusFN, E1} + end, + case Res of + {_, ok} -> ok; + {FN, {error, E2}} -> throw({error, {could_not_write_file, FN, E2}}) + end. + +read_cluster_status() -> + case {try_read_file(cluster_status_filename()), + try_read_file(running_nodes_filename())} of + {{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) -> + {All, Disc, Running}; + {_, _} -> + throw({error, corrupt_or_missing_cluster_files}) + end. -start_link() -> - gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). - -rabbit_running_on(Node) -> - gen_server:cast(rabbit_node_monitor, {rabbit_running_on, Node}). - -notify_cluster() -> - Node = node(), - Nodes = rabbit_mnesia:running_clustered_nodes() -- [Node], - %% notify other rabbits of this rabbit - case rpc:multicall(Nodes, rabbit_node_monitor, rabbit_running_on, - [Node], ?RABBIT_UP_RPC_TIMEOUT) of - {_, [] } -> ok; - {_, Bad} -> rabbit_log:info("failed to contact nodes ~p~n", [Bad]) - end, +update_cluster_status() -> + {ok, Status} = rabbit_mnesia:cluster_status_from_mnesia(), + write_cluster_status(Status). + +reset_cluster_status() -> + write_cluster_status({[node()], [node()], [node()]}). + +%%---------------------------------------------------------------------------- +%% Cluster notifications +%%---------------------------------------------------------------------------- + +notify_node_up() -> + Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()], + gen_server:abcast(Nodes, ?SERVER, + {node_up, node(), rabbit_mnesia:node_type()}), %% register other active rabbits with this rabbit - [ rabbit_running_on(N) || N <- Nodes ], + DiskNodes = rabbit_mnesia:cluster_nodes(disc), + [gen_server:cast(?SERVER, {node_up, N, case lists:member(N, DiskNodes) of + true -> disc; + false -> ram + end}) || N <- Nodes], ok. -%%-------------------------------------------------------------------- +notify_joined_cluster() -> + Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()], + gen_server:abcast(Nodes, ?SERVER, + {joined_cluster, node(), rabbit_mnesia:node_type()}), + ok. + +notify_left_cluster(Node) -> + Nodes = rabbit_mnesia:cluster_nodes(running), + gen_server:abcast(Nodes, ?SERVER, {left_cluster, Node}), + ok. + +%%---------------------------------------------------------------------------- +%% Server calls +%%---------------------------------------------------------------------------- + +partitions() -> + gen_server:call(?SERVER, partitions, infinity). + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- init([]) -> - {ok, ordsets:new()}. + {ok, _} = mnesia:subscribe(system), + {ok, #state{monitors = pmon:new(), + partitions = []}}. + +handle_call(partitions, _From, State = #state{partitions = Partitions}) -> + {reply, {node(), Partitions}, State}; handle_call(_Request, _From, State) -> {noreply, State}. -handle_cast({rabbit_running_on, Node}, Nodes) -> - case ordsets:is_element(Node, Nodes) of - true -> {noreply, Nodes}; +%% Note: when updating the status file, we can't simply write the +%% mnesia information since the message can (and will) overtake the +%% mnesia propagation. +handle_cast({node_up, Node, NodeType}, + State = #state{monitors = Monitors}) -> + case pmon:is_monitored({rabbit, Node}, Monitors) of + true -> {noreply, State}; false -> rabbit_log:info("rabbit on node ~p up~n", [Node]), - erlang:monitor(process, {rabbit, Node}), + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), - {noreply, ordsets:add_element(Node, Nodes)} + {noreply, State#state{ + monitors = pmon:monitor({rabbit, Node}, Monitors)}} end; +handle_cast({joined_cluster, Node, NodeType}, State) -> + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + RunningNodes}), + {noreply, State}; +handle_cast({left_cluster, Node}, State) -> + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes), + del_node(Node, RunningNodes)}), + {noreply, State}; handle_cast(_Msg, State) -> {noreply, State}. -handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, Nodes) -> +handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, + State = #state{monitors = Monitors}) -> rabbit_log:info("rabbit on node ~p down~n", [Node]), + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), ok = handle_dead_rabbit(Node), - {noreply, ordsets:del_element(Node, Nodes)}; + {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}}; + +handle_info({mnesia_system_event, + {inconsistent_database, running_partitioned_network, Node}}, + State = #state{partitions = Partitions}) -> + Partitions1 = ordsets:to_list( + ordsets:add_element(Node, ordsets:from_list(Partitions))), + {noreply, State#state{partitions = Partitions1}}; + handle_info(_Info, State) -> {noreply, State}. @@ -90,7 +254,9 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. -%%-------------------------------------------------------------------- +%%---------------------------------------------------------------------------- +%% Functions that call the module specific hooks when nodes go up/down +%%---------------------------------------------------------------------------- %% TODO: This may turn out to be a performance hog when there are lots %% of nodes. We really only need to execute some of these statements @@ -104,3 +270,27 @@ handle_dead_rabbit(Node) -> handle_live_rabbit(Node) -> ok = rabbit_alarm:on_node_up(Node), ok = rabbit_mnesia:on_node_up(Node). + +%%-------------------------------------------------------------------- +%% Internal utils +%%-------------------------------------------------------------------- + +try_read_file(FileName) -> + case rabbit_file:read_term_file(FileName) of + {ok, Term} -> {ok, Term}; + {error, enoent} -> {error, enoent}; + {error, E} -> throw({error, {cannot_read_file, FileName, E}}) + end. + +legacy_cluster_nodes(Nodes) -> + %% We get all the info that we can, including the nodes from + %% mnesia, which will be there if the node is a disc node (empty + %% list otherwise) + lists:usort(Nodes ++ mnesia:system_info(db_nodes)). + +legacy_should_be_disc_node(DiscNodes) -> + DiscNodes == [] orelse lists:member(node(), DiscNodes). + +add_node(Node, Nodes) -> lists:usort([Node | Nodes]). + +del_node(Node, Nodes) -> Nodes -- [Node]. diff --git a/src/rabbit_nodes.erl b/src/rabbit_nodes.erl index 1c23632d52..c8d77b0f87 100644 --- a/src/rabbit_nodes.erl +++ b/src/rabbit_nodes.erl @@ -70,8 +70,8 @@ diagnostics0() -> diagnostics_host(Host) -> case names(Host) of {error, EpmdReason} -> - {"- unable to connect to epmd on ~s: ~w", - [Host, EpmdReason]}; + {"- unable to connect to epmd on ~s: ~w (~s)", + [Host, EpmdReason, rabbit_misc:format_inet_error(EpmdReason)]}; {ok, NamePorts} -> {"- ~s: ~p", [Host, [{list_to_atom(Name), Port} || diff --git a/src/rabbit_parameter_validation.erl b/src/rabbit_parameter_validation.erl index af940dde97..24762a733f 100644 --- a/src/rabbit_parameter_validation.erl +++ b/src/rabbit_parameter_validation.erl @@ -16,7 +16,7 @@ -module(rabbit_parameter_validation). --export([number/2, binary/2, list/2, proplist/3]). +-export([number/2, binary/2, boolean/2, list/2, regex/2, proplist/3]). number(_Name, Term) when is_number(Term) -> ok; @@ -30,12 +30,26 @@ binary(_Name, Term) when is_binary(Term) -> binary(Name, Term) -> {error, "~s should be binary, actually was ~p", [Name, Term]}. +boolean(_Name, Term) when is_boolean(Term) -> + ok; +boolean(Name, Term) -> + {error, "~s should be boolean, actually was ~p", [Name, Term]}. + list(_Name, Term) when is_list(Term) -> ok; list(Name, Term) -> {error, "~s should be list, actually was ~p", [Name, Term]}. +regex(Name, Term) when is_binary(Term) -> + case re:compile(Term) of + {ok, _} -> ok; + {error, Reason} -> {error, "~s should be regular expression " + "but is invalid: ~p", [Name, Reason]} + end; +regex(Name, Term) -> + {error, "~s should be a binary but was ~p", [Name, Term]}. + proplist(Name, Constraints, Term) when is_list(Term) -> {Results, Remainder} = lists:foldl( diff --git a/src/rabbit_plugins.erl b/src/rabbit_plugins.erl index 7cf6eea945..ecb1961126 100644 --- a/src/rabbit_plugins.erl +++ b/src/rabbit_plugins.erl @@ -17,8 +17,7 @@ -module(rabbit_plugins). -include("rabbit.hrl"). --export([setup/0, active/0, read_enabled/1, - list/1, dependencies/3]). +-export([setup/0, active/0, read_enabled/1, list/1, dependencies/3]). -define(VERBOSE_DEF, {?VERBOSE_OPT, flag}). -define(MINIMAL_DEF, {?MINIMAL_OPT, flag}). @@ -36,28 +35,25 @@ -ifdef(use_specs). --spec(setup/0 :: () -> [atom()]). --spec(active/0 :: () -> [atom()]). +-type(plugin_name() :: atom()). + +-spec(setup/0 :: () -> [plugin_name()]). +-spec(active/0 :: () -> [plugin_name()]). -spec(list/1 :: (string()) -> [#plugin{}]). --spec(read_enabled/1 :: (file:filename()) -> [atom()]). --spec(dependencies/3 :: - (boolean(), [atom()], [#plugin{}]) -> [atom()]). +-spec(read_enabled/1 :: (file:filename()) -> [plugin_name()]). +-spec(dependencies/3 :: (boolean(), [plugin_name()], [#plugin{}]) -> + [plugin_name()]). -endif. %%---------------------------------------------------------------------------- -%% %% @doc Prepares the file system and installs all enabled plugins. -%% setup() -> - {ok, PluginDir} = application:get_env(rabbit, plugins_dir), - {ok, ExpandDir} = application:get_env(rabbit, plugins_expand_dir), - {ok, EnabledPluginsFile} = application:get_env(rabbit, - enabled_plugins_file), - prepare_plugins(EnabledPluginsFile, PluginDir, ExpandDir), - [prepare_dir_plugin(PluginName) || - PluginName <- filelib:wildcard(ExpandDir ++ "/*/ebin/*.app")]. + {ok, PluginDir} = application:get_env(rabbit, plugins_dir), + {ok, ExpandDir} = application:get_env(rabbit, plugins_expand_dir), + {ok, EnabledFile} = application:get_env(rabbit, enabled_plugins_file), + prepare_plugins(EnabledFile, PluginDir, ExpandDir). %% @doc Lists the plugins which are currently running. active() -> @@ -77,8 +73,7 @@ list(PluginsDir) -> (Plugin = #plugin{}, {Plugins1, Problems1}) -> {[Plugin|Plugins1], Problems1} end, {[], []}, - [get_plugin_info(PluginsDir, Plug) || - Plug <- EZs ++ FreeApps]), + [plugin_info(PluginsDir, Plug) || Plug <- EZs ++ FreeApps]), case Problems of [] -> ok; _ -> io:format("Warning: Problem reading some plugins: ~p~n", @@ -98,11 +93,9 @@ read_enabled(PluginsFile) -> PluginsFile, Reason}}) end. -%% %% @doc Calculate the dependency graph from <i>Sources</i>. %% When Reverse =:= true the bottom/leaf level applications are returned in %% the resulting list, otherwise they're skipped. -%% dependencies(Reverse, Sources, AllPlugins) -> {ok, G} = rabbit_misc:build_acyclic_graph( fun (App, _Deps) -> [{App, App}] end, @@ -118,42 +111,38 @@ dependencies(Reverse, Sources, AllPlugins) -> %%---------------------------------------------------------------------------- -prepare_plugins(EnabledPluginsFile, PluginsDistDir, DestDir) -> +prepare_plugins(EnabledFile, PluginsDistDir, ExpandDir) -> AllPlugins = list(PluginsDistDir), - Enabled = read_enabled(EnabledPluginsFile), + Enabled = read_enabled(EnabledFile), ToUnpack = dependencies(false, Enabled, AllPlugins), ToUnpackPlugins = lookup_plugins(ToUnpack, AllPlugins), - Missing = Enabled -- plugin_names(ToUnpackPlugins), - case Missing of - [] -> ok; - _ -> io:format("Warning: the following enabled plugins were " - "not found: ~p~n", [Missing]) + case Enabled -- plugin_names(ToUnpackPlugins) of + [] -> ok; + Missing -> io:format("Warning: the following enabled plugins were " + "not found: ~p~n", [Missing]) end, %% Eliminate the contents of the destination directory - case delete_recursively(DestDir) of - ok -> ok; - {error, E} -> rabbit_misc:quit("Could not delete dir ~s (~p)", - [DestDir, E]) + case delete_recursively(ExpandDir) of + ok -> ok; + {error, E1} -> throw({error, {cannot_delete_plugins_expand_dir, + [ExpandDir, E1]}}) end, - case filelib:ensure_dir(DestDir ++ "/") of + case filelib:ensure_dir(ExpandDir ++ "/") of ok -> ok; - {error, E2} -> rabbit_misc:quit("Could not create dir ~s (~p)", - [DestDir, E2]) + {error, E2} -> throw({error, {cannot_create_plugins_expand_dir, + [ExpandDir, E2]}}) end, - [prepare_plugin(Plugin, DestDir) || Plugin <- ToUnpackPlugins]. + [prepare_plugin(Plugin, ExpandDir) || Plugin <- ToUnpackPlugins], -prepare_dir_plugin(PluginAppDescFn) -> - %% Add the plugin ebin directory to the load path - PluginEBinDirN = filename:dirname(PluginAppDescFn), - code:add_path(PluginEBinDirN), + [prepare_dir_plugin(PluginAppDescPath) || + PluginAppDescPath <- filelib:wildcard(ExpandDir ++ "/*/ebin/*.app")]. - %% We want the second-last token - NameTokens = string:tokens(PluginAppDescFn,"/."), - PluginNameString = lists:nth(length(NameTokens) - 1, NameTokens), - list_to_atom(PluginNameString). +prepare_dir_plugin(PluginAppDescPath) -> + code:add_path(filename:dirname(PluginAppDescPath)), + list_to_atom(filename:basename(PluginAppDescPath, ".app")). %%---------------------------------------------------------------------------- @@ -164,22 +153,19 @@ delete_recursively(Fn) -> Error -> Error end. -prepare_plugin(#plugin{type = ez, location = Location}, PluginDestDir) -> - zip:unzip(Location, [{cwd, PluginDestDir}]); +prepare_plugin(#plugin{type = ez, location = Location}, ExpandDir) -> + zip:unzip(Location, [{cwd, ExpandDir}]); prepare_plugin(#plugin{type = dir, name = Name, location = Location}, - PluginsDestDir) -> - rabbit_file:recursive_copy(Location, - filename:join([PluginsDestDir, Name])). + ExpandDir) -> + rabbit_file:recursive_copy(Location, filename:join([ExpandDir, Name])). -%% Get the #plugin{} from an .ez. -get_plugin_info(Base, {ez, EZ0}) -> +plugin_info(Base, {ez, EZ0}) -> EZ = filename:join([Base, EZ0]), case read_app_file(EZ) of {application, Name, Props} -> mkplugin(Name, Props, ez, EZ); {error, Reason} -> {error, EZ, Reason} end; -%% Get the #plugin{} from an .app. -get_plugin_info(Base, {app, App0}) -> +plugin_info(Base, {app, App0}) -> App = filename:join([Base, App0]), case rabbit_file:read_term_file(App) of {ok, [{application, Name, Props}]} -> @@ -198,7 +184,6 @@ mkplugin(Name, Props, Type, Location) -> #plugin{name = Name, version = Version, description = Description, dependencies = Dependencies, location = Location, type = Type}. -%% Read the .app file from an ez. read_app_file(EZ) -> case zip:list_dir(EZ) of {ok, [_|ZippedFiles]} -> @@ -214,13 +199,11 @@ read_app_file(EZ) -> {error, {invalid_ez, Reason}} end. -%% Return the path of the .app files in ebin/. find_app_files(ZippedFiles) -> {ok, RE} = re:compile("^.*/ebin/.*.app$"), [Path || {zip_file, Path, _, _, _, _} <- ZippedFiles, re:run(Path, RE, [{capture, none}]) =:= match]. -%% Parse a binary into a term. parse_binary(Bin) -> try {ok, Ts, _} = erl_scan:string(binary_to_list(Bin)), @@ -230,13 +213,10 @@ parse_binary(Bin) -> Err -> {error, {invalid_app, Err}} end. -%% Filter out applications that can be loaded *right now*. filter_applications(Applications) -> [Application || Application <- Applications, not is_available_app(Application)]. -%% Return whether is application is already available (and hence -%% doesn't need enabling). is_available_app(Application) -> case application:load(Application) of {error, {already_loaded, _}} -> true; @@ -245,10 +225,8 @@ is_available_app(Application) -> _ -> false end. -%% Return the names of the given plugins. plugin_names(Plugins) -> [Name || #plugin{name = Name} <- Plugins]. -%% Find plugins by name in a list of plugins. lookup_plugins(Names, AllPlugins) -> [P || P = #plugin{name = Name} <- AllPlugins, lists:member(Name, Names)]. diff --git a/src/rabbit_policy.erl b/src/rabbit_policy.erl index 1551795f7c..2717cc9217 100644 --- a/src/rabbit_policy.erl +++ b/src/rabbit_policy.erl @@ -26,7 +26,9 @@ -export([register/0]). -export([name/1, get/2, set/1]). --export([validate/3, validate_clear/2, notify/3, notify_clear/2]). +-export([validate/4, validate_clear/3, notify/4, notify_clear/3]). +-export([parse_set/5, set/5, delete/2, lookup/2, list/0, list/1, + list_formatted/1, info_keys/0]). -rabbit_boot_step({?MODULE, [{description, "policy parameters"}, @@ -41,20 +43,21 @@ name(#amqqueue{policy = Policy}) -> name0(Policy); name(#exchange{policy = Policy}) -> name0(Policy). name0(undefined) -> none; -name0(Policy) -> pget(<<"name">>, Policy). +name0(Policy) -> pget(name, Policy). set(Q = #amqqueue{name = Name}) -> Q#amqqueue{policy = set0(Name)}; set(X = #exchange{name = Name}) -> X#exchange{policy = set0(Name)}. -set0(Name) -> match(Name, list()). +set0(Name = #resource{virtual_host = VHost}) -> match(Name, list(VHost)). get(Name, #amqqueue{policy = Policy}) -> get0(Name, Policy); get(Name, #exchange{policy = Policy}) -> get0(Name, Policy); %% Caution - SLOW. -get(Name, EntityName = #resource{}) -> get0(Name, match(EntityName, list())). +get(Name, EntityName = #resource{virtual_host = VHost}) -> + get0(Name, match(EntityName, list(VHost))). get0(_Name, undefined) -> {error, not_found}; -get0(Name, List) -> case pget(<<"policy">>, List) of +get0(Name, List) -> case pget(definition, List) of undefined -> {error, not_found}; Policy -> case pget(Name, Policy) of undefined -> {error, not_found}; @@ -64,54 +67,121 @@ get0(Name, List) -> case pget(<<"policy">>, List) of %%---------------------------------------------------------------------------- -validate(<<"policy">>, Name, Term) -> +parse_set(VHost, Name, Pattern, Definition, undefined) -> + parse_set0(VHost, Name, Pattern, Definition, 0); +parse_set(VHost, Name, Pattern, Definition, Priority) -> + try list_to_integer(Priority) of + Num -> parse_set0(VHost, Name, Pattern, Definition, Num) + catch + error:badarg -> {error, "~p priority must be a number", [Priority]} + end. + +parse_set0(VHost, Name, Pattern, Defn, Priority) -> + case rabbit_misc:json_decode(Defn) of + {ok, JSON} -> + set0(VHost, Name, + [{<<"pattern">>, list_to_binary(Pattern)}, + {<<"definition">>, rabbit_misc:json_to_term(JSON)}, + {<<"priority">>, Priority}]); + error -> + {error_string, "JSON decoding error"} + end. + +set(VHost, Name, Pattern, Definition, Priority) -> + PolicyProps = [{<<"pattern">>, Pattern}, + {<<"definition">>, Definition}, + {<<"priority">>, case Priority of + undefined -> 0; + _ -> Priority + end}], + set0(VHost, Name, PolicyProps). + +set0(VHost, Name, Term) -> + rabbit_runtime_parameters:set_any(VHost, <<"policy">>, Name, Term). + +delete(VHost, Name) -> + rabbit_runtime_parameters:clear_any(VHost, <<"policy">>, Name). + +lookup(VHost, Name) -> + case rabbit_runtime_parameters:lookup(VHost, <<"policy">>, Name) of + not_found -> not_found; + P -> p(P, fun ident/1) + end. + +list() -> + list('_'). + +list(VHost) -> + list0(VHost, fun ident/1). + +list_formatted(VHost) -> + order_policies(list0(VHost, fun format/1)). + +list0(VHost, DefnFun) -> + [p(P, DefnFun) || P <- rabbit_runtime_parameters:list(VHost, <<"policy">>)]. + +order_policies(PropList) -> + lists:sort(fun (A, B) -> pget(priority, A) < pget(priority, B) end, + PropList). + +p(Parameter, DefnFun) -> + Value = pget(value, Parameter), + [{vhost, pget(vhost, Parameter)}, + {name, pget(name, Parameter)}, + {pattern, pget(<<"pattern">>, Value)}, + {definition, DefnFun(pget(<<"definition">>, Value))}, + {priority, pget(<<"priority">>, Value)}]. + +format(Term) -> + {ok, JSON} = rabbit_misc:json_encode(rabbit_misc:term_to_json(Term)), + list_to_binary(JSON). + +ident(X) -> X. + +info_keys() -> [vhost, name, pattern, definition, priority]. + +%%---------------------------------------------------------------------------- + +validate(_VHost, <<"policy">>, Name, Term) -> rabbit_parameter_validation:proplist( Name, policy_validation(), Term). -validate_clear(<<"policy">>, _Name) -> +validate_clear(_VHost, <<"policy">>, _Name) -> ok. -notify(<<"policy">>, _Name, _Term) -> - update_policies(). +notify(VHost, <<"policy">>, _Name, _Term) -> + update_policies(VHost). -notify_clear(<<"policy">>, _Name) -> - update_policies(). +notify_clear(VHost, <<"policy">>, _Name) -> + update_policies(VHost). %%---------------------------------------------------------------------------- -list() -> - [[{<<"name">>, pget(key, P)} | pget(value, P)] - || P <- rabbit_runtime_parameters:list(<<"policy">>)]. - -update_policies() -> - Policies = list(), +update_policies(VHost) -> + Policies = list(VHost), {Xs, Qs} = rabbit_misc:execute_mnesia_transaction( fun() -> {[update_exchange(X, Policies) || - VHost <- rabbit_vhost:list(), - X <- rabbit_exchange:list(VHost)], + X <- rabbit_exchange:list(VHost)], [update_queue(Q, Policies) || - VHost <- rabbit_vhost:list(), - Q <- rabbit_amqqueue:list(VHost)]} + Q <- rabbit_amqqueue:list(VHost)]} end), [notify(X) || X <- Xs], [notify(Q) || Q <- Qs], ok. update_exchange(X = #exchange{name = XName, policy = OldPolicy}, Policies) -> - NewPolicy = match(XName, Policies), - case NewPolicy of + case match(XName, Policies) of OldPolicy -> no_change; - _ -> rabbit_exchange:update( + NewPolicy -> rabbit_exchange:update( XName, fun(X1) -> X1#exchange{policy = NewPolicy} end), {X, X#exchange{policy = NewPolicy}} end. update_queue(Q = #amqqueue{name = QName, policy = OldPolicy}, Policies) -> - NewPolicy = match(QName, Policies), - case NewPolicy of + case match(QName, Policies) of OldPolicy -> no_change; - _ -> rabbit_amqqueue:update( + NewPolicy -> rabbit_amqqueue:update( QName, fun(Q1) -> Q1#amqqueue{policy = NewPolicy} end), {Q, Q#amqqueue{policy = NewPolicy}} end. @@ -129,28 +199,53 @@ match(Name, Policies) -> [Policy | _Rest] -> Policy end. -matches(#resource{name = Name, virtual_host = VHost}, Policy) -> - Prefix = pget(<<"prefix">>, Policy), - case pget(<<"vhost">>, Policy) of - undefined -> prefix(Prefix, Name); - VHost -> prefix(Prefix, Name); - _ -> false - end. - -prefix(A, B) -> lists:prefix(binary_to_list(A), binary_to_list(B)). +matches(#resource{name = Name}, Policy) -> + match =:= re:run(Name, pget(pattern, Policy), [{capture, none}]). -sort_pred(A, B) -> - R = size(pget(<<"prefix">>, A)) >= size(pget(<<"prefix">>, B)), - case {pget(<<"vhost">>, A), pget(<<"vhost">>, B)} of - {undefined, undefined} -> R; - {undefined, _} -> true; - {_, undefined} -> false; - _ -> R - end. +sort_pred(A, B) -> pget(priority, A) >= pget(priority, B). %%---------------------------------------------------------------------------- policy_validation() -> - [{<<"vhost">>, fun rabbit_parameter_validation:binary/2, optional}, - {<<"prefix">>, fun rabbit_parameter_validation:binary/2, mandatory}, - {<<"policy">>, fun rabbit_parameter_validation:list/2, mandatory}]. + [{<<"priority">>, fun rabbit_parameter_validation:number/2, mandatory}, + {<<"pattern">>, fun rabbit_parameter_validation:regex/2, mandatory}, + {<<"definition">>, fun validation/2, mandatory}]. + +validation(_Name, []) -> + {error, "no policy provided", []}; +validation(_Name, Terms) when is_list(Terms) -> + {Keys, Modules} = lists:unzip( + rabbit_registry:lookup_all(policy_validator)), + [] = dups(Keys), %% ASSERTION + Validators = lists:zipwith(fun (M, K) -> {M, a2b(K)} end, Modules, Keys), + {TermKeys, _} = lists:unzip(Terms), + case dups(TermKeys) of + [] -> validation0(Validators, Terms); + Dup -> {error, "~p duplicate keys not allowed", [Dup]} + end; +validation(_Name, Term) -> + {error, "parse error while reading policy: ~p", [Term]}. + +validation0(Validators, Terms) -> + case lists:foldl( + fun (Mod, {ok, TermsLeft}) -> + ModKeys = proplists:get_all_values(Mod, Validators), + case [T || {Key, _} = T <- TermsLeft, + lists:member(Key, ModKeys)] of + [] -> {ok, TermsLeft}; + Scope -> {Mod:validate_policy(Scope), TermsLeft -- Scope} + end; + (_, Acc) -> + Acc + end, {ok, Terms}, proplists:get_keys(Validators)) of + {ok, []} -> + ok; + {ok, Unvalidated} -> + {error, "~p are not recognised policy settings", [Unvalidated]}; + {Error, _} -> + Error + end. + +a2b(A) -> list_to_binary(atom_to_list(A)). + +dups(L) -> L -- lists:usort(L). diff --git a/src/rabbit_policy_validator.erl b/src/rabbit_policy_validator.erl new file mode 100644 index 0000000000..b59dec2b47 --- /dev/null +++ b/src/rabbit_policy_validator.erl @@ -0,0 +1,37 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(rabbit_policy_validator). + +-ifdef(use_specs). + +-type(validate_results() :: + 'ok' | {error, string(), [term()]} | [validate_results()]). + +-callback validate_policy([{binary(), term()}]) -> validate_results(). + +-else. + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [ + {validate_policy, 1} + ]; +behaviour_info(_Other) -> + undefined. + +-endif. diff --git a/src/rabbit_prelaunch.erl b/src/rabbit_prelaunch.erl index d56211b50e..404afe3c3b 100644 --- a/src/rabbit_prelaunch.erl +++ b/src/rabbit_prelaunch.erl @@ -57,7 +57,7 @@ duplicate_node_check(NodeStr) -> case rabbit_nodes:names(NodeHost) of {ok, NamePorts} -> case proplists:is_defined(NodeName, NamePorts) of - true -> io:format("node with name ~p " + true -> io:format("ERROR: node with name ~p " "already running on ~p~n", [NodeName, NodeHost]), io:format(rabbit_nodes:diagnostics([Node]) ++ "~n"), @@ -65,11 +65,8 @@ duplicate_node_check(NodeStr) -> false -> ok end; {error, EpmdReason} -> - rabbit_misc:quit("epmd error for host ~p: ~p (~s)~n", + io:format("ERROR: epmd error for host ~p: ~p (~s)~n", [NodeHost, EpmdReason, - case EpmdReason of - address -> "unable to establish tcp connection"; - timeout -> "timed out establishing tcp connection"; - _ -> inet:format_error(EpmdReason) - end]) + rabbit_misc:format_inet_error(EpmdReason)]), + rabbit_misc:quit(?ERROR_CODE) end. diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 3ef769c7e6..21f581548d 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -400,19 +400,19 @@ blank_state_dir(Dir) -> on_sync = fun (_) -> ok end, unsynced_msg_ids = gb_sets:new() }. -clean_file_name(Dir) -> filename:join(Dir, ?CLEAN_FILENAME). +clean_filename(Dir) -> filename:join(Dir, ?CLEAN_FILENAME). detect_clean_shutdown(Dir) -> - case rabbit_file:delete(clean_file_name(Dir)) of + case rabbit_file:delete(clean_filename(Dir)) of ok -> true; {error, enoent} -> false end. read_shutdown_terms(Dir) -> - rabbit_file:read_term_file(clean_file_name(Dir)). + rabbit_file:read_term_file(clean_filename(Dir)). store_clean_shutdown(Terms, Dir) -> - CleanFileName = clean_file_name(Dir), + CleanFileName = clean_filename(Dir), ok = rabbit_file:ensure_dir(CleanFileName), rabbit_file:write_term_file(CleanFileName, Terms). @@ -537,7 +537,7 @@ queue_index_walker_reader(QueueName, Gatherer) -> State = blank_state(QueueName), ok = scan_segments( fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) -> - gatherer:in(Gatherer, {MsgId, 1}); + gatherer:sync_in(Gatherer, {MsgId, 1}); (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, _IsAcked, Acc) -> Acc diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index bd5cf58845..aef48b2030 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -173,6 +173,8 @@ server_capabilities(rabbit_framing_amqp_0_9_1) -> server_capabilities(_) -> []. +%%-------------------------------------------------------------------------- + log(Level, Fmt, Args) -> rabbit_log:log(connection, Level, Fmt, Args). inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F). @@ -182,6 +184,8 @@ socket_op(Sock, Fun) -> {ok, Res} -> Res; {error, Reason} -> log(error, "error on AMQP connection ~p: ~p~n", [self(), Reason]), + %% NB: this is tcp socket, even in case of ssl + rabbit_net:fast_close(Sock), exit(normal) end. @@ -234,15 +238,14 @@ start_connection(Parent, ChannelSupSupPid, Collector, StartHeartbeatFun, Deb, end, "closing AMQP connection ~p (~s):~n~p~n", [self(), ConnStr, Ex]) after - %% The reader is the controlling process and hence its - %% termination will close the socket. Furthermore, - %% gen_tcp:close/1 waits for pending output to be sent, which - %% results in unnecessary delays. However, to keep the - %% file_handle_cache accounting as accurate as possible it - %% would be good to close the socket immediately if we - %% can. But we can only do this for non-ssl sockets. - %% - rabbit_net:maybe_fast_close(ClientSock), + %% We don't call gen_tcp:close/1 here since it waits for + %% pending output to be sent, which results in unnecessary + %% delays. We could just terminate - the reader is the + %% controlling process and hence its termination will close + %% the socket. However, to keep the file_handle_cache + %% accounting as accurate as possible we ought to close the + %% socket w/o delay before termination. + rabbit_net:fast_close(ClientSock), rabbit_event:notify(connection_closed, [{pid, self()}]) end, done. @@ -311,7 +314,7 @@ handle_other(handshake_timeout, Deb, State) mainloop(Deb, State); handle_other(handshake_timeout, _Deb, State) -> throw({handshake_timeout, State#v1.callback}); -handle_other(timeout, Deb, State = #v1{connection_state = closed}) -> +handle_other(heartbeat_timeout, Deb, State = #v1{connection_state = closed}) -> mainloop(Deb, State); handle_other(heartbeat_timeout, _Deb, #v1{connection_state = S}) -> throw({heartbeat_timeout, S}); @@ -353,9 +356,9 @@ switch_callback(State, Callback, Length) -> State#v1{callback = Callback, recv_len = Length}. terminate(Explanation, State) when ?IS_RUNNING(State) -> - {normal, send_exception(State, 0, - rabbit_misc:amqp_error( - connection_forced, Explanation, [], none))}; + {normal, handle_exception(State, 0, + rabbit_misc:amqp_error( + connection_forced, Explanation, [], none))}; terminate(_Explanation, State) -> {force, State}. @@ -383,6 +386,9 @@ update_last_blocked_by(State = #v1{conserve_resources = true}) -> update_last_blocked_by(State = #v1{conserve_resources = false}) -> State#v1{last_blocked_by = flow}. +%%-------------------------------------------------------------------------- +%% error handling / termination + close_connection(State = #v1{queue_collector = Collector, connection = #connection{ timeout_sec = TimeoutSec}}) -> @@ -406,24 +412,10 @@ handle_dependent_exit(ChPid, Reason, State) -> {_Channel, controlled} -> maybe_close(control_throttle(State)); {Channel, uncontrolled} -> - log(error, "AMQP connection ~p, channel ~p - error:~n~p~n", - [self(), Channel, Reason]), maybe_close(handle_exception(control_throttle(State), Channel, Reason)) end. -channel_cleanup(ChPid) -> - case get({ch_pid, ChPid}) of - undefined -> undefined; - {Channel, MRef} -> credit_flow:peer_down(ChPid), - erase({channel, Channel}), - erase({ch_pid, ChPid}), - erlang:demonitor(MRef, [flush]), - Channel - end. - -all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()]. - terminate_channels() -> NChannels = length([rabbit_channel:shutdown(ChPid) || ChPid <- all_channels()]), @@ -477,6 +469,80 @@ maybe_close(State) -> termination_kind(normal) -> controlled; termination_kind(_) -> uncontrolled. +handle_exception(State = #v1{connection_state = closed}, Channel, Reason) -> + log(error, "AMQP connection ~p (~p), channel ~p - error:~n~p~n", + [self(), closed, Channel, Reason]), + State; +handle_exception(State = #v1{connection = #connection{protocol = Protocol}, + connection_state = CS}, + Channel, Reason) + when ?IS_RUNNING(State) orelse CS =:= closing -> + log(error, "AMQP connection ~p (~p), channel ~p - error:~n~p~n", + [self(), CS, Channel, Reason]), + {0, CloseMethod} = + rabbit_binary_generator:map_exception(Channel, Reason, Protocol), + terminate_channels(), + State1 = close_connection(State), + ok = send_on_channel0(State1#v1.sock, CloseMethod, Protocol), + State1; +handle_exception(State, Channel, Reason) -> + %% We don't trust the client at this point - force them to wait + %% for a bit so they can't DOS us with repeated failed logins etc. + timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw({handshake_error, State#v1.connection_state, Channel, Reason}). + +frame_error(Error, Type, Channel, Payload, State) -> + {Str, Bin} = payload_snippet(Payload), + handle_exception(State, Channel, + rabbit_misc:amqp_error(frame_error, + "type ~p, ~s octets = ~p: ~p", + [Type, Str, Bin, Error], none)). + +unexpected_frame(Type, Channel, Payload, State) -> + {Str, Bin} = payload_snippet(Payload), + handle_exception(State, Channel, + rabbit_misc:amqp_error(unexpected_frame, + "type ~p, ~s octets = ~p", + [Type, Str, Bin], none)). + +payload_snippet(Payload) when size(Payload) =< 16 -> + {"all", Payload}; +payload_snippet(<<Snippet:16/binary, _/binary>>) -> + {"first 16", Snippet}. + +%%-------------------------------------------------------------------------- + +create_channel(Channel, State) -> + #v1{sock = Sock, queue_collector = Collector, + channel_sup_sup_pid = ChanSupSup, + connection = #connection{protocol = Protocol, + frame_max = FrameMax, + user = User, + vhost = VHost, + capabilities = Capabilities}} = State, + {ok, _ChSupPid, {ChPid, AState}} = + rabbit_channel_sup_sup:start_channel( + ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), name(Sock), + Protocol, User, VHost, Capabilities, Collector}), + MRef = erlang:monitor(process, ChPid), + put({ch_pid, ChPid}, {Channel, MRef}), + put({channel, Channel}, {ChPid, AState}), + {ChPid, AState}. + +channel_cleanup(ChPid) -> + case get({ch_pid, ChPid}) of + undefined -> undefined; + {Channel, MRef} -> credit_flow:peer_down(ChPid), + erase({channel, Channel}), + erase({ch_pid, ChPid}), + erlang:demonitor(MRef, [flush]), + Channel + end. + +all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()]. + +%%-------------------------------------------------------------------------- + handle_frame(Type, 0, Payload, State = #v1{connection_state = CS, connection = #connection{protocol = Protocol}}) @@ -492,34 +558,43 @@ handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS}) handle_frame(Type, 0, Payload, State = #v1{connection = #connection{protocol = Protocol}}) -> case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of - error -> throw({unknown_frame, 0, Type, Payload}); + error -> frame_error(unknown_frame, Type, 0, Payload, State); heartbeat -> State; {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); - Other -> throw({unexpected_frame_on_channel0, Other}) + _Other -> unexpected_frame(Type, 0, Payload, State) end; handle_frame(Type, Channel, Payload, - State = #v1{connection = #connection{protocol = Protocol}}) -> + State = #v1{connection = #connection{protocol = Protocol}}) + when ?IS_RUNNING(State) -> case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of - error -> throw({unknown_frame, Channel, Type, Payload}); - heartbeat -> throw({unexpected_heartbeat_frame, Channel}); - AnalyzedFrame -> process_frame(AnalyzedFrame, Channel, State) - end. + error -> frame_error(unknown_frame, Type, Channel, Payload, State); + heartbeat -> unexpected_frame(Type, Channel, Payload, State); + Frame -> process_frame(Frame, Channel, State) + end; +handle_frame(Type, Channel, Payload, State) -> + unexpected_frame(Type, Channel, Payload, State). process_frame(Frame, Channel, State) -> - case get({channel, Channel}) of - {ChPid, AState} -> - case process_channel_frame(Frame, ChPid, AState) of - {ok, NewAState} -> put({channel, Channel}, {ChPid, NewAState}), - post_process_frame(Frame, ChPid, State); - {error, Reason} -> handle_exception(State, Channel, Reason) - end; - undefined when ?IS_RUNNING(State) -> - ok = create_channel(Channel, State), - process_frame(Frame, Channel, State); - undefined -> - throw({channel_frame_while_starting, - Channel, State#v1.connection_state, Frame}) + {ChPid, AState} = case get({channel, Channel}) of + undefined -> create_channel(Channel, State); + Other -> Other + end, + case process_channel_frame(Frame, ChPid, AState) of + {ok, NewAState} -> put({channel, Channel}, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, State); + {error, Reason} -> handle_exception(State, Channel, Reason) + end. + +process_channel_frame(Frame, ChPid, AState) -> + case rabbit_command_assembler:process(Frame, AState) of + {ok, NewAState} -> {ok, NewAState}; + {ok, Method, NewAState} -> rabbit_channel:do(ChPid, Method), + {ok, NewAState}; + {ok, Method, Content, NewAState} -> rabbit_channel:do_flow( + ChPid, Method, Content), + {ok, NewAState}; + {error, Reason} -> {error, Reason} end. post_process_frame({method, 'channel.close_ok', _}, ChPid, State) -> @@ -536,19 +611,20 @@ post_process_frame({method, MethodName, _}, _ChPid, post_process_frame(_Frame, _ChPid, State) -> control_throttle(State). +%%-------------------------------------------------------------------------- + handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) -> ensure_stats_timer( switch_callback(State, {frame_payload, Type, Channel, PayloadSize}, PayloadSize + 1)); -handle_input({frame_payload, Type, Channel, PayloadSize}, - PayloadAndMarker, State) -> - case PayloadAndMarker of - <<Payload:PayloadSize/binary, ?FRAME_END>> -> - switch_callback(handle_frame(Type, Channel, Payload, State), - frame_header, 7); - _ -> - throw({bad_payload, Type, Channel, PayloadSize, PayloadAndMarker}) +handle_input({frame_payload, Type, Channel, PayloadSize}, Data, State) -> + <<Payload:PayloadSize/binary, EndMarker>> = Data, + case EndMarker of + ?FRAME_END -> State1 = handle_frame(Type, Channel, Payload, State), + switch_callback(State1, frame_header, 7); + _ -> frame_error({invalid_frame_end_marker, EndMarker}, + Type, Channel, Payload, State) end; %% The two rules pertaining to version negotiation: @@ -619,24 +695,14 @@ ensure_stats_timer(State) -> handle_method0(MethodName, FieldsBin, State = #v1{connection = #connection{protocol = Protocol}}) -> - HandleException = - fun(R) -> - case ?IS_RUNNING(State) of - true -> send_exception(State, 0, R); - %% We don't trust the client at this point - force - %% them to wait for a bit so they can't DOS us with - %% repeated failed logins etc. - false -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), - throw({channel0_error, State#v1.connection_state, R}) - end - end, try handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), State) catch exit:#amqp_error{method = none} = Reason -> - HandleException(Reason#amqp_error{method = MethodName}); + handle_exception(State, 0, Reason#amqp_error{method = MethodName}); Type:Reason -> - HandleException({Type, Reason, MethodName, erlang:get_stacktrace()}) + Stack = erlang:get_stacktrace(), + handle_exception(State, 0, {Type, Reason, MethodName, Stack}) end. handle_method0(#'connection.start_ok'{mechanism = Mechanism, @@ -740,6 +806,10 @@ server_frame_max() -> {ok, FrameMax} = application:get_env(rabbit, frame_max), FrameMax. +server_heartbeat() -> + {ok, Heartbeat} = application:get_env(rabbit, heartbeat), + Heartbeat. + send_on_channel0(Sock, Method, Protocol) -> ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). @@ -791,7 +861,7 @@ auth_phase(Response, {ok, User} -> Tune = #'connection.tune'{channel_max = 0, frame_max = server_frame_max(), - heartbeat = 0}, + heartbeat = server_heartbeat()}, ok = send_on_channel0(Sock, Tune, Protocol), State#v1{connection_state = tuning, connection = Connection#connection{user = User}} @@ -834,8 +904,8 @@ i(SockStat, #v1{sock = Sock}) when SockStat =:= recv_oct; SockStat =:= send_oct; SockStat =:= send_cnt; SockStat =:= send_pend -> - socket_info(fun () -> rabbit_net:getstat(Sock, [SockStat]) end, - fun ([{_, I}]) -> I end); + socket_info(fun (S) -> rabbit_net:getstat(S, [SockStat]) end, + fun ([{_, I}]) -> I end, Sock); i(state, #v1{connection_state = S}) -> S; i(last_blocked_by, #v1{last_blocked_by = By}) -> @@ -871,10 +941,7 @@ i(Item, #v1{}) -> throw({bad_argument, Item}). socket_info(Get, Select, Sock) -> - socket_info(fun() -> Get(Sock) end, Select). - -socket_info(Get, Select) -> - case Get() of + case Get(Sock) of {ok, T} -> Select(T); {error, _} -> '' end. @@ -897,51 +964,6 @@ cert_info(F, Sock) -> {ok, Cert} -> list_to_binary(F(Cert)) end. -%%-------------------------------------------------------------------------- - -create_channel(Channel, State) -> - #v1{sock = Sock, queue_collector = Collector, - channel_sup_sup_pid = ChanSupSup, - connection = #connection{protocol = Protocol, - frame_max = FrameMax, - user = User, - vhost = VHost, - capabilities = Capabilities}} = State, - {ok, _ChSupPid, {ChPid, AState}} = - rabbit_channel_sup_sup:start_channel( - ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), name(Sock), - Protocol, User, VHost, Capabilities, Collector}), - MRef = erlang:monitor(process, ChPid), - put({ch_pid, ChPid}, {Channel, MRef}), - put({channel, Channel}, {ChPid, AState}), - ok. - -process_channel_frame(Frame, ChPid, AState) -> - case rabbit_command_assembler:process(Frame, AState) of - {ok, NewAState} -> {ok, NewAState}; - {ok, Method, NewAState} -> rabbit_channel:do(ChPid, Method), - {ok, NewAState}; - {ok, Method, Content, NewAState} -> rabbit_channel:do_flow( - ChPid, Method, Content), - {ok, NewAState}; - {error, Reason} -> {error, Reason} - end. - -handle_exception(State = #v1{connection_state = closed}, _Channel, _Reason) -> - State; -handle_exception(State, Channel, Reason) -> - send_exception(State, Channel, Reason). - -send_exception(State = #v1{connection = #connection{protocol = Protocol}}, - Channel, Reason) -> - {0, CloseMethod} = - rabbit_binary_generator:map_exception(Channel, Reason, Protocol), - terminate_channels(), - State1 = close_connection(State), - ok = rabbit_writer:internal_send_command( - State1#v1.sock, 0, CloseMethod, Protocol), - State1. - emit_stats(State) -> rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)), rabbit_event:reset_stats_timer(State, #v1.stats_timer). diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl index e14bbba018..32709d2484 100644 --- a/src/rabbit_registry.erl +++ b/src/rabbit_registry.erl @@ -107,7 +107,8 @@ sanity_check_module(ClassModule, Module) -> class_module(exchange) -> rabbit_exchange_type; class_module(auth_mechanism) -> rabbit_auth_mechanism; class_module(runtime_parameter) -> rabbit_runtime_parameter; -class_module(exchange_decorator) -> rabbit_exchange_decorator. +class_module(exchange_decorator) -> rabbit_exchange_decorator; +class_module(policy_validator) -> rabbit_policy_validator. %%--------------------------------------------------------------------------- diff --git a/src/rabbit_runtime_parameter.erl b/src/rabbit_runtime_parameter.erl index c7d3011674..186680496e 100644 --- a/src/rabbit_runtime_parameter.erl +++ b/src/rabbit_runtime_parameter.erl @@ -21,10 +21,12 @@ -type(validate_results() :: 'ok' | {error, string(), [term()]} | [validate_results()]). --callback validate(binary(), binary(), term()) -> validate_results(). --callback validate_clear(binary(), binary()) -> validate_results(). --callback notify(binary(), binary(), term()) -> 'ok'. --callback notify_clear(binary(), binary()) -> 'ok'. +-callback validate(rabbit_types:vhost(), binary(), binary(), + term()) -> validate_results(). +-callback validate_clear(rabbit_types:vhost(), binary(), + binary()) -> validate_results(). +-callback notify(rabbit_types:vhost(), binary(), binary(), term()) -> 'ok'. +-callback notify_clear(rabbit_types:vhost(), binary(), binary()) -> 'ok'. -else. @@ -32,10 +34,10 @@ behaviour_info(callbacks) -> [ - {validate, 3}, - {validate_clear, 2}, - {notify, 3}, - {notify_clear, 2} + {validate, 4}, + {validate_clear, 3}, + {notify, 4}, + {notify_clear, 3} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_runtime_parameters.erl b/src/rabbit_runtime_parameters.erl index 3a54e8f621..49060409e1 100644 --- a/src/rabbit_runtime_parameters.erl +++ b/src/rabbit_runtime_parameters.erl @@ -18,8 +18,9 @@ -include("rabbit.hrl"). --export([parse_set/3, set/3, clear/2, list/0, list/1, list_strict/1, - list_formatted/0, lookup/2, value/2, value/3, info_keys/0]). +-export([parse_set/4, set/4, set_any/4, clear/3, clear_any/3, list/0, list/1, + list_strict/1, list/2, list_strict/2, list_formatted/1, lookup/3, + value/3, value/4, info_keys/0]). %%---------------------------------------------------------------------------- @@ -27,16 +28,29 @@ -type(ok_or_error_string() :: 'ok' | {'error_string', string()}). --spec(parse_set/3 :: (binary(), binary(), string()) -> ok_or_error_string()). --spec(set/3 :: (binary(), binary(), term()) -> ok_or_error_string()). --spec(clear/2 :: (binary(), binary()) -> ok_or_error_string()). +-spec(parse_set/4 :: (rabbit_types:vhost(), binary(), binary(), string()) + -> ok_or_error_string()). +-spec(set/4 :: (rabbit_types:vhost(), binary(), binary(), term()) + -> ok_or_error_string()). +-spec(set_any/4 :: (rabbit_types:vhost(), binary(), binary(), term()) + -> ok_or_error_string()). +-spec(clear/3 :: (rabbit_types:vhost(), binary(), binary()) + -> ok_or_error_string()). +-spec(clear_any/3 :: (rabbit_types:vhost(), binary(), binary()) + -> ok_or_error_string()). -spec(list/0 :: () -> [rabbit_types:infos()]). --spec(list/1 :: (binary()) -> [rabbit_types:infos()]). --spec(list_strict/1 :: (binary()) -> [rabbit_types:infos()] | 'not_found'). --spec(list_formatted/0 :: () -> [rabbit_types:infos()]). --spec(lookup/2 :: (binary(), binary()) -> rabbit_types:infos()). --spec(value/2 :: (binary(), binary()) -> term()). --spec(value/3 :: (binary(), binary(), term()) -> term()). +-spec(list/1 :: (rabbit_types:vhost() | '_') -> [rabbit_types:infos()]). +-spec(list_strict/1 :: (binary() | '_') + -> [rabbit_types:infos()] | 'not_found'). +-spec(list/2 :: (rabbit_types:vhost() | '_', binary() | '_') + -> [rabbit_types:infos()]). +-spec(list_strict/2 :: (rabbit_types:vhost() | '_', binary() | '_') + -> [rabbit_types:infos()] | 'not_found'). +-spec(list_formatted/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]). +-spec(lookup/3 :: (rabbit_types:vhost(), binary(), binary()) + -> rabbit_types:infos() | 'not_found'). +-spec(value/3 :: (rabbit_types:vhost(), binary(), binary()) -> term()). +-spec(value/4 :: (rabbit_types:vhost(), binary(), binary(), term()) -> term()). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -endif. @@ -49,36 +63,39 @@ %%--------------------------------------------------------------------------- -parse_set(Component, Key, String) -> - case parse(String) of - {ok, Term} -> set(Component, Key, Term); - {errors, L} -> format_error(L) +parse_set(_, <<"policy">>, _, _) -> + {error_string, "policies may not be set using this method"}; +parse_set(VHost, Component, Name, String) -> + case rabbit_misc:json_decode(String) of + {ok, JSON} -> set(VHost, Component, Name, + rabbit_misc:json_to_term(JSON)); + error -> {error_string, "JSON decoding error"} end. -set(Component, Key, Term) -> - case set0(Component, Key, Term) of - ok -> ok; - {errors, L} -> format_error(L) - end. +set(_, <<"policy">>, _, _) -> + {error_string, "policies may not be set using this method"}; +set(VHost, Component, Name, Term) -> + set_any(VHost, Component, Name, Term). format_error(L) -> {error_string, rabbit_misc:format_many([{"Validation failed~n", []} | L])}. -set0(Component, Key, Term) -> +set_any(VHost, Component, Name, Term) -> + case set_any0(VHost, Component, Name, Term) of + ok -> ok; + {errors, L} -> format_error(L) + end. + +set_any0(VHost, Component, Name, Term) -> case lookup_component(Component) of {ok, Mod} -> - case flatten_errors(validate(Term)) of + case flatten_errors(Mod:validate(VHost, Component, Name, Term)) of ok -> - case flatten_errors(Mod:validate(Component, Key, Term)) of - ok -> - case mnesia_update(Component, Key, Term) of - {old, Term} -> ok; - _ -> Mod:notify(Component, Key, Term) - end, - ok; - E -> - E - end; + case mnesia_update(VHost, Component, Name, Term) of + {old, Term} -> ok; + _ -> Mod:notify(VHost, Component, Name, Term) + end, + ok; E -> E end; @@ -86,102 +103,125 @@ set0(Component, Key, Term) -> E end. -mnesia_update(Component, Key, Term) -> +mnesia_update(VHost, Component, Name, Term) -> rabbit_misc:execute_mnesia_transaction( fun () -> - Res = case mnesia:read(?TABLE, {Component, Key}, read) of + Res = case mnesia:read(?TABLE, {VHost, Component, Name}, read) of [] -> new; [Params] -> {old, Params#runtime_parameters.value} end, - ok = mnesia:write(?TABLE, c(Component, Key, Term), write), + ok = mnesia:write(?TABLE, c(VHost, Component, Name, Term), write), Res end). -clear(Component, Key) -> - case clear0(Component, Key) of +clear(_, <<"policy">> , _) -> + {error_string, "policies may not be cleared using this method"}; +clear(VHost, Component, Name) -> + clear_any(VHost, Component, Name). + +clear_any(VHost, Component, Name) -> + case clear_any0(VHost, Component, Name) of ok -> ok; {errors, L} -> format_error(L) end. -clear0(Component, Key) -> +clear_any0(VHost, Component, Name) -> case lookup_component(Component) of - {ok, Mod} -> case flatten_errors(Mod:validate_clear(Component, Key)) of - ok -> mnesia_clear(Component, Key), - Mod:notify_clear(Component, Key), + {ok, Mod} -> case flatten_errors( + Mod:validate_clear(VHost, Component, Name)) of + ok -> mnesia_clear(VHost, Component, Name), + Mod:notify_clear(VHost, Component, Name), ok; E -> E end; E -> E end. -mnesia_clear(Component, Key) -> +mnesia_clear(VHost, Component, Name) -> ok = rabbit_misc:execute_mnesia_transaction( fun () -> - ok = mnesia:delete(?TABLE, {Component, Key}, write) + ok = mnesia:delete(?TABLE, {VHost, Component, Name}, write) end). list() -> - [p(P) || P <- rabbit_misc:dirty_read_all(?TABLE)]. - -list(Component) -> list(Component, []). -list_strict(Component) -> list(Component, not_found). - -list(Component, Default) -> - case lookup_component(Component) of - {ok, _} -> Match = #runtime_parameters{key = {Component, '_'}, _ = '_'}, - [p(P) || P <- mnesia:dirty_match_object(?TABLE, Match)]; - _ -> Default + [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <- + rabbit_misc:dirty_read_all(?TABLE), Comp /= <<"policy">>]. + +list(VHost) -> list(VHost, '_', []). +list_strict(Component) -> list('_', Component, not_found). +list(VHost, Component) -> list(VHost, Component, []). +list_strict(VHost, Component) -> list(VHost, Component, not_found). + +list(VHost, Component, Default) -> + case component_good(Component) of + true -> Match = #runtime_parameters{key = {VHost, Component, '_'}, + _ = '_'}, + [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <- + mnesia:dirty_match_object(?TABLE, Match), + Comp =/= <<"policy">> orelse + Component =:= <<"policy">>]; + _ -> Default end. -list_formatted() -> - [pset(value, format(pget(value, P)), P) || P <- list()]. +list_formatted(VHost) -> + [pset(value, format(pget(value, P)), P) || P <- list(VHost)]. -lookup(Component, Key) -> - case lookup0(Component, Key, rabbit_misc:const(not_found)) of +lookup(VHost, Component, Name) -> + case lookup0(VHost, Component, Name, rabbit_misc:const(not_found)) of not_found -> not_found; Params -> p(Params) end. -value(Component, Key) -> - case lookup0(Component, Key, rabbit_misc:const(not_found)) of +value(VHost, Component, Name) -> + case lookup0(VHost, Component, Name, rabbit_misc:const(not_found)) of not_found -> not_found; Params -> Params#runtime_parameters.value end. -value(Component, Key, Default) -> - Params = lookup0(Component, Key, - fun () -> lookup_missing(Component, Key, Default) end), +value(VHost, Component, Name, Default) -> + Params = lookup0(VHost, Component, Name, + fun () -> + lookup_missing(VHost, Component, Name, Default) + end), Params#runtime_parameters.value. -lookup0(Component, Key, DefaultFun) -> - case mnesia:dirty_read(?TABLE, {Component, Key}) of +lookup0(VHost, Component, Name, DefaultFun) -> + case mnesia:dirty_read(?TABLE, {VHost, Component, Name}) of [] -> DefaultFun(); [R] -> R end. -lookup_missing(Component, Key, Default) -> +lookup_missing(VHost, Component, Name, Default) -> rabbit_misc:execute_mnesia_transaction( fun () -> - case mnesia:read(?TABLE, {Component, Key}, read) of - [] -> Record = c(Component, Key, Default), + case mnesia:read(?TABLE, {VHost, Component, Name}, read) of + [] -> Record = c(VHost, Component, Name, Default), mnesia:write(?TABLE, Record, write), Record; [R] -> R end end). -c(Component, Key, Default) -> #runtime_parameters{key = {Component, Key}, - value = Default}. +c(VHost, Component, Name, Default) -> + #runtime_parameters{key = {VHost, Component, Name}, + value = Default}. -p(#runtime_parameters{key = {Component, Key}, value = Value}) -> - [{component, Component}, - {key, Key}, +p(#runtime_parameters{key = {VHost, Component, Name}, value = Value}) -> + [{vhost, VHost}, + {component, Component}, + {name, Name}, {value, Value}]. -info_keys() -> [component, key, value]. +info_keys() -> [component, name, value]. %%--------------------------------------------------------------------------- +component_good('_') -> true; +component_good(Component) -> case lookup_component(Component) of + {ok, _} -> true; + _ -> false + end. + lookup_component(Component) -> case rabbit_registry:lookup_module( runtime_parameter, list_to_atom(binary_to_list(Component))) of @@ -190,51 +230,9 @@ lookup_component(Component) -> {ok, Module} -> {ok, Module} end. -parse(Src0) -> - Src1 = string:strip(Src0), - Src = case lists:reverse(Src1) of - [$. |_] -> Src1; - _ -> Src1 ++ "." - end, - case erl_scan:string(Src) of - {ok, Scanned, _} -> - case erl_parse:parse_term(Scanned) of - {ok, Parsed} -> - {ok, Parsed}; - {error, E} -> - {errors, - [{"Could not parse value: ~s", [format_parse_error(E)]}]} - end; - {error, E, _} -> - {errors, [{"Could not scan value: ~s", [format_parse_error(E)]}]} - end. - -format_parse_error({_Line, Mod, Err}) -> - lists:flatten(Mod:format_error(Err)). - format(Term) -> - list_to_binary(rabbit_misc:format("~p", [Term])). - -%%--------------------------------------------------------------------------- - -%% We will want to be able to biject these to JSON. So we have some -%% generic restrictions on what we consider acceptable. -validate(Proplist = [T | _]) when is_tuple(T) -> validate_proplist(Proplist); -validate(L) when is_list(L) -> validate_list(L); -validate(T) when is_tuple(T) -> {error, "tuple: ~p", [T]}; -validate(B) when is_boolean(B) -> ok; -validate(null) -> ok; -validate(A) when is_atom(A) -> {error, "atom: ~p", [A]}; -validate(N) when is_number(N) -> ok; -validate(B) when is_binary(B) -> ok; -validate(B) when is_bitstring(B) -> {error, "bitstring: ~p", [B]}. - -validate_list(L) -> [validate(I) || I <- L]. -validate_proplist(L) -> [vp(I) || I <- L]. - -vp({K, V}) when is_binary(K) -> validate(V); -vp({K, _V}) -> {error, "bad key: ~p", [K]}; -vp(H) -> {error, "not two tuple: ~p", [H]}. + {ok, JSON} = rabbit_misc:json_encode(rabbit_misc:term_to_json(Term)), + list_to_binary(JSON). flatten_errors(L) -> case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of diff --git a/src/rabbit_runtime_parameters_test.erl b/src/rabbit_runtime_parameters_test.erl index f23b322722..d4d7271e90 100644 --- a/src/rabbit_runtime_parameters_test.erl +++ b/src/rabbit_runtime_parameters_test.erl @@ -16,9 +16,14 @@ -module(rabbit_runtime_parameters_test). -behaviour(rabbit_runtime_parameter). +-behaviour(rabbit_policy_validator). --export([validate/3, validate_clear/2, notify/3, notify_clear/2]). +-export([validate/4, validate_clear/3, notify/4, notify_clear/3]). -export([register/0, unregister/0]). +-export([validate_policy/1]). +-export([register_policy_validator/0, unregister_policy_validator/0]). + +%---------------------------------------------------------------------------- register() -> rabbit_registry:register(runtime_parameter, <<"test">>, ?MODULE). @@ -26,13 +31,38 @@ register() -> unregister() -> rabbit_registry:unregister(runtime_parameter, <<"test">>). -validate(<<"test">>, <<"good">>, _Term) -> ok; -validate(<<"test">>, <<"maybe">>, <<"good">>) -> ok; -validate(<<"test">>, _, _) -> {error, "meh", []}. +validate(_, <<"test">>, <<"good">>, _Term) -> ok; +validate(_, <<"test">>, <<"maybe">>, <<"good">>) -> ok; +validate(_, <<"test">>, _, _) -> {error, "meh", []}. + +validate_clear(_, <<"test">>, <<"good">>) -> ok; +validate_clear(_, <<"test">>, <<"maybe">>) -> ok; +validate_clear(_, <<"test">>, _) -> {error, "meh", []}. + +notify(_, _, _, _) -> ok. +notify_clear(_, _, _) -> ok. + +%---------------------------------------------------------------------------- + +register_policy_validator() -> + rabbit_registry:register(policy_validator, <<"testeven">>, ?MODULE), + rabbit_registry:register(policy_validator, <<"testpos">>, ?MODULE). + +unregister_policy_validator() -> + rabbit_registry:unregister(policy_validator, <<"testeven">>), + rabbit_registry:unregister(policy_validator, <<"testpos">>). + +validate_policy([{<<"testeven">>, Terms}]) when is_list(Terms) -> + case length(Terms) rem 2 =:= 0 of + true -> ok; + false -> {error, "meh", []} + end; -validate_clear(<<"test">>, <<"good">>) -> ok; -validate_clear(<<"test">>, <<"maybe">>) -> ok; -validate_clear(<<"test">>, _) -> {error, "meh", []}. +validate_policy([{<<"testpos">>, Terms}]) when is_list(Terms) -> + case lists:all(fun (N) -> is_integer(N) andalso N > 0 end, Terms) of + true -> ok; + false -> {error, "meh", []} + end; -notify(_, _, _) -> ok. -notify_clear(_, _) -> ok. +validate_policy(_) -> + {error, "meh", []}. diff --git a/src/rabbit_table.erl b/src/rabbit_table.erl new file mode 100644 index 0000000000..fa1c5bbd01 --- /dev/null +++ b/src/rabbit_table.erl @@ -0,0 +1,311 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(rabbit_table). + +-export([create/0, create_local_copy/1, wait_for_replicated/0, wait/1, + force_load/0, is_present/0, is_empty/0, + check_schema_integrity/0, clear_ram_only_tables/0]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(create/0 :: () -> 'ok'). +-spec(create_local_copy/1 :: ('disc' | 'ram') -> 'ok'). +-spec(wait_for_replicated/0 :: () -> 'ok'). +-spec(wait/1 :: ([atom()]) -> 'ok'). +-spec(force_load/0 :: () -> 'ok'). +-spec(is_present/0 :: () -> boolean()). +-spec(is_empty/0 :: () -> boolean()). +-spec(check_schema_integrity/0 :: () -> rabbit_types:ok_or_error(any())). +-spec(clear_ram_only_tables/0 :: () -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- +%% Main interface +%%---------------------------------------------------------------------------- + +create() -> + lists:foreach(fun ({Tab, TabDef}) -> + TabDef1 = proplists:delete(match, TabDef), + case mnesia:create_table(Tab, TabDef1) of + {atomic, ok} -> ok; + {aborted, Reason} -> + throw({error, {table_creation_failed, + Tab, TabDef1, Reason}}) + end + end, definitions()), + ok. + +%% The sequence in which we delete the schema and then the other +%% tables is important: if we delete the schema first when moving to +%% RAM mnesia will loudly complain since it doesn't make much sense to +%% do that. But when moving to disc, we need to move the schema first. +create_local_copy(disc) -> + create_local_copy(schema, disc_copies), + create_local_copies(disc); +create_local_copy(ram) -> + create_local_copies(ram), + create_local_copy(schema, ram_copies). + +wait_for_replicated() -> + wait([Tab || {Tab, TabDef} <- definitions(), + not lists:member({local_content, true}, TabDef)]). + +wait(TableNames) -> + case mnesia:wait_for_tables(TableNames, 30000) of + ok -> + ok; + {timeout, BadTabs} -> + throw({error, {timeout_waiting_for_tables, BadTabs}}); + {error, Reason} -> + throw({error, {failed_waiting_for_tables, Reason}}) + end. + +force_load() -> [mnesia:force_load_table(T) || T <- names()], ok. + +is_present() -> names() -- mnesia:system_info(tables) =:= []. + +is_empty() -> + lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, + names()). + +check_schema_integrity() -> + Tables = mnesia:system_info(tables), + case check(fun (Tab, TabDef) -> + case lists:member(Tab, Tables) of + false -> {error, {table_missing, Tab}}; + true -> check_attributes(Tab, TabDef) + end + end) of + ok -> ok = wait(names()), + check(fun check_content/2); + Other -> Other + end. + +clear_ram_only_tables() -> + Node = node(), + lists:foreach( + fun (TabName) -> + case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of + true -> {atomic, ok} = mnesia:clear_table(TabName); + false -> ok + end + end, names()), + ok. + +%%-------------------------------------------------------------------- +%% Internal helpers +%%-------------------------------------------------------------------- + +create_local_copies(Type) -> + lists:foreach( + fun ({Tab, TabDef}) -> + HasDiscCopies = has_copy_type(TabDef, disc_copies), + HasDiscOnlyCopies = has_copy_type(TabDef, disc_only_copies), + LocalTab = proplists:get_bool(local_content, TabDef), + StorageType = + if + Type =:= disc orelse LocalTab -> + if + HasDiscCopies -> disc_copies; + HasDiscOnlyCopies -> disc_only_copies; + true -> ram_copies + end; + Type =:= ram -> + ram_copies + end, + ok = create_local_copy(Tab, StorageType) + end, definitions(Type)), + ok. + +create_local_copy(Tab, Type) -> + StorageType = mnesia:table_info(Tab, storage_type), + {atomic, ok} = + if + StorageType == unknown -> + mnesia:add_table_copy(Tab, node(), Type); + StorageType /= Type -> + mnesia:change_table_copy_type(Tab, node(), Type); + true -> {atomic, ok} + end, + ok. + +has_copy_type(TabDef, DiscType) -> + lists:member(node(), proplists:get_value(DiscType, TabDef, [])). + +check_attributes(Tab, TabDef) -> + {_, ExpAttrs} = proplists:lookup(attributes, TabDef), + case mnesia:table_info(Tab, attributes) of + ExpAttrs -> ok; + Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}} + end. + +check_content(Tab, TabDef) -> + {_, Match} = proplists:lookup(match, TabDef), + case mnesia:dirty_first(Tab) of + '$end_of_table' -> + ok; + Key -> + ObjList = mnesia:dirty_read(Tab, Key), + MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), + case ets:match_spec_run(ObjList, MatchComp) of + ObjList -> ok; + _ -> {error, {table_content_invalid, Tab, Match, ObjList}} + end + end. + +check(Fun) -> + case [Error || {Tab, TabDef} <- definitions(), + case Fun(Tab, TabDef) of + ok -> Error = none, false; + {error, Error} -> true + end] of + [] -> ok; + Errors -> {error, Errors} + end. + +%%-------------------------------------------------------------------- +%% Table definitions +%%-------------------------------------------------------------------- + +names() -> [Tab || {Tab, _} <- definitions()]. + +%% The tables aren't supposed to be on disk on a ram node +definitions(disc) -> + definitions(); +definitions(ram) -> + [{Tab, [{disc_copies, []}, {ram_copies, [node()]} | + proplists:delete( + ram_copies, proplists:delete(disc_copies, TabDef))]} || + {Tab, TabDef} <- definitions()]. + +definitions() -> + [{rabbit_user, + [{record_name, internal_user}, + {attributes, record_info(fields, internal_user)}, + {disc_copies, [node()]}, + {match, #internal_user{_='_'}}]}, + {rabbit_user_permission, + [{record_name, user_permission}, + {attributes, record_info(fields, user_permission)}, + {disc_copies, [node()]}, + {match, #user_permission{user_vhost = #user_vhost{_='_'}, + permission = #permission{_='_'}, + _='_'}}]}, + {rabbit_vhost, + [{record_name, vhost}, + {attributes, record_info(fields, vhost)}, + {disc_copies, [node()]}, + {match, #vhost{_='_'}}]}, + {rabbit_listener, + [{record_name, listener}, + {attributes, record_info(fields, listener)}, + {type, bag}, + {match, #listener{_='_'}}]}, + {rabbit_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {disc_copies, [node()]}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_semi_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_reverse_route, + [{record_name, reverse_route}, + {attributes, record_info(fields, reverse_route)}, + {type, ordered_set}, + {match, #reverse_route{reverse_binding = reverse_binding_match(), + _='_'}}]}, + {rabbit_topic_trie_node, + [{record_name, topic_trie_node}, + {attributes, record_info(fields, topic_trie_node)}, + {type, ordered_set}, + {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]}, + {rabbit_topic_trie_edge, + [{record_name, topic_trie_edge}, + {attributes, record_info(fields, topic_trie_edge)}, + {type, ordered_set}, + {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]}, + {rabbit_topic_trie_binding, + [{record_name, topic_trie_binding}, + {attributes, record_info(fields, topic_trie_binding)}, + {type, ordered_set}, + {match, #topic_trie_binding{trie_binding = trie_binding_match(), + _='_'}}]}, + {rabbit_durable_exchange, + [{record_name, exchange}, + {attributes, record_info(fields, exchange)}, + {disc_copies, [node()]}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange, + [{record_name, exchange}, + {attributes, record_info(fields, exchange)}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange_serial, + [{record_name, exchange_serial}, + {attributes, record_info(fields, exchange_serial)}, + {match, #exchange_serial{name = exchange_name_match(), _='_'}}]}, + {rabbit_runtime_parameters, + [{record_name, runtime_parameters}, + {attributes, record_info(fields, runtime_parameters)}, + {disc_copies, [node()]}, + {match, #runtime_parameters{_='_'}}]}, + {rabbit_durable_queue, + [{record_name, amqqueue}, + {attributes, record_info(fields, amqqueue)}, + {disc_copies, [node()]}, + {match, #amqqueue{name = queue_name_match(), _='_'}}]}, + {rabbit_queue, + [{record_name, amqqueue}, + {attributes, record_info(fields, amqqueue)}, + {match, #amqqueue{name = queue_name_match(), _='_'}}]}] + ++ gm:table_definitions() + ++ mirrored_supervisor:table_definitions(). + +binding_match() -> + #binding{source = exchange_name_match(), + destination = binding_destination_match(), + _='_'}. +reverse_binding_match() -> + #reverse_binding{destination = binding_destination_match(), + source = exchange_name_match(), + _='_'}. +binding_destination_match() -> + resource_match('_'). +trie_node_match() -> + #trie_node{ exchange_name = exchange_name_match(), _='_'}. +trie_edge_match() -> + #trie_edge{ exchange_name = exchange_name_match(), _='_'}. +trie_binding_match() -> + #trie_binding{exchange_name = exchange_name_match(), _='_'}. +exchange_name_match() -> + resource_match(exchange). +queue_name_match() -> + resource_match(queue). +resource_match(Kind) -> + #resource{kind = Kind, _='_'}. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index bb60bd125e..962bb64889 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -32,6 +32,8 @@ -define(TIMEOUT, 5000). all_tests() -> + ok = setup_cluster(), + ok = supervisor2_tests:test_all(), passed = gm_tests:all_tests(), passed = mirrored_supervisor_tests:all_tests(), application:set_env(rabbit, file_handles_high_watermark, 10, infinity), @@ -52,36 +54,63 @@ all_tests() -> passed = test_log_management_during_startup(), passed = test_statistics(), passed = test_arguments_parser(), - passed = test_cluster_management(), + passed = test_dynamic_mirroring(), passed = test_user_management(), passed = test_runtime_parameters(), + passed = test_policy_validation(), passed = test_server_status(), passed = test_confirms(), - passed = maybe_run_cluster_dependent_tests(), + passed = + do_if_secondary_node( + fun run_cluster_dependent_tests/1, + fun (SecondaryNode) -> + io:format("Skipping cluster dependent tests with node ~p~n", + [SecondaryNode]), + passed + end), passed = test_configurable_server_properties(), passed. -maybe_run_cluster_dependent_tests() -> +do_if_secondary_node(Up, Down) -> SecondaryNode = rabbit_nodes:make("hare"), case net_adm:ping(SecondaryNode) of - pong -> passed = run_cluster_dependent_tests(SecondaryNode); - pang -> io:format("Skipping cluster dependent tests with node ~p~n", - [SecondaryNode]) - end, - passed. + pong -> Up(SecondaryNode); + pang -> Down(SecondaryNode) + end. -run_cluster_dependent_tests(SecondaryNode) -> - SecondaryNodeS = atom_to_list(SecondaryNode), +setup_cluster() -> + do_if_secondary_node( + fun (SecondaryNode) -> + cover:stop(SecondaryNode), + ok = control_action(stop_app, []), + %% 'cover' does not cope at all well with nodes disconnecting, + %% which happens as part of reset. So we turn it off + %% temporarily. That is ok even if we're not in general using + %% cover, it just turns the engine on / off and doesn't log + %% anything. Note that this way cover won't be on when joining + %% the cluster, but this is OK since we're testing the clustering + %% interface elsewere anyway. + cover:stop(nodes()), + ok = control_action(join_cluster, + [atom_to_list(SecondaryNode)]), + cover:start(nodes()), + ok = control_action(start_app, []), + ok = control_action(start_app, SecondaryNode, [], []) + end, + fun (_) -> ok end). - cover:stop(SecondaryNode), - ok = control_action(stop_app, []), - ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS]), - ok = control_action(start_app, []), - cover:start(SecondaryNode), - ok = control_action(start_app, SecondaryNode, [], []), +maybe_run_cluster_dependent_tests() -> + do_if_secondary_node( + fun (SecondaryNode) -> + passed = run_cluster_dependent_tests(SecondaryNode) + end, + fun (SecondaryNode) -> + io:format("Skipping cluster dependent tests with node ~p~n", + [SecondaryNode]) + end). +run_cluster_dependent_tests(SecondaryNode) -> io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]), passed = test_delegates_async(SecondaryNode), passed = test_delegates_sync(SecondaryNode), @@ -629,7 +658,6 @@ test_topic_expect_match(X, List) -> #'P_basic'{}, <<>>), Res = rabbit_exchange_type_topic:route( X, #delivery{mandatory = false, - immediate = false, sender = self(), message = Message}), ExpectedRes = lists:map( @@ -747,7 +775,9 @@ test_log_management_during_startup() -> ok = case catch control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, log_rotation_tty_no_handlers_test}); - {error, {cannot_log_to_tty, _, _}} -> ok + {badrpc, {'EXIT', {rabbit,failure_during_boot, + {error,{cannot_log_to_tty, + _, not_installed}}}}} -> ok end, %% fix sasl logging @@ -771,7 +801,9 @@ test_log_management_during_startup() -> ok = case control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, log_rotation_no_write_permission_dir_test}); - {error, {cannot_log_to_file, _, _}} -> ok + {badrpc, {'EXIT', + {rabbit, failure_during_boot, + {error, {cannot_log_to_file, _, _}}}}} -> ok end, %% start application with logging to a subdirectory which @@ -782,8 +814,11 @@ test_log_management_during_startup() -> ok = case control_action(start_app, []) of ok -> exit({got_success_but_expected_failure, log_rotatation_parent_dirs_test}); - {error, {cannot_log_to_file, _, - {error, {cannot_create_parent_dirs, _, eacces}}}} -> ok + {badrpc, + {'EXIT', {rabbit,failure_during_boot, + {error, {cannot_log_to_file, _, + {error, + {cannot_create_parent_dirs, _, eacces}}}}}}} -> ok end, ok = set_permissions(TmpDir, 8#00700), ok = set_permissions(TmpLog, 8#00600), @@ -856,199 +891,51 @@ test_arguments_parser() -> passed. -test_cluster_management() -> - %% 'cluster' and 'reset' should only work if the app is stopped - {error, _} = control_action(cluster, []), - {error, _} = control_action(reset, []), - {error, _} = control_action(force_reset, []), - - ok = control_action(stop_app, []), - - %% various ways of creating a standalone node - NodeS = atom_to_list(node()), - ClusteringSequence = [[], - [NodeS], - ["invalid@invalid", NodeS], - [NodeS, "invalid@invalid"]], - - ok = control_action(reset, []), - lists:foreach(fun (Arg) -> - ok = control_action(force_cluster, Arg), - ok - end, - ClusteringSequence), - lists:foreach(fun (Arg) -> - ok = control_action(reset, []), - ok = control_action(force_cluster, Arg), - ok - end, - ClusteringSequence), - ok = control_action(reset, []), - lists:foreach(fun (Arg) -> - ok = control_action(force_cluster, Arg), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok - end, - ClusteringSequence), - lists:foreach(fun (Arg) -> - ok = control_action(reset, []), - ok = control_action(force_cluster, Arg), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok - end, - ClusteringSequence), - - %% convert a disk node into a ram node - ok = control_action(reset, []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - ok = assert_ram_node(), - - %% join a non-existing cluster as a ram node - ok = control_action(reset, []), - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - ok = assert_ram_node(), +test_dynamic_mirroring() -> + %% Just unit tests of the node selection logic, see multi node + %% tests for the rest... + Test = fun ({NewM, NewSs, ExtraSs}, Policy, Params, {OldM, OldSs}, All) -> + {NewM, NewSs0} = + rabbit_mirror_queue_misc:suggested_queue_nodes( + Policy, Params, {OldM, OldSs}, All), + NewSs1 = lists:sort(NewSs0), + case dm_list_match(NewSs, NewSs1, ExtraSs) of + ok -> ok; + error -> exit({no_match, NewSs, NewSs1, ExtraSs}) + end + end, + + Test({a,[b,c],0},<<"all">>,'_',{a,[]}, [a,b,c]), + Test({a,[b,c],0},<<"all">>,'_',{a,[b,c]},[a,b,c]), + Test({a,[b,c],0},<<"all">>,'_',{a,[d]}, [a,b,c]), + + %% Add a node + Test({a,[b,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{a,[b]},[a,b,c,d]), + Test({b,[a,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{b,[a]},[a,b,c,d]), + %% Add two nodes and drop one + Test({a,[b,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{a,[d]},[a,b,c,d]), + %% Promote slave to master by policy + Test({a,[b,c],0},<<"nodes">>,[<<"a">>,<<"b">>,<<"c">>],{d,[a]},[a,b,c,d]), + %% Don't try to include nodes that are not running + Test({a,[b], 0},<<"nodes">>,[<<"a">>,<<"b">>,<<"f">>],{a,[b]},[a,b,c,d]), + %% If we can't find any of the nodes listed then just keep the master + Test({a,[], 0},<<"nodes">>,[<<"f">>,<<"g">>,<<"h">>],{a,[b]},[a,b,c,d]), + + Test({a,[], 1},<<"exactly">>,2,{a,[]}, [a,b,c,d]), + Test({a,[], 2},<<"exactly">>,3,{a,[]}, [a,b,c,d]), + Test({a,[c], 0},<<"exactly">>,2,{a,[c]}, [a,b,c,d]), + Test({a,[c], 1},<<"exactly">>,3,{a,[c]}, [a,b,c,d]), + Test({a,[c], 0},<<"exactly">>,2,{a,[c,d]},[a,b,c,d]), + Test({a,[c,d],0},<<"exactly">>,3,{a,[c,d]},[a,b,c,d]), - ok = control_action(reset, []), - - SecondaryNode = rabbit_nodes:make("hare"), - case net_adm:ping(SecondaryNode) of - pong -> passed = test_cluster_management2(SecondaryNode); - pang -> io:format("Skipping clustering tests with node ~p~n", - [SecondaryNode]) - end, - - ok = control_action(start_app, []), passed. -test_cluster_management2(SecondaryNode) -> - NodeS = atom_to_list(node()), - SecondaryNodeS = atom_to_list(SecondaryNode), - - %% make a disk node - ok = control_action(cluster, [NodeS]), - ok = assert_disc_node(), - %% make a ram node - ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS]), - ok = assert_ram_node(), - - %% join cluster as a ram node - ok = control_action(reset, []), - ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_ram_node(), - - %% ram node will not start by itself - ok = control_action(stop_app, []), - ok = control_action(stop_app, SecondaryNode, [], []), - {error, _} = control_action(start_app, []), - ok = control_action(start_app, SecondaryNode, [], []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - - %% change cluster config while remaining in same cluster - ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - - %% join non-existing cluster as a ram node - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - {error, _} = control_action(start_app, []), - ok = assert_ram_node(), - - %% join empty cluster as a ram node (converts to disc) - ok = control_action(cluster, []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - - %% make a new ram node - ok = control_action(reset, []), - ok = control_action(force_cluster, [SecondaryNodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_ram_node(), - - %% turn ram node into disk node - ok = control_action(cluster, [SecondaryNodeS, NodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - - %% convert a disk node into a ram node - ok = assert_disc_node(), - ok = control_action(force_cluster, ["invalid1@invalid", - "invalid2@invalid"]), - ok = assert_ram_node(), - - %% make a new disk node - ok = control_action(force_reset, []), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_disc_node(), - - %% turn a disk node into a ram node - ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - ok = assert_ram_node(), - - %% NB: this will log an inconsistent_database error, which is harmless - %% Turning cover on / off is OK even if we're not in general using cover, - %% it just turns the engine on / off, doesn't actually log anything. - cover:stop([SecondaryNode]), - true = disconnect_node(SecondaryNode), - pong = net_adm:ping(SecondaryNode), - cover:start([SecondaryNode]), - - %% leaving a cluster as a ram node - ok = control_action(reset, []), - %% ...and as a disk node - ok = control_action(cluster, [SecondaryNodeS, NodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, []), - cover:stop(SecondaryNode), - ok = control_action(reset, []), - cover:start(SecondaryNode), - - %% attempt to leave cluster when no other node is alive - ok = control_action(cluster, [SecondaryNodeS, NodeS]), - ok = control_action(start_app, []), - ok = control_action(stop_app, SecondaryNode, [], []), - ok = control_action(stop_app, []), - {error, {no_running_cluster_nodes, _, _}} = - control_action(reset, []), - - %% attempt to change type when no other node is alive - {error, {no_running_cluster_nodes, _, _}} = - control_action(cluster, [SecondaryNodeS]), - - %% leave system clustered, with the secondary node as a ram node - ok = control_action(force_reset, []), - ok = control_action(start_app, []), - %% Yes, this is rather ugly. But since we're a clustered Mnesia - %% node and we're telling another clustered node to reset itself, - %% we will get disconnected half way through causing a - %% badrpc. This never happens in real life since rabbitmqctl is - %% not a clustered Mnesia node. - cover:stop(SecondaryNode), - {badrpc, nodedown} = control_action(force_reset, SecondaryNode, [], []), - pong = net_adm:ping(SecondaryNode), - cover:start(SecondaryNode), - ok = control_action(cluster, SecondaryNode, [NodeS], []), - ok = control_action(start_app, SecondaryNode, [], []), - - passed. +%% Does the first list match the second where the second is required +%% to have exactly Extra superfluous items? +dm_list_match([], [], 0) -> ok; +dm_list_match(_, [], _Extra) -> error; +dm_list_match([H|T1], [H |T2], Extra) -> dm_list_match(T1, T2, Extra); +dm_list_match(L1, [_H|T2], Extra) -> dm_list_match(L1, T2, Extra - 1). test_user_management() -> @@ -1135,22 +1022,21 @@ test_runtime_parameters() -> Bad = fun(L) -> {error_string, _} = control_action(set_parameter, L) end, %% Acceptable for bijection - Good(["test", "good", "<<\"ignore\">>"]), + Good(["test", "good", "\"ignore\""]), Good(["test", "good", "123"]), Good(["test", "good", "true"]), Good(["test", "good", "false"]), Good(["test", "good", "null"]), - Good(["test", "good", "[{<<\"key\">>, <<\"value\">>}]"]), + Good(["test", "good", "{\"key\": \"value\"}"]), - %% Various forms of fail due to non-bijectability + %% Invalid json Bad(["test", "good", "atom"]), - Bad(["test", "good", "{tuple, foo}"]), - Bad(["test", "good", "[{<<\"key\">>, <<\"value\">>, 1}]"]), - Bad(["test", "good", "[{key, <<\"value\">>}]"]), + Bad(["test", "good", "{\"foo\": \"bar\""]), + Bad(["test", "good", "{foo: \"bar\"}"]), %% Test actual validation hook - Good(["test", "maybe", "<<\"good\">>"]), - Bad(["test", "maybe", "<<\"bad\">>"]), + Good(["test", "maybe", "\"good\""]), + Bad(["test", "maybe", "\"bad\""]), ok = control_action(list_parameters, []), @@ -1161,6 +1047,26 @@ test_runtime_parameters() -> rabbit_runtime_parameters_test:unregister(), passed. +test_policy_validation() -> + rabbit_runtime_parameters_test:register_policy_validator(), + SetPol = + fun (Key, Val) -> + control_action( + set_policy, + ["name", ".*", rabbit_misc:format("{\"~s\":~p}", [Key, Val])]) + end, + + ok = SetPol("testeven", []), + ok = SetPol("testeven", [1, 2]), + ok = SetPol("testeven", [1, 2, 3, 4]), + ok = SetPol("testpos", [2, 5, 5678]), + + {error_string, _} = SetPol("testpos", [-1, 0, 1]), + {error_string, _} = SetPol("testeven", [ 1, 2, 3]), + + rabbit_runtime_parameters_test:unregister_policy_validator(), + passed. + test_server_status() -> %% create a few things so there is some useful information to list Writer = spawn(fun () -> receive shutdown -> ok end end), @@ -1216,7 +1122,15 @@ test_server_status() -> ok = control_action(list_consumers, []), %% set vm memory high watermark + HWM = vm_memory_monitor:get_vm_memory_high_watermark(), + ok = control_action(set_vm_memory_high_watermark, ["1"]), ok = control_action(set_vm_memory_high_watermark, ["1.0"]), + ok = control_action(set_vm_memory_high_watermark, [float_to_list(HWM)]), + + %% eval + {error_string, _} = control_action(eval, ["\""]), + {error_string, _} = control_action(eval, ["a("]), + ok = control_action(eval, ["a."]), %% cleanup [{ok, _} = rabbit_amqqueue:delete(QR, false, false) || QR <- [Q, Q2]], @@ -1672,15 +1586,15 @@ clean_logs(Files, Suffix) -> ok. assert_ram_node() -> - case rabbit_mnesia:is_disc_node() of - true -> exit('not_ram_node'); - false -> ok + case rabbit_mnesia:node_type() of + disc -> exit('not_ram_node'); + ram -> ok end. assert_disc_node() -> - case rabbit_mnesia:is_disc_node() of - true -> ok; - false -> exit('not_disc_node') + case rabbit_mnesia:node_type() of + disc -> ok; + ram -> exit('not_disc_node') end. delete_file(File) -> @@ -2354,8 +2268,8 @@ publish_and_confirm(Q, Payload, Count) -> Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), <<>>, #'P_basic'{delivery_mode = 2}, Payload), - Delivery = #delivery{mandatory = false, immediate = false, - sender = self(), message = Msg, msg_seq_no = Seq}, + Delivery = #delivery{mandatory = false, sender = self(), + message = Msg, msg_seq_no = Seq}, {routed, _} = rabbit_amqqueue:deliver([Q], Delivery) end || Seq <- Seqs], wait_for_confirms(gb_sets:from_list(Seqs)). @@ -2447,10 +2361,10 @@ test_dropwhile(VQ0) -> fun (N, Props) -> Props#message_properties{expiry = N} end, VQ0), %% drop the first 5 messages - {undefined, VQ2} = rabbit_variable_queue:dropwhile( - fun(#message_properties { expiry = Expiry }) -> - Expiry =< 5 - end, false, VQ1), + {_, undefined, VQ2} = rabbit_variable_queue:dropwhile( + fun(#message_properties { expiry = Expiry }) -> + Expiry =< 5 + end, false, VQ1), %% fetch five now VQ3 = lists:foldl(fun (_N, VQN) -> @@ -2467,11 +2381,11 @@ test_dropwhile(VQ0) -> test_dropwhile_varying_ram_duration(VQ0) -> VQ1 = variable_queue_publish(false, 1, VQ0), VQ2 = rabbit_variable_queue:set_ram_duration_target(0, VQ1), - {undefined, VQ3} = rabbit_variable_queue:dropwhile( - fun(_) -> false end, false, VQ2), + {_, undefined, VQ3} = rabbit_variable_queue:dropwhile( + fun(_) -> false end, false, VQ2), VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3), VQ5 = variable_queue_publish(false, 1, VQ4), - {undefined, VQ6} = + {_, undefined, VQ6} = rabbit_variable_queue:dropwhile(fun(_) -> false end, false, VQ5), VQ6. diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index 732c29b6b6..5bc3d9f530 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -64,12 +64,11 @@ #basic_message{exchange_name :: rabbit_exchange:name(), routing_keys :: [rabbit_router:routing_key()], content :: content(), - id :: msg_id(), + id :: msg_id(), is_persistent :: boolean()}). -type(message() :: basic_message()). -type(delivery() :: #delivery{mandatory :: boolean(), - immediate :: boolean(), sender :: pid(), message :: message()}). -type(message_properties() :: @@ -118,8 +117,7 @@ exclusive_owner :: rabbit_types:maybe(pid()), arguments :: rabbit_framing:amqp_table(), pid :: rabbit_types:maybe(pid()), - slave_pids :: [pid()], - mirror_nodes :: [node()] | 'undefined' | 'all'}). + slave_pids :: [pid()]}). -type(exchange() :: #exchange{name :: rabbit_exchange:name(), diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index e1a7bcaea8..455134da3f 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -66,11 +66,11 @@ %% into the boot process by prelaunch before the mnesia application is %% started. By the time Mnesia is started the upgrades have happened %% (on the primary), or Mnesia has been reset (on the secondary) and -%% rabbit_mnesia:init_db/3 can then make the node rejoin the cluster +%% rabbit_mnesia:init_db_unchecked/2 can then make the node rejoin the cluster %% in the normal way. %% %% The non-mnesia upgrades are then triggered by -%% rabbit_mnesia:init_db/3. Of course, it's possible for a given +%% rabbit_mnesia:init_db_unchecked/2. Of course, it's possible for a given %% upgrade process to only require Mnesia upgrades, or only require %% non-Mnesia upgrades. In the latter case no Mnesia resets and %% reclusterings occur. @@ -121,19 +121,16 @@ remove_backup() -> info("upgrades: Mnesia backup removed~n", []). maybe_upgrade_mnesia() -> - %% rabbit_mnesia:all_clustered_nodes/0 will return [] at this point - %% if we are a RAM node since Mnesia has not started yet. - AllNodes = lists:usort(rabbit_mnesia:all_clustered_nodes() ++ - rabbit_mnesia:read_cluster_nodes_config()), + AllNodes = rabbit_mnesia:cluster_nodes(all), case rabbit_version:upgrades_required(mnesia) of {error, starting_from_scratch} -> ok; {error, version_not_available} -> case AllNodes of - [_] -> ok; - _ -> die("Cluster upgrade needed but upgrading from " - "< 2.1.1.~nUnfortunately you will need to " - "rebuild the cluster.", []) + [] -> die("Cluster upgrade needed but upgrading from " + "< 2.1.1.~nUnfortunately you will need to " + "rebuild the cluster.", []); + _ -> ok end; {error, _} = Err -> throw(Err); @@ -150,12 +147,12 @@ maybe_upgrade_mnesia() -> upgrade_mode(AllNodes) -> case nodes_running(AllNodes) of [] -> - AfterUs = rabbit_mnesia:read_previously_running_nodes(), - case {is_disc_node_legacy(), AfterUs} of - {true, []} -> + AfterUs = rabbit_mnesia:cluster_nodes(running) -- [node()], + case {node_type_legacy(), AfterUs} of + {disc, []} -> primary; - {true, _} -> - Filename = rabbit_mnesia:running_nodes_filename(), + {disc, _} -> + Filename = rabbit_node_monitor:running_nodes_filename(), die("Cluster upgrade needed but other disc nodes shut " "down after this one.~nPlease first start the last " "disc node to shut down.~n~nNote: if several disc " @@ -163,7 +160,7 @@ upgrade_mode(AllNodes) -> "all~nshow this message. In which case, remove " "the lock file on one of them and~nstart that node. " "The lock file on this node is:~n~n ~s ", [Filename]); - {false, _} -> + {ram, _} -> die("Cluster upgrade needed but this is a ram node.~n" "Please first start the last disc node to shut down.", []) @@ -204,7 +201,7 @@ primary_upgrade(Upgrades, Nodes) -> mnesia, Upgrades, fun () -> - force_tables(), + rabbit_table:force_load(), case Others of [] -> ok; _ -> info("mnesia upgrades: Breaking cluster~n", []), @@ -214,23 +211,13 @@ primary_upgrade(Upgrades, Nodes) -> end), ok. -force_tables() -> - [mnesia:force_load_table(T) || T <- rabbit_mnesia:table_names()]. - secondary_upgrade(AllNodes) -> %% must do this before we wipe out schema - IsDiscNode = is_disc_node_legacy(), + NodeType = node_type_legacy(), rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), cannot_delete_schema), - %% Note that we cluster with all nodes, rather than all disc nodes - %% (as we can't know all disc nodes at this point). This is safe as - %% we're not writing the cluster config, just setting up Mnesia. - ClusterNodes = case IsDiscNode of - true -> AllNodes; - false -> AllNodes -- [node()] - end, rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), - ok = rabbit_mnesia:init_db(ClusterNodes, true, fun () -> ok end), + ok = rabbit_mnesia:init_db_unchecked(AllNodes, NodeType), ok = rabbit_version:record_desired_for_scope(mnesia), ok. @@ -278,13 +265,16 @@ lock_filename() -> lock_filename(dir()). lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME). backup_dir() -> dir() ++ "-upgrade-backup". -is_disc_node_legacy() -> +node_type_legacy() -> %% This is pretty ugly but we can't start Mnesia and ask it (will %% hang), we can't look at the config file (may not include us %% even if we're a disc node). We also can't use - %% rabbit_mnesia:is_disc_node/0 because that will give false + %% rabbit_mnesia:node_type/0 because that will give false %% postivies on Rabbit up to 2.5.1. - filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")). + case filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")) of + true -> disc; + false -> ram + end. %% NB: we cannot use rabbit_log here since it may not have been %% started yet diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl index 18704807ba..21fdcd667b 100644 --- a/src/rabbit_upgrade_functions.erl +++ b/src/rabbit_upgrade_functions.erl @@ -40,6 +40,9 @@ -rabbit_upgrade({exchange_scratches, mnesia, [exchange_scratch]}). -rabbit_upgrade({policy, mnesia, [exchange_scratches, ha_mirrors]}). +-rabbit_upgrade({sync_slave_pids, mnesia, [policy]}). +-rabbit_upgrade({no_mirror_nodes, mnesia, [sync_slave_pids]}). +-rabbit_upgrade({gm_pids, mnesia, [no_mirror_nodes]}). %% ------------------------------------------------------------------- @@ -62,6 +65,9 @@ -spec(topic_trie_node/0 :: () -> 'ok'). -spec(runtime_parameters/0 :: () -> 'ok'). -spec(policy/0 :: () -> 'ok'). +-spec(sync_slave_pids/0 :: () -> 'ok'). +-spec(no_mirror_nodes/0 :: () -> 'ok'). +-spec(gm_pids/0 :: () -> 'ok'). -endif. @@ -240,15 +246,53 @@ queue_policy(Table) -> [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, mirror_nodes, policy]). +sync_slave_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddSyncSlavesFun = + fun ({amqqueue, N, D, AD, Excl, Args, Pid, SPids, MNodes, Pol}) -> + {amqqueue, N, D, AD, Excl, Args, Pid, SPids, [], MNodes, Pol} + end, + [ok = transform(T, AddSyncSlavesFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, mirror_nodes, policy]) + || T <- Tables], + ok. + +no_mirror_nodes() -> + Tables = [rabbit_queue, rabbit_durable_queue], + RemoveMirrorNodesFun = + fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, _MNodes, Pol}) -> + {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol} + end, + [ok = transform(T, RemoveMirrorNodesFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, policy]) + || T <- Tables], + ok. + +gm_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddGMPidsFun = + fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}) -> + {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol, []} + end, + [ok = transform(T, AddGMPidsFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, policy, gm_pids]) + || T <- Tables], + ok. + + + %%-------------------------------------------------------------------- transform(TableName, Fun, FieldList) -> - rabbit_mnesia:wait_for_tables([TableName]), + rabbit_table:wait([TableName]), {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList), ok. transform(TableName, Fun, FieldList, NewRecordName) -> - rabbit_mnesia:wait_for_tables([TableName]), + rabbit_table:wait([TableName]), {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList, NewRecordName), ok. diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 49213c9552..8a3fd9d917 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -17,11 +17,11 @@ -module(rabbit_variable_queue). -export([init/3, terminate/2, delete_and_terminate/2, purge/1, - publish/4, publish_delivered/5, drain_confirmed/1, + publish/4, publish_delivered/4, discard/3, drain_confirmed/1, dropwhile/3, fetch/2, ack/2, requeue/2, len/1, is_empty/1, - set_ram_duration_target/2, ram_duration/1, needs_timeout/1, - timeout/1, handle_pre_hibernate/1, status/1, invoke/3, - is_duplicate/2, discard/3, multiple_routing_keys/0, fold/3]). + depth/1, set_ram_duration_target/2, ram_duration/1, + needs_timeout/1, timeout/1, handle_pre_hibernate/1, status/1, invoke/3, + is_duplicate/2, multiple_routing_keys/0, fold/3]). -export([start/1, stop/0]). @@ -545,17 +545,8 @@ publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId }, ram_msg_count = RamMsgCount + 1, unconfirmed = UC1 })). -publish_delivered(false, #basic_message { id = MsgId }, - #message_properties { needs_confirming = NeedsConfirming }, - _ChPid, State = #vqstate { async_callback = Callback, - len = 0 }) -> - case NeedsConfirming of - true -> blind_confirm(Callback, gb_sets:singleton(MsgId)); - false -> ok - end, - {undefined, a(State)}; -publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent, - id = MsgId }, +publish_delivered(Msg = #basic_message { is_persistent = IsPersistent, + id = MsgId }, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, _ChPid, State = #vqstate { len = 0, @@ -579,6 +570,8 @@ publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent, persistent_count = PCount1, unconfirmed = UC1 }))}. +discard(_MsgId, _ChPid, State) -> State. + drain_confirmed(State = #vqstate { confirmed = C }) -> case gb_sets:is_empty(C) of true -> {[], State}; %% common case @@ -589,12 +582,12 @@ drain_confirmed(State = #vqstate { confirmed = C }) -> dropwhile(Pred, AckRequired, State) -> dropwhile(Pred, AckRequired, State, []). dropwhile(Pred, AckRequired, State, Msgs) -> - End = fun(S) when AckRequired -> {lists:reverse(Msgs), S}; - (S) -> {undefined, S} + End = fun(Next, S) when AckRequired -> {Next, lists:reverse(Msgs), S}; + (Next, S) -> {Next, undefined, S} end, case queue_out(State) of {empty, State1} -> - End(a(State1)); + End(undefined, a(State1)); {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} -> case {Pred(MsgProps), AckRequired} of {true, true} -> @@ -606,7 +599,7 @@ dropwhile(Pred, AckRequired, State, Msgs) -> {_, State2} = internal_fetch(false, MsgStatus, State1), dropwhile(Pred, AckRequired, State2, undefined); {false, _} -> - End(a(in_r(MsgStatus, State1))) + End(MsgProps, a(in_r(MsgStatus, State1))) end end. @@ -681,6 +674,9 @@ len(#vqstate { len = Len }) -> Len. is_empty(State) -> 0 == len(State). +depth(State = #vqstate { pending_ack = Ack }) -> + len(State) + gb_trees:size(Ack). + set_ram_duration_target( DurationTarget, State = #vqstate { rates = #rates { avg_egress = AvgEgressRate, @@ -818,8 +814,6 @@ invoke(?MODULE, Fun, State) -> Fun(?MODULE, State). is_duplicate(_Msg, State) -> {false, State}. -discard(_Msg, _ChPid, State) -> State. - %%---------------------------------------------------------------------------- %% Minor helpers %%---------------------------------------------------------------------------- @@ -871,9 +865,10 @@ gb_sets_maybe_insert(false, _Val, Set) -> Set; gb_sets_maybe_insert(true, Val, Set) -> gb_sets:add(Val, Set). msg_status(IsPersistent, SeqId, Msg = #basic_message { id = MsgId }, - MsgProps) -> + MsgProps = #message_properties { delivered = Delivered }) -> + %% TODO would it make sense to remove #msg_status.is_delivered? #msg_status { seq_id = SeqId, msg_id = MsgId, msg = Msg, - is_persistent = IsPersistent, is_delivered = false, + is_persistent = IsPersistent, is_delivered = Delivered, msg_on_disk = false, index_on_disk = false, msg_props = MsgProps }. @@ -1321,12 +1316,9 @@ must_sync_index(#vqstate { msg_indices_on_disk = MIOD, %% subtraction. not (gb_sets:is_empty(UC) orelse gb_sets:is_subset(UC, MIOD)). -blind_confirm(Callback, MsgIdSet) -> - Callback(?MODULE, - fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end). - msgs_written_to_disk(Callback, MsgIdSet, ignored) -> - blind_confirm(Callback, MsgIdSet); + Callback(?MODULE, + fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end); msgs_written_to_disk(Callback, MsgIdSet, written) -> Callback(?MODULE, fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 5548ef6d05..297fa56fe3 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -90,12 +90,15 @@ delete(VHostPath) -> R. internal_delete(VHostPath) -> - lists:foreach( - fun (Info) -> - ok = rabbit_auth_backend_internal:clear_permissions( - proplists:get_value(user, Info), VHostPath) - end, - rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)), + [ok = rabbit_auth_backend_internal:clear_permissions( + proplists:get_value(user, Info), VHostPath) + || Info <- rabbit_auth_backend_internal:list_vhost_permissions(VHostPath)], + [ok = rabbit_runtime_parameters:clear(VHostPath, + proplists:get_value(component, Info), + proplists:get_value(key, Info)) + || Info <- rabbit_runtime_parameters:list(VHostPath)], + [ok = rabbit_policy:delete(VHostPath, proplists:get_value(key, Info)) + || Info <- rabbit_policy:list(VHostPath)], ok = mnesia:delete({rabbit_vhost, VHostPath}), ok. diff --git a/src/rabbit_vm.erl b/src/rabbit_vm.erl new file mode 100644 index 0000000000..53f3df18b3 --- /dev/null +++ b/src/rabbit_vm.erl @@ -0,0 +1,129 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(rabbit_vm). + +-export([memory/0]). + +-define(MAGIC_PLUGINS, ["mochiweb", "webmachine", "cowboy", "sockjs", + "rfc4627_jsonrpc"]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(memory/0 :: () -> rabbit_types:infos()). + +-endif. + +%%---------------------------------------------------------------------------- + +%% Like erlang:memory(), but with awareness of rabbit-y things +memory() -> + Conns = (sup_memory(rabbit_tcp_client_sup) + + sup_memory(ssl_connection_sup) + + sup_memory(amqp_sup)), + Qs = (sup_memory(rabbit_amqqueue_sup) + + sup_memory(rabbit_mirror_queue_slave_sup)), + Mnesia = mnesia_memory(), + MsgIndexETS = ets_memory(rabbit_msg_store_ets_index), + MsgIndexProc = (pid_memory(msg_store_transient) + + pid_memory(msg_store_persistent)), + MgmtDbETS = ets_memory(rabbit_mgmt_db), + MgmtDbProc = sup_memory(rabbit_mgmt_sup), + Plugins = plugin_memory() - MgmtDbProc, + + [{total, Total}, + {processes, Processes}, + {ets, ETS}, + {atom, Atom}, + {binary, Bin}, + {code, Code}, + {system, System}] = + erlang:memory([total, processes, ets, atom, binary, code, system]), + + OtherProc = Processes - Conns - Qs - MsgIndexProc - MgmtDbProc - Plugins, + + [{total, Total}, + {connection_procs, Conns}, + {queue_procs, Qs}, + {plugins, Plugins}, + {other_proc, lists:max([0, OtherProc])}, %% [1] + {mnesia, Mnesia}, + {mgmt_db, MgmtDbETS + MgmtDbProc}, + {msg_index, MsgIndexETS + MsgIndexProc}, + {other_ets, ETS - Mnesia - MsgIndexETS - MgmtDbETS}, + {binary, Bin}, + {code, Code}, + {atom, Atom}, + {other_system, System - ETS - Atom - Bin - Code}]. + +%% [1] - erlang:memory(processes) can be less than the sum of its +%% parts. Rather than display something nonsensical, just silence any +%% claims about negative memory. See +%% http://erlang.org/pipermail/erlang-questions/2012-September/069320.html + +%%---------------------------------------------------------------------------- + +sup_memory(Sup) -> + lists:sum([child_memory(P, T) || {_, P, T, _} <- sup_children(Sup)]) + + pid_memory(Sup). + +sup_children(Sup) -> + rabbit_misc:with_exit_handler( + rabbit_misc:const([]), fun () -> supervisor:which_children(Sup) end). + +pid_memory(Pid) when is_pid(Pid) -> case process_info(Pid, memory) of + {memory, M} -> M; + _ -> 0 + end; +pid_memory(Name) when is_atom(Name) -> case whereis(Name) of + P when is_pid(P) -> pid_memory(P); + _ -> 0 + end. + +child_memory(Pid, worker) when is_pid (Pid) -> pid_memory(Pid); +child_memory(Pid, supervisor) when is_pid (Pid) -> sup_memory(Pid); +child_memory(_, _) -> 0. + +mnesia_memory() -> + case mnesia:system_info(is_running) of + yes -> lists:sum([bytes(mnesia:table_info(Tab, memory)) || + Tab <- mnesia:system_info(tables)]); + no -> 0 + end. + +ets_memory(Name) -> + lists:sum([bytes(ets:info(T, memory)) || T <- ets:all(), + N <- [ets:info(T, name)], + N =:= Name]). + +bytes(Words) -> Words * erlang:system_info(wordsize). + +plugin_memory() -> + lists:sum([plugin_memory(App) || + {App, _, _} <- application:which_applications(), + is_plugin(atom_to_list(App))]). + +plugin_memory(App) -> + case catch application_master:get_child( + application_controller:get_master(App)) of + {Pid, _} -> sup_memory(Pid); + _ -> 0 + end. + +is_plugin("rabbitmq_" ++ _) -> true; +is_plugin(App) -> lists:member(App, ?MAGIC_PLUGINS). diff --git a/src/supervisor2.erl b/src/supervisor2.erl index 3d3623d752..5af38573fc 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -255,10 +255,10 @@ behaviour_info(_Other) -> %%% --------------------------------------------------- start_link(Mod, Args) -> gen_server:start_link(?MODULE, {self, Mod, Args}, []). - + start_link(SupName, Mod, Args) -> gen_server:start_link(SupName, ?MODULE, {SupName, Mod, Args}, []). - + %%% --------------------------------------------------- %%% Interface functions. %%% --------------------------------------------------- @@ -298,9 +298,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> check_childspecs(X) -> {error, {badarg, X}}. %%% --------------------------------------------------- -%%% +%%% %%% Initialize the supervisor. -%%% +%%% %%% --------------------------------------------------- init({SupName, Mod, Args}) -> process_flag(trap_exit, true), @@ -319,7 +319,7 @@ init({SupName, Mod, Args}) -> Error -> {stop, {bad_return, {Mod, init, Error}}} end. - + init_children(State, StartSpec) -> SupName = State#state.name, case check_startspec(StartSpec) of @@ -349,7 +349,7 @@ init_dynamic(_State, StartSpec) -> %% Func: start_children/2 %% Args: Children = [#child] in start order %% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} -%% Purpose: Start all children. The new list contains #child's +%% Purpose: Start all children. The new list contains #child's %% with pids. %% Returns: {ok, NChildren} | {error, NChildren} %% NChildren = [#child] in termination order (reversed @@ -381,7 +381,7 @@ do_start_child(SupName, Child) -> NChild = Child#child{pid = Pid}, report_progress(NChild, SupName), {ok, Pid, Extra}; - ignore -> + ignore -> {ok, undefined}; {error, What} -> {error, What}; What -> {error, What} @@ -400,12 +400,12 @@ do_start_child_i(M, F, A) -> What -> {error, What} end. - + %%% --------------------------------------------------- -%%% +%%% %%% Callback functions. -%%% +%%% %%% --------------------------------------------------- handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) -> #child{mfa = {M, F, A}} = hd(State#state.children), @@ -414,11 +414,11 @@ handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) -> {ok, undefined} -> {reply, {ok, undefined}, State}; {ok, Pid} -> - NState = State#state{dynamics = + NState = State#state{dynamics = ?DICT:store(Pid, Args, State#state.dynamics)}, {reply, {ok, Pid}, NState}; {ok, Pid, Extra} -> - NState = State#state{dynamics = + NState = State#state{dynamics = ?DICT:store(Pid, Args, State#state.dynamics)}, {reply, {ok, Pid, Extra}, NState}; What -> @@ -497,7 +497,7 @@ handle_call(which_children, _From, State) -> %%% Hopefully cause a function-clause as there is no API function %%% that utilizes cast. handle_cast(null, State) -> - error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", + error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", []), {noreply, State}. @@ -527,7 +527,7 @@ handle_info({'EXIT', Pid, Reason}, State) -> end; handle_info(Msg, State) -> - error_logger:error_msg("Supervisor received unexpected message: ~p~n", + error_logger:error_msg("Supervisor received unexpected message: ~p~n", [Msg]), {noreply, State}. %% @@ -577,13 +577,13 @@ check_flags({Strategy, MaxIntensity, Period}) -> check_flags(What) -> {bad_flags, What}. -update_childspec(State, StartSpec) when ?is_simple(State) -> - case check_startspec(StartSpec) of - {ok, [Child]} -> - {ok, State#state{children = [Child]}}; - Error -> - {error, Error} - end; +update_childspec(State, StartSpec) when ?is_simple(State) -> + case check_startspec(StartSpec) of + {ok, [Child]} -> + {ok, State#state{children = [Child]}}; + Error -> + {error, Error} + end; update_childspec(State, StartSpec) -> case check_startspec(StartSpec) of @@ -604,7 +604,7 @@ update_childspec1([Child|OldC], Children, KeepOld) -> end; update_childspec1([], Children, KeepOld) -> % Return them in (keeped) reverse start order. - lists:reverse(Children ++ KeepOld). + lists:reverse(Children ++ KeepOld). update_chsp(OldCh, Children) -> case lists:map(fun (Ch) when OldCh#child.name =:= Ch#child.name -> @@ -618,7 +618,7 @@ update_chsp(OldCh, Children) -> NewC -> {ok, NewC} end. - + %%% --------------------------------------------------- %%% Start a new child. %%% --------------------------------------------------- @@ -630,12 +630,12 @@ handle_start_child(Child, State) -> {ok, Pid} -> Children = State#state.children, {{ok, Pid}, - State#state{children = + State#state{children = [Child#child{pid = Pid}|Children]}}; {ok, Pid, Extra} -> Children = State#state.children, {{ok, Pid, Extra}, - State#state{children = + State#state{children = [Child#child{pid = Pid}|Children]}}; {error, What} -> {{error, {What, Child}}, State} @@ -816,29 +816,32 @@ terminate_simple_children(Child, Dynamics, SupName) -> {Replies, Timedout} = lists:foldl( fun (_Pid, {Replies, Timedout}) -> - {Reply, Timedout1} = + {Pid1, Reason1, Timedout1} = receive TimeoutMsg -> Remaining = Pids -- [P || {P, _} <- Replies], [exit(P, kill) || P <- Remaining], - receive {'DOWN', _MRef, process, Pid, Reason} -> - {{error, Reason}, true} + receive + {'DOWN', _MRef, process, Pid, Reason} -> + {Pid, Reason, true} end; {'DOWN', _MRef, process, Pid, Reason} -> - {child_res(Child, Reason, Timedout), Timedout}; - {'EXIT', Pid, Reason} -> - receive {'DOWN', _MRef, process, Pid, _} -> - {{error, Reason}, Timedout} - end + {Pid, Reason, Timedout} end, - {[{Pid, Reply} | Replies], Timedout1} + {[{Pid1, child_res(Child, Reason1, Timedout1)} | Replies], + Timedout1} end, {[], false}, Pids), timeout_stop(Child, TRef, TimeoutMsg, Timedout), ReportError = shutdown_error_reporter(SupName), - [case Reply of - {_Pid, ok} -> ok; - {Pid, {error, R}} -> ReportError(R, Child#child{pid = Pid}) - end || Reply <- Replies], + Report = fun(_, ok) -> ok; + (Pid, {error, R}) -> ReportError(R, Child#child{pid = Pid}) + end, + [receive + {'EXIT', Pid, Reason} -> + Report(Pid, child_res(Child, Reason, Timedout)) + after + 0 -> Report(Pid, Reply) + end || {Pid, Reply} <- Replies], ok. child_exit_reason(#child{shutdown = brutal_kill}) -> kill; @@ -863,7 +866,7 @@ timeout_stop(#child{shutdown = Time}, TRef, Msg, false) when is_integer(Time) -> after 0 -> ok end; -timeout_stop(#child{}, ok, _Msg, _Timedout) -> +timeout_stop(#child{}, _TRef, _Msg, _Timedout) -> ok. do_terminate(Child, SupName) when Child#child.pid =/= undefined -> @@ -885,17 +888,17 @@ do_terminate(Child, _SupName) -> Child. %%----------------------------------------------------------------- -%% Shutdowns a child. We must check the EXIT value +%% Shutdowns a child. We must check the EXIT value %% of the child, because it might have died with another reason than -%% the wanted. In that case we want to report the error. We put a -%% monitor on the child an check for the 'DOWN' message instead of -%% checking for the 'EXIT' message, because if we check the 'EXIT' -%% message a "naughty" child, who does unlink(Sup), could hang the -%% supervisor. +%% the wanted. In that case we want to report the error. We put a +%% monitor on the child an check for the 'DOWN' message instead of +%% checking for the 'EXIT' message, because if we check the 'EXIT' +%% message a "naughty" child, who does unlink(Sup), could hang the +%% supervisor. %% Returns: ok | {error, OtherReason} (this should be reported) %%----------------------------------------------------------------- shutdown(Pid, brutal_kill) -> - + case monitor_child(Pid) of ok -> exit(Pid, kill), @@ -905,16 +908,16 @@ shutdown(Pid, brutal_kill) -> {'DOWN', _MRef, process, Pid, OtherReason} -> {error, OtherReason} end; - {error, Reason} -> + {error, Reason} -> {error, Reason} end; shutdown(Pid, Time) -> - + case monitor_child(Pid) of ok -> exit(Pid, shutdown), %% Try to shutdown gracefully - receive + receive {'DOWN', _MRef, process, Pid, shutdown} -> ok; {'DOWN', _MRef, process, Pid, OtherReason} -> @@ -926,14 +929,14 @@ shutdown(Pid, Time) -> {error, OtherReason} end end; - {error, Reason} -> + {error, Reason} -> {error, Reason} end. %% Help function to shutdown/2 switches from link to monitor approach monitor_child(Pid) -> - - %% Do the monitor operation first so that if the child dies + + %% Do the monitor operation first so that if the child dies %% before the monitoring is done causing a 'DOWN'-message with %% reason noproc, we will get the real reason in the 'EXIT'-message %% unless a naughty child has already done unlink... @@ -943,22 +946,22 @@ monitor_child(Pid) -> receive %% If the child dies before the unlik we must empty %% the mail-box of the 'EXIT'-message and the 'DOWN'-message. - {'EXIT', Pid, Reason} -> - receive + {'EXIT', Pid, Reason} -> + receive {'DOWN', _, process, Pid, _} -> {error, Reason} end - after 0 -> + after 0 -> %% If a naughty child did unlink and the child dies before - %% monitor the result will be that shutdown/2 receives a + %% monitor the result will be that shutdown/2 receives a %% 'DOWN'-message with reason noproc. %% If the child should die after the unlink there %% will be a 'DOWN'-message with a correct reason - %% that will be handled in shutdown/2. - ok + %% that will be handled in shutdown/2. + ok end. - - + + %%----------------------------------------------------------------- %% Child/State manipulating functions. %%----------------------------------------------------------------- @@ -1012,7 +1015,7 @@ remove_child(Child, State) -> %% Args: SupName = {local, atom()} | {global, atom()} | self %% Type = {Strategy, MaxIntensity, Period} %% Strategy = one_for_one | one_for_all | simple_one_for_one | -%% rest_for_one +%% rest_for_one %% MaxIntensity = integer() %% Period = integer() %% Mod :== atom() @@ -1107,10 +1110,10 @@ validChildType(supervisor) -> true; validChildType(worker) -> true; validChildType(What) -> throw({invalid_child_type, What}). -validName(_Name) -> true. +validName(_Name) -> true. -validFunc({M, F, A}) when is_atom(M), - is_atom(F), +validFunc({M, F, A}) when is_atom(M), + is_atom(F), is_list(A) -> true; validFunc(Func) -> throw({invalid_mfa, Func}). @@ -1128,7 +1131,7 @@ validDelay(Delay) when is_number(Delay), Delay >= 0 -> true; validDelay(What) -> throw({invalid_delay, What}). -validShutdown(Shutdown, _) +validShutdown(Shutdown, _) when is_integer(Shutdown), Shutdown > 0 -> true; validShutdown(infinity, supervisor) -> true; validShutdown(brutal_kill, _) -> true; @@ -1154,7 +1157,7 @@ validMods(Mods) -> throw({invalid_modules, Mods}). %%% Returns: {ok, State'} | {terminate, State'} %%% ------------------------------------------------------ -add_restart(State) -> +add_restart(State) -> I = State#state.intensity, P = State#state.period, R = State#state.restarts, diff --git a/src/supervisor2_tests.erl b/src/supervisor2_tests.erl new file mode 100644 index 0000000000..e42ded7b53 --- /dev/null +++ b/src/supervisor2_tests.erl @@ -0,0 +1,70 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2011-2012 VMware, Inc. All rights reserved. +%% + +-module(supervisor2_tests). +-behaviour(supervisor2). + +-export([test_all/0, start_link/0]). +-export([init/1]). + +test_all() -> + ok = check_shutdown(stop, 200, 200, 2000), + ok = check_shutdown(ignored, 1, 2, 2000). + +check_shutdown(SigStop, Iterations, ChildCount, SupTimeout) -> + {ok, Sup} = supervisor2:start_link(?MODULE, [SupTimeout]), + Res = lists:foldl( + fun (I, ok) -> + TestSupPid = erlang:whereis(?MODULE), + ChildPids = + [begin + {ok, ChildPid} = + supervisor2:start_child(TestSupPid, []), + ChildPid + end || _ <- lists:seq(1, ChildCount)], + MRef = erlang:monitor(process, TestSupPid), + [P ! SigStop || P <- ChildPids], + ok = supervisor2:terminate_child(Sup, test_sup), + {ok, _} = supervisor2:restart_child(Sup, test_sup), + receive + {'DOWN', MRef, process, TestSupPid, shutdown} -> + ok; + {'DOWN', MRef, process, TestSupPid, Reason} -> + {error, {I, Reason}} + end; + (_, R) -> + R + end, ok, lists:seq(1, Iterations)), + unlink(Sup), + exit(Sup, shutdown), + Res. + +start_link() -> + Pid = spawn_link(fun () -> + process_flag(trap_exit, true), + receive stop -> ok end + end), + {ok, Pid}. + +init([Timeout]) -> + {ok, {{one_for_one, 0, 1}, + [{test_sup, {supervisor2, start_link, + [{local, ?MODULE}, ?MODULE, []]}, + transient, Timeout, supervisor, [?MODULE]}]}}; +init([]) -> + {ok, {{simple_one_for_one_terminate, 0, 1}, + [{test_worker, {?MODULE, start_link, []}, + temporary, 1000, worker, [?MODULE]}]}}. diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl index fb184d1ab2..5ce894a944 100644 --- a/src/vm_memory_monitor.erl +++ b/src/vm_memory_monitor.erl @@ -27,7 +27,7 @@ -behaviour(gen_server). --export([start_link/1]). +-export([start_link/1, start_link/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -49,9 +49,11 @@ -record(state, {total_memory, memory_limit, + memory_fraction, timeout, timer, - alarmed + alarmed, + alarm_funs }). %%---------------------------------------------------------------------------- @@ -59,6 +61,8 @@ -ifdef(use_specs). -spec(start_link/1 :: (float()) -> rabbit_types:ok_pid_or_error()). +-spec(start_link/3 :: (float(), fun ((any()) -> 'ok'), + fun ((any()) -> 'ok')) -> rabbit_types:ok_pid_or_error()). -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')). -spec(get_vm_limit/0 :: () -> non_neg_integer()). -spec(get_check_interval/0 :: () -> non_neg_integer()). @@ -73,11 +77,9 @@ %% Public API %%---------------------------------------------------------------------------- -get_total_memory() -> - get_total_memory(os:type()). +get_total_memory() -> get_total_memory(os:type()). -get_vm_limit() -> - get_vm_limit(os:type()). +get_vm_limit() -> get_vm_limit(os:type()). get_check_interval() -> gen_server:call(?MODULE, get_check_interval, infinity). @@ -99,24 +101,27 @@ get_memory_limit() -> %% gen_server callbacks %%---------------------------------------------------------------------------- -start_link(Args) -> - gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []). +start_link(MemFraction) -> + start_link(MemFraction, + fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1). -init([MemFraction]) -> +start_link(MemFraction, AlarmSet, AlarmClear) -> + gen_server:start_link({local, ?SERVER}, ?MODULE, + [MemFraction, {AlarmSet, AlarmClear}], []). + +init([MemFraction, AlarmFuns]) -> TRef = start_timer(?DEFAULT_MEMORY_CHECK_INTERVAL), - State = #state { timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL, - timer = TRef, - alarmed = false}, + State = #state { timeout = ?DEFAULT_MEMORY_CHECK_INTERVAL, + timer = TRef, + alarmed = false, + alarm_funs = AlarmFuns }, {ok, set_mem_limits(State, MemFraction)}. handle_call(get_vm_memory_high_watermark, _From, State) -> - {reply, State#state.memory_limit / State#state.total_memory, State}; + {reply, State#state.memory_fraction, State}; handle_call({set_vm_memory_high_watermark, MemFraction}, _From, State) -> - State1 = set_mem_limits(State, MemFraction), - error_logger:info_msg("Memory alarm changed to ~p, ~p bytes.~n", - [MemFraction, State1#state.memory_limit]), - {reply, ok, State1}; + {reply, ok, set_mem_limits(State, MemFraction)}; handle_call(get_check_interval, _From, State) -> {reply, State#state.timeout, State}; @@ -168,32 +173,41 @@ set_mem_limits(State, MemFraction) -> ?MEMORY_SIZE_FOR_UNKNOWN_OS; M -> M end, - MemLim = get_mem_limit(MemFraction, TotalMemory), + UsableMemory = case get_vm_limit() of + Limit when Limit < TotalMemory -> + error_logger:warning_msg( + "Only ~pMB of ~pMB memory usable due to " + "limited address space.~n", + [trunc(V/?ONE_MB) || V <- [Limit, TotalMemory]]), + Limit; + _ -> + TotalMemory + end, + MemLim = trunc(MemFraction * UsableMemory), error_logger:info_msg("Memory limit set to ~pMB of ~pMB total.~n", [trunc(MemLim/?ONE_MB), trunc(TotalMemory/?ONE_MB)]), - internal_update(State #state { total_memory = TotalMemory, - memory_limit = MemLim }). + internal_update(State #state { total_memory = TotalMemory, + memory_limit = MemLim, + memory_fraction = MemFraction}). internal_update(State = #state { memory_limit = MemLimit, - alarmed = Alarmed}) -> + alarmed = Alarmed, + alarm_funs = {AlarmSet, AlarmClear} }) -> MemUsed = erlang:memory(total), NewAlarmed = MemUsed > MemLimit, case {Alarmed, NewAlarmed} of - {false, true} -> - emit_update_info(set, MemUsed, MemLimit), - alarm_handler:set_alarm({{resource_limit, memory, node()}, []}); - {true, false} -> - emit_update_info(clear, MemUsed, MemLimit), - alarm_handler:clear_alarm({resource_limit, memory, node()}); - _ -> - ok + {false, true} -> emit_update_info(set, MemUsed, MemLimit), + AlarmSet({{resource_limit, memory, node()}, []}); + {true, false} -> emit_update_info(clear, MemUsed, MemLimit), + AlarmClear({resource_limit, memory, node()}); + _ -> ok end, State #state {alarmed = NewAlarmed}. -emit_update_info(State, MemUsed, MemLimit) -> +emit_update_info(AlarmState, MemUsed, MemLimit) -> error_logger:info_msg( "vm_memory_high_watermark ~p. Memory used:~p allowed:~p~n", - [State, MemUsed, MemLimit]). + [AlarmState, MemUsed, MemLimit]). start_timer(Timeout) -> {ok, TRef} = timer:send_interval(Timeout, update), @@ -207,7 +221,7 @@ get_vm_limit({win32,_OSname}) -> 8 -> 8*1024*1024*1024*1024 %% 8 TB for 64 bits 2^42 end; -%% On a 32-bit machine, if you're using more than 2 gigs of RAM you're +%% On a 32-bit machine, if you're using more than 4 gigs of RAM you're %% in big trouble anyway. get_vm_limit(_OsType) -> case erlang:system_info(wordsize) of @@ -216,10 +230,6 @@ get_vm_limit(_OsType) -> %%http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details end. -get_mem_limit(MemFraction, TotalMemory) -> - AvMem = lists:min([TotalMemory, get_vm_limit()]), - trunc(AvMem * MemFraction). - %%---------------------------------------------------------------------------- %% Internal Helpers %%---------------------------------------------------------------------------- |
