diff options
author | dcorbacho <dparracorbacho@piotal.io> | 2020-11-18 14:27:41 +0000 |
---|---|---|
committer | dcorbacho <dparracorbacho@piotal.io> | 2020-11-18 14:27:41 +0000 |
commit | f23a51261d9502ec39df0f8db47ba6b22aa7659f (patch) | |
tree | 53dcdf46e7dc2c14e81ee960bce8793879b488d3 /deps/rabbit/src | |
parent | afa2c2bf6c7e0e9b63f4fb53dc931c70388e1c82 (diff) | |
parent | 9f6d64ec4a4b1eeac24d7846c5c64fd96798d892 (diff) | |
download | rabbitmq-server-git-stream-timestamp-offset.tar.gz |
Merge remote-tracking branch 'origin/master' into stream-timestamp-offsetstream-timestamp-offset
Diffstat (limited to 'deps/rabbit/src')
171 files changed, 62633 insertions, 0 deletions
diff --git a/deps/rabbit/src/amqqueue.erl b/deps/rabbit/src/amqqueue.erl new file mode 100644 index 0000000000..3415ebd073 --- /dev/null +++ b/deps/rabbit/src/amqqueue.erl @@ -0,0 +1,762 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(amqqueue). %% Could become amqqueue_v2 in the future. + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([new/8, + new/9, + new_with_version/9, + new_with_version/10, + fields/0, + fields/1, + field_vhost/0, + record_version_to_use/0, + upgrade/1, + upgrade_to/2, + % arguments + get_arguments/1, + set_arguments/2, + % decorators + get_decorators/1, + set_decorators/2, + % exclusive_owner + get_exclusive_owner/1, + % gm_pids + get_gm_pids/1, + set_gm_pids/2, + get_leader/1, + % name (#resource) + get_name/1, + set_name/2, + % operator_policy + get_operator_policy/1, + set_operator_policy/2, + get_options/1, + % pid + get_pid/1, + set_pid/2, + % policy + get_policy/1, + set_policy/2, + % policy_version + get_policy_version/1, + set_policy_version/2, + % type_state + get_type_state/1, + set_type_state/2, + % recoverable_slaves + get_recoverable_slaves/1, + set_recoverable_slaves/2, + % slave_pids + get_slave_pids/1, + set_slave_pids/2, + % slave_pids_pending_shutdown + get_slave_pids_pending_shutdown/1, + set_slave_pids_pending_shutdown/2, + % state + get_state/1, + set_state/2, + % sync_slave_pids + get_sync_slave_pids/1, + set_sync_slave_pids/2, + get_type/1, + get_vhost/1, + is_amqqueue/1, + is_auto_delete/1, + is_durable/1, + is_classic/1, + is_quorum/1, + pattern_match_all/0, + pattern_match_on_name/1, + pattern_match_on_type/1, + reset_mirroring_and_decorators/1, + set_immutable/1, + qnode/1, + macros/0]). + +-define(record_version, amqqueue_v2). +-define(is_backwards_compat_classic(T), + (T =:= classic orelse T =:= ?amqqueue_v1_type)). + +-record(amqqueue, { + name :: rabbit_amqqueue:name() | '_', %% immutable + durable :: boolean() | '_', %% immutable + auto_delete :: boolean() | '_', %% immutable + exclusive_owner = none :: pid() | none | '_', %% immutable + arguments = [] :: rabbit_framing:amqp_table() | '_', %% immutable + pid :: pid() | ra_server_id() | none | '_', %% durable (just so we + %% know home node) + slave_pids = [] :: [pid()] | none | '_', %% transient + sync_slave_pids = [] :: [pid()] | none| '_',%% transient + recoverable_slaves = [] :: [atom()] | none | '_', %% durable + policy :: binary() | none | undefined | '_', %% durable, implicit + %% update as above + operator_policy :: binary() | none | undefined | '_', %% durable, + %% implicit + %% update + %% as above + gm_pids = [] :: [{pid(), pid()}] | none | '_', %% transient + decorators :: [atom()] | none | undefined | '_', %% transient, + %% recalculated + %% as above + state = live :: atom() | none | '_', %% durable (have we crashed?) + policy_version = 0 :: non_neg_integer() | '_', + slave_pids_pending_shutdown = [] :: [pid()] | '_', + vhost :: rabbit_types:vhost() | undefined | '_', %% secondary index + options = #{} :: map() | '_', + type = ?amqqueue_v1_type :: module() | '_', + type_state = #{} :: map() | '_' + }). + +-type amqqueue() :: amqqueue_v1:amqqueue_v1() | amqqueue_v2(). +-type amqqueue_v2() :: #amqqueue{ + name :: rabbit_amqqueue:name(), + durable :: boolean(), + auto_delete :: boolean(), + exclusive_owner :: pid() | none, + arguments :: rabbit_framing:amqp_table(), + pid :: pid() | ra_server_id() | none, + slave_pids :: [pid()] | none, + sync_slave_pids :: [pid()] | none, + recoverable_slaves :: [atom()] | none, + policy :: binary() | none | undefined, + operator_policy :: binary() | none | undefined, + gm_pids :: [{pid(), pid()}] | none, + decorators :: [atom()] | none | undefined, + state :: atom() | none, + policy_version :: non_neg_integer(), + slave_pids_pending_shutdown :: [pid()], + vhost :: rabbit_types:vhost() | undefined, + options :: map(), + type :: atom(), + type_state :: #{} + }. + +-type ra_server_id() :: {Name :: atom(), Node :: node()}. + +-type amqqueue_pattern() :: amqqueue_v1:amqqueue_v1_pattern() | + amqqueue_v2_pattern(). +-type amqqueue_v2_pattern() :: #amqqueue{ + name :: rabbit_amqqueue:name() | '_', + durable :: '_', + auto_delete :: '_', + exclusive_owner :: '_', + arguments :: '_', + pid :: '_', + slave_pids :: '_', + sync_slave_pids :: '_', + recoverable_slaves :: '_', + policy :: '_', + operator_policy :: '_', + gm_pids :: '_', + decorators :: '_', + state :: '_', + policy_version :: '_', + slave_pids_pending_shutdown :: '_', + vhost :: '_', + options :: '_', + type :: atom() | '_', + type_state :: '_' + }. + +-export_type([amqqueue/0, + amqqueue_v2/0, + amqqueue_pattern/0, + amqqueue_v2_pattern/0, + ra_server_id/0]). + +-spec new(rabbit_amqqueue:name(), + pid() | ra_server_id() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map()) -> amqqueue(). + +new(#resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options) + when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) -> + new(Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + ?amqqueue_v1_type). + +-spec new(rabbit_amqqueue:name(), + pid() | ra_server_id() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map(), + atom()) -> amqqueue(). + +new(#resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type) + when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) andalso + is_atom(Type) -> + case record_version_to_use() of + ?record_version -> + new_with_version( + ?record_version, + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type); + _ -> + amqqueue_v1:new( + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type) + end. + +-spec new_with_version +(amqqueue_v1 | amqqueue_v2, + rabbit_amqqueue:name(), + pid() | ra_server_id() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map()) -> amqqueue(). + +new_with_version(RecordVersion, + #resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options) + when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) -> + new_with_version(RecordVersion, + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + ?amqqueue_v1_type). + +-spec new_with_version +(amqqueue_v1 | amqqueue_v2, + rabbit_amqqueue:name(), + pid() | ra_server_id() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map(), + atom()) -> amqqueue(). + +new_with_version(?record_version, + #resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type) + when (is_pid(Pid) orelse is_tuple(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) andalso + is_atom(Type) -> + #amqqueue{name = Name, + durable = Durable, + auto_delete = AutoDelete, + arguments = Args, + exclusive_owner = Owner, + pid = Pid, + vhost = VHost, + options = Options, + type = ensure_type_compat(Type)}; +new_with_version(Version, + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type) + when ?is_backwards_compat_classic(Type) -> + amqqueue_v1:new_with_version( + Version, + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options). + +-spec is_amqqueue(any()) -> boolean(). + +is_amqqueue(#amqqueue{}) -> true; +is_amqqueue(Queue) -> amqqueue_v1:is_amqqueue(Queue). + +-spec record_version_to_use() -> amqqueue_v1 | amqqueue_v2. + +record_version_to_use() -> + case rabbit_feature_flags:is_enabled(quorum_queue) of + true -> ?record_version; + false -> amqqueue_v1:record_version_to_use() + end. + +-spec upgrade(amqqueue()) -> amqqueue(). + +upgrade(#amqqueue{} = Queue) -> Queue; +upgrade(OldQueue) -> upgrade_to(record_version_to_use(), OldQueue). + +-spec upgrade_to +(amqqueue_v2, amqqueue()) -> amqqueue_v2(); +(amqqueue_v1, amqqueue_v1:amqqueue_v1()) -> amqqueue_v1:amqqueue_v1(). + +upgrade_to(?record_version, #amqqueue{} = Queue) -> + Queue; +upgrade_to(?record_version, OldQueue) -> + Fields = erlang:tuple_to_list(OldQueue) ++ [?amqqueue_v1_type, + undefined], + #amqqueue{} = erlang:list_to_tuple(Fields); +upgrade_to(Version, OldQueue) -> + amqqueue_v1:upgrade_to(Version, OldQueue). + +% arguments + +-spec get_arguments(amqqueue()) -> rabbit_framing:amqp_table(). + +get_arguments(#amqqueue{arguments = Args}) -> + Args; +get_arguments(Queue) -> + amqqueue_v1:get_arguments(Queue). + +-spec set_arguments(amqqueue(), rabbit_framing:amqp_table()) -> amqqueue(). + +set_arguments(#amqqueue{} = Queue, Args) -> + Queue#amqqueue{arguments = Args}; +set_arguments(Queue, Args) -> + amqqueue_v1:set_arguments(Queue, Args). + +% decorators + +-spec get_decorators(amqqueue()) -> [atom()] | none | undefined. + +get_decorators(#amqqueue{decorators = Decorators}) -> + Decorators; +get_decorators(Queue) -> + amqqueue_v1:get_decorators(Queue). + +-spec set_decorators(amqqueue(), [atom()] | none | undefined) -> amqqueue(). + +set_decorators(#amqqueue{} = Queue, Decorators) -> + Queue#amqqueue{decorators = Decorators}; +set_decorators(Queue, Decorators) -> + amqqueue_v1:set_decorators(Queue, Decorators). + +-spec get_exclusive_owner(amqqueue()) -> pid() | none. + +get_exclusive_owner(#amqqueue{exclusive_owner = Owner}) -> + Owner; +get_exclusive_owner(Queue) -> + amqqueue_v1:get_exclusive_owner(Queue). + +% gm_pids + +-spec get_gm_pids(amqqueue()) -> [{pid(), pid()}] | none. + +get_gm_pids(#amqqueue{gm_pids = GMPids}) -> + GMPids; +get_gm_pids(Queue) -> + amqqueue_v1:get_gm_pids(Queue). + +-spec set_gm_pids(amqqueue(), [{pid(), pid()}] | none) -> amqqueue(). + +set_gm_pids(#amqqueue{} = Queue, GMPids) -> + Queue#amqqueue{gm_pids = GMPids}; +set_gm_pids(Queue, GMPids) -> + amqqueue_v1:set_gm_pids(Queue, GMPids). + +-spec get_leader(amqqueue_v2()) -> node(). + +get_leader(#amqqueue{type = rabbit_quorum_queue, pid = {_, Leader}}) -> Leader. + +% operator_policy + +-spec get_operator_policy(amqqueue()) -> binary() | none | undefined. + +get_operator_policy(#amqqueue{operator_policy = OpPolicy}) -> OpPolicy; +get_operator_policy(Queue) -> amqqueue_v1:get_operator_policy(Queue). + +-spec set_operator_policy(amqqueue(), binary() | none | undefined) -> + amqqueue(). + +set_operator_policy(#amqqueue{} = Queue, Policy) -> + Queue#amqqueue{operator_policy = Policy}; +set_operator_policy(Queue, Policy) -> + amqqueue_v1:set_operator_policy(Queue, Policy). + +% name + +-spec get_name(amqqueue()) -> rabbit_amqqueue:name(). + +get_name(#amqqueue{name = Name}) -> Name; +get_name(Queue) -> amqqueue_v1:get_name(Queue). + +-spec set_name(amqqueue(), rabbit_amqqueue:name()) -> amqqueue(). + +set_name(#amqqueue{} = Queue, Name) -> + Queue#amqqueue{name = Name}; +set_name(Queue, Name) -> + amqqueue_v1:set_name(Queue, Name). + +-spec get_options(amqqueue()) -> map(). + +get_options(#amqqueue{options = Options}) -> Options; +get_options(Queue) -> amqqueue_v1:get_options(Queue). + +% pid + +-spec get_pid +(amqqueue_v2()) -> pid() | ra_server_id() | none; +(amqqueue_v1:amqqueue_v1()) -> pid() | none. + +get_pid(#amqqueue{pid = Pid}) -> Pid; +get_pid(Queue) -> amqqueue_v1:get_pid(Queue). + +-spec set_pid +(amqqueue_v2(), pid() | ra_server_id() | none) -> amqqueue_v2(); +(amqqueue_v1:amqqueue_v1(), pid() | none) -> amqqueue_v1:amqqueue_v1(). + +set_pid(#amqqueue{} = Queue, Pid) -> + Queue#amqqueue{pid = Pid}; +set_pid(Queue, Pid) -> + amqqueue_v1:set_pid(Queue, Pid). + +% policy + +-spec get_policy(amqqueue()) -> proplists:proplist() | none | undefined. + +get_policy(#amqqueue{policy = Policy}) -> Policy; +get_policy(Queue) -> amqqueue_v1:get_policy(Queue). + +-spec set_policy(amqqueue(), binary() | none | undefined) -> amqqueue(). + +set_policy(#amqqueue{} = Queue, Policy) -> + Queue#amqqueue{policy = Policy}; +set_policy(Queue, Policy) -> + amqqueue_v1:set_policy(Queue, Policy). + +% policy_version + +-spec get_policy_version(amqqueue()) -> non_neg_integer(). + +get_policy_version(#amqqueue{policy_version = PV}) -> + PV; +get_policy_version(Queue) -> + amqqueue_v1:get_policy_version(Queue). + +-spec set_policy_version(amqqueue(), non_neg_integer()) -> amqqueue(). + +set_policy_version(#amqqueue{} = Queue, PV) -> + Queue#amqqueue{policy_version = PV}; +set_policy_version(Queue, PV) -> + amqqueue_v1:set_policy_version(Queue, PV). + +% recoverable_slaves + +-spec get_recoverable_slaves(amqqueue()) -> [atom()] | none. + +get_recoverable_slaves(#amqqueue{recoverable_slaves = Slaves}) -> + Slaves; +get_recoverable_slaves(Queue) -> + amqqueue_v1:get_recoverable_slaves(Queue). + +-spec set_recoverable_slaves(amqqueue(), [atom()] | none) -> amqqueue(). + +set_recoverable_slaves(#amqqueue{} = Queue, Slaves) -> + Queue#amqqueue{recoverable_slaves = Slaves}; +set_recoverable_slaves(Queue, Slaves) -> + amqqueue_v1:set_recoverable_slaves(Queue, Slaves). + +% type_state (new in v2) + +-spec get_type_state(amqqueue()) -> map(). +get_type_state(#amqqueue{type_state = TState}) -> + TState; +get_type_state(_) -> + #{}. + +-spec set_type_state(amqqueue(), map()) -> amqqueue(). +set_type_state(#amqqueue{} = Queue, TState) -> + Queue#amqqueue{type_state = TState}; +set_type_state(Queue, _TState) -> + Queue. + +% slave_pids + +-spec get_slave_pids(amqqueue()) -> [pid()] | none. + +get_slave_pids(#amqqueue{slave_pids = Slaves}) -> + Slaves; +get_slave_pids(Queue) -> + amqqueue_v1:get_slave_pids(Queue). + +-spec set_slave_pids(amqqueue(), [pid()] | none) -> amqqueue(). + +set_slave_pids(#amqqueue{} = Queue, SlavePids) -> + Queue#amqqueue{slave_pids = SlavePids}; +set_slave_pids(Queue, SlavePids) -> + amqqueue_v1:set_slave_pids(Queue, SlavePids). + +% slave_pids_pending_shutdown + +-spec get_slave_pids_pending_shutdown(amqqueue()) -> [pid()]. + +get_slave_pids_pending_shutdown( + #amqqueue{slave_pids_pending_shutdown = Slaves}) -> + Slaves; +get_slave_pids_pending_shutdown(Queue) -> + amqqueue_v1:get_slave_pids_pending_shutdown(Queue). + +-spec set_slave_pids_pending_shutdown(amqqueue(), [pid()]) -> amqqueue(). + +set_slave_pids_pending_shutdown(#amqqueue{} = Queue, SlavePids) -> + Queue#amqqueue{slave_pids_pending_shutdown = SlavePids}; +set_slave_pids_pending_shutdown(Queue, SlavePids) -> + amqqueue_v1:set_slave_pids_pending_shutdown(Queue, SlavePids). + +% state + +-spec get_state(amqqueue()) -> atom() | none. + +get_state(#amqqueue{state = State}) -> State; +get_state(Queue) -> amqqueue_v1:get_state(Queue). + +-spec set_state(amqqueue(), atom() | none) -> amqqueue(). + +set_state(#amqqueue{} = Queue, State) -> + Queue#amqqueue{state = State}; +set_state(Queue, State) -> + amqqueue_v1:set_state(Queue, State). + +% sync_slave_pids + +-spec get_sync_slave_pids(amqqueue()) -> [pid()] | none. + +get_sync_slave_pids(#amqqueue{sync_slave_pids = Pids}) -> + Pids; +get_sync_slave_pids(Queue) -> + amqqueue_v1:get_sync_slave_pids(Queue). + +-spec set_sync_slave_pids(amqqueue(), [pid()] | none) -> amqqueue(). + +set_sync_slave_pids(#amqqueue{} = Queue, Pids) -> + Queue#amqqueue{sync_slave_pids = Pids}; +set_sync_slave_pids(Queue, Pids) -> + amqqueue_v1:set_sync_slave_pids(Queue, Pids). + +%% New in v2. + +-spec get_type(amqqueue()) -> atom(). + +get_type(#amqqueue{type = Type}) -> Type; +get_type(Queue) when ?is_amqqueue(Queue) -> ?amqqueue_v1_type. + +-spec get_vhost(amqqueue()) -> rabbit_types:vhost() | undefined. + +get_vhost(#amqqueue{vhost = VHost}) -> VHost; +get_vhost(Queue) -> amqqueue_v1:get_vhost(Queue). + +-spec is_auto_delete(amqqueue()) -> boolean(). + +is_auto_delete(#amqqueue{auto_delete = AutoDelete}) -> + AutoDelete; +is_auto_delete(Queue) -> + amqqueue_v1:is_auto_delete(Queue). + +-spec is_durable(amqqueue()) -> boolean(). + +is_durable(#amqqueue{durable = Durable}) -> Durable; +is_durable(Queue) -> amqqueue_v1:is_durable(Queue). + +-spec is_classic(amqqueue()) -> boolean(). + +is_classic(Queue) -> + get_type(Queue) =:= ?amqqueue_v1_type. + +-spec is_quorum(amqqueue()) -> boolean(). + +is_quorum(Queue) -> + get_type(Queue) =:= rabbit_quorum_queue. + +fields() -> + case record_version_to_use() of + ?record_version -> fields(?record_version); + _ -> amqqueue_v1:fields() + end. + +fields(?record_version) -> record_info(fields, amqqueue); +fields(Version) -> amqqueue_v1:fields(Version). + +field_vhost() -> + case record_version_to_use() of + ?record_version -> #amqqueue.vhost; + _ -> amqqueue_v1:field_vhost() + end. + +-spec pattern_match_all() -> amqqueue_pattern(). + +pattern_match_all() -> + case record_version_to_use() of + ?record_version -> #amqqueue{_ = '_'}; + _ -> amqqueue_v1:pattern_match_all() + end. + +-spec pattern_match_on_name(rabbit_amqqueue:name()) -> amqqueue_pattern(). + +pattern_match_on_name(Name) -> + case record_version_to_use() of + ?record_version -> #amqqueue{name = Name, _ = '_'}; + _ -> amqqueue_v1:pattern_match_on_name(Name) + end. + +-spec pattern_match_on_type(atom()) -> amqqueue_pattern(). + +pattern_match_on_type(Type) -> + case record_version_to_use() of + ?record_version -> + #amqqueue{type = Type, _ = '_'}; + _ when ?is_backwards_compat_classic(Type) -> + amqqueue_v1:pattern_match_all(); + %% FIXME: We try a pattern which should never match when the + %% `quorum_queue` feature flag is not enabled yet. Is there + %% a better solution? + _ -> + amqqueue_v1:pattern_match_on_name( + rabbit_misc:r(<<0>>, queue, <<0>>)) + end. + +-spec reset_mirroring_and_decorators(amqqueue()) -> amqqueue(). + +reset_mirroring_and_decorators(#amqqueue{} = Queue) -> + Queue#amqqueue{slave_pids = [], + sync_slave_pids = [], + gm_pids = [], + decorators = undefined}; +reset_mirroring_and_decorators(Queue) -> + amqqueue_v1:reset_mirroring_and_decorators(Queue). + +-spec set_immutable(amqqueue()) -> amqqueue(). + +set_immutable(#amqqueue{} = Queue) -> + Queue#amqqueue{pid = none, + slave_pids = [], + sync_slave_pids = none, + recoverable_slaves = none, + gm_pids = none, + policy = none, + decorators = none, + state = none}; +set_immutable(Queue) -> + amqqueue_v1:set_immutable(Queue). + +-spec qnode(amqqueue() | pid() | ra_server_id()) -> node(). + +qnode(Queue) when ?is_amqqueue(Queue) -> + QPid = get_pid(Queue), + qnode(QPid); +qnode(QPid) when is_pid(QPid) -> + node(QPid); +qnode({_, Node}) -> + Node. + +% private + +macros() -> + io:format( + "-define(is_~s(Q), is_record(Q, amqqueue, ~b)).~n~n", + [?record_version, record_info(size, amqqueue)]), + %% The field number starts at 2 because the first element is the + %% record name. + macros(record_info(fields, amqqueue), 2). + +macros([Field | Rest], I) -> + io:format( + "-define(~s_field_~s(Q), element(~b, Q)).~n", + [?record_version, Field, I]), + macros(Rest, I + 1); +macros([], _) -> + ok. + +ensure_type_compat(classic) -> + ?amqqueue_v1_type; +ensure_type_compat(Type) -> + Type. diff --git a/deps/rabbit/src/amqqueue_v1.erl b/deps/rabbit/src/amqqueue_v1.erl new file mode 100644 index 0000000000..dd1de74a4e --- /dev/null +++ b/deps/rabbit/src/amqqueue_v1.erl @@ -0,0 +1,584 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(amqqueue_v1). + +-include_lib("rabbit_common/include/resource.hrl"). +-include("amqqueue.hrl"). + +-export([new/8, + new/9, + new_with_version/9, + new_with_version/10, + fields/0, + fields/1, + field_vhost/0, + record_version_to_use/0, + upgrade/1, + upgrade_to/2, + % arguments + get_arguments/1, + set_arguments/2, + % decorators + get_decorators/1, + set_decorators/2, + % exclusive_owner + get_exclusive_owner/1, + % gm_pids + get_gm_pids/1, + set_gm_pids/2, + get_leader/1, + % name (#resource) + get_name/1, + set_name/2, + % operator_policy + get_operator_policy/1, + set_operator_policy/2, + get_options/1, + % pid + get_pid/1, + set_pid/2, + % policy + get_policy/1, + set_policy/2, + % policy_version + get_policy_version/1, + set_policy_version/2, + % type_state + get_type_state/1, + set_type_state/2, + % recoverable_slaves + get_recoverable_slaves/1, + set_recoverable_slaves/2, + % slave_pids + get_slave_pids/1, + set_slave_pids/2, + % slave_pids_pending_shutdown + get_slave_pids_pending_shutdown/1, + set_slave_pids_pending_shutdown/2, + % state + get_state/1, + set_state/2, + % sync_slave_pids + get_sync_slave_pids/1, + set_sync_slave_pids/2, + get_type/1, + get_vhost/1, + is_amqqueue/1, + is_auto_delete/1, + is_durable/1, + is_classic/1, + is_quorum/1, + pattern_match_all/0, + pattern_match_on_name/1, + pattern_match_on_type/1, + reset_mirroring_and_decorators/1, + set_immutable/1, + qnode/1, + macros/0]). + +-define(record_version, ?MODULE). +-define(is_backwards_compat_classic(T), + (T =:= classic orelse T =:= ?amqqueue_v1_type)). + +-record(amqqueue, { + name :: rabbit_amqqueue:name() | '_', %% immutable + durable :: boolean() | '_', %% immutable + auto_delete :: boolean() | '_', %% immutable + exclusive_owner = none :: pid() | none | '_', %% immutable + arguments = [] :: rabbit_framing:amqp_table() | '_', %% immutable + pid :: pid() | none | '_', %% durable (just so we + %% know home node) + slave_pids = [] :: [pid()] | none | '_', %% transient + sync_slave_pids = [] :: [pid()] | none| '_',%% transient + recoverable_slaves = [] :: [atom()] | none | '_', %% durable + policy :: binary() | none | undefined | '_', %% durable, implicit + %% update as above + operator_policy :: binary() | none | undefined | '_', %% durable, + %% implicit + %% update + %% as above + gm_pids = [] :: [{pid(), pid()}] | none | '_', %% transient + decorators :: [atom()] | none | undefined | '_', %% transient, + %% recalculated + %% as above + state = live :: atom() | none | '_', %% durable (have we crashed?) + policy_version = 0 :: non_neg_integer() | '_', + slave_pids_pending_shutdown = [] :: [pid()] | '_', + vhost :: rabbit_types:vhost() | undefined | '_', %% secondary index + options = #{} :: map() | '_' + }). + +-type amqqueue() :: amqqueue_v1(). +-type amqqueue_v1() :: #amqqueue{ + name :: rabbit_amqqueue:name(), + durable :: boolean(), + auto_delete :: boolean(), + exclusive_owner :: pid() | none, + arguments :: rabbit_framing:amqp_table(), + pid :: pid() | none, + slave_pids :: [pid()] | none, + sync_slave_pids :: [pid()] | none, + recoverable_slaves :: [atom()] | none, + policy :: binary() | none | undefined, + operator_policy :: binary() | none | undefined, + gm_pids :: [{pid(), pid()}] | none, + decorators :: [atom()] | none | undefined, + state :: atom() | none, + policy_version :: non_neg_integer(), + slave_pids_pending_shutdown :: [pid()], + vhost :: rabbit_types:vhost() | undefined, + options :: map() + }. + +-type amqqueue_pattern() :: amqqueue_v1_pattern(). +-type amqqueue_v1_pattern() :: #amqqueue{ + name :: rabbit_amqqueue:name() | '_', + durable :: '_', + auto_delete :: '_', + exclusive_owner :: '_', + arguments :: '_', + pid :: '_', + slave_pids :: '_', + sync_slave_pids :: '_', + recoverable_slaves :: '_', + policy :: '_', + operator_policy :: '_', + gm_pids :: '_', + decorators :: '_', + state :: '_', + policy_version :: '_', + slave_pids_pending_shutdown :: '_', + vhost :: '_', + options :: '_' + }. + +-export_type([amqqueue/0, + amqqueue_v1/0, + amqqueue_pattern/0, + amqqueue_v1_pattern/0]). + +-spec new(rabbit_amqqueue:name(), + pid() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map()) -> amqqueue(). + +new(#resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options) + when (is_pid(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) -> + new_with_version( + ?record_version, + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options). + +-spec new(rabbit_amqqueue:name(), + pid() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map(), + ?amqqueue_v1_type | classic) -> amqqueue(). + +new(#resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type) + when (is_pid(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) andalso + ?is_backwards_compat_classic(Type) -> + new( + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options). + +-spec new_with_version(amqqueue_v1, + rabbit_amqqueue:name(), + pid() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map()) -> amqqueue(). + +new_with_version(?record_version, + #resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options) + when (is_pid(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) -> + #amqqueue{name = Name, + durable = Durable, + auto_delete = AutoDelete, + arguments = Args, + exclusive_owner = Owner, + pid = Pid, + vhost = VHost, + options = Options}. + +-spec new_with_version(amqqueue_v1, + rabbit_amqqueue:name(), + pid() | none, + boolean(), + boolean(), + pid() | none, + rabbit_framing:amqp_table(), + rabbit_types:vhost() | undefined, + map(), + ?amqqueue_v1_type | classic) -> amqqueue(). + +new_with_version(?record_version, + #resource{kind = queue} = Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options, + Type) + when (is_pid(Pid) orelse Pid =:= none) andalso + is_boolean(Durable) andalso + is_boolean(AutoDelete) andalso + (is_pid(Owner) orelse Owner =:= none) andalso + is_list(Args) andalso + (is_binary(VHost) orelse VHost =:= undefined) andalso + is_map(Options) andalso + ?is_backwards_compat_classic(Type) -> + new_with_version( + ?record_version, + Name, + Pid, + Durable, + AutoDelete, + Owner, + Args, + VHost, + Options). + +-spec is_amqqueue(any()) -> boolean(). + +is_amqqueue(#amqqueue{}) -> true; +is_amqqueue(_) -> false. + +-spec record_version_to_use() -> amqqueue_v1. + +record_version_to_use() -> + ?record_version. + +-spec upgrade(amqqueue()) -> amqqueue(). + +upgrade(#amqqueue{} = Queue) -> Queue. + +-spec upgrade_to(amqqueue_v1, amqqueue()) -> amqqueue(). + +upgrade_to(?record_version, #amqqueue{} = Queue) -> + Queue. + +% arguments + +-spec get_arguments(amqqueue()) -> rabbit_framing:amqp_table(). + +get_arguments(#amqqueue{arguments = Args}) -> Args. + +-spec set_arguments(amqqueue(), rabbit_framing:amqp_table()) -> amqqueue(). + +set_arguments(#amqqueue{} = Queue, Args) -> + Queue#amqqueue{arguments = Args}. + +% decorators + +-spec get_decorators(amqqueue()) -> [atom()] | none | undefined. + +get_decorators(#amqqueue{decorators = Decorators}) -> Decorators. + +-spec set_decorators(amqqueue(), [atom()] | none | undefined) -> amqqueue(). + +set_decorators(#amqqueue{} = Queue, Decorators) -> + Queue#amqqueue{decorators = Decorators}. + +-spec get_exclusive_owner(amqqueue()) -> pid() | none. + +get_exclusive_owner(#amqqueue{exclusive_owner = Owner}) -> Owner. + +% gm_pids + +-spec get_gm_pids(amqqueue()) -> [{pid(), pid()}] | none. + +get_gm_pids(#amqqueue{gm_pids = GMPids}) -> GMPids. + +-spec set_gm_pids(amqqueue(), [{pid(), pid()}] | none) -> amqqueue(). + +set_gm_pids(#amqqueue{} = Queue, GMPids) -> + Queue#amqqueue{gm_pids = GMPids}. + +-spec get_leader(amqqueue_v1()) -> no_return(). + +get_leader(_) -> throw({unsupported, ?record_version, get_leader}). + +% operator_policy + +-spec get_operator_policy(amqqueue()) -> binary() | none | undefined. + +get_operator_policy(#amqqueue{operator_policy = OpPolicy}) -> OpPolicy. + +-spec set_operator_policy(amqqueue(), binary() | none | undefined) -> + amqqueue(). + +set_operator_policy(#amqqueue{} = Queue, OpPolicy) -> + Queue#amqqueue{operator_policy = OpPolicy}. + +% name + +-spec get_name(amqqueue()) -> rabbit_amqqueue:name(). + +get_name(#amqqueue{name = Name}) -> Name. + +-spec set_name(amqqueue(), rabbit_amqqueue:name()) -> amqqueue(). + +set_name(#amqqueue{} = Queue, Name) -> + Queue#amqqueue{name = Name}. + +-spec get_options(amqqueue()) -> map(). + +get_options(#amqqueue{options = Options}) -> Options. + +% pid + +-spec get_pid +(amqqueue_v1:amqqueue_v1()) -> pid() | none. + +get_pid(#amqqueue{pid = Pid}) -> Pid. + +-spec set_pid +(amqqueue_v1:amqqueue_v1(), pid() | none) -> amqqueue_v1:amqqueue_v1(). + +set_pid(#amqqueue{} = Queue, Pid) -> + Queue#amqqueue{pid = Pid}. + +% policy + +-spec get_policy(amqqueue()) -> proplists:proplist() | none | undefined. + +get_policy(#amqqueue{policy = Policy}) -> Policy. + +-spec set_policy(amqqueue(), binary() | none | undefined) -> amqqueue(). + +set_policy(#amqqueue{} = Queue, Policy) -> + Queue#amqqueue{policy = Policy}. + +% policy_version + +-spec get_policy_version(amqqueue()) -> non_neg_integer(). + +get_policy_version(#amqqueue{policy_version = PV}) -> + PV. + +-spec set_policy_version(amqqueue(), non_neg_integer()) -> amqqueue(). + +set_policy_version(#amqqueue{} = Queue, PV) -> + Queue#amqqueue{policy_version = PV}. + +% recoverable_slaves + +-spec get_recoverable_slaves(amqqueue()) -> [atom()] | none. + +get_recoverable_slaves(#amqqueue{recoverable_slaves = Slaves}) -> + Slaves. + +-spec set_recoverable_slaves(amqqueue(), [atom()] | none) -> amqqueue(). + +set_recoverable_slaves(#amqqueue{} = Queue, Slaves) -> + Queue#amqqueue{recoverable_slaves = Slaves}. + +% type_state (new in v2) + +-spec get_type_state(amqqueue()) -> no_return(). + +get_type_state(_) -> throw({unsupported, ?record_version, get_type_state}). + +-spec set_type_state(amqqueue(), [node()]) -> no_return(). + +set_type_state(_, _) -> + throw({unsupported, ?record_version, set_type_state}). + +% slave_pids + +get_slave_pids(#amqqueue{slave_pids = Slaves}) -> + Slaves. + +set_slave_pids(#amqqueue{} = Queue, SlavePids) -> + Queue#amqqueue{slave_pids = SlavePids}. + +% slave_pids_pending_shutdown + +get_slave_pids_pending_shutdown( + #amqqueue{slave_pids_pending_shutdown = Slaves}) -> + Slaves. + +set_slave_pids_pending_shutdown(#amqqueue{} = Queue, SlavePids) -> + Queue#amqqueue{slave_pids_pending_shutdown = SlavePids}. + +% state + +-spec get_state(amqqueue()) -> atom() | none. + +get_state(#amqqueue{state = State}) -> State. + +-spec set_state(amqqueue(), atom() | none) -> amqqueue(). + +set_state(#amqqueue{} = Queue, State) -> + Queue#amqqueue{state = State}. + +% sync_slave_pids + +-spec get_sync_slave_pids(amqqueue()) -> [pid()] | none. + +get_sync_slave_pids(#amqqueue{sync_slave_pids = Pids}) -> + Pids. + +-spec set_sync_slave_pids(amqqueue(), [pid()] | none) -> amqqueue(). + +set_sync_slave_pids(#amqqueue{} = Queue, Pids) -> + Queue#amqqueue{sync_slave_pids = Pids}. + +%% New in v2. + +-spec get_type(amqqueue()) -> atom(). + +get_type(Queue) when ?is_amqqueue(Queue) -> ?amqqueue_v1_type. + +-spec get_vhost(amqqueue()) -> rabbit_types:vhost() | undefined. + +get_vhost(#amqqueue{vhost = VHost}) -> VHost. + +-spec is_auto_delete(amqqueue()) -> boolean(). + +is_auto_delete(#amqqueue{auto_delete = AutoDelete}) -> AutoDelete. + +-spec is_durable(amqqueue()) -> boolean(). + +is_durable(#amqqueue{durable = Durable}) -> Durable. + +-spec is_classic(amqqueue()) -> boolean(). + +is_classic(Queue) -> + get_type(Queue) =:= ?amqqueue_v1_type. + +-spec is_quorum(amqqueue()) -> boolean(). + +is_quorum(Queue) when ?is_amqqueue(Queue) -> + false. + +fields() -> fields(?record_version). + +fields(?record_version) -> record_info(fields, amqqueue). + +field_vhost() -> #amqqueue.vhost. + +-spec pattern_match_all() -> amqqueue_pattern(). + +pattern_match_all() -> #amqqueue{_ = '_'}. + +-spec pattern_match_on_name(rabbit_amqqueue:name()) -> + amqqueue_pattern(). + +pattern_match_on_name(Name) -> #amqqueue{name = Name, _ = '_'}. + +-spec pattern_match_on_type(atom()) -> no_return(). + +pattern_match_on_type(_) -> + throw({unsupported, ?record_version, pattern_match_on_type}). + +reset_mirroring_and_decorators(#amqqueue{} = Queue) -> + Queue#amqqueue{slave_pids = [], + sync_slave_pids = [], + gm_pids = [], + decorators = undefined}. + +set_immutable(#amqqueue{} = Queue) -> + Queue#amqqueue{pid = none, + slave_pids = none, + sync_slave_pids = none, + recoverable_slaves = none, + gm_pids = none, + policy = none, + decorators = none, + state = none}. + +-spec qnode(amqqueue() | pid()) -> node(). + +qnode(Queue) when ?is_amqqueue(Queue) -> + QPid = get_pid(Queue), + qnode(QPid); +qnode(QPid) when is_pid(QPid) -> + node(QPid). + +macros() -> + io:format( + "-define(is_~s(Q), is_record(Q, amqqueue, ~b)).~n~n", + [?record_version, record_info(size, amqqueue)]), + %% The field number starts at 2 because the first element is the + %% record name. + macros(record_info(fields, amqqueue), 2). + +macros([Field | Rest], I) -> + io:format( + "-define(~s_field_~s(Q), element(~b, Q)).~n", + [?record_version, Field, I]), + macros(Rest, I + 1); +macros([], _) -> + ok. diff --git a/deps/rabbit/src/background_gc.erl b/deps/rabbit/src/background_gc.erl new file mode 100644 index 0000000000..be5bf0c995 --- /dev/null +++ b/deps/rabbit/src/background_gc.erl @@ -0,0 +1,78 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(background_gc). + +-behaviour(gen_server2). + +-export([start_link/0, run/0]). +-export([gc/0]). %% For run_interval only + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(MAX_RATIO, 0.01). +-define(MAX_INTERVAL, 240000). + +-record(state, {last_interval}). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> {'ok', pid()} | {'error', any()}. + +start_link() -> gen_server2:start_link({local, ?MODULE}, ?MODULE, [], + [{timeout, infinity}]). + +-spec run() -> 'ok'. + +run() -> gen_server2:cast(?MODULE, run). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, IdealInterval} = application:get_env(rabbit, background_gc_target_interval), + {ok, interval_gc(#state{last_interval = IdealInterval})}. + +handle_call(Msg, _From, State) -> + {stop, {unexpected_call, Msg}, {unexpected_call, Msg}, State}. + +handle_cast(run, State) -> gc(), {noreply, State}; + +handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}. + +handle_info(run, State) -> {noreply, interval_gc(State)}; + +handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. + +code_change(_OldVsn, State, _Extra) -> {ok, State}. + +terminate(_Reason, State) -> State. + +%%---------------------------------------------------------------------------- + +interval_gc(State = #state{last_interval = LastInterval}) -> + {ok, IdealInterval} = application:get_env(rabbit, background_gc_target_interval), + {ok, Interval} = rabbit_misc:interval_operation( + {?MODULE, gc, []}, + ?MAX_RATIO, ?MAX_INTERVAL, IdealInterval, LastInterval), + erlang:send_after(Interval, self(), run), + State#state{last_interval = Interval}. + +-spec gc() -> 'ok'. + +gc() -> + Enabled = rabbit_misc:get_env(rabbit, background_gc_enabled, false), + case Enabled of + true -> + [garbage_collect(P) || P <- processes(), + {status, waiting} == process_info(P, status)], + %% since we will never be waiting... + garbage_collect(); + false -> + ok + end, + ok. diff --git a/deps/rabbit/src/code_server_cache.erl b/deps/rabbit/src/code_server_cache.erl new file mode 100644 index 0000000000..b53f5dcee9 --- /dev/null +++ b/deps/rabbit/src/code_server_cache.erl @@ -0,0 +1,81 @@ +%% -*- erlang-indent-level: 4;indent-tabs-mode: nil -*- +%% ex: ts=4 sw=4 et +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(code_server_cache). + +-behaviour(gen_server). + +%% API +-export([start_link/0, + maybe_call_mfa/4]). + +%% gen_server callbacks +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-record(state, { + modules = #{} :: #{atom() => boolean()} +}). + +%% API +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +maybe_call_mfa(Module, Function, Args, Default) -> + gen_server:call(?MODULE, {maybe_call_mfa, {Module, Function, Args, Default}}). + +%% gen_server callbacks + +init([]) -> + {ok, #state{}}. + +handle_call({maybe_call_mfa, {Mod, _F, _A, _D} = MFA}, _From, #state{modules = ModuleMap} = State0) -> + Value = maps:get(Mod, ModuleMap, true), + {ok, Reply, State1} = handle_maybe_call_mfa(Value, MFA, State0), + {reply, Reply, State1}; +handle_call(_Request, _From, State) -> + {reply, ignored, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% Internal functions + +handle_maybe_call_mfa(false, {_M, _F, _A, Default}, State) -> + {ok, Default, State}; +handle_maybe_call_mfa(true, {Module, Function, Args, Default}, State) -> + try + Reply = erlang:apply(Module, Function, Args), + {ok, Reply, State} + catch + error:undef -> + handle_maybe_call_mfa_error(Module, Default, State); + Err:Reason -> + rabbit_log:error("Calling ~p:~p failed: ~p:~p~n", + [Module, Function, Err, Reason]), + handle_maybe_call_mfa_error(Module, Default, State) + end. + +handle_maybe_call_mfa_error(Module, Default, #state{modules = ModuleMap0} = State0) -> + ModuleMap1 = maps:put(Module, false, ModuleMap0), + State1 = State0#state{modules = ModuleMap1}, + {ok, Default, State1}. diff --git a/deps/rabbit/src/gatherer.erl b/deps/rabbit/src/gatherer.erl new file mode 100644 index 0000000000..2b46ec02b1 --- /dev/null +++ b/deps/rabbit/src/gatherer.erl @@ -0,0 +1,151 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(gatherer). + +%% Gatherer is a queue which has producer and consumer processes. Before producers +%% push items to the queue using gatherer:in/2 they need to declare their intent +%% to do so with gatherer:fork/1. When a publisher's work is done, it states so +%% using gatherer:finish/1. +%% +%% Consumers pop messages off queues with gatherer:out/1. If a queue is empty +%% and there are producers that haven't finished working, the caller is blocked +%% until an item is available. If there are no active producers, gatherer:out/1 +%% immediately returns 'empty'. +%% +%% This module is primarily used to collect results from asynchronous tasks +%% running in a worker pool, e.g. when recovering bindings or rebuilding +%% message store indices. + +-behaviour(gen_server2). + +-export([start_link/0, stop/1, fork/1, finish/1, in/2, sync_in/2, out/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-define(HIBERNATE_AFTER_MIN, 1000). +-define(DESIRED_HIBERNATE, 10000). + +%%---------------------------------------------------------------------------- + +-record(gstate, { forks, values, blocked }). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server2:start_link(?MODULE, [], [{timeout, infinity}]). + +-spec stop(pid()) -> 'ok'. + +stop(Pid) -> + unlink(Pid), + gen_server2:call(Pid, stop, infinity). + +-spec fork(pid()) -> 'ok'. + +fork(Pid) -> + gen_server2:call(Pid, fork, infinity). + +-spec finish(pid()) -> 'ok'. + +finish(Pid) -> + gen_server2:cast(Pid, finish). + +-spec in(pid(), any()) -> 'ok'. + +in(Pid, Value) -> + gen_server2:cast(Pid, {in, Value}). + +-spec sync_in(pid(), any()) -> 'ok'. + +sync_in(Pid, Value) -> + gen_server2:call(Pid, {in, Value}, infinity). + +-spec out(pid()) -> {'value', any()} | 'empty'. + +out(Pid) -> + gen_server2:call(Pid, out, infinity). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(fork, _From, State = #gstate { forks = Forks }) -> + {reply, ok, State #gstate { forks = Forks + 1 }, hibernate}; + +handle_call({in, Value}, From, State) -> + {noreply, in(Value, From, State), hibernate}; + +handle_call(out, From, State = #gstate { forks = Forks, + values = Values, + blocked = Blocked }) -> + case queue:out(Values) of + {empty, _} when Forks == 0 -> + {reply, empty, State, hibernate}; + {empty, _} -> + {noreply, State #gstate { blocked = queue:in(From, Blocked) }, + hibernate}; + {{value, {PendingIn, Value}}, NewValues} -> + reply(PendingIn, ok), + {reply, {value, Value}, State #gstate { values = NewValues }, + hibernate} + end; + +handle_call(Msg, _From, State) -> + {stop, {unexpected_call, Msg}, State}. + +handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) -> + NewForks = Forks - 1, + NewBlocked = case NewForks of + 0 -> _ = [gen_server2:reply(From, empty) || + From <- queue:to_list(Blocked)], + queue:new(); + _ -> Blocked + end, + {noreply, State #gstate { forks = NewForks, blocked = NewBlocked }, + hibernate}; + +handle_cast({in, Value}, State) -> + {noreply, in(Value, undefined, State), hibernate}; + +handle_cast(Msg, State) -> + {stop, {unexpected_cast, Msg}, State}. + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +terminate(_Reason, State) -> + State. + +%%---------------------------------------------------------------------------- + +in(Value, From, State = #gstate { values = Values, blocked = Blocked }) -> + case queue:out(Blocked) of + {empty, _} -> + State #gstate { values = queue:in({From, Value}, Values) }; + {{value, PendingOut}, NewBlocked} -> + reply(From, ok), + gen_server2:reply(PendingOut, {value, Value}), + State #gstate { blocked = NewBlocked } + end. + +reply(undefined, _Reply) -> ok; +reply(From, Reply) -> gen_server2:reply(From, Reply). diff --git a/deps/rabbit/src/gm.erl b/deps/rabbit/src/gm.erl new file mode 100644 index 0000000000..af24a2958a --- /dev/null +++ b/deps/rabbit/src/gm.erl @@ -0,0 +1,1650 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(gm). + +%% Guaranteed Multicast +%% ==================== +%% +%% This module provides the ability to create named groups of +%% processes to which members can be dynamically added and removed, +%% and for messages to be broadcast within the group that are +%% guaranteed to reach all members of the group during the lifetime of +%% the message. The lifetime of a message is defined as being, at a +%% minimum, the time from which the message is first sent to any +%% member of the group, up until the time at which it is known by the +%% member who published the message that the message has reached all +%% group members. +%% +%% The guarantee given is that provided a message, once sent, makes it +%% to members who do not all leave the group, the message will +%% continue to propagate to all group members. +%% +%% Another way of stating the guarantee is that if member P publishes +%% messages m and m', then for all members P', if P' is a member of +%% the group prior to the publication of m, and P' receives m', then +%% P' will receive m. +%% +%% Note that only local-ordering is enforced: i.e. if member P sends +%% message m and then message m', then for-all members P', if P' +%% receives m and m', then they will receive m' after m. Causality +%% ordering is _not_ enforced. I.e. if member P receives message m +%% and as a result publishes message m', there is no guarantee that +%% other members P' will receive m before m'. +%% +%% +%% API Use +%% ------- +%% +%% Mnesia must be started. Use the idempotent create_tables/0 function +%% to create the tables required. +%% +%% start_link/3 +%% Provide the group name, the callback module name, and any arguments +%% you wish to be passed into the callback module's functions. The +%% joined/2 function will be called when we have joined the group, +%% with the arguments passed to start_link and a list of the current +%% members of the group. See the callbacks specs and the comments +%% below for further details of the callback functions. +%% +%% leave/1 +%% Provide the Pid. Removes the Pid from the group. The callback +%% handle_terminate/2 function will be called. +%% +%% broadcast/2 +%% Provide the Pid and a Message. The message will be sent to all +%% members of the group as per the guarantees given above. This is a +%% cast and the function call will return immediately. There is no +%% guarantee that the message will reach any member of the group. +%% +%% confirmed_broadcast/2 +%% Provide the Pid and a Message. As per broadcast/2 except that this +%% is a call, not a cast, and only returns 'ok' once the Message has +%% reached every member of the group. Do not call +%% confirmed_broadcast/2 directly from the callback module otherwise +%% you will deadlock the entire group. +%% +%% info/1 +%% Provide the Pid. Returns a proplist with various facts, including +%% the group name and the current group members. +%% +%% validate_members/2 +%% Check whether a given member list agrees with the chosen member's +%% view. Any differences will be communicated via the members_changed +%% callback. If there are no differences then there will be no reply. +%% Note that members will not necessarily share the same view. +%% +%% forget_group/1 +%% Provide the group name. Removes its mnesia record. Makes no attempt +%% to ensure the group is empty. +%% +%% Implementation Overview +%% ----------------------- +%% +%% One possible means of implementation would be a fan-out from the +%% sender to every member of the group. This would require that the +%% group is fully connected, and, in the event that the original +%% sender of the message disappears from the group before the message +%% has made it to every member of the group, raises questions as to +%% who is responsible for sending on the message to new group members. +%% In particular, the issue is with [ Pid ! Msg || Pid <- Members ] - +%% if the sender dies part way through, who is responsible for +%% ensuring that the remaining Members receive the Msg? In the event +%% that within the group, messages sent are broadcast from a subset of +%% the members, the fan-out arrangement has the potential to +%% substantially impact the CPU and network workload of such members, +%% as such members would have to accommodate the cost of sending each +%% message to every group member. +%% +%% Instead, if the members of the group are arranged in a chain, then +%% it becomes easier to reason about who within the group has received +%% each message and who has not. It eases issues of responsibility: in +%% the event of a group member disappearing, the nearest upstream +%% member of the chain is responsible for ensuring that messages +%% continue to propagate down the chain. It also results in equal +%% distribution of sending and receiving workload, even if all +%% messages are being sent from just a single group member. This +%% configuration has the further advantage that it is not necessary +%% for every group member to know of every other group member, and +%% even that a group member does not have to be accessible from all +%% other group members. +%% +%% Performance is kept high by permitting pipelining and all +%% communication between joined group members is asynchronous. In the +%% chain A -> B -> C -> D, if A sends a message to the group, it will +%% not directly contact C or D. However, it must know that D receives +%% the message (in addition to B and C) before it can consider the +%% message fully sent. A simplistic implementation would require that +%% D replies to C, C replies to B and B then replies to A. This would +%% result in a propagation delay of twice the length of the chain. It +%% would also require, in the event of the failure of C, that D knows +%% to directly contact B and issue the necessary replies. Instead, the +%% chain forms a ring: D sends the message on to A: D does not +%% distinguish A as the sender, merely as the next member (downstream) +%% within the chain (which has now become a ring). When A receives +%% from D messages that A sent, it knows that all members have +%% received the message. However, the message is not dead yet: if C +%% died as B was sending to C, then B would need to detect the death +%% of C and forward the message on to D instead: thus every node has +%% to remember every message published until it is told that it can +%% forget about the message. This is essential not just for dealing +%% with failure of members, but also for the addition of new members. +%% +%% Thus once A receives the message back again, it then sends to B an +%% acknowledgement for the message, indicating that B can now forget +%% about the message. B does so, and forwards the ack to C. C forgets +%% the message, and forwards the ack to D, which forgets the message +%% and finally forwards the ack back to A. At this point, A takes no +%% further action: the message and its acknowledgement have made it to +%% every member of the group. The message is now dead, and any new +%% member joining the group at this point will not receive the +%% message. +%% +%% We therefore have two roles: +%% +%% 1. The sender, who upon receiving their own messages back, must +%% then send out acknowledgements, and upon receiving their own +%% acknowledgements back perform no further action. +%% +%% 2. The other group members who upon receiving messages and +%% acknowledgements must update their own internal state accordingly +%% (the sending member must also do this in order to be able to +%% accommodate failures), and forwards messages on to their downstream +%% neighbours. +%% +%% +%% Implementation: It gets trickier +%% -------------------------------- +%% +%% Chain A -> B -> C -> D +%% +%% A publishes a message which B receives. A now dies. B and D will +%% detect the death of A, and will link up, thus the chain is now B -> +%% C -> D. B forwards A's message on to C, who forwards it to D, who +%% forwards it to B. Thus B is now responsible for A's messages - both +%% publications and acknowledgements that were in flight at the point +%% at which A died. Even worse is that this is transitive: after B +%% forwards A's message to C, B dies as well. Now C is not only +%% responsible for B's in-flight messages, but is also responsible for +%% A's in-flight messages. +%% +%% Lemma 1: A member can only determine which dead members they have +%% inherited responsibility for if there is a total ordering on the +%% conflicting additions and subtractions of members from the group. +%% +%% Consider the simultaneous death of B and addition of B' that +%% transitions a chain from A -> B -> C to A -> B' -> C. Either B' or +%% C is responsible for in-flight messages from B. It is easy to +%% ensure that at least one of them thinks they have inherited B, but +%% if we do not ensure that exactly one of them inherits B, then we +%% could have B' converting publishes to acks, which then will crash C +%% as C does not believe it has issued acks for those messages. +%% +%% More complex scenarios are easy to concoct: A -> B -> C -> D -> E +%% becoming A -> C' -> E. Who has inherited which of B, C and D? +%% +%% However, for non-conflicting membership changes, only a partial +%% ordering is required. For example, A -> B -> C becoming A -> A' -> +%% B. The addition of A', between A and B can have no conflicts with +%% the death of C: it is clear that A has inherited C's messages. +%% +%% For ease of implementation, we adopt the simple solution, of +%% imposing a total order on all membership changes. +%% +%% On the death of a member, it is ensured the dead member's +%% neighbours become aware of the death, and the upstream neighbour +%% now sends to its new downstream neighbour its state, including the +%% messages pending acknowledgement. The downstream neighbour can then +%% use this to calculate which publishes and acknowledgements it has +%% missed out on, due to the death of its old upstream. Thus the +%% downstream can catch up, and continues the propagation of messages +%% through the group. +%% +%% Lemma 2: When a member is joining, it must synchronously +%% communicate with its upstream member in order to receive its +%% starting state atomically with its addition to the group. +%% +%% New members must start with the same state as their nearest +%% upstream neighbour. This ensures that it is not surprised by +%% acknowledgements they are sent, and that should their downstream +%% neighbour die, they are able to send the correct state to their new +%% downstream neighbour to ensure it can catch up. Thus in the +%% transition A -> B -> C becomes A -> A' -> B -> C becomes A -> A' -> +%% C, A' must start with the state of A, so that it can send C the +%% correct state when B dies, allowing C to detect any missed +%% messages. +%% +%% If A' starts by adding itself to the group membership, A could then +%% die, without A' having received the necessary state from A. This +%% would leave A' responsible for in-flight messages from A, but +%% having the least knowledge of all, of those messages. Thus A' must +%% start by synchronously calling A, which then immediately sends A' +%% back its state. A then adds A' to the group. If A dies at this +%% point then A' will be able to see this (as A' will fail to appear +%% in the group membership), and thus A' will ignore the state it +%% receives from A, and will simply repeat the process, trying to now +%% join downstream from some other member. This ensures that should +%% the upstream die as soon as the new member has been joined, the new +%% member is guaranteed to receive the correct state, allowing it to +%% correctly process messages inherited due to the death of its +%% upstream neighbour. +%% +%% The canonical definition of the group membership is held by a +%% distributed database. Whilst this allows the total ordering of +%% changes to be achieved, it is nevertheless undesirable to have to +%% query this database for the current view, upon receiving each +%% message. Instead, we wish for members to be able to cache a view of +%% the group membership, which then requires a cache invalidation +%% mechanism. Each member maintains its own view of the group +%% membership. Thus when the group's membership changes, members may +%% need to become aware of such changes in order to be able to +%% accurately process messages they receive. Because of the +%% requirement of a total ordering of conflicting membership changes, +%% it is not possible to use the guaranteed broadcast mechanism to +%% communicate these changes: to achieve the necessary ordering, it +%% would be necessary for such messages to be published by exactly one +%% member, which can not be guaranteed given that such a member could +%% die. +%% +%% The total ordering we enforce on membership changes gives rise to a +%% view version number: every change to the membership creates a +%% different view, and the total ordering permits a simple +%% monotonically increasing view version number. +%% +%% Lemma 3: If a message is sent from a member that holds view version +%% N, it can be correctly processed by any member receiving the +%% message with a view version >= N. +%% +%% Initially, let us suppose that each view contains the ordering of +%% every member that was ever part of the group. Dead members are +%% marked as such. Thus we have a ring of members, some of which are +%% dead, and are thus inherited by the nearest alive downstream +%% member. +%% +%% In the chain A -> B -> C, all three members initially have view +%% version 1, which reflects reality. B publishes a message, which is +%% forward by C to A. B now dies, which A notices very quickly. Thus A +%% updates the view, creating version 2. It now forwards B's +%% publication, sending that message to its new downstream neighbour, +%% C. This happens before C is aware of the death of B. C must become +%% aware of the view change before it interprets the message its +%% received, otherwise it will fail to learn of the death of B, and +%% thus will not realise it has inherited B's messages (and will +%% likely crash). +%% +%% Thus very simply, we have that each subsequent view contains more +%% information than the preceding view. +%% +%% However, to avoid the views growing indefinitely, we need to be +%% able to delete members which have died _and_ for which no messages +%% are in-flight. This requires that upon inheriting a dead member, we +%% know the last publication sent by the dead member (this is easy: we +%% inherit a member because we are the nearest downstream member which +%% implies that we know at least as much than everyone else about the +%% publications of the dead member), and we know the earliest message +%% for which the acknowledgement is still in flight. +%% +%% In the chain A -> B -> C, when B dies, A will send to C its state +%% (as C is the new downstream from A), allowing C to calculate which +%% messages it has missed out on (described above). At this point, C +%% also inherits B's messages. If that state from A also includes the +%% last message published by B for which an acknowledgement has been +%% seen, then C knows exactly which further acknowledgements it must +%% receive (also including issuing acknowledgements for publications +%% still in-flight that it receives), after which it is known there +%% are no more messages in flight for B, thus all evidence that B was +%% ever part of the group can be safely removed from the canonical +%% group membership. +%% +%% Thus, for every message that a member sends, it includes with that +%% message its view version. When a member receives a message it will +%% update its view from the canonical copy, should its view be older +%% than the view version included in the message it has received. +%% +%% The state held by each member therefore includes the messages from +%% each publisher pending acknowledgement, the last publication seen +%% from that publisher, and the last acknowledgement from that +%% publisher. In the case of the member's own publications or +%% inherited members, this last acknowledgement seen state indicates +%% the last acknowledgement retired, rather than sent. +%% +%% +%% Proof sketch +%% ------------ +%% +%% We need to prove that with the provided operational semantics, we +%% can never reach a state that is not well formed from a well-formed +%% starting state. +%% +%% Operational semantics (small step): straight-forward message +%% sending, process monitoring, state updates. +%% +%% Well formed state: dead members inherited by exactly one non-dead +%% member; for every entry in anyone's pending-acks, either (the +%% publication of the message is in-flight downstream from the member +%% and upstream from the publisher) or (the acknowledgement of the +%% message is in-flight downstream from the publisher and upstream +%% from the member). +%% +%% Proof by induction on the applicable operational semantics. +%% +%% +%% Related work +%% ------------ +%% +%% The ring configuration and double traversal of messages around the +%% ring is similar (though developed independently) to the LCR +%% protocol by [Levy 2008]. However, LCR differs in several +%% ways. Firstly, by using vector clocks, it enforces a total order of +%% message delivery, which is unnecessary for our purposes. More +%% significantly, it is built on top of a "group communication system" +%% which performs the group management functions, taking +%% responsibility away from the protocol as to how to cope with safely +%% adding and removing members. When membership changes do occur, the +%% protocol stipulates that every member must perform communication +%% with every other member of the group, to ensure all outstanding +%% deliveries complete, before the entire group transitions to the new +%% view. This, in total, requires two sets of all-to-all synchronous +%% communications. +%% +%% This is not only rather inefficient, but also does not explain what +%% happens upon the failure of a member during this process. It does +%% though entirely avoid the need for inheritance of responsibility of +%% dead members that our protocol incorporates. +%% +%% In [Marandi et al 2010], a Paxos-based protocol is described. This +%% work explicitly focuses on the efficiency of communication. LCR +%% (and our protocol too) are more efficient, but at the cost of +%% higher latency. The Ring-Paxos protocol is itself built on top of +%% IP-multicast, which rules it out for many applications where +%% point-to-point communication is all that can be required. They also +%% have an excellent related work section which I really ought to +%% read... +%% +%% +%% [Levy 2008] The Complexity of Reliable Distributed Storage, 2008. +%% [Marandi et al 2010] Ring Paxos: A High-Throughput Atomic Broadcast +%% Protocol + + +-behaviour(gen_server2). + +-export([create_tables/0, start_link/4, leave/1, broadcast/2, broadcast/3, + confirmed_broadcast/2, info/1, validate_members/2, forget_group/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, prioritise_info/3]). + +%% For INSTR_MOD callbacks +-export([call/3, cast/2, monitor/1, demonitor/1]). + +-export([table_definitions/0]). + +-define(GROUP_TABLE, gm_group). +-define(MAX_BUFFER_SIZE, 100000000). %% 100MB +-define(BROADCAST_TIMER, 25). +-define(FORCE_GC_TIMER, 250). +-define(VERSION_START, 0). +-define(SETS, ordsets). + +-record(state, + { self, + left, + right, + group_name, + module, + view, + pub_count, + members_state, + callback_args, + confirms, + broadcast_buffer, + broadcast_buffer_sz, + broadcast_timer, + force_gc_timer, + txn_executor, + shutting_down + }). + +-record(gm_group, { name, version, members }). + +-record(view_member, { id, aliases, left, right }). + +-record(member, { pending_ack, last_pub, last_ack }). + +-define(TABLE, {?GROUP_TABLE, [{record_name, gm_group}, + {attributes, record_info(fields, gm_group)}]}). +-define(TABLE_MATCH, {match, #gm_group { _ = '_' }}). + +-define(TAG, '$gm'). + +-export_type([group_name/0]). + +-type group_name() :: any(). +-type txn_fun() :: fun((fun(() -> any())) -> any()). + +%% The joined, members_changed and handle_msg callbacks can all return +%% any of the following terms: +%% +%% 'ok' - the callback function returns normally +%% +%% {'stop', Reason} - the callback indicates the member should stop +%% with reason Reason and should leave the group. +%% +%% {'become', Module, Args} - the callback indicates that the callback +%% module should be changed to Module and that the callback functions +%% should now be passed the arguments Args. This allows the callback +%% module to be dynamically changed. + +%% Called when we've successfully joined the group. Supplied with Args +%% provided in start_link, plus current group members. +-callback joined(Args :: term(), Members :: [pid()]) -> + ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}. + +%% Supplied with Args provided in start_link, the list of new members +%% and the list of members previously known to us that have since +%% died. Note that if a member joins and dies very quickly, it's +%% possible that we will never see that member appear in either births +%% or deaths. However we are guaranteed that (1) we will see a member +%% joining either in the births here, or in the members passed to +%% joined/2 before receiving any messages from it; and (2) we will not +%% see members die that we have not seen born (or supplied in the +%% members to joined/2). +-callback members_changed(Args :: term(), + Births :: [pid()], Deaths :: [pid()]) -> + ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}. + +%% Supplied with Args provided in start_link, the sender, and the +%% message. This does get called for messages injected by this member, +%% however, in such cases, there is no special significance of this +%% invocation: it does not indicate that the message has made it to +%% any other members, let alone all other members. +-callback handle_msg(Args :: term(), From :: pid(), Message :: term()) -> + ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}. + +%% Called on gm member termination as per rules in gen_server, with +%% the Args provided in start_link plus the termination Reason. +-callback handle_terminate(Args :: term(), Reason :: term()) -> + ok | term(). + +-spec create_tables() -> 'ok' | {'aborted', any()}. + +create_tables() -> + create_tables([?TABLE]). + +create_tables([]) -> + ok; +create_tables([{Table, Attributes} | Tables]) -> + case mnesia:create_table(Table, Attributes) of + {atomic, ok} -> create_tables(Tables); + {aborted, {already_exists, Table}} -> create_tables(Tables); + Err -> Err + end. + +table_definitions() -> + {Name, Attributes} = ?TABLE, + [{Name, [?TABLE_MATCH | Attributes]}]. + +-spec start_link(group_name(), atom(), any(), txn_fun()) -> + rabbit_types:ok_pid_or_error(). + +start_link(GroupName, Module, Args, TxnFun) -> + gen_server2:start_link(?MODULE, [GroupName, Module, Args, TxnFun], + [{spawn_opt, [{fullsweep_after, 0}]}]). + +-spec leave(pid()) -> 'ok'. + +leave(Server) -> + gen_server2:cast(Server, leave). + +-spec broadcast(pid(), any()) -> 'ok'. + +broadcast(Server, Msg) -> broadcast(Server, Msg, 0). + +broadcast(Server, Msg, SizeHint) -> + gen_server2:cast(Server, {broadcast, Msg, SizeHint}). + +-spec confirmed_broadcast(pid(), any()) -> 'ok'. + +confirmed_broadcast(Server, Msg) -> + gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). + +-spec info(pid()) -> rabbit_types:infos(). + +info(Server) -> + gen_server2:call(Server, info, infinity). + +-spec validate_members(pid(), [pid()]) -> 'ok'. + +validate_members(Server, Members) -> + gen_server2:cast(Server, {validate_members, Members}). + +-spec forget_group(group_name()) -> 'ok'. + +forget_group(GroupName) -> + {atomic, ok} = mnesia:sync_transaction( + fun () -> + mnesia:delete({?GROUP_TABLE, GroupName}) + end), + ok. + +init([GroupName, Module, Args, TxnFun]) -> + put(process_name, {?MODULE, GroupName}), + Self = make_member(GroupName), + gen_server2:cast(self(), join), + {ok, #state { self = Self, + left = {Self, undefined}, + right = {Self, undefined}, + group_name = GroupName, + module = Module, + view = undefined, + pub_count = -1, + members_state = undefined, + callback_args = Args, + confirms = queue:new(), + broadcast_buffer = [], + broadcast_buffer_sz = 0, + broadcast_timer = undefined, + force_gc_timer = undefined, + txn_executor = TxnFun, + shutting_down = false }}. + + +handle_call({confirmed_broadcast, _Msg}, _From, + State = #state { shutting_down = {true, _} }) -> + reply(shutting_down, State); + +handle_call({confirmed_broadcast, _Msg}, _From, + State = #state { members_state = undefined }) -> + reply(not_joined, State); + +handle_call({confirmed_broadcast, Msg}, _From, + State = #state { self = Self, + right = {Self, undefined}, + module = Module, + callback_args = Args }) -> + handle_callback_result({Module:handle_msg(Args, get_pid(Self), Msg), + ok, State}); + +handle_call({confirmed_broadcast, Msg}, From, State) -> + {Result, State1 = #state { pub_count = PubCount, confirms = Confirms }} = + internal_broadcast(Msg, 0, State), + Confirms1 = queue:in({PubCount, From}, Confirms), + handle_callback_result({Result, flush_broadcast_buffer( + State1 #state { confirms = Confirms1 })}); + +handle_call(info, _From, + State = #state { members_state = undefined }) -> + reply(not_joined, State); + +handle_call(info, _From, State = #state { group_name = GroupName, + module = Module, + view = View }) -> + reply([{group_name, GroupName}, + {module, Module}, + {group_members, get_pids(alive_view_members(View))}], State); + +handle_call({add_on_right, _NewMember}, _From, + State = #state { members_state = undefined }) -> + reply(not_ready, State); + +handle_call({add_on_right, NewMember}, _From, + State = #state { self = Self, + group_name = GroupName, + members_state = MembersState, + txn_executor = TxnFun }) -> + try + Group = record_new_member_in_group( + NewMember, Self, GroupName, TxnFun), + View1 = group_to_view(check_membership(Self, Group)), + MembersState1 = remove_erased_members(MembersState, View1), + ok = send_right(NewMember, View1, + {catchup, Self, prepare_members_state(MembersState1)}), + {Result, State1} = change_view(View1, State #state { + members_state = MembersState1 }), + handle_callback_result({Result, {ok, Group}, State1}) + catch + lost_membership -> + {stop, shutdown, State} + end. + +%% add_on_right causes a catchup to be sent immediately from the left, +%% so we can never see this from the left neighbour. However, it's +%% possible for the right neighbour to send us a check_neighbours +%% immediately before that. We can't possibly handle it, but if we're +%% in this state we know a catchup is coming imminently anyway. So +%% just ignore it. +handle_cast({?TAG, _ReqVer, check_neighbours}, + State = #state { members_state = undefined }) -> + noreply(State); + +handle_cast({?TAG, ReqVer, Msg}, + State = #state { view = View, + self = Self, + members_state = MembersState, + group_name = GroupName }) -> + try + {Result, State1} = + case needs_view_update(ReqVer, View) of + true -> + View1 = group_to_view( + check_membership(Self, + dirty_read_group(GroupName))), + MemberState1 = remove_erased_members(MembersState, View1), + change_view(View1, State #state { + members_state = MemberState1 }); + false -> {ok, State} + end, + handle_callback_result( + if_callback_success( + Result, fun handle_msg_true/3, fun handle_msg_false/3, Msg, State1)) + catch + lost_membership -> + {stop, shutdown, State} + end; + +handle_cast({broadcast, _Msg, _SizeHint}, + State = #state { shutting_down = {true, _} }) -> + noreply(State); + +handle_cast({broadcast, _Msg, _SizeHint}, + State = #state { members_state = undefined }) -> + noreply(State); + +handle_cast({broadcast, Msg, _SizeHint}, + State = #state { self = Self, + right = {Self, undefined}, + module = Module, + callback_args = Args }) -> + handle_callback_result({Module:handle_msg(Args, get_pid(Self), Msg), + State}); + +handle_cast({broadcast, Msg, SizeHint}, State) -> + {Result, State1} = internal_broadcast(Msg, SizeHint, State), + handle_callback_result({Result, maybe_flush_broadcast_buffer(State1)}); + +handle_cast(join, State = #state { self = Self, + group_name = GroupName, + members_state = undefined, + module = Module, + callback_args = Args, + txn_executor = TxnFun }) -> + try + View = join_group(Self, GroupName, TxnFun), + MembersState = + case alive_view_members(View) of + [Self] -> blank_member_state(); + _ -> undefined + end, + State1 = check_neighbours(State #state { view = View, + members_state = MembersState }), + handle_callback_result( + {Module:joined(Args, get_pids(all_known_members(View))), State1}) + catch + lost_membership -> + {stop, shutdown, State} + end; + +handle_cast({validate_members, OldMembers}, + State = #state { view = View, + module = Module, + callback_args = Args }) -> + NewMembers = get_pids(all_known_members(View)), + Births = NewMembers -- OldMembers, + Deaths = OldMembers -- NewMembers, + case {Births, Deaths} of + {[], []} -> noreply(State); + _ -> Result = Module:members_changed(Args, Births, Deaths), + handle_callback_result({Result, State}) + end; + +handle_cast(leave, State) -> + {stop, normal, State}. + + +handle_info(force_gc, State) -> + garbage_collect(), + noreply(State #state { force_gc_timer = undefined }); + +handle_info(flush, State) -> + noreply( + flush_broadcast_buffer(State #state { broadcast_timer = undefined })); + +handle_info(timeout, State) -> + noreply(flush_broadcast_buffer(State)); + +handle_info({'DOWN', _MRef, process, _Pid, _Reason}, + State = #state { shutting_down = + {true, {shutdown, ring_shutdown}} }) -> + noreply(State); +handle_info({'DOWN', MRef, process, _Pid, Reason}, + State = #state { self = Self, + left = Left, + right = Right, + group_name = GroupName, + confirms = Confirms, + txn_executor = TxnFun }) -> + try + check_membership(GroupName), + Member = case {Left, Right} of + {{Member1, MRef}, _} -> Member1; + {_, {Member1, MRef}} -> Member1; + _ -> undefined + end, + case {Member, Reason} of + {undefined, _} -> + noreply(State); + {_, {shutdown, ring_shutdown}} -> + noreply(State); + _ -> + %% In the event of a partial partition we could see another member + %% go down and then remove them from Mnesia. While they can + %% recover from this they'd have to restart the queue - not + %% ideal. So let's sleep here briefly just in case this was caused + %% by a partial partition; in which case by the time we record the + %% member death in Mnesia we will probably be in a full + %% partition and will not be assassinating another member. + timer:sleep(100), + View1 = group_to_view(record_dead_member_in_group(Self, + Member, GroupName, TxnFun, true)), + handle_callback_result( + case alive_view_members(View1) of + [Self] -> maybe_erase_aliases( + State #state { + members_state = blank_member_state(), + confirms = purge_confirms(Confirms) }, + View1); + _ -> change_view(View1, State) + end) + end + catch + lost_membership -> + {stop, shutdown, State} + end; +handle_info(_, State) -> + %% Discard any unexpected messages, such as late replies from neighbour_call/2 + %% TODO: For #gm_group{} related info messages, it could be worthwhile to + %% change_view/2, as this might reflect an alteration in the gm group, meaning + %% we now need to update our state. see rabbitmq-server#914. + noreply(State). + +terminate(Reason, #state { module = Module, callback_args = Args }) -> + Module:handle_terminate(Args, Reason). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +prioritise_info(flush, _Len, _State) -> + 1; +%% DOWN messages should not overtake initial catchups; if they do we +%% will receive a DOWN we do not know what to do with. +prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len, + #state { members_state = undefined }) -> + 0; +%% We should not prioritise DOWN messages from our left since +%% otherwise the DOWN can overtake any last activity from the left, +%% causing that activity to be lost. +prioritise_info({'DOWN', _MRef, process, LeftPid, _Reason}, _Len, + #state { left = {{_LeftVer, LeftPid}, _MRef2} }) -> + 0; +%% But prioritise all other DOWNs - we want to make sure we are not +%% sending activity into the void for too long because our right is +%% down but we don't know it. +prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len, _State) -> + 1; +prioritise_info(_, _Len, _State) -> + 0. + + +handle_msg(check_neighbours, State) -> + %% no-op - it's already been done by the calling handle_cast + {ok, State}; + +handle_msg({catchup, Left, MembersStateLeft}, + State = #state { self = Self, + left = {Left, _MRefL}, + right = {Right, _MRefR}, + view = View, + members_state = undefined }) -> + ok = send_right(Right, View, {catchup, Self, MembersStateLeft}), + MembersStateLeft1 = build_members_state(MembersStateLeft), + {ok, State #state { members_state = MembersStateLeft1 }}; + +handle_msg({catchup, Left, MembersStateLeft}, + State = #state { self = Self, + left = {Left, _MRefL}, + view = View, + members_state = MembersState }) + when MembersState =/= undefined -> + MembersStateLeft1 = build_members_state(MembersStateLeft), + AllMembers = lists:usort(maps:keys(MembersState) ++ + maps:keys(MembersStateLeft1)), + {MembersState1, Activity} = + lists:foldl( + fun (Id, MembersStateActivity) -> + #member { pending_ack = PALeft, last_ack = LA } = + find_member_or_blank(Id, MembersStateLeft1), + with_member_acc( + fun (#member { pending_ack = PA } = Member, Activity1) -> + case is_member_alias(Id, Self, View) of + true -> + {_AcksInFlight, Pubs, _PA1} = + find_prefix_common_suffix(PALeft, PA), + {Member #member { last_ack = LA }, + activity_cons(Id, pubs_from_queue(Pubs), + [], Activity1)}; + false -> + {Acks, _Common, Pubs} = + find_prefix_common_suffix(PA, PALeft), + {Member, + activity_cons(Id, pubs_from_queue(Pubs), + acks_from_queue(Acks), + Activity1)} + end + end, Id, MembersStateActivity) + end, {MembersState, activity_nil()}, AllMembers), + handle_msg({activity, Left, activity_finalise(Activity)}, + State #state { members_state = MembersState1 }); + +handle_msg({catchup, _NotLeft, _MembersState}, State) -> + {ok, State}; + +handle_msg({activity, Left, Activity}, + State = #state { self = Self, + group_name = GroupName, + left = {Left, _MRefL}, + view = View, + members_state = MembersState, + confirms = Confirms }) + when MembersState =/= undefined -> + try + %% If we have to stop, do it asap so we avoid any ack confirmation + %% Membership must be checked again by erase_members_in_group, as the + %% node can be marked as dead on the meanwhile + check_membership(GroupName), + {MembersState1, {Confirms1, Activity1}} = + calculate_activity(MembersState, Confirms, Activity, Self, View), + State1 = State #state { members_state = MembersState1, + confirms = Confirms1 }, + Activity3 = activity_finalise(Activity1), + ok = maybe_send_activity(Activity3, State1), + {Result, State2} = maybe_erase_aliases(State1, View), + if_callback_success( + Result, fun activity_true/3, fun activity_false/3, Activity3, State2) + catch + lost_membership -> + {{stop, shutdown}, State} + end; + +handle_msg({activity, _NotLeft, _Activity}, State) -> + {ok, State}. + + +noreply(State) -> + {noreply, ensure_timers(State), flush_timeout(State)}. + +reply(Reply, State) -> + {reply, Reply, ensure_timers(State), flush_timeout(State)}. + +ensure_timers(State) -> + ensure_force_gc_timer(ensure_broadcast_timer(State)). + +flush_timeout(#state{broadcast_buffer = []}) -> infinity; +flush_timeout(_) -> 0. + +ensure_force_gc_timer(State = #state { force_gc_timer = TRef }) + when is_reference(TRef) -> + State; +ensure_force_gc_timer(State = #state { force_gc_timer = undefined }) -> + TRef = erlang:send_after(?FORCE_GC_TIMER, self(), force_gc), + State #state { force_gc_timer = TRef }. + +ensure_broadcast_timer(State = #state { broadcast_buffer = [], + broadcast_timer = undefined }) -> + State; +ensure_broadcast_timer(State = #state { broadcast_buffer = [], + broadcast_timer = TRef }) -> + _ = erlang:cancel_timer(TRef), + State #state { broadcast_timer = undefined }; +ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) -> + TRef = erlang:send_after(?BROADCAST_TIMER, self(), flush), + State #state { broadcast_timer = TRef }; +ensure_broadcast_timer(State) -> + State. + +internal_broadcast(Msg, SizeHint, + State = #state { self = Self, + pub_count = PubCount, + module = Module, + callback_args = Args, + broadcast_buffer = Buffer, + broadcast_buffer_sz = BufferSize }) -> + PubCount1 = PubCount + 1, + {Module:handle_msg(Args, get_pid(Self), Msg), + State #state { pub_count = PubCount1, + broadcast_buffer = [{PubCount1, Msg} | Buffer], + broadcast_buffer_sz = BufferSize + SizeHint}}. + +%% The Erlang distribution mechanism has an interesting quirk - it +%% will kill the VM cold with "Absurdly large distribution output data +%% buffer" if you attempt to send a message which serialises out to +%% more than 2^31 bytes in size. It's therefore a very good idea to +%% make sure that we don't exceed that size! +%% +%% Now, we could figure out the size of messages as they come in using +%% size(term_to_binary(Msg)) or similar. The trouble is, that requires +%% us to serialise the message only to throw the serialised form +%% away. Hard to believe that's a sensible thing to do. So instead we +%% accept a size hint from the application, via broadcast/3. This size +%% hint can be the size of anything in the message which we expect +%% could be large, and we just ignore the size of any small bits of +%% the message term. Therefore MAX_BUFFER_SIZE is set somewhat +%% conservatively at 100MB - but the buffer is only to allow us to +%% buffer tiny messages anyway, so 100MB is plenty. + +maybe_flush_broadcast_buffer(State = #state{broadcast_buffer_sz = Size}) -> + case Size > ?MAX_BUFFER_SIZE of + true -> flush_broadcast_buffer(State); + false -> State + end. + +flush_broadcast_buffer(State = #state { broadcast_buffer = [] }) -> + State; +flush_broadcast_buffer(State = #state { self = Self, + members_state = MembersState, + broadcast_buffer = Buffer, + pub_count = PubCount }) -> + [{PubCount, _Msg}|_] = Buffer, %% ASSERTION match on PubCount + Pubs = lists:reverse(Buffer), + Activity = activity_cons(Self, Pubs, [], activity_nil()), + ok = maybe_send_activity(activity_finalise(Activity), State), + MembersState1 = with_member( + fun (Member = #member { pending_ack = PA }) -> + PA1 = queue:join(PA, queue:from_list(Pubs)), + Member #member { pending_ack = PA1, + last_pub = PubCount } + end, Self, MembersState), + State #state { members_state = MembersState1, + broadcast_buffer = [], + broadcast_buffer_sz = 0 }. + +%% --------------------------------------------------------------------------- +%% View construction and inspection +%% --------------------------------------------------------------------------- + +needs_view_update(ReqVer, {Ver, _View}) -> Ver < ReqVer. + +view_version({Ver, _View}) -> Ver. + +is_member_alive({dead, _Member}) -> false; +is_member_alive(_) -> true. + +is_member_alias(Self, Self, _View) -> + true; +is_member_alias(Member, Self, View) -> + ?SETS:is_element(Member, + ((fetch_view_member(Self, View)) #view_member.aliases)). + +dead_member_id({dead, Member}) -> Member. + +store_view_member(VMember = #view_member { id = Id }, {Ver, View}) -> + {Ver, maps:put(Id, VMember, View)}. + +with_view_member(Fun, View, Id) -> + store_view_member(Fun(fetch_view_member(Id, View)), View). + +fetch_view_member(Id, {_Ver, View}) -> maps:get(Id, View). + +find_view_member(Id, {_Ver, View}) -> maps:find(Id, View). + +blank_view(Ver) -> {Ver, maps:new()}. + +alive_view_members({_Ver, View}) -> maps:keys(View). + +all_known_members({_Ver, View}) -> + maps:fold( + fun (Member, #view_member { aliases = Aliases }, Acc) -> + ?SETS:to_list(Aliases) ++ [Member | Acc] + end, [], View). + +group_to_view(#gm_group { members = Members, version = Ver }) -> + Alive = lists:filter(fun is_member_alive/1, Members), + [_|_] = Alive, %% ASSERTION - can't have all dead members + add_aliases(link_view(Alive ++ Alive ++ Alive, blank_view(Ver)), Members). + +link_view([Left, Middle, Right | Rest], View) -> + case find_view_member(Middle, View) of + error -> + link_view( + [Middle, Right | Rest], + store_view_member(#view_member { id = Middle, + aliases = ?SETS:new(), + left = Left, + right = Right }, View)); + {ok, _} -> + View + end; +link_view(_, View) -> + View. + +add_aliases(View, Members) -> + Members1 = ensure_alive_suffix(Members), + {EmptyDeadSet, View1} = + lists:foldl( + fun (Member, {DeadAcc, ViewAcc}) -> + case is_member_alive(Member) of + true -> + {?SETS:new(), + with_view_member( + fun (VMember = + #view_member { aliases = Aliases }) -> + VMember #view_member { + aliases = ?SETS:union(Aliases, DeadAcc) } + end, ViewAcc, Member)}; + false -> + {?SETS:add_element(dead_member_id(Member), DeadAcc), + ViewAcc} + end + end, {?SETS:new(), View}, Members1), + 0 = ?SETS:size(EmptyDeadSet), %% ASSERTION + View1. + +ensure_alive_suffix(Members) -> + queue:to_list(ensure_alive_suffix1(queue:from_list(Members))). + +ensure_alive_suffix1(MembersQ) -> + {{value, Member}, MembersQ1} = queue:out_r(MembersQ), + case is_member_alive(Member) of + true -> MembersQ; + false -> ensure_alive_suffix1(queue:in_r(Member, MembersQ1)) + end. + + +%% --------------------------------------------------------------------------- +%% View modification +%% --------------------------------------------------------------------------- + +join_group(Self, GroupName, TxnFun) -> + join_group(Self, GroupName, dirty_read_group(GroupName), TxnFun). + +join_group(Self, GroupName, {error, not_found}, TxnFun) -> + join_group(Self, GroupName, + prune_or_create_group(Self, GroupName, TxnFun), TxnFun); +join_group(Self, _GroupName, #gm_group { members = [Self] } = Group, _TxnFun) -> + group_to_view(Group); +join_group(Self, GroupName, #gm_group { members = Members } = Group, TxnFun) -> + case lists:member(Self, Members) of + true -> + group_to_view(Group); + false -> + case lists:filter(fun is_member_alive/1, Members) of + [] -> + join_group(Self, GroupName, + prune_or_create_group(Self, GroupName, TxnFun), + TxnFun); + Alive -> + Left = lists:nth(rand:uniform(length(Alive)), Alive), + Handler = + fun () -> + join_group( + Self, GroupName, + record_dead_member_in_group(Self, + Left, GroupName, TxnFun, false), + TxnFun) + end, + try + case neighbour_call(Left, {add_on_right, Self}) of + {ok, Group1} -> group_to_view(Group1); + not_ready -> join_group(Self, GroupName, TxnFun) + end + catch + exit:{R, _} + when R =:= noproc; R =:= normal; R =:= shutdown -> + Handler(); + exit:{{R, _}, _} + when R =:= nodedown; R =:= shutdown -> + Handler() + end + end + end. + +dirty_read_group(GroupName) -> + case mnesia:dirty_read(?GROUP_TABLE, GroupName) of + [] -> {error, not_found}; + [Group] -> Group + end. + +read_group(GroupName) -> + case mnesia:read({?GROUP_TABLE, GroupName}) of + [] -> {error, not_found}; + [Group] -> Group + end. + +write_group(Group) -> mnesia:write(?GROUP_TABLE, Group, write), Group. + +prune_or_create_group(Self, GroupName, TxnFun) -> + TxnFun( + fun () -> + GroupNew = #gm_group { name = GroupName, + members = [Self], + version = get_version(Self) }, + case read_group(GroupName) of + {error, not_found} -> + write_group(GroupNew); + Group = #gm_group { members = Members } -> + case lists:any(fun is_member_alive/1, Members) of + true -> Group; + false -> write_group(GroupNew) + end + end + end). + +record_dead_member_in_group(Self, Member, GroupName, TxnFun, Verify) -> + Fun = + fun () -> + try + Group = #gm_group { members = Members, version = Ver } = + case Verify of + true -> + check_membership(Self, read_group(GroupName)); + false -> + check_group(read_group(GroupName)) + end, + case lists:splitwith( + fun (Member1) -> Member1 =/= Member end, Members) of + {_Members1, []} -> %% not found - already recorded dead + Group; + {Members1, [Member | Members2]} -> + Members3 = Members1 ++ [{dead, Member} | Members2], + write_group(Group #gm_group { members = Members3, + version = Ver + 1 }) + end + catch + lost_membership -> + %% The transaction must not be abruptly crashed, but + %% leave the gen_server to stop normally + {error, lost_membership} + end + end, + handle_lost_membership_in_txn(TxnFun, Fun). + +handle_lost_membership_in_txn(TxnFun, Fun) -> + case TxnFun(Fun) of + {error, lost_membership = T} -> + throw(T); + Any -> + Any + end. + +record_new_member_in_group(NewMember, Left, GroupName, TxnFun) -> + Fun = + fun () -> + try + Group = #gm_group { members = Members, version = Ver } = + check_membership(Left, read_group(GroupName)), + case lists:member(NewMember, Members) of + true -> + %% This avois duplicates during partial partitions, + %% as inconsistent views might happen during them + rabbit_log:warning("(~p) GM avoiding duplicate of ~p", + [self(), NewMember]), + Group; + false -> + {Prefix, [Left | Suffix]} = + lists:splitwith(fun (M) -> M =/= Left end, Members), + write_group(Group #gm_group { + members = Prefix ++ [Left, NewMember | Suffix], + version = Ver + 1 }) + end + catch + lost_membership -> + %% The transaction must not be abruptly crashed, but + %% leave the gen_server to stop normally + {error, lost_membership} + end + end, + handle_lost_membership_in_txn(TxnFun, Fun). + +erase_members_in_group(Self, Members, GroupName, TxnFun) -> + DeadMembers = [{dead, Id} || Id <- Members], + Fun = + fun () -> + try + Group = #gm_group { members = [_|_] = Members1, version = Ver } = + check_membership(Self, read_group(GroupName)), + case Members1 -- DeadMembers of + Members1 -> Group; + Members2 -> write_group( + Group #gm_group { members = Members2, + version = Ver + 1 }) + end + catch + lost_membership -> + %% The transaction must not be abruptly crashed, but + %% leave the gen_server to stop normally + {error, lost_membership} + end + end, + handle_lost_membership_in_txn(TxnFun, Fun). + +maybe_erase_aliases(State = #state { self = Self, + group_name = GroupName, + members_state = MembersState, + txn_executor = TxnFun }, View) -> + #view_member { aliases = Aliases } = fetch_view_member(Self, View), + {Erasable, MembersState1} + = ?SETS:fold( + fun (Id, {ErasableAcc, MembersStateAcc} = Acc) -> + #member { last_pub = LP, last_ack = LA } = + find_member_or_blank(Id, MembersState), + case can_erase_view_member(Self, Id, LA, LP) of + true -> {[Id | ErasableAcc], + erase_member(Id, MembersStateAcc)}; + false -> Acc + end + end, {[], MembersState}, Aliases), + View1 = case Erasable of + [] -> View; + _ -> group_to_view( + erase_members_in_group(Self, Erasable, GroupName, TxnFun)) + end, + change_view(View1, State #state { members_state = MembersState1 }). + +can_erase_view_member(Self, Self, _LA, _LP) -> false; +can_erase_view_member(_Self, _Id, N, N) -> true; +can_erase_view_member(_Self, _Id, _LA, _LP) -> false. + +neighbour_cast(N, Msg) -> ?INSTR_MOD:cast(get_pid(N), Msg). +neighbour_call(N, Msg) -> ?INSTR_MOD:call(get_pid(N), Msg, infinity). + +%% --------------------------------------------------------------------------- +%% View monitoring and maintenance +%% --------------------------------------------------------------------------- + +ensure_neighbour(_Ver, Self, {Self, undefined}, Self) -> + {Self, undefined}; +ensure_neighbour(Ver, Self, {Self, undefined}, RealNeighbour) -> + ok = neighbour_cast(RealNeighbour, {?TAG, Ver, check_neighbours}), + {RealNeighbour, maybe_monitor(RealNeighbour, Self)}; +ensure_neighbour(_Ver, _Self, {RealNeighbour, MRef}, RealNeighbour) -> + {RealNeighbour, MRef}; +ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) -> + true = ?INSTR_MOD:demonitor(MRef), + Msg = {?TAG, Ver, check_neighbours}, + ok = neighbour_cast(RealNeighbour, Msg), + ok = case Neighbour of + Self -> ok; + _ -> neighbour_cast(Neighbour, Msg) + end, + {Neighbour, maybe_monitor(Neighbour, Self)}. + +maybe_monitor( Self, Self) -> undefined; +maybe_monitor(Other, _Self) -> ?INSTR_MOD:monitor(get_pid(Other)). + +check_neighbours(State = #state { self = Self, + left = Left, + right = Right, + view = View, + broadcast_buffer = Buffer }) -> + #view_member { left = VLeft, right = VRight } + = fetch_view_member(Self, View), + Ver = view_version(View), + Left1 = ensure_neighbour(Ver, Self, Left, VLeft), + Right1 = ensure_neighbour(Ver, Self, Right, VRight), + Buffer1 = case Right1 of + {Self, undefined} -> []; + _ -> Buffer + end, + State1 = State #state { left = Left1, right = Right1, + broadcast_buffer = Buffer1 }, + ok = maybe_send_catchup(Right, State1), + State1. + +maybe_send_catchup(Right, #state { right = Right }) -> + ok; +maybe_send_catchup(_Right, #state { self = Self, + right = {Self, undefined} }) -> + ok; +maybe_send_catchup(_Right, #state { members_state = undefined }) -> + ok; +maybe_send_catchup(_Right, #state { self = Self, + right = {Right, _MRef}, + view = View, + members_state = MembersState }) -> + send_right(Right, View, + {catchup, Self, prepare_members_state(MembersState)}). + + +%% --------------------------------------------------------------------------- +%% Catch_up delta detection +%% --------------------------------------------------------------------------- + +find_prefix_common_suffix(A, B) -> + {Prefix, A1} = find_prefix(A, B, queue:new()), + {Common, Suffix} = find_common(A1, B, queue:new()), + {Prefix, Common, Suffix}. + +%% Returns the elements of A that occur before the first element of B, +%% plus the remainder of A. +find_prefix(A, B, Prefix) -> + case {queue:out(A), queue:out(B)} of + {{{value, Val}, _A1}, {{value, Val}, _B1}} -> + {Prefix, A}; + {{empty, A1}, {{value, _A}, _B1}} -> + {Prefix, A1}; + {{{value, {NumA, _MsgA} = Val}, A1}, + {{value, {NumB, _MsgB}}, _B1}} when NumA < NumB -> + find_prefix(A1, B, queue:in(Val, Prefix)); + {_, {empty, _B1}} -> + {A, Prefix} %% Prefix well be empty here + end. + +%% A should be a prefix of B. Returns the commonality plus the +%% remainder of B. +find_common(A, B, Common) -> + case {queue:out(A), queue:out(B)} of + {{{value, Val}, A1}, {{value, Val}, B1}} -> + find_common(A1, B1, queue:in(Val, Common)); + {{empty, _A}, _} -> + {Common, B}; + %% Drop value from B. + %% Match value to avoid infinite loop, since {empty, B} = queue:out(B). + {_, {{value, _}, B1}} -> + find_common(A, B1, Common); + %% Drop value from A. Empty A should be matched by second close. + {{{value, _}, A1}, _} -> + find_common(A1, B, Common) + end. + + +%% --------------------------------------------------------------------------- +%% Members helpers +%% --------------------------------------------------------------------------- + +with_member(Fun, Id, MembersState) -> + store_member( + Id, Fun(find_member_or_blank(Id, MembersState)), MembersState). + +with_member_acc(Fun, Id, {MembersState, Acc}) -> + {MemberState, Acc1} = Fun(find_member_or_blank(Id, MembersState), Acc), + {store_member(Id, MemberState, MembersState), Acc1}. + +find_member_or_blank(Id, MembersState) -> + case maps:find(Id, MembersState) of + {ok, Result} -> Result; + error -> blank_member() + end. + +erase_member(Id, MembersState) -> maps:remove(Id, MembersState). + +blank_member() -> + #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }. + +blank_member_state() -> maps:new(). + +store_member(Id, MemberState, MembersState) -> + maps:put(Id, MemberState, MembersState). + +prepare_members_state(MembersState) -> maps:to_list(MembersState). + +build_members_state(MembersStateList) -> maps:from_list(MembersStateList). + +make_member(GroupName) -> + {case dirty_read_group(GroupName) of + #gm_group { version = Version } -> Version; + {error, not_found} -> ?VERSION_START + end, self()}. + +remove_erased_members(MembersState, View) -> + lists:foldl(fun (Id, MembersState1) -> + store_member(Id, find_member_or_blank(Id, MembersState), + MembersState1) + end, blank_member_state(), all_known_members(View)). + +get_version({Version, _Pid}) -> Version. + +get_pid({_Version, Pid}) -> Pid. + +get_pids(Ids) -> [Pid || {_Version, Pid} <- Ids]. + +%% --------------------------------------------------------------------------- +%% Activity assembly +%% --------------------------------------------------------------------------- + +activity_nil() -> queue:new(). + +activity_cons( _Id, [], [], Tail) -> Tail; +activity_cons(Sender, Pubs, Acks, Tail) -> queue:in({Sender, Pubs, Acks}, Tail). + +activity_finalise(Activity) -> queue:to_list(Activity). + +maybe_send_activity([], _State) -> + ok; +maybe_send_activity(Activity, #state { self = Self, + right = {Right, _MRefR}, + view = View }) -> + send_right(Right, View, {activity, Self, Activity}). + +send_right(Right, View, Msg) -> + ok = neighbour_cast(Right, {?TAG, view_version(View), Msg}). + +calculate_activity(MembersState, Confirms, Activity, Self, View) -> + lists:foldl( + fun ({Id, Pubs, Acks}, MembersStateConfirmsActivity) -> + with_member_acc( + fun (Member = #member { pending_ack = PA, + last_pub = LP, + last_ack = LA }, + {Confirms2, Activity2}) -> + case is_member_alias(Id, Self, View) of + true -> + {ToAck, PA1} = + find_common(queue_from_pubs(Pubs), PA, + queue:new()), + LA1 = last_ack(Acks, LA), + AckNums = acks_from_queue(ToAck), + Confirms3 = maybe_confirm( + Self, Id, Confirms2, AckNums), + {Member #member { pending_ack = PA1, + last_ack = LA1 }, + {Confirms3, + activity_cons( + Id, [], AckNums, Activity2)}}; + false -> + PA1 = apply_acks(Acks, join_pubs(PA, Pubs)), + LA1 = last_ack(Acks, LA), + LP1 = last_pub(Pubs, LP), + {Member #member { pending_ack = PA1, + last_pub = LP1, + last_ack = LA1 }, + {Confirms2, + activity_cons(Id, Pubs, Acks, Activity2)}} + end + end, Id, MembersStateConfirmsActivity) + end, {MembersState, {Confirms, activity_nil()}}, Activity). + +callback(Args, Module, Activity) -> + Result = + lists:foldl( + fun ({Id, Pubs, _Acks}, {Args1, Module1, ok}) -> + lists:foldl(fun ({_PubNum, Pub}, Acc = {Args2, Module2, ok}) -> + case Module2:handle_msg( + Args2, get_pid(Id), Pub) of + ok -> + Acc; + {become, Module3, Args3} -> + {Args3, Module3, ok}; + {stop, _Reason} = Error -> + Error + end; + (_, Error = {stop, _Reason}) -> + Error + end, {Args1, Module1, ok}, Pubs); + (_, Error = {stop, _Reason}) -> + Error + end, {Args, Module, ok}, Activity), + case Result of + {Args, Module, ok} -> ok; + {Args1, Module1, ok} -> {become, Module1, Args1}; + {stop, _Reason} = Error -> Error + end. + +change_view(View, State = #state { view = View0, + module = Module, + callback_args = Args }) -> + OldMembers = all_known_members(View0), + NewMembers = all_known_members(View), + Births = NewMembers -- OldMembers, + Deaths = OldMembers -- NewMembers, + Result = case {Births, Deaths} of + {[], []} -> ok; + _ -> Module:members_changed( + Args, get_pids(Births), get_pids(Deaths)) + end, + {Result, check_neighbours(State #state { view = View })}. + +handle_callback_result({Result, State}) -> + if_callback_success( + Result, fun no_reply_true/3, fun no_reply_false/3, undefined, State); +handle_callback_result({Result, Reply, State}) -> + if_callback_success( + Result, fun reply_true/3, fun reply_false/3, Reply, State). + +no_reply_true (_Result, _Undefined, State) -> noreply(State). +no_reply_false({stop, Reason}, _Undefined, State) -> {stop, Reason, State}. + +reply_true (_Result, Reply, State) -> reply(Reply, State). +reply_false({stop, Reason}, Reply, State) -> {stop, Reason, Reply, State}. + +handle_msg_true (_Result, Msg, State) -> handle_msg(Msg, State). +handle_msg_false(Result, _Msg, State) -> {Result, State}. + +activity_true(_Result, Activity, State = #state { module = Module, + callback_args = Args }) -> + {callback(Args, Module, Activity), State}. +activity_false(Result, _Activity, State) -> + {Result, State}. + +if_callback_success(Result, True, False, Arg, State) -> + {NewResult, NewState} = maybe_stop(Result, State), + if_callback_success1(NewResult, True, False, Arg, NewState). + +if_callback_success1(ok, True, _False, Arg, State) -> + True(ok, Arg, State); +if_callback_success1( + {become, Module, Args} = Result, True, _False, Arg, State) -> + True(Result, Arg, State #state { module = Module, + callback_args = Args }); +if_callback_success1({stop, _Reason} = Result, _True, False, Arg, State) -> + False(Result, Arg, State). + +maybe_stop({stop, Reason}, #state{ shutting_down = false } = State) -> + ShuttingDown = {true, Reason}, + case has_pending_messages(State) of + true -> {ok, State #state{ shutting_down = ShuttingDown }}; + false -> {{stop, Reason}, State #state{ shutting_down = ShuttingDown }} + end; +maybe_stop(Result, #state{ shutting_down = false } = State) -> + {Result, State}; +maybe_stop(Result, #state{ shutting_down = {true, Reason} } = State) -> + case has_pending_messages(State) of + true -> {Result, State}; + false -> {{stop, Reason}, State} + end. + +has_pending_messages(#state{ broadcast_buffer = Buffer }) + when Buffer =/= [] -> + true; +has_pending_messages(#state{ members_state = MembersState }) -> + MembersWithPubAckMismatches = maps:filter(fun(_Id, #member{last_pub = LP, last_ack = LA}) -> + LP =/= LA + end, MembersState), + 0 =/= maps:size(MembersWithPubAckMismatches). + +maybe_confirm(_Self, _Id, Confirms, []) -> + Confirms; +maybe_confirm(Self, Self, Confirms, [PubNum | PubNums]) -> + case queue:out(Confirms) of + {empty, _Confirms} -> + Confirms; + {{value, {PubNum, From}}, Confirms1} -> + gen_server2:reply(From, ok), + maybe_confirm(Self, Self, Confirms1, PubNums); + {{value, {PubNum1, _From}}, _Confirms} when PubNum1 > PubNum -> + maybe_confirm(Self, Self, Confirms, PubNums) + end; +maybe_confirm(_Self, _Id, Confirms, _PubNums) -> + Confirms. + +purge_confirms(Confirms) -> + _ = [gen_server2:reply(From, ok) || {_PubNum, From} <- queue:to_list(Confirms)], + queue:new(). + + +%% --------------------------------------------------------------------------- +%% Msg transformation +%% --------------------------------------------------------------------------- + +acks_from_queue(Q) -> [PubNum || {PubNum, _Msg} <- queue:to_list(Q)]. + +pubs_from_queue(Q) -> queue:to_list(Q). + +queue_from_pubs(Pubs) -> queue:from_list(Pubs). + +apply_acks( [], Pubs) -> Pubs; +apply_acks(List, Pubs) -> {_, Pubs1} = queue:split(length(List), Pubs), + Pubs1. + +join_pubs(Q, []) -> Q; +join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)). + +last_ack( [], LA) -> LA; +last_ack(List, LA) -> LA1 = lists:last(List), + true = LA1 > LA, %% ASSERTION + LA1. + +last_pub( [], LP) -> LP; +last_pub(List, LP) -> {PubNum, _Msg} = lists:last(List), + true = PubNum > LP, %% ASSERTION + PubNum. + +%% --------------------------------------------------------------------------- + +%% Uninstrumented versions + +call(Pid, Msg, Timeout) -> gen_server2:call(Pid, Msg, Timeout). +cast(Pid, Msg) -> gen_server2:cast(Pid, Msg). +monitor(Pid) -> erlang:monitor(process, Pid). +demonitor(MRef) -> erlang:demonitor(MRef). + +check_membership(Self, #gm_group{members = M} = Group) -> + case lists:member(Self, M) of + true -> + Group; + false -> + throw(lost_membership) + end; +check_membership(_Self, {error, not_found}) -> + throw(lost_membership). + +check_membership(GroupName) -> + case dirty_read_group(GroupName) of + #gm_group{members = M} -> + case lists:keymember(self(), 2, M) of + true -> + ok; + false -> + throw(lost_membership) + end; + {error, not_found} -> + throw(lost_membership) + end. + +check_group({error, not_found}) -> + throw(lost_membership); +check_group(Any) -> + Any. diff --git a/deps/rabbit/src/internal_user.erl b/deps/rabbit/src/internal_user.erl new file mode 100644 index 0000000000..b2bdcb6785 --- /dev/null +++ b/deps/rabbit/src/internal_user.erl @@ -0,0 +1,216 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(internal_user). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([ + new/0, + new/1, + record_version_to_use/0, + fields/0, + fields/1, + upgrade/1, + upgrade_to/2, + pattern_match_all/0, + get_username/1, + get_password_hash/1, + get_tags/1, + get_hashing_algorithm/1, + get_limits/1, + create_user/3, + set_password_hash/3, + set_tags/2, + update_limits/3, + clear_limits/1 +]). + +-define(record_version, internal_user_v2). + +-type(username() :: binary()). + +-type(password_hash() :: binary()). + +-type internal_user() :: internal_user_v1:internal_user_v1() | internal_user_v2(). + +-record(internal_user, { + username :: username() | '_', + password_hash :: password_hash() | '_', + tags :: [atom()] | '_', + %% password hashing implementation module, + %% typically rabbit_password_hashing_* but can + %% come from a plugin + hashing_algorithm :: atom() | '_', + limits = #{} :: map() | '_'}). + +-type(internal_user_v2() :: + #internal_user{username :: username() | '_', + password_hash :: password_hash() | '_', + tags :: [atom()] | '_', + hashing_algorithm :: atom() | '_', + limits :: map()}). + +-type internal_user_pattern() :: internal_user_v1:internal_user_v1_pattern() | + internal_user_v2_pattern(). + +-type internal_user_v2_pattern() :: #internal_user{ + username :: username() | '_', + password_hash :: '_', + tags :: '_', + hashing_algorithm :: '_', + limits :: '_' + }. + +-export_type([username/0, + password_hash/0, + internal_user/0, + internal_user_v2/0, + internal_user_pattern/0, + internal_user_v2_pattern/0]). + +-spec new() -> internal_user(). +new() -> + case record_version_to_use() of + ?record_version -> + #internal_user{ + username = <<"">>, + password_hash = <<"">>, + tags = [] + }; + _ -> + internal_user_v1:new() + end. + +-spec new(tuple()) -> internal_user(). +new({hashing_algorithm, HashingAlgorithm}) -> + case record_version_to_use() of + ?record_version -> + #internal_user{ + username = <<"">>, + password_hash = <<"">>, + tags = [], + hashing_algorithm = HashingAlgorithm + }; + _ -> + internal_user_v1:new({hashing_algorithm, HashingAlgorithm}) + end; +new({tags, Tags}) -> + case record_version_to_use() of + ?record_version -> + #internal_user{ + username = <<"">>, + password_hash = <<"">>, + tags = Tags + }; + _ -> + internal_user_v1:new({tags, Tags}) + end. + +-spec record_version_to_use() -> internal_user_v1 | internal_user_v2. +record_version_to_use() -> + case rabbit_feature_flags:is_enabled(user_limits) of + true -> ?record_version; + false -> internal_user_v1:record_version_to_use() + end. + +-spec fields() -> list(). +fields() -> + case record_version_to_use() of + ?record_version -> fields(?record_version); + _ -> internal_user_v1:fields() + end. + +-spec fields(atom()) -> list(). +fields(?record_version) -> record_info(fields, internal_user); +fields(Version) -> internal_user_v1:fields(Version). + +-spec upgrade(internal_user()) -> internal_user(). +upgrade(#internal_user{} = User) -> User; +upgrade(OldUser) -> upgrade_to(record_version_to_use(), OldUser). + +-spec upgrade_to +(internal_user_v2, internal_user()) -> internal_user_v2(); +(internal_user_v1, internal_user_v1:internal_user_v1()) -> internal_user_v1:internal_user_v1(). + +upgrade_to(?record_version, #internal_user{} = User) -> + User; +upgrade_to(?record_version, OldUser) -> + Fields = erlang:tuple_to_list(OldUser) ++ [#{}], + #internal_user{} = erlang:list_to_tuple(Fields); +upgrade_to(Version, OldUser) -> + internal_user_v1:upgrade_to(Version, OldUser). + +-spec pattern_match_all() -> internal_user_pattern(). +pattern_match_all() -> + case record_version_to_use() of + ?record_version -> #internal_user{_ = '_'}; + _ -> internal_user_v1:pattern_match_all() + end. + +-spec get_username(internal_user()) -> username(). +get_username(#internal_user{username = Value}) -> Value; +get_username(User) -> internal_user_v1:get_username(User). + +-spec get_password_hash(internal_user()) -> password_hash(). +get_password_hash(#internal_user{password_hash = Value}) -> Value; +get_password_hash(User) -> internal_user_v1:get_password_hash(User). + +-spec get_tags(internal_user()) -> [atom()]. +get_tags(#internal_user{tags = Value}) -> Value; +get_tags(User) -> internal_user_v1:get_tags(User). + +-spec get_hashing_algorithm(internal_user()) -> atom(). +get_hashing_algorithm(#internal_user{hashing_algorithm = Value}) -> Value; +get_hashing_algorithm(User) -> internal_user_v1:get_hashing_algorithm(User). + +-spec get_limits(internal_user()) -> map(). +get_limits(#internal_user{limits = Value}) -> Value; +get_limits(User) -> internal_user_v1:get_limits(User). + +-spec create_user(username(), password_hash(), atom()) -> internal_user(). +create_user(Username, PasswordHash, HashingMod) -> + case record_version_to_use() of + ?record_version -> + #internal_user{username = Username, + password_hash = PasswordHash, + tags = [], + hashing_algorithm = HashingMod, + limits = #{} + }; + _ -> + internal_user_v1:create_user(Username, PasswordHash, HashingMod) + end. + +-spec set_password_hash(internal_user(), password_hash(), atom()) -> internal_user(). +set_password_hash(#internal_user{} = User, PasswordHash, HashingAlgorithm) -> + User#internal_user{password_hash = PasswordHash, + hashing_algorithm = HashingAlgorithm}; +set_password_hash(User, PasswordHash, HashingAlgorithm) -> + internal_user_v1:set_password_hash(User, PasswordHash, HashingAlgorithm). + +-spec set_tags(internal_user(), [atom()]) -> internal_user(). +set_tags(#internal_user{} = User, Tags) -> + User#internal_user{tags = Tags}; +set_tags(User, Tags) -> + internal_user_v1:set_tags(User, Tags). + +-spec update_limits +(add, internal_user(), map()) -> internal_user(); +(remove, internal_user(), term()) -> internal_user(). +update_limits(add, #internal_user{limits = Limits} = User, Term) -> + User#internal_user{limits = maps:merge(Limits, Term)}; +update_limits(remove, #internal_user{limits = Limits} = User, LimitType) -> + User#internal_user{limits = maps:remove(LimitType, Limits)}; +update_limits(Action, User, Term) -> + internal_user_v1:update_limits(Action, User, Term). + +-spec clear_limits(internal_user()) -> internal_user(). +clear_limits(#internal_user{} = User) -> + User#internal_user{limits = #{}}; +clear_limits(User) -> + internal_user_v1:clear_limits(User). diff --git a/deps/rabbit/src/internal_user_v1.erl b/deps/rabbit/src/internal_user_v1.erl new file mode 100644 index 0000000000..edb956436f --- /dev/null +++ b/deps/rabbit/src/internal_user_v1.erl @@ -0,0 +1,151 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(internal_user_v1). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([ + new/0, + new/1, + record_version_to_use/0, + fields/0, + fields/1, + upgrade/1, + upgrade_to/2, + pattern_match_all/0, + get_username/1, + get_password_hash/1, + get_tags/1, + get_hashing_algorithm/1, + get_limits/1, + create_user/3, + set_password_hash/3, + set_tags/2, + update_limits/3, + clear_limits/1 +]). + +-define(record_version, ?MODULE). + +-record(internal_user, { + username :: internal_user:username() | '_', + password_hash :: internal_user:password_hash() | '_', + tags :: [atom()] | '_', + %% password hashing implementation module, + %% typically rabbit_password_hashing_* but can + %% come from a plugin + hashing_algorithm :: atom() | '_'}). + +-type internal_user() :: internal_user_v1(). + +-type(internal_user_v1() :: + #internal_user{username :: internal_user:username(), + password_hash :: internal_user:password_hash(), + tags :: [atom()], + hashing_algorithm :: atom()}). + +-type internal_user_pattern() :: internal_user_v1_pattern(). + +-type internal_user_v1_pattern() :: #internal_user{ + username :: internal_user:username() | '_', + password_hash :: '_', + tags :: '_', + hashing_algorithm :: '_' + }. + +-export_type([internal_user/0, + internal_user_v1/0, + internal_user_pattern/0, + internal_user_v1_pattern/0]). + +-spec record_version_to_use() -> internal_user_v1. +record_version_to_use() -> + ?record_version. + +-spec new() -> internal_user(). +new() -> + #internal_user{ + username = <<"">>, + password_hash = <<"">>, + tags = [] + }. + +-spec new(tuple()) -> internal_user(). +new({hashing_algorithm, HashingAlgorithm}) -> + #internal_user{ + username = <<"">>, + password_hash = <<"">>, + hashing_algorithm = HashingAlgorithm, + tags = [] + }; +new({tags, Tags}) -> + #internal_user{ + username = <<"">>, + password_hash = <<"">>, + tags = Tags + }. + +-spec fields() -> list(). +fields() -> fields(?record_version). + +-spec fields(atom()) -> list(). +fields(?record_version) -> record_info(fields, internal_user). + +-spec upgrade(internal_user()) -> internal_user(). +upgrade(#internal_user{} = User) -> User. + +-spec upgrade_to(internal_user_v1, internal_user()) -> internal_user(). +upgrade_to(?record_version, #internal_user{} = User) -> + User. + +-spec pattern_match_all() -> internal_user_pattern(). +pattern_match_all() -> #internal_user{_ = '_'}. + +-spec get_username(internal_user()) -> internal_user:username(). +get_username(#internal_user{username = Value}) -> Value. + +-spec get_password_hash(internal_user()) -> internal_user:password_hash(). +get_password_hash(#internal_user{password_hash = Value}) -> Value. + +-spec get_tags(internal_user()) -> [atom()]. +get_tags(#internal_user{tags = Value}) -> Value. + +-spec get_hashing_algorithm(internal_user()) -> atom(). +get_hashing_algorithm(#internal_user{hashing_algorithm = Value}) -> Value. + +-spec get_limits(internal_user()) -> map(). +get_limits(_User) -> #{}. + +-spec create_user(internal_user:username(), internal_user:password_hash(), + atom()) -> internal_user(). +create_user(Username, PasswordHash, HashingMod) -> + #internal_user{username = Username, + password_hash = PasswordHash, + tags = [], + hashing_algorithm = HashingMod + }. + +-spec set_password_hash(internal_user:internal_user(), + internal_user:password_hash(), atom()) -> internal_user(). +set_password_hash(#internal_user{} = User, PasswordHash, HashingAlgorithm) -> + User#internal_user{password_hash = PasswordHash, + hashing_algorithm = HashingAlgorithm}. + +-spec set_tags(internal_user(), [atom()]) -> internal_user(). +set_tags(#internal_user{} = User, Tags) -> + User#internal_user{tags = Tags}. + +-spec update_limits +(add, internal_user(), map()) -> internal_user(); +(remove, internal_user(), term()) -> internal_user(). +update_limits(_, User, _) -> + User. + +-spec clear_limits(internal_user()) -> internal_user(). +clear_limits(User) -> + User. diff --git a/deps/rabbit/src/lager_exchange_backend.erl b/deps/rabbit/src/lager_exchange_backend.erl new file mode 100644 index 0000000000..cd96f2230e --- /dev/null +++ b/deps/rabbit/src/lager_exchange_backend.erl @@ -0,0 +1,233 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% @doc RabbitMQ backend for lager. +%% Configuration is a proplist with the following keys: +%% <ul> +%% <li>`level' - log level to use</li> +%% <li>`formatter' - the module to use when formatting log messages. Defaults to +%% `lager_default_formatter'</li> +%% <li>`formatter_config' - the format configuration string. Defaults to +%% `time [ severity ] message'</li> +%% </ul> + +-module(lager_exchange_backend). + +-behaviour(gen_event). + +-export([init/1, terminate/2, code_change/3, + handle_call/2, handle_event/2, handle_info/2]). + +-export([maybe_init_exchange/0]). + +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +-include_lib("lager/include/lager.hrl"). + +-record(state, {level :: {'mask', integer()}, + formatter :: atom(), + format_config :: any(), + init_exchange_ts = undefined :: integer() | undefined, + exchange = undefined :: #resource{} | undefined}). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile([{parse_transform, lager_transform}]). +-endif. + +-define(INIT_EXCHANGE_INTERVAL_SECS, 5). +-define(TERSE_FORMAT, [time, " [", severity, "] ", message]). +-define(DEFAULT_FORMAT_CONFIG, ?TERSE_FORMAT). +-define(FORMAT_CONFIG_OFF, []). + +-ifdef(TEST). +-define(DEPRECATED(_Msg), ok). +-else. +-define(DEPRECATED(Msg), + io:format(user, "WARNING: This is a deprecated lager_exchange_backend configuration. Please use \"~w\" instead.~n", [Msg])). +-endif. + +-define(LOG_EXCH_NAME, <<"amq.rabbitmq.log">>). + +init([Level]) when is_atom(Level) -> + ?DEPRECATED([{level, Level}]), + init([{level, Level}]); +init([Level, true]) when is_atom(Level) -> % for backwards compatibility + ?DEPRECATED([{level, Level}, {formatter_config, [{eol, "\\r\\n\\"}]}]), + init([{level, Level}, {formatter_config, ?FORMAT_CONFIG_OFF}]); +init([Level, false]) when is_atom(Level) -> % for backwards compatibility + ?DEPRECATED([{level, Level}]), + init([{level, Level}]); + +init(Options) when is_list(Options) -> + true = validate_options(Options), + Level = get_option(level, Options, undefined), + try lager_util:config_to_mask(Level) of + L -> + DefaultOptions = [{formatter, lager_default_formatter}, + {formatter_config, ?DEFAULT_FORMAT_CONFIG}], + [Formatter, Config] = [get_option(K, Options, Default) || {K, Default} <- DefaultOptions], + State0 = #state{level=L, + formatter=Formatter, + format_config=Config}, + % NB: this will probably always fail since the / vhost isn't available + State1 = maybe_init_exchange(State0), + {ok, State1} + catch + _:_ -> + {error, {fatal, bad_log_level}} + end; +init(Level) when is_atom(Level) -> + ?DEPRECATED([{level, Level}]), + init([{level, Level}]); +init(Other) -> + {error, {fatal, {bad_lager_exchange_backend_config, Other}}}. + +% rabbitmq/rabbitmq-server#1973 +% This is called immediatly after the / vhost is created +% or recovered +maybe_init_exchange() -> + case lists:member(?MODULE, gen_event:which_handlers(lager_event)) of + true -> + _ = init_exchange(true), + ok; + _ -> + ok + end. + +validate_options([]) -> true; +validate_options([{level, L}|T]) when is_atom(L) -> + case lists:member(L, ?LEVELS) of + false -> + throw({error, {fatal, {bad_level, L}}}); + true -> + validate_options(T) + end; +validate_options([{formatter, M}|T]) when is_atom(M) -> + validate_options(T); +validate_options([{formatter_config, C}|T]) when is_list(C) -> + validate_options(T); +validate_options([H|_]) -> + throw({error, {fatal, {bad_lager_exchange_backend_config, H}}}). + +get_option(K, Options, Default) -> + case lists:keyfind(K, 1, Options) of + {K, V} -> V; + false -> Default + end. + +handle_call(get_loglevel, #state{level=Level} = State) -> + {ok, Level, State}; +handle_call({set_loglevel, Level}, State) -> + try lager_util:config_to_mask(Level) of + Levels -> + {ok, ok, State#state{level=Levels}} + catch + _:_ -> + {ok, {error, bad_log_level}, State} + end; +handle_call(_Request, State) -> + {ok, ok, State}. + +handle_event({log, _Message} = Event, State0) -> + State1 = maybe_init_exchange(State0), + handle_log_event(Event, State1); +handle_event(_Event, State) -> + {ok, State}. + +handle_info(_Info, State) -> + {ok, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% @private +handle_log_event({log, _Message}, #state{exchange=undefined} = State) -> + % NB: tried to define the exchange but still undefined, + % so not logging this message. Note: we can't log this dropped + % message because it will start an infinite loop + {ok, State}; +handle_log_event({log, Message}, + #state{level=L, exchange=LogExch, + formatter=Formatter, format_config=FormatConfig} = State) -> + case lager_util:is_loggable(Message, L, ?MODULE) of + true -> + %% 0-9-1 says the timestamp is a "64 bit POSIX timestamp". That's + %% second resolution, not millisecond. + RoutingKey = rabbit_data_coercion:to_binary(lager_msg:severity(Message)), + Timestamp = os:system_time(seconds), + Node = rabbit_data_coercion:to_binary(node()), + Headers = [{<<"node">>, longstr, Node}], + AmqpMsg = #'P_basic'{content_type = <<"text/plain">>, + timestamp = Timestamp, + headers = Headers}, + Body = rabbit_data_coercion:to_binary(Formatter:format(Message, FormatConfig)), + case rabbit_basic:publish(LogExch, RoutingKey, AmqpMsg, Body) of + ok -> ok; + {error, not_found} -> ok + end, + {ok, State}; + false -> + {ok, State} + end. + +%% @private +maybe_init_exchange(#state{exchange=undefined, init_exchange_ts=undefined} = State) -> + Now = erlang:monotonic_time(second), + handle_init_exchange(init_exchange(true), Now, State); +maybe_init_exchange(#state{exchange=undefined, init_exchange_ts=Timestamp} = State) -> + Now = erlang:monotonic_time(second), + % NB: since we may try to declare the exchange on every log message, this ensures + % that we only try once every 5 seconds + HasEnoughTimeElapsed = Now - Timestamp > ?INIT_EXCHANGE_INTERVAL_SECS, + Result = init_exchange(HasEnoughTimeElapsed), + handle_init_exchange(Result, Now, State); +maybe_init_exchange(State) -> + State. + +%% @private +init_exchange(true) -> + {ok, DefaultVHost} = application:get_env(rabbit, default_vhost), + Exchange = rabbit_misc:r(DefaultVHost, exchange, ?LOG_EXCH_NAME), + try + %% durable + #exchange{} = rabbit_exchange:declare(Exchange, topic, true, false, true, [], ?INTERNAL_USER), + rabbit_log:info("Declared exchange '~s' in vhost '~s'", [?LOG_EXCH_NAME, DefaultVHost]), + {ok, Exchange} + catch + ErrType:Err -> + rabbit_log:error("Could not declare exchange '~s' in vhost '~s', reason: ~p:~p", + [?LOG_EXCH_NAME, DefaultVHost, ErrType, Err]), + {ok, undefined} + end; +init_exchange(_) -> + {ok, undefined}. + +%% @private +handle_init_exchange({ok, undefined}, Now, State) -> + State#state{init_exchange_ts=Now}; +handle_init_exchange({ok, Exchange}, Now, State) -> + State#state{exchange=Exchange, init_exchange_ts=Now}. + +-ifdef(TEST). +console_config_validation_test_() -> + Good = [{level, info}], + Bad1 = [{level, foo}], + Bad2 = [{larval, info}], + AllGood = [{level, info}, {formatter, my_formatter}, + {formatter_config, ["blort", "garbage"]}], + [ + ?_assertEqual(true, validate_options(Good)), + ?_assertThrow({error, {fatal, {bad_level, foo}}}, validate_options(Bad1)), + ?_assertThrow({error, {fatal, {bad_lager_exchange_backend_config, {larval, info}}}}, validate_options(Bad2)), + ?_assertEqual(true, validate_options(AllGood)) + ]. +-endif. diff --git a/deps/rabbit/src/lqueue.erl b/deps/rabbit/src/lqueue.erl new file mode 100644 index 0000000000..1e267210d9 --- /dev/null +++ b/deps/rabbit/src/lqueue.erl @@ -0,0 +1,102 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(lqueue). + +%% lqueue implements a subset of Erlang's queue module. lqueues +%% maintain their own length, so lqueue:len/1 +%% is an O(1) operation, in contrast with queue:len/1 which is O(n). + +-export([new/0, is_empty/1, len/1, in/2, in_r/2, out/1, out_r/1, join/2, + foldl/3, foldr/3, from_list/1, drop/1, to_list/1, peek/1, peek_r/1]). + +-define(QUEUE, queue). + +-export_type([ + ?MODULE/0, + ?MODULE/1 + ]). + +-opaque ?MODULE() :: ?MODULE(_). +-opaque ?MODULE(T) :: {non_neg_integer(), queue:queue(T)}. +-type value() :: any(). +-type result(T) :: 'empty' | {'value', T}. + +-spec new() -> ?MODULE(_). + +new() -> {0, ?QUEUE:new()}. + +-spec drop(?MODULE(T)) -> ?MODULE(T). + +drop({L, Q}) -> {L - 1, ?QUEUE:drop(Q)}. + +-spec is_empty(?MODULE(_)) -> boolean(). + +is_empty({0, _Q}) -> true; +is_empty(_) -> false. + +-spec in(T, ?MODULE(T)) -> ?MODULE(T). + +in(V, {L, Q}) -> {L+1, ?QUEUE:in(V, Q)}. + +-spec in_r(value(), ?MODULE(T)) -> ?MODULE(T). + +in_r(V, {L, Q}) -> {L+1, ?QUEUE:in_r(V, Q)}. + +-spec out(?MODULE(T)) -> {result(T), ?MODULE(T)}. + +out({0, _Q} = Q) -> {empty, Q}; +out({L, Q}) -> {Result, Q1} = ?QUEUE:out(Q), + {Result, {L-1, Q1}}. + +-spec out_r(?MODULE(T)) -> {result(T), ?MODULE(T)}. + +out_r({0, _Q} = Q) -> {empty, Q}; +out_r({L, Q}) -> {Result, Q1} = ?QUEUE:out_r(Q), + {Result, {L-1, Q1}}. + +-spec join(?MODULE(A), ?MODULE(B)) -> ?MODULE(A | B). + +join({L1, Q1}, {L2, Q2}) -> {L1 + L2, ?QUEUE:join(Q1, Q2)}. + +-spec to_list(?MODULE(T)) -> [T]. + +to_list({_L, Q}) -> ?QUEUE:to_list(Q). + +-spec from_list([T]) -> ?MODULE(T). + +from_list(L) -> {length(L), ?QUEUE:from_list(L)}. + +-spec foldl(fun ((T, B) -> B), B, ?MODULE(T)) -> B. + +foldl(Fun, Init, Q) -> + case out(Q) of + {empty, _Q} -> Init; + {{value, V}, Q1} -> foldl(Fun, Fun(V, Init), Q1) + end. + +-spec foldr(fun ((T, B) -> B), B, ?MODULE(T)) -> B. + +foldr(Fun, Init, Q) -> + case out_r(Q) of + {empty, _Q} -> Init; + {{value, V}, Q1} -> foldr(Fun, Fun(V, Init), Q1) + end. + +-spec len(?MODULE(_)) -> non_neg_integer(). + +len({L, _}) -> L. + +-spec peek(?MODULE(T)) -> result(T). + +peek({ 0, _Q}) -> empty; +peek({_L, Q}) -> ?QUEUE:peek(Q). + +-spec peek_r(?MODULE(T)) -> result(T). + +peek_r({ 0, _Q}) -> empty; +peek_r({_L, Q}) -> ?QUEUE:peek_r(Q). diff --git a/deps/rabbit/src/mirrored_supervisor_sups.erl b/deps/rabbit/src/mirrored_supervisor_sups.erl new file mode 100644 index 0000000000..b29d4d48e6 --- /dev/null +++ b/deps/rabbit/src/mirrored_supervisor_sups.erl @@ -0,0 +1,34 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(mirrored_supervisor_sups). + +-define(SUPERVISOR, supervisor2). +-define(GS_MODULE, mirrored_supervisor). + +-behaviour(?SUPERVISOR). + +-export([init/1]). + +%%---------------------------------------------------------------------------- + +init({overall, _Group, _TxFun, ignore}) -> ignore; +init({overall, Group, TxFun, {ok, {Restart, ChildSpecs}}}) -> + %% Important: Delegate MUST start before Mirroring so that when we + %% shut down from above it shuts down last, so Mirroring does not + %% see it die. + %% + %% See comment in handle_info('DOWN', ...) in mirrored_supervisor + {ok, {{one_for_all, 0, 1}, + [{delegate, {?SUPERVISOR, start_link, [?MODULE, {delegate, Restart}]}, + temporary, 16#ffffffff, supervisor, [?SUPERVISOR]}, + {mirroring, {?GS_MODULE, start_internal, [Group, TxFun, ChildSpecs]}, + permanent, 16#ffffffff, worker, [?MODULE]}]}}; + + +init({delegate, Restart}) -> + {ok, {Restart, []}}. diff --git a/deps/rabbit/src/pg_local.erl b/deps/rabbit/src/pg_local.erl new file mode 100644 index 0000000000..263e743d1f --- /dev/null +++ b/deps/rabbit/src/pg_local.erl @@ -0,0 +1,249 @@ +%% This file is a copy of pg2.erl from the R13B-3 Erlang/OTP +%% distribution, with the following modifications: +%% +%% 1) Process groups are node-local only. +%% +%% 2) Groups are created/deleted implicitly. +%% +%% 3) 'join' and 'leave' are asynchronous. +%% +%% 4) the type specs of the exported non-callback functions have been +%% extracted into a separate, guarded section, and rewritten in +%% old-style spec syntax, for better compatibility with older +%% versions of Erlang/OTP. The remaining type specs have been +%% removed. + +%% All modifications are (C) 2010-2020 VMware, Inc. or its affiliates. + +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at https://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(pg_local). + +-export([join/2, leave/2, get_members/1, in_group/2]). +%% intended for testing only; not part of official API +-export([sync/0, clear/0]). +-export([start/0, start_link/0, init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2]). + +%%---------------------------------------------------------------------------- + +-type name() :: term(). + +%%---------------------------------------------------------------------------- + +-define(TABLE, pg_local_table). + +%%% +%%% Exported functions +%%% + +-spec start_link() -> {'ok', pid()} | {'error', any()}. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +-spec start() -> {'ok', pid()} | {'error', any()}. + +start() -> + ensure_started(). + +-spec join(name(), pid()) -> 'ok'. + +join(Name, Pid) when is_pid(Pid) -> + _ = ensure_started(), + gen_server:cast(?MODULE, {join, Name, Pid}). + +-spec leave(name(), pid()) -> 'ok'. + +leave(Name, Pid) when is_pid(Pid) -> + _ = ensure_started(), + gen_server:cast(?MODULE, {leave, Name, Pid}). + +-spec get_members(name()) -> [pid()]. + +get_members(Name) -> + _ = ensure_started(), + group_members(Name). + +-spec in_group(name(), pid()) -> boolean(). + +in_group(Name, Pid) -> + _ = ensure_started(), + %% The join message is a cast and thus can race, but we want to + %% keep it that way to be fast in the common case. + case member_present(Name, Pid) of + true -> true; + false -> sync(), + member_present(Name, Pid) + end. + +-spec sync() -> 'ok'. + +sync() -> + _ = ensure_started(), + gen_server:call(?MODULE, sync, infinity). + +clear() -> + _ = ensure_started(), + gen_server:call(?MODULE, clear, infinity). + +%%% +%%% Callback functions from gen_server +%%% + +-record(state, {}). + +init([]) -> + ?TABLE = ets:new(?TABLE, [ordered_set, protected, named_table]), + {ok, #state{}}. + +handle_call(sync, _From, S) -> + {reply, ok, S}; + +handle_call(clear, _From, S) -> + ets:delete_all_objects(?TABLE), + {reply, ok, S}; + +handle_call(Request, From, S) -> + error_logger:warning_msg("The pg_local server received an unexpected message:\n" + "handle_call(~p, ~p, _)\n", + [Request, From]), + {noreply, S}. + +handle_cast({join, Name, Pid}, S) -> + _ = join_group(Name, Pid), + {noreply, S}; +handle_cast({leave, Name, Pid}, S) -> + leave_group(Name, Pid), + {noreply, S}; +handle_cast(_, S) -> + {noreply, S}. + +handle_info({'DOWN', MonitorRef, process, Pid, _Info}, S) -> + member_died(MonitorRef, Pid), + {noreply, S}; +handle_info(_, S) -> + {noreply, S}. + +terminate(_Reason, _S) -> + true = ets:delete(?TABLE), + ok. + +%%% +%%% Local functions +%%% + +%%% One ETS table, pg_local_table, is used for bookkeeping. The type of the +%%% table is ordered_set, and the fast matching of partially +%%% instantiated keys is used extensively. +%%% +%%% {{ref, Pid}, MonitorRef, Counter} +%%% {{ref, MonitorRef}, Pid} +%%% Each process has one monitor. Counter is incremented when the +%%% Pid joins some group. +%%% {{member, Name, Pid}, _} +%%% Pid is a member of group Name, GroupCounter is incremented when the +%%% Pid joins the group Name. +%%% {{pid, Pid, Name}} +%%% Pid is a member of group Name. + +member_died(Ref, Pid) -> + case ets:lookup(?TABLE, {ref, Ref}) of + [{{ref, Ref}, Pid}] -> + leave_all_groups(Pid); + %% in case the key has already been removed + %% we can clean up using the value from the DOWN message + _ -> + leave_all_groups(Pid) + end, + ok. + +leave_all_groups(Pid) -> + Names = member_groups(Pid), + _ = [leave_group(Name, P) || + Name <- Names, + P <- member_in_group(Pid, Name)]. + +join_group(Name, Pid) -> + Ref_Pid = {ref, Pid}, + try _ = ets:update_counter(?TABLE, Ref_Pid, {3, +1}) + catch _:_ -> + Ref = erlang:monitor(process, Pid), + true = ets:insert(?TABLE, {Ref_Pid, Ref, 1}), + true = ets:insert(?TABLE, {{ref, Ref}, Pid}) + end, + Member_Name_Pid = {member, Name, Pid}, + try _ = ets:update_counter(?TABLE, Member_Name_Pid, {2, +1}) + catch _:_ -> + true = ets:insert(?TABLE, {Member_Name_Pid, 1}), + true = ets:insert(?TABLE, {{pid, Pid, Name}}) + end. + +leave_group(Name, Pid) -> + Member_Name_Pid = {member, Name, Pid}, + try ets:update_counter(?TABLE, Member_Name_Pid, {2, -1}) of + N -> + if + N =:= 0 -> + true = ets:delete(?TABLE, {pid, Pid, Name}), + true = ets:delete(?TABLE, Member_Name_Pid); + true -> + ok + end, + Ref_Pid = {ref, Pid}, + case ets:update_counter(?TABLE, Ref_Pid, {3, -1}) of + 0 -> + [{Ref_Pid,Ref,0}] = ets:lookup(?TABLE, Ref_Pid), + true = ets:delete(?TABLE, {ref, Ref}), + true = ets:delete(?TABLE, Ref_Pid), + true = erlang:demonitor(Ref, [flush]), + ok; + _ -> + ok + end + catch _:_ -> + ok + end. + +group_members(Name) -> + [P || + [P, N] <- ets:match(?TABLE, {{member, Name, '$1'},'$2'}), + _ <- lists:seq(1, N)]. + +member_in_group(Pid, Name) -> + [{{member, Name, Pid}, N}] = ets:lookup(?TABLE, {member, Name, Pid}), + lists:duplicate(N, Pid). + +member_present(Name, Pid) -> + case ets:lookup(?TABLE, {member, Name, Pid}) of + [_] -> true; + [] -> false + end. + +member_groups(Pid) -> + [Name || [Name] <- ets:match(?TABLE, {{pid, Pid, '$1'}})]. + +ensure_started() -> + case whereis(?MODULE) of + undefined -> + C = {pg_local, {?MODULE, start_link, []}, permanent, + 16#ffffffff, worker, [?MODULE]}, + supervisor:start_child(kernel_safe_sup, C); + PgLocalPid -> + {ok, PgLocalPid} + end. diff --git a/deps/rabbit/src/rabbit.erl b/deps/rabbit/src/rabbit.erl new file mode 100644 index 0000000000..9248c945dc --- /dev/null +++ b/deps/rabbit/src/rabbit.erl @@ -0,0 +1,1511 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit). + +%% Transitional step until we can require Erlang/OTP 21 and +%% use the now recommended try/catch syntax for obtaining the stack trace. +-compile(nowarn_deprecated_function). + +-behaviour(application). + +-export([start/0, boot/0, stop/0, + stop_and_halt/0, await_startup/0, await_startup/1, await_startup/3, + status/0, is_running/0, alarms/0, + is_running/1, environment/0, rotate_logs/0, force_event_refresh/1, + start_fhc/0]). + +-export([start/2, stop/1, prep_stop/1]). +-export([start_apps/1, start_apps/2, stop_apps/1]). +-export([product_info/0, + product_name/0, + product_version/0, + base_product_name/0, + base_product_version/0, + motd_file/0, + motd/0]). +-export([log_locations/0, config_files/0]). %% for testing and mgmt-agent +-export([is_booted/1, is_booted/0, is_booting/1, is_booting/0]). + +%%--------------------------------------------------------------------------- +%% Boot steps. +-export([maybe_insert_default_data/0, boot_delegate/0, recover/0]). + +%% for tests +-export([validate_msg_store_io_batch_size_and_credit_disc_bound/2]). + +-rabbit_boot_step({pre_boot, [{description, "rabbit boot start"}]}). + +-rabbit_boot_step({codec_correctness_check, + [{description, "codec correctness check"}, + {mfa, {rabbit_binary_generator, + check_empty_frame_size, + []}}, + {requires, pre_boot}, + {enables, external_infrastructure}]}). + +%% rabbit_alarm currently starts memory and disk space monitors +-rabbit_boot_step({rabbit_alarm, + [{description, "alarm handler"}, + {mfa, {rabbit_alarm, start, []}}, + {requires, pre_boot}, + {enables, external_infrastructure}]}). + +-rabbit_boot_step({feature_flags, + [{description, "feature flags registry and initial state"}, + {mfa, {rabbit_feature_flags, init, []}}, + {requires, pre_boot}, + {enables, external_infrastructure}]}). + +-rabbit_boot_step({database, + [{mfa, {rabbit_mnesia, init, []}}, + {requires, file_handle_cache}, + {enables, external_infrastructure}]}). + +-rabbit_boot_step({database_sync, + [{description, "database sync"}, + {mfa, {rabbit_sup, start_child, [mnesia_sync]}}, + {requires, database}, + {enables, external_infrastructure}]}). + +-rabbit_boot_step({code_server_cache, + [{description, "code_server cache server"}, + {mfa, {rabbit_sup, start_child, [code_server_cache]}}, + {requires, rabbit_alarm}, + {enables, file_handle_cache}]}). + +-rabbit_boot_step({file_handle_cache, + [{description, "file handle cache server"}, + {mfa, {rabbit, start_fhc, []}}, + %% FHC needs memory monitor to be running + {requires, code_server_cache}, + {enables, worker_pool}]}). + +-rabbit_boot_step({worker_pool, + [{description, "default worker pool"}, + {mfa, {rabbit_sup, start_supervisor_child, + [worker_pool_sup]}}, + {requires, pre_boot}, + {enables, external_infrastructure}]}). + +-rabbit_boot_step({definition_import_worker_pool, + [{description, "dedicated worker pool for definition import"}, + {mfa, {rabbit_definitions, boot, []}}, + {requires, external_infrastructure}]}). + +-rabbit_boot_step({external_infrastructure, + [{description, "external infrastructure ready"}]}). + +-rabbit_boot_step({rabbit_registry, + [{description, "plugin registry"}, + {mfa, {rabbit_sup, start_child, + [rabbit_registry]}}, + {requires, external_infrastructure}, + {enables, kernel_ready}]}). + +-rabbit_boot_step({rabbit_core_metrics, + [{description, "core metrics storage"}, + {mfa, {rabbit_sup, start_child, + [rabbit_metrics]}}, + {requires, pre_boot}, + {enables, external_infrastructure}]}). + +-rabbit_boot_step({rabbit_osiris_metrics, + [{description, "osiris metrics scraper"}, + {mfa, {rabbit_sup, start_child, + [rabbit_osiris_metrics]}}, + {requires, pre_boot}, + {enables, external_infrastructure}]}). + +%% -rabbit_boot_step({rabbit_stream_coordinator, +%% [{description, "stream queues coordinator"}, +%% {mfa, {rabbit_stream_coordinator, start, +%% []}}, +%% {requires, pre_boot}, +%% {enables, external_infrastructure}]}). + +-rabbit_boot_step({rabbit_event, + [{description, "statistics event manager"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_event]}}, + {requires, external_infrastructure}, + {enables, kernel_ready}]}). + +-rabbit_boot_step({kernel_ready, + [{description, "kernel ready"}, + {requires, external_infrastructure}]}). + +-rabbit_boot_step({rabbit_memory_monitor, + [{description, "memory monitor"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_memory_monitor]}}, + {requires, rabbit_alarm}, + {enables, core_initialized}]}). + +-rabbit_boot_step({guid_generator, + [{description, "guid generator"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_guid]}}, + {requires, kernel_ready}, + {enables, core_initialized}]}). + +-rabbit_boot_step({delegate_sup, + [{description, "cluster delegate"}, + {mfa, {rabbit, boot_delegate, []}}, + {requires, kernel_ready}, + {enables, core_initialized}]}). + +-rabbit_boot_step({rabbit_node_monitor, + [{description, "node monitor"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_node_monitor]}}, + {requires, [rabbit_alarm, guid_generator]}, + {enables, core_initialized}]}). + +-rabbit_boot_step({rabbit_epmd_monitor, + [{description, "epmd monitor"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_epmd_monitor]}}, + {requires, kernel_ready}, + {enables, core_initialized}]}). + +-rabbit_boot_step({rabbit_sysmon_minder, + [{description, "sysmon_handler supervisor"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_sysmon_minder]}}, + {requires, kernel_ready}, + {enables, core_initialized}]}). + +-rabbit_boot_step({core_initialized, + [{description, "core initialized"}, + {requires, kernel_ready}]}). + +-rabbit_boot_step({upgrade_queues, + [{description, "per-vhost message store migration"}, + {mfa, {rabbit_upgrade, + maybe_migrate_queues_to_per_vhost_storage, + []}}, + {requires, [core_initialized]}, + {enables, recovery}]}). + +-rabbit_boot_step({recovery, + [{description, "exchange, queue and binding recovery"}, + {mfa, {rabbit, recover, []}}, + {requires, [core_initialized]}, + {enables, routing_ready}]}). + +-rabbit_boot_step({empty_db_check, + [{description, "empty DB check"}, + {mfa, {?MODULE, maybe_insert_default_data, []}}, + {requires, recovery}, + {enables, routing_ready}]}). + +-rabbit_boot_step({routing_ready, + [{description, "message delivery logic ready"}, + {requires, [core_initialized, recovery]}]}). + +-rabbit_boot_step({connection_tracking, + [{description, "connection tracking infrastructure"}, + {mfa, {rabbit_connection_tracking, boot, []}}, + {enables, routing_ready}]}). + +-rabbit_boot_step({channel_tracking, + [{description, "channel tracking infrastructure"}, + {mfa, {rabbit_channel_tracking, boot, []}}, + {enables, routing_ready}]}). + +-rabbit_boot_step({background_gc, + [{description, "background garbage collection"}, + {mfa, {rabbit_sup, start_restartable_child, + [background_gc]}}, + {requires, [core_initialized, recovery]}, + {enables, routing_ready}]}). + +-rabbit_boot_step({rabbit_core_metrics_gc, + [{description, "background core metrics garbage collection"}, + {mfa, {rabbit_sup, start_restartable_child, + [rabbit_core_metrics_gc]}}, + {requires, [core_initialized, recovery]}, + {enables, routing_ready}]}). + +-rabbit_boot_step({rabbit_looking_glass, + [{description, "Looking Glass tracer and profiler"}, + {mfa, {rabbit_looking_glass, boot, []}}, + {requires, [core_initialized, recovery]}, + {enables, routing_ready}]}). + +-rabbit_boot_step({pre_flight, + [{description, "ready to communicate with peers and clients"}, + {requires, [core_initialized, recovery, routing_ready]}]}). + +-rabbit_boot_step({cluster_name, + [{description, "sets cluster name if configured"}, + {mfa, {rabbit_nodes, boot, []}}, + {requires, pre_flight} + ]}). + +-rabbit_boot_step({direct_client, + [{description, "direct client"}, + {mfa, {rabbit_direct, boot, []}}, + {requires, pre_flight} + ]}). + +-rabbit_boot_step({notify_cluster, + [{description, "notifies cluster peers of our presence"}, + {mfa, {rabbit_node_monitor, notify_node_up, []}}, + {requires, pre_flight}]}). + +-rabbit_boot_step({networking, + [{description, "TCP and TLS listeners (backwards compatibility)"}, + {mfa, {rabbit_log, debug, ["'networking' boot step skipped and moved to end of startup", []]}}, + {requires, notify_cluster}]}). + +%%--------------------------------------------------------------------------- + +-include("rabbit_framing.hrl"). +-include("rabbit.hrl"). + +-define(APPS, [os_mon, mnesia, rabbit_common, rabbitmq_prelaunch, ra, sysmon_handler, rabbit, osiris]). + +-define(ASYNC_THREADS_WARNING_THRESHOLD, 8). + +%% 1 minute +-define(BOOT_START_TIMEOUT, 1 * 60 * 1000). +%% 12 hours +-define(BOOT_FINISH_TIMEOUT, 12 * 60 * 60 * 1000). +%% 100 ms +-define(BOOT_STATUS_CHECK_INTERVAL, 100). + +%%---------------------------------------------------------------------------- + +-type restart_type() :: 'permanent' | 'transient' | 'temporary'. + +-type param() :: atom(). +-type app_name() :: atom(). + +%%---------------------------------------------------------------------------- + +-spec start() -> 'ok'. + +start() -> + %% start() vs. boot(): we want to throw an error in start(). + start_it(temporary). + +-spec boot() -> 'ok'. + +boot() -> + %% start() vs. boot(): we want the node to exit in boot(). Because + %% applications are started with `transient`, any error during their + %% startup will abort the node. + start_it(transient). + +run_prelaunch_second_phase() -> + %% Finish the prelaunch phase started by the `rabbitmq_prelaunch` + %% application. + %% + %% The first phase was handled by the `rabbitmq_prelaunch` + %% application. It was started in one of the following way: + %% - from an Erlang release boot script; + %% - from the rabbit:boot/0 or rabbit:start/0 functions. + %% + %% The `rabbitmq_prelaunch` application creates the context map from + %% the environment and the configuration files early during Erlang + %% VM startup. Once it is done, all application environments are + %% configured (in particular `mnesia` and `ra`). + %% + %% This second phase depends on other modules & facilities of + %% RabbitMQ core. That's why we need to run it now, from the + %% `rabbit` application start function. + + %% We assert Mnesia is stopped before we run the prelaunch + %% phases. See `rabbit_prelaunch` for an explanation. + %% + %% This is the second assertion, just in case Mnesia is started + %% between the two prelaunch phases. + rabbit_prelaunch:assert_mnesia_is_stopped(), + + %% Get the context created by `rabbitmq_prelaunch` then proceed + %% with all steps in this phase. + #{initial_pass := IsInitialPass} = + Context = rabbit_prelaunch:get_context(), + + case IsInitialPass of + true -> + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug( + "== Prelaunch phase [2/2] (initial pass) =="); + false -> + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Prelaunch phase [2/2] =="), + ok + end, + + %% 1. Enabled plugins file. + ok = rabbit_prelaunch_enabled_plugins_file:setup(Context), + + %% 2. Feature flags registry. + ok = rabbit_prelaunch_feature_flags:setup(Context), + + %% 3. Logging. + ok = rabbit_prelaunch_logging:setup(Context), + + %% 4. Clustering. + ok = rabbit_prelaunch_cluster:setup(Context), + + %% Start Mnesia now that everything is ready. + rabbit_log_prelaunch:debug("Starting Mnesia"), + ok = mnesia:start(), + + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Prelaunch DONE =="), + + case IsInitialPass of + true -> rabbit_prelaunch:initial_pass_finished(); + false -> ok + end, + ok. + +start_it(StartType) -> + case spawn_boot_marker() of + {ok, Marker} -> + T0 = erlang:timestamp(), + rabbit_log:info("RabbitMQ is asked to start...", []), + try + {ok, _} = application:ensure_all_started(rabbitmq_prelaunch, + StartType), + {ok, _} = application:ensure_all_started(rabbit, + StartType), + ok = wait_for_ready_or_stopped(), + + T1 = erlang:timestamp(), + rabbit_log_prelaunch:debug( + "Time to start RabbitMQ: ~p µs", + [timer:now_diff(T1, T0)]), + stop_boot_marker(Marker), + ok + catch + error:{badmatch, Error}:_ -> + stop_boot_marker(Marker), + case StartType of + temporary -> throw(Error); + _ -> exit(Error) + end + end; + {already_booting, Marker} -> + stop_boot_marker(Marker), + ok + end. + +wait_for_ready_or_stopped() -> + ok = rabbit_boot_state:wait_for(ready, ?BOOT_FINISH_TIMEOUT), + case rabbit_boot_state:get() of + ready -> + ok; + _ -> + ok = rabbit_boot_state:wait_for(stopped, ?BOOT_FINISH_TIMEOUT), + rabbit_prelaunch:get_stop_reason() + end. + +spawn_boot_marker() -> + %% Compatibility with older RabbitMQ versions: + %% We register a process doing nothing to indicate that RabbitMQ is + %% booting. This is checked by `is_booting(Node)` on a remote node. + Marker = spawn_link(fun() -> receive stop -> ok end end), + case catch register(rabbit_boot, Marker) of + true -> {ok, Marker}; + _ -> {already_booting, Marker} + end. + +stop_boot_marker(Marker) -> + unlink(Marker), + Marker ! stop, + ok. + +-spec stop() -> 'ok'. + +stop() -> + case wait_for_ready_or_stopped() of + ok -> + case rabbit_boot_state:get() of + ready -> + Product = product_name(), + rabbit_log:info("~s is asked to stop...", [Product]), + do_stop(), + rabbit_log:info( + "Successfully stopped ~s and its dependencies", + [Product]), + ok; + stopped -> + ok + end; + _ -> + ok + end. + +do_stop() -> + Apps0 = ?APPS ++ rabbit_plugins:active(), + %% We ensure that Mnesia is stopped last (or more exactly, after rabbit). + Apps1 = app_utils:app_dependency_order(Apps0, true) -- [mnesia], + Apps = [mnesia | Apps1], + %% this will also perform unregistration with the peer discovery backend + %% as needed + stop_apps(Apps). + +-spec stop_and_halt() -> no_return(). + +stop_and_halt() -> + try + stop() + catch Type:Reason -> + rabbit_log:error( + "Error trying to stop ~s: ~p:~p", + [product_name(), Type, Reason]), + error({Type, Reason}) + after + %% Enclose all the logging in the try block. + %% init:stop() will be called regardless of any errors. + try + AppsLeft = [ A || {A, _, _} <- application:which_applications() ], + rabbit_log:info( + lists:flatten(["Halting Erlang VM with the following applications:~n", + [" ~p~n" || _ <- AppsLeft]]), + AppsLeft), + %% Also duplicate this information to stderr, so console where + %% foreground broker was running (or systemd journal) will + %% contain information about graceful termination. + io:format(standard_error, "Gracefully halting Erlang VM~n", []) + after + init:stop() + end + end, + ok. + +-spec start_apps([app_name()]) -> 'ok'. + +start_apps(Apps) -> + start_apps(Apps, #{}). + +-spec start_apps([app_name()], + #{app_name() => restart_type()}) -> 'ok'. + +%% TODO: start_apps/2 and is now specific to plugins. This function +%% should be moved over `rabbit_plugins`, along with stop_apps/1, once +%% the latter stops using app_utils as well. + +start_apps(Apps, RestartTypes) -> + false = lists:member(rabbit, Apps), %% Assertion. + %% We need to load all applications involved in order to be able to + %% find new feature flags. + app_utils:load_applications(Apps), + ok = rabbit_feature_flags:refresh_feature_flags_after_app_load(Apps), + rabbit_prelaunch_conf:decrypt_config(Apps), + lists:foreach( + fun(App) -> + RestartType = maps:get(App, RestartTypes, temporary), + ok = rabbit_boot_steps:run_boot_steps([App]), + case application:ensure_all_started(App, RestartType) of + {ok, _} -> ok; + {error, Reason} -> throw({could_not_start, App, Reason}) + end + end, Apps). + +-spec stop_apps([app_name()]) -> 'ok'. + +stop_apps([]) -> + ok; +stop_apps(Apps) -> + rabbit_log:info( + lists:flatten(["Stopping ~s applications and their dependencies in the following order:~n", + [" ~p~n" || _ <- Apps]]), + [product_name() | lists:reverse(Apps)]), + ok = app_utils:stop_applications( + Apps, handle_app_error(error_during_shutdown)), + case lists:member(rabbit, Apps) of + %% plugin deactivation + false -> rabbit_boot_steps:run_cleanup_steps(Apps); + true -> ok %% it's all going anyway + end, + ok. + +-spec handle_app_error(_) -> fun((_, _) -> no_return()). +handle_app_error(Term) -> + fun(App, {bad_return, {_MFA, {'EXIT', ExitReason}}}) -> + throw({Term, App, ExitReason}); + (App, Reason) -> + throw({Term, App, Reason}) + end. + +is_booting() -> is_booting(node()). + +is_booting(Node) when Node =:= node() -> + case rabbit_boot_state:get() of + booting -> true; + _ -> false + end; +is_booting(Node) -> + case rpc:call(Node, rabbit, is_booting, []) of + {badrpc, _} = Err -> Err; + Ret -> Ret + end. + + +-spec await_startup() -> 'ok' | {'error', 'timeout'}. + +await_startup() -> + await_startup(node(), false). + +-spec await_startup(node() | non_neg_integer()) -> 'ok' | {'error', 'timeout'}. + +await_startup(Node) when is_atom(Node) -> + await_startup(Node, false); + await_startup(Timeout) when is_integer(Timeout) -> + await_startup(node(), false, Timeout). + +-spec await_startup(node(), boolean()) -> 'ok' | {'error', 'timeout'}. + +await_startup(Node, PrintProgressReports) -> + case is_booting(Node) of + true -> wait_for_boot_to_finish(Node, PrintProgressReports); + false -> + case is_running(Node) of + true -> ok; + false -> wait_for_boot_to_start(Node), + wait_for_boot_to_finish(Node, PrintProgressReports) + end + end. + +-spec await_startup(node(), boolean(), non_neg_integer()) -> 'ok' | {'error', 'timeout'}. + +await_startup(Node, PrintProgressReports, Timeout) -> + case is_booting(Node) of + true -> wait_for_boot_to_finish(Node, PrintProgressReports, Timeout); + false -> + case is_running(Node) of + true -> ok; + false -> wait_for_boot_to_start(Node, Timeout), + wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) + end + end. + +wait_for_boot_to_start(Node) -> + wait_for_boot_to_start(Node, ?BOOT_START_TIMEOUT). + +wait_for_boot_to_start(Node, infinity) -> + %% This assumes that 100K iterations is close enough to "infinity". + %% Now that's deep. + do_wait_for_boot_to_start(Node, 100000); +wait_for_boot_to_start(Node, Timeout) -> + Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL, + do_wait_for_boot_to_start(Node, Iterations). + +do_wait_for_boot_to_start(_Node, IterationsLeft) when IterationsLeft =< 0 -> + {error, timeout}; +do_wait_for_boot_to_start(Node, IterationsLeft) -> + case is_booting(Node) of + false -> + timer:sleep(?BOOT_STATUS_CHECK_INTERVAL), + do_wait_for_boot_to_start(Node, IterationsLeft - 1); + {badrpc, _} = Err -> + Err; + true -> + ok + end. + +wait_for_boot_to_finish(Node, PrintProgressReports) -> + wait_for_boot_to_finish(Node, PrintProgressReports, ?BOOT_FINISH_TIMEOUT). + +wait_for_boot_to_finish(Node, PrintProgressReports, infinity) -> + %% This assumes that 100K iterations is close enough to "infinity". + %% Now that's deep. + do_wait_for_boot_to_finish(Node, PrintProgressReports, 100000); +wait_for_boot_to_finish(Node, PrintProgressReports, Timeout) -> + Iterations = Timeout div ?BOOT_STATUS_CHECK_INTERVAL, + do_wait_for_boot_to_finish(Node, PrintProgressReports, Iterations). + +do_wait_for_boot_to_finish(_Node, _PrintProgressReports, IterationsLeft) when IterationsLeft =< 0 -> + {error, timeout}; +do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft) -> + case is_booting(Node) of + false -> + %% We don't want badrpc error to be interpreted as false, + %% so we don't call rabbit:is_running(Node) + case rpc:call(Node, rabbit, is_running, []) of + true -> ok; + false -> {error, rabbit_is_not_running}; + {badrpc, _} = Err -> Err + end; + {badrpc, _} = Err -> + Err; + true -> + maybe_print_boot_progress(PrintProgressReports, IterationsLeft), + timer:sleep(?BOOT_STATUS_CHECK_INTERVAL), + do_wait_for_boot_to_finish(Node, PrintProgressReports, IterationsLeft - 1) + end. + +maybe_print_boot_progress(false = _PrintProgressReports, _IterationsLeft) -> + ok; +maybe_print_boot_progress(true, IterationsLeft) -> + case IterationsLeft rem 100 of + %% This will be printed on the CLI command end to illustrate some + %% progress. + 0 -> io:format("Still booting, will check again in 10 seconds...~n"); + _ -> ok + end. + +-spec status + () -> [{pid, integer()} | + {running_applications, [{atom(), string(), string()}]} | + {os, {atom(), atom()}} | + {erlang_version, string()} | + {memory, any()}]. + +status() -> + Version = base_product_version(), + S1 = [{pid, list_to_integer(os:getpid())}, + %% The timeout value used is twice that of gen_server:call/2. + {running_applications, rabbit_misc:which_applications()}, + {os, os:type()}, + {rabbitmq_version, Version}, + {erlang_version, erlang:system_info(system_version)}, + {memory, rabbit_vm:memory()}, + {alarms, alarms()}, + {is_under_maintenance, rabbit_maintenance:is_being_drained_local_read(node())}, + {listeners, listeners()}, + {vm_memory_calculation_strategy, vm_memory_monitor:get_memory_calculation_strategy()}], + S2 = rabbit_misc:filter_exit_map( + fun ({Key, {M, F, A}}) -> {Key, erlang:apply(M, F, A)} end, + [{vm_memory_high_watermark, {vm_memory_monitor, + get_vm_memory_high_watermark, []}}, + {vm_memory_limit, {vm_memory_monitor, + get_memory_limit, []}}, + {disk_free_limit, {rabbit_disk_monitor, + get_disk_free_limit, []}}, + {disk_free, {rabbit_disk_monitor, + get_disk_free, []}}]), + S3 = rabbit_misc:with_exit_handler( + fun () -> [] end, + fun () -> [{file_descriptors, file_handle_cache:info()}] end), + S4 = [{processes, [{limit, erlang:system_info(process_limit)}, + {used, erlang:system_info(process_count)}]}, + {run_queue, erlang:statistics(run_queue)}, + {uptime, begin + {T,_} = erlang:statistics(wall_clock), + T div 1000 + end}, + {kernel, {net_ticktime, net_kernel:get_net_ticktime()}}], + S5 = [{active_plugins, rabbit_plugins:active()}, + {enabled_plugin_file, rabbit_plugins:enabled_plugins_file()}], + S6 = [{config_files, config_files()}, + {log_files, log_locations()}, + {data_directory, rabbit_mnesia:dir()}, + {raft_data_directory, ra_env:data_dir()}], + Totals = case is_running() of + true -> + [{virtual_host_count, rabbit_vhost:count()}, + {connection_count, + length(rabbit_networking:connections_local())}, + {queue_count, total_queue_count()}]; + false -> + [] + end, + S7 = [{totals, Totals}], + S8 = lists:filter( + fun + ({product_base_name, _}) -> true; + ({product_base_version, _}) -> true; + ({product_name, _}) -> true; + ({product_version, _}) -> true; + (_) -> false + end, + maps:to_list(product_info())), + S1 ++ S2 ++ S3 ++ S4 ++ S5 ++ S6 ++ S7 ++ S8. + +alarms() -> + Alarms = rabbit_misc:with_exit_handler(rabbit_misc:const([]), + fun rabbit_alarm:get_alarms/0), + N = node(), + %% [{{resource_limit,memory,rabbit@mercurio},[]}] + [{resource_limit, Limit, Node} || {{resource_limit, Limit, Node}, _} <- Alarms, Node =:= N]. + +listeners() -> + Listeners = try + rabbit_networking:active_listeners() + catch + exit:{aborted, _} -> [] + end, + [L || L = #listener{node = Node} <- Listeners, Node =:= node()]. + +total_queue_count() -> + lists:foldl(fun (VirtualHost, Acc) -> + Acc + rabbit_amqqueue:count(VirtualHost) + end, + 0, rabbit_vhost:list_names()). + +-spec is_running() -> boolean(). + +is_running() -> is_running(node()). + +-spec is_running(node()) -> boolean(). + +is_running(Node) when Node =:= node() -> + case rabbit_boot_state:get() of + ready -> true; + _ -> false + end; +is_running(Node) -> + case rpc:call(Node, rabbit, is_running, []) of + true -> true; + _ -> false + end. + +is_booted() -> is_booted(node()). + +is_booted(Node) -> + case is_booting(Node) of + false -> + is_running(Node); + _ -> false + end. + +-spec environment() -> [{param(), term()}]. + +environment() -> + %% The timeout value is twice that of gen_server:call/2. + [{A, environment(A)} || + {A, _, _} <- lists:keysort(1, application:which_applications(10000))]. + +environment(App) -> + Ignore = [default_pass, included_applications], + lists:keysort(1, [P || P = {K, _} <- application:get_all_env(App), + not lists:member(K, Ignore)]). + +-spec rotate_logs() -> rabbit_types:ok_or_error(any()). + +rotate_logs() -> + rabbit_lager:fold_sinks( + fun + (_, [], Acc) -> + Acc; + (SinkName, FileNames, Acc) -> + lager:log(SinkName, info, self(), + "Log file rotation forced", []), + %% FIXME: We use an internal message, understood by + %% lager_file_backend. We should use a proper API, when + %% it's added to Lager. + %% + %% FIXME: This call is effectively asynchronous: at the + %% end of this function, we can't guaranty the rotation + %% is completed. + [ok = gen_event:call(SinkName, + {lager_file_backend, FileName}, + rotate, + infinity) || FileName <- FileNames], + lager:log(SinkName, info, self(), + "Log file re-opened after forced rotation", []), + Acc + end, ok). + +%%-------------------------------------------------------------------- + +-spec start('normal',[]) -> + {'error', + {'erlang_version_too_old', + {'found',string(),string()}, + {'required',string(),string()}}} | + {'ok',pid()}. + +start(normal, []) -> + %% Reset boot state and clear the stop reason again (it was already + %% made in rabbitmq_prelaunch). + %% + %% This is important if the previous startup attempt failed after + %% rabbitmq_prelaunch was started and the application is still + %% running. + rabbit_boot_state:set(booting), + rabbit_prelaunch:clear_stop_reason(), + + try + run_prelaunch_second_phase(), + + ProductInfo = product_info(), + case ProductInfo of + #{product_overridden := true, + product_base_name := BaseName, + product_base_version := BaseVersion} -> + rabbit_log:info("~n Starting ~s ~s on Erlang ~s~n Based on ~s ~s~n ~s~n ~s~n", + [product_name(), product_version(), rabbit_misc:otp_release(), + BaseName, BaseVersion, + ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]); + _ -> + rabbit_log:info("~n Starting ~s ~s on Erlang ~s~n ~s~n ~s~n", + [product_name(), product_version(), rabbit_misc:otp_release(), + ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]) + end, + log_motd(), + {ok, SupPid} = rabbit_sup:start_link(), + + %% Compatibility with older RabbitMQ versions + required by + %% rabbit_node_monitor:notify_node_up/0: + %% + %% We register the app process under the name `rabbit`. This is + %% checked by `is_running(Node)` on a remote node. The process + %% is also monitord by rabbit_node_monitor. + %% + %% The process name must be registered *before* running the boot + %% steps: that's when rabbit_node_monitor will set the process + %% monitor up. + %% + %% Note that plugins were not taken care of at this point + %% either. + rabbit_log_prelaunch:debug( + "Register `rabbit` process (~p) for rabbit_node_monitor", + [self()]), + true = register(rabbit, self()), + + print_banner(), + log_banner(), + warn_if_kernel_config_dubious(), + warn_if_disc_io_options_dubious(), + %% We run `rabbit` boot steps only for now. Plugins boot steps + %% will be executed as part of the postlaunch phase after they + %% are started. + rabbit_boot_steps:run_boot_steps([rabbit]), + run_postlaunch_phase(), + {ok, SupPid} + catch + throw:{error, _} = Error -> + mnesia:stop(), + rabbit_prelaunch_errors:log_error(Error), + rabbit_prelaunch:set_stop_reason(Error), + rabbit_boot_state:set(stopped), + Error; + Class:Exception:Stacktrace -> + mnesia:stop(), + rabbit_prelaunch_errors:log_exception( + Class, Exception, Stacktrace), + Error = {error, Exception}, + rabbit_prelaunch:set_stop_reason(Error), + rabbit_boot_state:set(stopped), + Error + end. + +run_postlaunch_phase() -> + spawn(fun() -> do_run_postlaunch_phase() end). + +do_run_postlaunch_phase() -> + %% Once RabbitMQ itself is started, we need to run a few more steps, + %% in particular start plugins. + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Postlaunch phase =="), + + try + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Plugins =="), + + rabbit_log_prelaunch:debug("Setting plugins up"), + %% `Plugins` contains all the enabled plugins, plus their + %% dependencies. The order is important: dependencies appear + %% before plugin which depend on them. + Plugins = rabbit_plugins:setup(), + rabbit_log_prelaunch:debug( + "Starting the following plugins: ~p", [Plugins]), + %% We can load all plugins and refresh their feature flags at + %% once, because it does not involve running code from the + %% plugins. + app_utils:load_applications(Plugins), + ok = rabbit_feature_flags:refresh_feature_flags_after_app_load( + Plugins), + %% However, we want to run their boot steps and actually start + %% them one by one, to ensure a dependency is fully started + %% before a plugin which depends on it gets a chance to start. + lists:foreach( + fun(Plugin) -> + ok = rabbit_boot_steps:run_boot_steps([Plugin]), + case application:ensure_all_started(Plugin) of + {ok, _} -> ok; + Error -> throw(Error) + end + end, Plugins), + + %% Successful boot resets node maintenance state. + rabbit_log_prelaunch:info("Resetting node maintenance status"), + _ = rabbit_maintenance:unmark_as_being_drained(), + + %% Export definitions after all plugins have been enabled, + %% see rabbitmq/rabbitmq-server#2384 + case rabbit_definitions:maybe_load_definitions() of + ok -> ok; + DefLoadError -> throw(DefLoadError) + end, + + %% Start listeners after all plugins have been enabled, + %% see rabbitmq/rabbitmq-server#2405. + rabbit_log_prelaunch:info( + "Ready to start client connection listeners"), + ok = rabbit_networking:boot(), + + %% The node is ready: mark it as such and log it. + %% NOTE: PLEASE DO NOT ADD CRITICAL NODE STARTUP CODE AFTER THIS. + ok = rabbit_lager:broker_is_started(), + ok = log_broker_started( + rabbit_plugins:strictly_plugins(rabbit_plugins:active())), + + rabbit_log_prelaunch:debug("Marking ~s as running", [product_name()]), + rabbit_boot_state:set(ready) + catch + throw:{error, _} = Error -> + rabbit_prelaunch_errors:log_error(Error), + rabbit_prelaunch:set_stop_reason(Error), + do_stop(); + Class:Exception:Stacktrace -> + rabbit_prelaunch_errors:log_exception( + Class, Exception, Stacktrace), + Error = {error, Exception}, + rabbit_prelaunch:set_stop_reason(Error), + do_stop() + end. + +prep_stop(State) -> + rabbit_boot_state:set(stopping), + rabbit_peer_discovery:maybe_unregister(), + State. + +-spec stop(_) -> 'ok'. + +stop(State) -> + ok = rabbit_alarm:stop(), + ok = case rabbit_mnesia:is_clustered() of + true -> ok; + false -> rabbit_table:clear_ram_only_tables() + end, + case State of + [] -> rabbit_prelaunch:set_stop_reason(normal); + _ -> rabbit_prelaunch:set_stop_reason(State) + end, + rabbit_boot_state:set(stopped), + ok. + +%%--------------------------------------------------------------------------- +%% boot step functions + +-spec boot_delegate() -> 'ok'. + +boot_delegate() -> + {ok, Count} = application:get_env(rabbit, delegate_count), + rabbit_sup:start_supervisor_child(delegate_sup, [Count]). + +-spec recover() -> 'ok'. + +recover() -> + ok = rabbit_policy:recover(), + ok = rabbit_vhost:recover(), + ok = lager_exchange_backend:maybe_init_exchange(). + +-spec maybe_insert_default_data() -> 'ok'. + +maybe_insert_default_data() -> + NoDefsToImport = not rabbit_definitions:has_configured_definitions_to_load(), + case rabbit_table:needs_default_data() andalso NoDefsToImport of + true -> + rabbit_log:info("Will seed default virtual host and user..."), + insert_default_data(); + false -> + rabbit_log:info("Will not seed default virtual host and user: have definitions to load..."), + ok + end. + +insert_default_data() -> + {ok, DefaultUser} = application:get_env(default_user), + {ok, DefaultPass} = application:get_env(default_pass), + {ok, DefaultTags} = application:get_env(default_user_tags), + {ok, DefaultVHost} = application:get_env(default_vhost), + {ok, [DefaultConfigurePerm, DefaultWritePerm, DefaultReadPerm]} = + application:get_env(default_permissions), + + DefaultUserBin = rabbit_data_coercion:to_binary(DefaultUser), + DefaultPassBin = rabbit_data_coercion:to_binary(DefaultPass), + DefaultVHostBin = rabbit_data_coercion:to_binary(DefaultVHost), + DefaultConfigurePermBin = rabbit_data_coercion:to_binary(DefaultConfigurePerm), + DefaultWritePermBin = rabbit_data_coercion:to_binary(DefaultWritePerm), + DefaultReadPermBin = rabbit_data_coercion:to_binary(DefaultReadPerm), + + ok = rabbit_vhost:add(DefaultVHostBin, <<"Default virtual host">>, [], ?INTERNAL_USER), + ok = lager_exchange_backend:maybe_init_exchange(), + ok = rabbit_auth_backend_internal:add_user( + DefaultUserBin, + DefaultPassBin, + ?INTERNAL_USER + ), + ok = rabbit_auth_backend_internal:set_tags(DefaultUserBin, DefaultTags, + ?INTERNAL_USER), + ok = rabbit_auth_backend_internal:set_permissions(DefaultUserBin, + DefaultVHostBin, + DefaultConfigurePermBin, + DefaultWritePermBin, + DefaultReadPermBin, + ?INTERNAL_USER), + ok. + +%%--------------------------------------------------------------------------- +%% logging + +-spec log_locations() -> [rabbit_lager:log_location()]. +log_locations() -> + rabbit_lager:log_locations(). + +-spec config_locations() -> [rabbit_config:config_location()]. +config_locations() -> + rabbit_config:config_files(). + +-spec force_event_refresh(reference()) -> 'ok'. + +% Note: https://www.pivotaltracker.com/story/show/166962656 +% This event is necessary for the stats timer to be initialized with +% the correct values once the management agent has started +force_event_refresh(Ref) -> + % direct connections, e.g. MQTT, STOMP + ok = rabbit_direct:force_event_refresh(Ref), + % AMQP connections + ok = rabbit_networking:force_connection_event_refresh(Ref), + % "external" connections, which are not handled by the "AMQP core", + % e.g. connections to the stream plugin + ok = rabbit_networking:force_non_amqp_connection_event_refresh(Ref), + ok = rabbit_channel:force_event_refresh(Ref), + ok = rabbit_amqqueue:force_event_refresh(Ref). + +%%--------------------------------------------------------------------------- +%% misc + +log_broker_started(Plugins) -> + PluginList = iolist_to_binary([rabbit_misc:format(" * ~s~n", [P]) + || P <- Plugins]), + Message = string:strip(rabbit_misc:format( + "Server startup complete; ~b plugins started.~n~s", + [length(Plugins), PluginList]), right, $\n), + rabbit_log:info(Message), + io:format(" completed with ~p plugins.~n", [length(Plugins)]). + +-define(RABBIT_TEXT_LOGO, + "~n ## ## ~s ~s" + "~n ## ##" + "~n ########## ~s" + "~n ###### ##" + "~n ########## ~s"). +-define(FG8_START, "\033[38;5;202m"). +-define(BG8_START, "\033[48;5;202m"). +-define(FG32_START, "\033[38;2;255;102;0m"). +-define(BG32_START, "\033[48;2;255;102;0m"). +-define(C_END, "\033[0m"). +-define(RABBIT_8BITCOLOR_LOGO, + "~n " ?BG8_START " " ?C_END " " ?BG8_START " " ?C_END " \033[1m" ?FG8_START "~s" ?C_END " ~s" + "~n " ?BG8_START " " ?C_END " " ?BG8_START " " ?C_END + "~n " ?BG8_START " " ?C_END " ~s" + "~n " ?BG8_START " " ?C_END " " ?BG8_START " " ?C_END + "~n " ?BG8_START " " ?C_END " ~s"). +-define(RABBIT_32BITCOLOR_LOGO, + "~n " ?BG32_START " " ?C_END " " ?BG32_START " " ?C_END " \033[1m" ?FG32_START "~s" ?C_END " ~s" + "~n " ?BG32_START " " ?C_END " " ?BG32_START " " ?C_END + "~n " ?BG32_START " " ?C_END " ~s" + "~n " ?BG32_START " " ?C_END " " ?BG32_START " " ?C_END + "~n " ?BG32_START " " ?C_END " ~s"). + +print_banner() -> + Product = product_name(), + Version = product_version(), + LineListFormatter = fun (Placeholder, [_ | Tail] = LL) -> + LF = lists:flatten([Placeholder || _ <- lists:seq(1, length(Tail))]), + {LF, LL}; + (_, []) -> + {"", ["(none)"]} + end, + Logo = case rabbit_prelaunch:get_context() of + %% We use the colored logo only when running the + %% interactive shell and when colors are supported. + %% + %% Basically it means it will be used on Unix when + %% running "make run-broker" and that's about it. + #{os_type := {unix, darwin}, + interactive_shell := true, + output_supports_colors := true} -> ?RABBIT_8BITCOLOR_LOGO; + #{interactive_shell := true, + output_supports_colors := true} -> ?RABBIT_32BITCOLOR_LOGO; + _ -> ?RABBIT_TEXT_LOGO + end, + %% padded list lines + {LogFmt, LogLocations} = LineListFormatter("~n ~ts", log_locations()), + {CfgFmt, CfgLocations} = LineListFormatter("~n ~ts", config_locations()), + {MOTDFormat, MOTDArgs} = case motd() of + undefined -> + {"", []}; + MOTD -> + Lines = string:split(MOTD, "\n", all), + Padded = [case Line of + <<>> -> "\n"; + _ -> [" ", Line, "\n"] + end + || Line <- Lines], + {"~n~ts", [Padded]} + end, + io:format(Logo ++ + "~n" ++ + MOTDFormat ++ + "~n Doc guides: https://rabbitmq.com/documentation.html" + "~n Support: https://rabbitmq.com/contact.html" + "~n Tutorials: https://rabbitmq.com/getstarted.html" + "~n Monitoring: https://rabbitmq.com/monitoring.html" + "~n" + "~n Logs: ~ts" ++ LogFmt ++ "~n" + "~n Config file(s): ~ts" ++ CfgFmt ++ "~n" + "~n Starting broker...", + [Product, Version, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE] ++ + MOTDArgs ++ + LogLocations ++ + CfgLocations). + +log_motd() -> + case motd() of + undefined -> + ok; + MOTD -> + Lines = string:split(MOTD, "\n", all), + Padded = [case Line of + <<>> -> "\n"; + _ -> [" ", Line, "\n"] + end + || Line <- Lines], + rabbit_log:info("~n~ts", [string:trim(Padded, trailing, [$\r, $\n])]) + end. + +log_banner() -> + {FirstLog, OtherLogs} = case log_locations() of + [Head | Tail] -> + {Head, [{"", F} || F <- Tail]}; + [] -> + {"(none)", []} + end, + Settings = [{"node", node()}, + {"home dir", home_dir()}, + {"config file(s)", config_files()}, + {"cookie hash", rabbit_nodes:cookie_hash()}, + {"log(s)", FirstLog}] ++ + OtherLogs ++ + [{"database dir", rabbit_mnesia:dir()}], + DescrLen = 1 + lists:max([length(K) || {K, _V} <- Settings]), + Format = fun (K, V) -> + rabbit_misc:format( + " ~-" ++ integer_to_list(DescrLen) ++ "s: ~ts~n", [K, V]) + end, + Banner = string:strip(lists:flatten( + [case S of + {"config file(s)" = K, []} -> + Format(K, "(none)"); + {"config file(s)" = K, [V0 | Vs]} -> + [Format(K, V0) | [Format("", V) || V <- Vs]]; + {K, V} -> + Format(K, V) + end || S <- Settings]), right, $\n), + rabbit_log:info("~n~ts", [Banner]). + +warn_if_kernel_config_dubious() -> + case os:type() of + {win32, _} -> + ok; + _ -> + case erlang:system_info(kernel_poll) of + true -> ok; + false -> rabbit_log:warning( + "Kernel poll (epoll, kqueue, etc) is disabled. Throughput " + "and CPU utilization may worsen.~n") + end + end, + AsyncThreads = erlang:system_info(thread_pool_size), + case AsyncThreads < ?ASYNC_THREADS_WARNING_THRESHOLD of + true -> rabbit_log:warning( + "Erlang VM is running with ~b I/O threads, " + "file I/O performance may worsen~n", [AsyncThreads]); + false -> ok + end, + IDCOpts = case application:get_env(kernel, inet_default_connect_options) of + undefined -> []; + {ok, Val} -> Val + end, + case proplists:get_value(nodelay, IDCOpts, false) of + false -> rabbit_log:warning("Nagle's algorithm is enabled for sockets, " + "network I/O latency will be higher~n"); + true -> ok + end. + +warn_if_disc_io_options_dubious() -> + %% if these values are not set, it doesn't matter since + %% rabbit_variable_queue will pick up the values defined in the + %% IO_BATCH_SIZE and CREDIT_DISC_BOUND constants. + CreditDiscBound = rabbit_misc:get_env(rabbit, msg_store_credit_disc_bound, + undefined), + IoBatchSize = rabbit_misc:get_env(rabbit, msg_store_io_batch_size, + undefined), + case catch validate_msg_store_io_batch_size_and_credit_disc_bound( + CreditDiscBound, IoBatchSize) of + ok -> ok; + {error, {Reason, Vars}} -> + rabbit_log:warning(Reason, Vars) + end. + +validate_msg_store_io_batch_size_and_credit_disc_bound(CreditDiscBound, + IoBatchSize) -> + case IoBatchSize of + undefined -> + ok; + IoBatchSize when is_integer(IoBatchSize) -> + if IoBatchSize < ?IO_BATCH_SIZE -> + throw({error, + {"io_batch_size of ~b lower than recommended value ~b, " + "paging performance may worsen~n", + [IoBatchSize, ?IO_BATCH_SIZE]}}); + true -> + ok + end; + IoBatchSize -> + throw({error, + {"io_batch_size should be an integer, but ~b given", + [IoBatchSize]}}) + end, + + %% CreditDiscBound = {InitialCredit, MoreCreditAfter} + {RIC, RMCA} = ?CREDIT_DISC_BOUND, + case CreditDiscBound of + undefined -> + ok; + {IC, MCA} when is_integer(IC), is_integer(MCA) -> + if IC < RIC; MCA < RMCA -> + throw({error, + {"msg_store_credit_disc_bound {~b, ~b} lower than" + "recommended value {~b, ~b}," + " paging performance may worsen~n", + [IC, MCA, RIC, RMCA]}}); + true -> + ok + end; + {IC, MCA} -> + throw({error, + {"both msg_store_credit_disc_bound values should be integers, but ~p given", + [{IC, MCA}]}}); + CreditDiscBound -> + throw({error, + {"invalid msg_store_credit_disc_bound value given: ~p", + [CreditDiscBound]}}) + end, + + case {CreditDiscBound, IoBatchSize} of + {undefined, undefined} -> + ok; + {_CDB, undefined} -> + ok; + {undefined, _IBS} -> + ok; + {{InitialCredit, _MCA}, IoBatchSize} -> + if IoBatchSize < InitialCredit -> + throw( + {error, + {"msg_store_io_batch_size ~b should be bigger than the initial " + "credit value from msg_store_credit_disc_bound ~b," + " paging performance may worsen~n", + [IoBatchSize, InitialCredit]}}); + true -> + ok + end + end. + +-spec product_name() -> string(). + +product_name() -> + case product_info() of + #{product_name := ProductName} -> ProductName; + #{product_base_name := BaseName} -> BaseName + end. + +-spec product_version() -> string(). + +product_version() -> + case product_info() of + #{product_version := ProductVersion} -> ProductVersion; + #{product_base_version := BaseVersion} -> BaseVersion + end. + +-spec product_info() -> #{product_base_name := string(), + product_base_version := string(), + product_overridden := boolean(), + product_name => string(), + product_version => string(), + otp_release := string()}. + +product_info() -> + PTKey = {?MODULE, product}, + try + %% The value is cached the first time to avoid calling the + %% application master many times just for that. + persistent_term:get(PTKey) + catch + error:badarg -> + BaseName = base_product_name(), + BaseVersion = base_product_version(), + Info0 = #{product_base_name => BaseName, + product_base_version => BaseVersion, + otp_release => rabbit_misc:otp_release()}, + + {NameFromEnv, VersionFromEnv} = + case rabbit_prelaunch:get_context() of + #{product_name := NFE, + product_version := VFE} -> {NFE, VFE}; + _ -> {undefined, undefined} + end, + + Info1 = case NameFromEnv of + undefined -> + NameFromApp = string_from_app_env( + product_name, + undefined), + case NameFromApp of + undefined -> + Info0; + _ -> + Info0#{product_name => NameFromApp, + product_overridden => true} + end; + _ -> + Info0#{product_name => NameFromEnv, + product_overridden => true} + end, + + Info2 = case VersionFromEnv of + undefined -> + VersionFromApp = string_from_app_env( + product_version, + undefined), + case VersionFromApp of + undefined -> + Info1; + _ -> + Info1#{product_version => VersionFromApp, + product_overridden => true} + end; + _ -> + Info1#{product_version => VersionFromEnv, + product_overridden => true} + end, + persistent_term:put(PTKey, Info2), + Info2 + end. + +string_from_app_env(Key, Default) -> + case application:get_env(rabbit, Key) of + {ok, Val} -> + case io_lib:deep_char_list(Val) of + true -> + case lists:flatten(Val) of + "" -> Default; + String -> String + end; + false -> + Default + end; + undefined -> + Default + end. + +base_product_name() -> + %% This function assumes the `rabbit` application was loaded in + %% product_info(). + {ok, Product} = application:get_key(rabbit, description), + Product. + +base_product_version() -> + %% This function assumes the `rabbit` application was loaded in + %% product_info(). + rabbit_misc:version(). + +motd_file() -> + %% Precendence is: + %% 1. The environment variable; + %% 2. The `motd_file` configuration parameter; + %% 3. The default value. + Context = rabbit_prelaunch:get_context(), + case Context of + #{motd_file := File, + var_origins := #{motd_file := environment}} + when File =/= undefined -> + File; + _ -> + Default = case Context of + #{motd_file := File} -> File; + _ -> undefined + end, + string_from_app_env(motd_file, Default) + end. + +motd() -> + case motd_file() of + undefined -> + undefined; + File -> + case file:read_file(File) of + {ok, MOTD} -> string:trim(MOTD, trailing, [$\r,$\n]); + {error, _} -> undefined + end + end. + +home_dir() -> + case init:get_argument(home) of + {ok, [[Home]]} -> Home; + Other -> Other + end. + +config_files() -> + rabbit_config:config_files(). + +%% We don't want this in fhc since it references rabbit stuff. And we can't put +%% this in the bootstep directly. +start_fhc() -> + ok = rabbit_sup:start_restartable_child( + file_handle_cache, + [fun rabbit_alarm:set_alarm/1, fun rabbit_alarm:clear_alarm/1]), + ensure_working_fhc(). + +ensure_working_fhc() -> + %% To test the file handle cache, we simply read a file we know it + %% exists (Erlang kernel's .app file). + %% + %% To avoid any pollution of the application process' dictionary by + %% file_handle_cache, we spawn a separate process. + Parent = self(), + TestFun = fun() -> + ReadBuf = case application:get_env(rabbit, fhc_read_buffering) of + {ok, true} -> "ON"; + {ok, false} -> "OFF" + end, + WriteBuf = case application:get_env(rabbit, fhc_write_buffering) of + {ok, true} -> "ON"; + {ok, false} -> "OFF" + end, + rabbit_log:info("FHC read buffering: ~s~n", [ReadBuf]), + rabbit_log:info("FHC write buffering: ~s~n", [WriteBuf]), + Filename = filename:join(code:lib_dir(kernel, ebin), "kernel.app"), + {ok, Fd} = file_handle_cache:open(Filename, [raw, binary, read], []), + {ok, _} = file_handle_cache:read(Fd, 1), + ok = file_handle_cache:close(Fd), + Parent ! fhc_ok + end, + TestPid = spawn_link(TestFun), + %% Because we are waiting for the test fun, abuse the + %% 'mnesia_table_loading_retry_timeout' parameter to find a sane timeout + %% value. + Timeout = rabbit_table:retry_timeout(), + receive + fhc_ok -> ok; + {'EXIT', TestPid, Exception} -> throw({ensure_working_fhc, Exception}) + after Timeout -> + throw({ensure_working_fhc, {timeout, TestPid}}) + end. diff --git a/deps/rabbit/src/rabbit_access_control.erl b/deps/rabbit/src/rabbit_access_control.erl new file mode 100644 index 0000000000..72260d5723 --- /dev/null +++ b/deps/rabbit/src/rabbit_access_control.erl @@ -0,0 +1,257 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_access_control). + +-include("rabbit.hrl"). + +-export([check_user_pass_login/2, check_user_login/2, check_user_loopback/2, + check_vhost_access/4, check_resource_access/4, check_topic_access/4]). + +-export([permission_cache_can_expire/1, update_state/2]). + +%%---------------------------------------------------------------------------- + +-export_type([permission_atom/0]). + +-type permission_atom() :: 'configure' | 'read' | 'write'. + +%%---------------------------------------------------------------------------- + +-spec check_user_pass_login + (rabbit_types:username(), rabbit_types:password()) -> + {'ok', rabbit_types:user()} | + {'refused', rabbit_types:username(), string(), [any()]}. + +check_user_pass_login(Username, Password) -> + check_user_login(Username, [{password, Password}]). + +-spec check_user_login + (rabbit_types:username(), [{atom(), any()}]) -> + {'ok', rabbit_types:user()} | + {'refused', rabbit_types:username(), string(), [any()]}. + +check_user_login(Username, AuthProps) -> + %% extra auth properties like MQTT client id are in AuthProps + {ok, Modules} = application:get_env(rabbit, auth_backends), + R = lists:foldl( + fun (rabbit_auth_backend_cache=ModN, {refused, _, _, _}) -> + %% It is possible to specify authn/authz within the cache module settings, + %% so we have to do both auth steps here + %% See this rabbitmq-users discussion: + %% https://groups.google.com/d/topic/rabbitmq-users/ObqM7MQdA3I/discussion + try_authenticate_and_try_authorize(ModN, ModN, Username, AuthProps); + ({ModN, ModZs}, {refused, _, _, _}) -> + %% Different modules for authN vs authZ. So authenticate + %% with authN module, then if that succeeds do + %% passwordless (i.e pre-authenticated) login with authZ. + try_authenticate_and_try_authorize(ModN, ModZs, Username, AuthProps); + (Mod, {refused, _, _, _}) -> + %% Same module for authN and authZ. Just take the result + %% it gives us + case try_authenticate(Mod, Username, AuthProps) of + {ok, ModNUser = #auth_user{username = Username2, impl = Impl}} -> + rabbit_log:debug("User '~s' authenticated successfully by backend ~s", [Username2, Mod]), + user(ModNUser, {ok, [{Mod, Impl}], []}); + Else -> + rabbit_log:debug("User '~s' failed authenticatation by backend ~s", [Username, Mod]), + Else + end; + (_, {ok, User}) -> + %% We've successfully authenticated. Skip to the end... + {ok, User} + end, + {refused, Username, "No modules checked '~s'", [Username]}, Modules), + R. + +try_authenticate_and_try_authorize(ModN, ModZs0, Username, AuthProps) -> + ModZs = case ModZs0 of + A when is_atom(A) -> [A]; + L when is_list(L) -> L + end, + case try_authenticate(ModN, Username, AuthProps) of + {ok, ModNUser = #auth_user{username = Username2}} -> + rabbit_log:debug("User '~s' authenticated successfully by backend ~s", [Username2, ModN]), + user(ModNUser, try_authorize(ModZs, Username2, AuthProps)); + Else -> + Else + end. + +try_authenticate(Module, Username, AuthProps) -> + case Module:user_login_authentication(Username, AuthProps) of + {ok, AuthUser} -> {ok, AuthUser}; + {error, E} -> {refused, Username, + "~s failed authenticating ~s: ~p~n", + [Module, Username, E]}; + {refused, F, A} -> {refused, Username, F, A} + end. + +try_authorize(Modules, Username, AuthProps) -> + lists:foldr( + fun (Module, {ok, ModsImpls, ModsTags}) -> + case Module:user_login_authorization(Username, AuthProps) of + {ok, Impl, Tags}-> {ok, [{Module, Impl} | ModsImpls], ModsTags ++ Tags}; + {ok, Impl} -> {ok, [{Module, Impl} | ModsImpls], ModsTags}; + {error, E} -> {refused, Username, + "~s failed authorizing ~s: ~p~n", + [Module, Username, E]}; + {refused, F, A} -> {refused, Username, F, A} + end; + (_, {refused, F, A}) -> + {refused, Username, F, A} + end, {ok, [], []}, Modules). + +user(#auth_user{username = Username, tags = Tags}, {ok, ModZImpls, ModZTags}) -> + {ok, #user{username = Username, + tags = Tags ++ ModZTags, + authz_backends = ModZImpls}}; +user(_AuthUser, Error) -> + Error. + +auth_user(#user{username = Username, tags = Tags}, Impl) -> + #auth_user{username = Username, + tags = Tags, + impl = Impl}. + +-spec check_user_loopback + (rabbit_types:username(), rabbit_net:socket() | inet:ip_address()) -> + 'ok' | 'not_allowed'. + +check_user_loopback(Username, SockOrAddr) -> + {ok, Users} = application:get_env(rabbit, loopback_users), + case rabbit_net:is_loopback(SockOrAddr) + orelse not lists:member(Username, Users) of + true -> ok; + false -> not_allowed + end. + +get_authz_data_from({ip, Address}) -> + #{peeraddr => Address}; +get_authz_data_from({socket, Sock}) -> + {ok, {Address, _Port}} = rabbit_net:peername(Sock), + #{peeraddr => Address}; +get_authz_data_from(undefined) -> + undefined. + +% Note: ip can be either a tuple or, a binary if reverse_dns_lookups +% is enabled and it's a direct connection. +-spec check_vhost_access(User :: rabbit_types:user(), + VHostPath :: rabbit_types:vhost(), + AuthzRawData :: {socket, rabbit_net:socket()} | {ip, inet:ip_address() | binary()} | undefined, + AuthzContext :: map()) -> + 'ok' | rabbit_types:channel_exit(). +check_vhost_access(User = #user{username = Username, + authz_backends = Modules}, VHostPath, AuthzRawData, AuthzContext) -> + AuthzData = get_authz_data_from(AuthzRawData), + FullAuthzContext = create_vhost_access_authz_data(AuthzData, AuthzContext), + lists:foldl( + fun({Mod, Impl}, ok) -> + check_access( + fun() -> + rabbit_vhost:exists(VHostPath) andalso + Mod:check_vhost_access( + auth_user(User, Impl), VHostPath, FullAuthzContext) + end, + Mod, "access to vhost '~s' refused for user '~s'", + [VHostPath, Username], not_allowed); + (_, Else) -> + Else + end, ok, Modules). + +create_vhost_access_authz_data(undefined, Context) when map_size(Context) == 0 -> + undefined; +create_vhost_access_authz_data(undefined, Context) -> + Context; +create_vhost_access_authz_data(PeerAddr, Context) when map_size(Context) == 0 -> + PeerAddr; +create_vhost_access_authz_data(PeerAddr, Context) -> + maps:merge(PeerAddr, Context). + +-spec check_resource_access + (rabbit_types:user(), rabbit_types:r(atom()), permission_atom(), rabbit_types:authz_context()) -> + 'ok' | rabbit_types:channel_exit(). + +check_resource_access(User, R = #resource{kind = exchange, name = <<"">>}, + Permission, Context) -> + check_resource_access(User, R#resource{name = <<"amq.default">>}, + Permission, Context); +check_resource_access(User = #user{username = Username, + authz_backends = Modules}, + Resource, Permission, Context) -> + lists:foldl( + fun({Module, Impl}, ok) -> + check_access( + fun() -> Module:check_resource_access( + auth_user(User, Impl), Resource, Permission, Context) end, + Module, "access to ~s refused for user '~s'", + [rabbit_misc:rs(Resource), Username]); + (_, Else) -> Else + end, ok, Modules). + +check_topic_access(User = #user{username = Username, + authz_backends = Modules}, + Resource, Permission, Context) -> + lists:foldl( + fun({Module, Impl}, ok) -> + check_access( + fun() -> Module:check_topic_access( + auth_user(User, Impl), Resource, Permission, Context) end, + Module, "access to topic '~s' in exchange ~s refused for user '~s'", + [maps:get(routing_key, Context), rabbit_misc:rs(Resource), Username]); + (_, Else) -> Else + end, ok, Modules). + +check_access(Fun, Module, ErrStr, ErrArgs) -> + check_access(Fun, Module, ErrStr, ErrArgs, access_refused). + +check_access(Fun, Module, ErrStr, ErrArgs, ErrName) -> + case Fun() of + true -> + ok; + false -> + rabbit_misc:protocol_error(ErrName, ErrStr, ErrArgs); + {error, E} -> + FullErrStr = ErrStr ++ ", backend ~s returned an error: ~p~n", + FullErrArgs = ErrArgs ++ [Module, E], + rabbit_log:error(FullErrStr, FullErrArgs), + rabbit_misc:protocol_error(ErrName, FullErrStr, FullErrArgs) + end. + +-spec update_state(User :: rabbit_types:user(), NewState :: term()) -> + {'ok', rabbit_types:auth_user()} | + {'refused', string()} | + {'error', any()}. + +update_state(User = #user{authz_backends = Backends0}, NewState) -> + %% N.B.: we use foldl/3 and prepending, so the final list of + %% backends is in reverse order from the original list. + Backends = lists:foldl( + fun({Module, Impl}, {ok, Acc}) -> + case Module:state_can_expire() of + true -> + case Module:update_state(auth_user(User, Impl), NewState) of + {ok, #auth_user{impl = Impl1}} -> + {ok, [{Module, Impl1} | Acc]}; + Else -> Else + end; + false -> + {ok, [{Module, Impl} | Acc]} + end; + (_, {error, _} = Err) -> Err; + (_, {refused, _, _} = Err) -> Err + end, {ok, []}, Backends0), + case Backends of + {ok, Pairs} -> {ok, User#user{authz_backends = lists:reverse(Pairs)}}; + Else -> Else + end. + +-spec permission_cache_can_expire(User :: rabbit_types:user()) -> boolean(). + +%% Returns true if any of the backends support credential expiration, +%% otherwise returns false. +permission_cache_can_expire(#user{authz_backends = Backends}) -> + lists:any(fun ({Module, _State}) -> Module:state_can_expire() end, Backends). diff --git a/deps/rabbit/src/rabbit_alarm.erl b/deps/rabbit/src/rabbit_alarm.erl new file mode 100644 index 0000000000..3f1ab7ae62 --- /dev/null +++ b/deps/rabbit/src/rabbit_alarm.erl @@ -0,0 +1,365 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +%% There are two types of alarms handled by this module: +%% +%% * per-node resource (disk, memory) alarms for the whole cluster. If any node +%% has an alarm, then all publishing should be disabled across the +%% cluster until all alarms clear. When a node sets such an alarm, +%% this information is automatically propagated throughout the cluster. +%% `#alarms.alarmed_nodes' is being used to track this type of alarms. +%% * limits local to this node (file_descriptor_limit). Used for information +%% purposes only: logging and getting node status. This information is not propagated +%% throughout the cluster. `#alarms.alarms' is being used to track this type of alarms. +%% @end + +-module(rabbit_alarm). + +-behaviour(gen_event). + +-export([start_link/0, start/0, stop/0, register/2, set_alarm/1, + clear_alarm/1, get_alarms/0, get_alarms/1, get_local_alarms/0, get_local_alarms/1, on_node_up/1, on_node_down/1, + format_as_map/1, format_as_maps/1, is_local/1]). + +-export([init/1, handle_call/2, handle_event/2, handle_info/2, + terminate/2, code_change/3]). + +-export([remote_conserve_resources/3]). %% Internal use only + +-define(SERVER, ?MODULE). + +-define(FILE_DESCRIPTOR_RESOURCE, <<"file descriptors">>). +-define(MEMORY_RESOURCE, <<"memory">>). +-define(DISK_SPACE_RESOURCE, <<"disk space">>). + +%%---------------------------------------------------------------------------- + +-record(alarms, {alertees :: dict:dict(pid(), rabbit_types:mfargs()), + alarmed_nodes :: dict:dict(node(), [resource_alarm_source()]), + alarms :: [alarm()]}). + +-type local_alarm() :: 'file_descriptor_limit'. +-type resource_alarm_source() :: 'disk' | 'memory'. +-type resource_alarm() :: {resource_limit, resource_alarm_source(), node()}. +-type alarm() :: local_alarm() | resource_alarm(). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_event:start_link({local, ?SERVER}). + +-spec start() -> 'ok'. + +start() -> + ok = rabbit_sup:start_restartable_child(?MODULE), + ok = gen_event:add_handler(?SERVER, ?MODULE, []), + {ok, MemoryWatermark} = application:get_env(vm_memory_high_watermark), + + rabbit_sup:start_restartable_child( + vm_memory_monitor, [MemoryWatermark, + fun (Alarm) -> + background_gc:run(), + set_alarm(Alarm) + end, + fun clear_alarm/1]), + {ok, DiskLimit} = application:get_env(disk_free_limit), + rabbit_sup:start_delayed_restartable_child( + rabbit_disk_monitor, [DiskLimit]), + ok. + +-spec stop() -> 'ok'. + +stop() -> ok. + +%% Registers a handler that should be called on every resource alarm change. +%% Given a call rabbit_alarm:register(Pid, {M, F, A}), the handler would be +%% called like this: `apply(M, F, A ++ [Pid, Source, Alert])', where `Source' +%% has the type of resource_alarm_source() and `Alert' has the type of resource_alert(). + +-spec register(pid(), rabbit_types:mfargs()) -> [atom()]. + +register(Pid, AlertMFA) -> + gen_event:call(?SERVER, ?MODULE, {register, Pid, AlertMFA}, infinity). + +-spec set_alarm({alarm(), []}) -> 'ok'. + +set_alarm(Alarm) -> gen_event:notify(?SERVER, {set_alarm, Alarm}). + +-spec clear_alarm(alarm()) -> 'ok'. + +clear_alarm(Alarm) -> gen_event:notify(?SERVER, {clear_alarm, Alarm}). + +-spec get_alarms() -> [{alarm(), []}]. +get_alarms() -> gen_event:call(?SERVER, ?MODULE, get_alarms, infinity). + +-spec get_alarms(timeout()) -> [{alarm(), []}]. +get_alarms(Timeout) -> gen_event:call(?SERVER, ?MODULE, get_alarms, Timeout). + +-spec get_local_alarms() -> [alarm()]. +get_local_alarms() -> gen_event:call(?SERVER, ?MODULE, get_local_alarms, infinity). + +-spec get_local_alarms(timeout()) -> [alarm()]. +get_local_alarms(Timeout) -> gen_event:call(?SERVER, ?MODULE, get_local_alarms, Timeout). + +-spec filter_local_alarms([alarm()]) -> [alarm()]. +filter_local_alarms(Alarms) -> + lists:filter(fun is_local/1, Alarms). + +-spec is_local({alarm(), any()}) -> boolean(). +is_local({file_descriptor_limit, _}) -> true; +is_local({{resource_limit, _Resource, Node}, _}) when Node =:= node() -> true; +is_local({{resource_limit, _Resource, Node}, _}) when Node =/= node() -> false. + +-spec format_as_map(alarm()) -> #{binary() => term()}. +format_as_map(file_descriptor_limit) -> + #{ + <<"resource">> => ?FILE_DESCRIPTOR_RESOURCE, + <<"node">> => node() + }; +format_as_map({resource_limit, disk, Node}) -> + #{ + <<"resource">> => ?DISK_SPACE_RESOURCE, + <<"node">> => Node + }; +format_as_map({resource_limit, memory, Node}) -> + #{ + <<"resource">> => ?MEMORY_RESOURCE, + <<"node">> => Node + }; +format_as_map({resource_limit, Limit, Node}) -> + #{ + <<"resource">> => rabbit_data_coercion:to_binary(Limit), + <<"node">> => Node + }. + +-spec format_as_maps([{alarm(), []}]) -> [#{any() => term()}]. +format_as_maps(Alarms) when is_list(Alarms) -> + %% get_alarms/0 returns + %% + %% [ + %% {file_descriptor_limit, []}, + %% {{resource_limit, disk, rabbit@warp10}, []}, + %% {{resource_limit, memory, rabbit@warp10}, []} + %% ] + lists:map(fun({Resource, _}) -> format_as_map(Resource); + (Resource) -> format_as_map(Resource) + end, Alarms). + + +-spec on_node_up(node()) -> 'ok'. +on_node_up(Node) -> gen_event:notify(?SERVER, {node_up, Node}). + +-spec on_node_down(node()) -> 'ok'. +on_node_down(Node) -> gen_event:notify(?SERVER, {node_down, Node}). + +remote_conserve_resources(Pid, Source, {true, _, _}) -> + gen_event:notify({?SERVER, node(Pid)}, + {set_alarm, {{resource_limit, Source, node()}, []}}); +remote_conserve_resources(Pid, Source, {false, _, _}) -> + gen_event:notify({?SERVER, node(Pid)}, + {clear_alarm, {resource_limit, Source, node()}}). + + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, #alarms{alertees = dict:new(), + alarmed_nodes = dict:new(), + alarms = []}}. + +handle_call({register, Pid, AlertMFA}, State = #alarms{alarmed_nodes = AN}) -> + {ok, lists:usort(lists:append([V || {_, V} <- dict:to_list(AN)])), + internal_register(Pid, AlertMFA, State)}; + +handle_call(get_alarms, State) -> + {ok, compute_alarms(State), State}; + +handle_call(get_local_alarms, State) -> + {ok, filter_local_alarms(compute_alarms(State)), State}; + +handle_call(_Request, State) -> + {ok, not_understood, State}. + +handle_event({set_alarm, {{resource_limit, Source, Node}, []}}, State) -> + case is_node_alarmed(Source, Node, State) of + true -> + {ok, State}; + false -> + rabbit_event:notify(alarm_set, [{source, Source}, + {node, Node}]), + handle_set_resource_alarm(Source, Node, State) + end; +handle_event({set_alarm, Alarm}, State = #alarms{alarms = Alarms}) -> + case lists:member(Alarm, Alarms) of + true -> {ok, State}; + false -> UpdatedAlarms = lists:usort([Alarm|Alarms]), + handle_set_alarm(Alarm, State#alarms{alarms = UpdatedAlarms}) + end; + +handle_event({clear_alarm, {resource_limit, Source, Node}}, State) -> + case is_node_alarmed(Source, Node, State) of + true -> + rabbit_event:notify(alarm_cleared, [{source, Source}, + {node, Node}]), + handle_clear_resource_alarm(Source, Node, State); + false -> + {ok, State} + end; +handle_event({clear_alarm, Alarm}, State = #alarms{alarms = Alarms}) -> + case lists:keymember(Alarm, 1, Alarms) of + true -> handle_clear_alarm( + Alarm, State#alarms{alarms = lists:keydelete( + Alarm, 1, Alarms)}); + false -> {ok, State} + + end; + +handle_event({node_up, Node}, State) -> + %% Must do this via notify and not call to avoid possible deadlock. + ok = gen_event:notify( + {?SERVER, Node}, + {register, self(), {?MODULE, remote_conserve_resources, []}}), + {ok, State}; + +handle_event({node_down, Node}, #alarms{alarmed_nodes = AN} = State) -> + AlarmsForDeadNode = case dict:find(Node, AN) of + {ok, V} -> V; + error -> [] + end, + {ok, lists:foldr(fun(Source, AccState) -> + rabbit_log:warning("~s resource limit alarm cleared for dead node ~p~n", + [Source, Node]), + maybe_alert(fun dict_unappend/3, Node, Source, false, AccState) + end, State, AlarmsForDeadNode)}; + +handle_event({register, Pid, AlertMFA}, State) -> + {ok, internal_register(Pid, AlertMFA, State)}; + +handle_event(_Event, State) -> + {ok, State}. + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, + State = #alarms{alertees = Alertees}) -> + {ok, State#alarms{alertees = dict:erase(Pid, Alertees)}}; + +handle_info(_Info, State) -> + {ok, State}. + +terminate(_Arg, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- + +dict_append(Key, Val, Dict) -> + L = case dict:find(Key, Dict) of + {ok, V} -> V; + error -> [] + end, + dict:store(Key, lists:usort([Val|L]), Dict). + +dict_unappend(Key, Val, Dict) -> + L = case dict:find(Key, Dict) of + {ok, V} -> V; + error -> [] + end, + + case lists:delete(Val, L) of + [] -> dict:erase(Key, Dict); + X -> dict:store(Key, X, Dict) + end. + +maybe_alert(UpdateFun, Node, Source, WasAlertAdded, + State = #alarms{alarmed_nodes = AN, + alertees = Alertees}) -> + AN1 = UpdateFun(Node, Source, AN), + %% Is alarm for Source still set on any node? + StillHasAlerts = lists:any(fun ({_Node, NodeAlerts}) -> lists:member(Source, NodeAlerts) end, dict:to_list(AN1)), + case StillHasAlerts of + true -> ok; + false -> rabbit_log:warning("~s resource limit alarm cleared across the cluster~n", [Source]) + end, + Alert = {WasAlertAdded, StillHasAlerts, Node}, + case node() of + Node -> ok = alert_remote(Alert, Alertees, Source); + _ -> ok + end, + ok = alert_local(Alert, Alertees, Source), + State#alarms{alarmed_nodes = AN1}. + +alert_local(Alert, Alertees, Source) -> + alert(Alertees, Source, Alert, fun erlang:'=:='/2). + +alert_remote(Alert, Alertees, Source) -> + alert(Alertees, Source, Alert, fun erlang:'=/='/2). + +alert(Alertees, Source, Alert, NodeComparator) -> + Node = node(), + dict:fold(fun (Pid, {M, F, A}, ok) -> + case NodeComparator(Node, node(Pid)) of + true -> apply(M, F, A ++ [Pid, Source, Alert]); + false -> ok + end + end, ok, Alertees). + +internal_register(Pid, {M, F, A} = AlertMFA, + State = #alarms{alertees = Alertees}) -> + _MRef = erlang:monitor(process, Pid), + case dict:find(node(), State#alarms.alarmed_nodes) of + {ok, Sources} -> [apply(M, F, A ++ [Pid, R, {true, true, node()}]) || R <- Sources]; + error -> ok + end, + NewAlertees = dict:store(Pid, AlertMFA, Alertees), + State#alarms{alertees = NewAlertees}. + +handle_set_resource_alarm(Source, Node, State) -> + rabbit_log:warning( + "~s resource limit alarm set on node ~p.~n~n" + "**********************************************************~n" + "*** Publishers will be blocked until this alarm clears ***~n" + "**********************************************************~n", + [Source, Node]), + {ok, maybe_alert(fun dict_append/3, Node, Source, true, State)}. + +handle_set_alarm({file_descriptor_limit, []}, State) -> + rabbit_log:warning( + "file descriptor limit alarm set.~n~n" + "********************************************************************~n" + "*** New connections will not be accepted until this alarm clears ***~n" + "********************************************************************~n"), + {ok, State}; +handle_set_alarm(Alarm, State) -> + rabbit_log:warning("alarm '~p' set~n", [Alarm]), + {ok, State}. + +handle_clear_resource_alarm(Source, Node, State) -> + rabbit_log:warning("~s resource limit alarm cleared on node ~p~n", + [Source, Node]), + {ok, maybe_alert(fun dict_unappend/3, Node, Source, false, State)}. + +handle_clear_alarm(file_descriptor_limit, State) -> + rabbit_log:warning("file descriptor limit alarm cleared~n"), + {ok, State}; +handle_clear_alarm(Alarm, State) -> + rabbit_log:warning("alarm '~p' cleared~n", [Alarm]), + {ok, State}. + +is_node_alarmed(Source, Node, #alarms{alarmed_nodes = AN}) -> + case dict:find(Node, AN) of + {ok, Sources} -> + lists:member(Source, Sources); + error -> + false + end. + +compute_alarms(#alarms{alarms = Alarms, + alarmed_nodes = AN}) -> + Alarms ++ [ {{resource_limit, Source, Node}, []} + || {Node, Sources} <- dict:to_list(AN), Source <- Sources ]. diff --git a/deps/rabbit/src/rabbit_amqqueue.erl b/deps/rabbit/src/rabbit_amqqueue.erl new file mode 100644 index 0000000000..cd5f894680 --- /dev/null +++ b/deps/rabbit/src/rabbit_amqqueue.erl @@ -0,0 +1,1889 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_amqqueue). + +-export([warn_file_limit/0]). +-export([recover/1, stop/1, start/1, declare/6, declare/7, + delete_immediately/1, delete_exclusive/2, delete/4, purge/1, + forget_all_durable/1]). +-export([pseudo_queue/2, pseudo_queue/3, immutable/1]). +-export([lookup/1, lookup_many/1, not_found_or_absent/1, not_found_or_absent_dirty/1, + with/2, with/3, with_or_die/2, + assert_equivalence/5, + check_exclusive_access/2, with_exclusive_access_or_die/3, + stat/1, deliver/2, + requeue/3, ack/3, reject/4]). +-export([not_found/1, absent/2]). +-export([list/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2, + emit_info_all/5, list_local/1, info_local/1, + emit_info_local/4, emit_info_down/4]). +-export([count/0]). +-export([list_down/1, count/1, list_names/0, list_names/1, list_local_names/0, + list_local_names_down/0, list_with_possible_retry/1]). +-export([list_by_type/1, sample_local_queues/0, sample_n_by_name/2, sample_n/2]). +-export([force_event_refresh/1, notify_policy_changed/1]). +-export([consumers/1, consumers_all/1, emit_consumers_all/4, consumer_info_keys/0]). +-export([basic_get/5, basic_consume/12, basic_cancel/5, notify_decorators/1]). +-export([notify_sent/2, notify_sent_queue_down/1, resume/2]). +-export([notify_down_all/2, notify_down_all/3, activate_limit_all/2, credit/5]). +-export([on_node_up/1, on_node_down/1]). +-export([update/2, store_queue/1, update_decorators/1, policy_changed/2]). +-export([update_mirroring/1, sync_mirrors/1, cancel_sync_mirrors/1]). +-export([emit_unresponsive/6, emit_unresponsive_local/5, is_unresponsive/2]). +-export([has_synchronised_mirrors_online/1]). +-export([is_replicated/1, is_exclusive/1, is_not_exclusive/1, is_dead_exclusive/1]). +-export([list_local_quorum_queues/0, list_local_quorum_queue_names/0, + list_local_mirrored_classic_queues/0, list_local_mirrored_classic_names/0, + list_local_leaders/0, list_local_followers/0, get_quorum_nodes/1, + list_local_mirrored_classic_without_synchronised_mirrors/0, + list_local_mirrored_classic_without_synchronised_mirrors_for_cli/0]). +-export([ensure_rabbit_queue_record_is_initialized/1]). +-export([format/1]). +-export([delete_immediately_by_resource/1]). +-export([delete_crashed/1, + delete_crashed/2, + delete_crashed_internal/2]). + +-export([pid_of/1, pid_of/2]). +-export([mark_local_durable_queues_stopped/1]). + +-export([rebalance/3]). +-export([collect_info_all/2]). + +-export([is_policy_applicable/2]). +-export([is_server_named_allowed/1]). + +-export([check_max_age/1]). +-export([get_queue_type/1]). + +%% internal +-export([internal_declare/2, internal_delete/2, run_backing_queue/3, + set_ram_duration_target/2, set_maximum_since_use/2, + emit_consumers_local/3, internal_delete/3]). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include_lib("stdlib/include/qlc.hrl"). +-include("amqqueue.hrl"). + +-define(INTEGER_ARG_TYPES, [byte, short, signedint, long, + unsignedbyte, unsignedshort, unsignedint]). + +-define(MORE_CONSUMER_CREDIT_AFTER, 50). + +-define(IS_CLASSIC(QPid), is_pid(QPid)). +-define(IS_QUORUM(QPid), is_tuple(QPid)). +%%---------------------------------------------------------------------------- + +-export_type([name/0, qmsg/0, absent_reason/0]). + +-type name() :: rabbit_types:r('queue'). + +-type qpids() :: [pid()]. +-type qlen() :: rabbit_types:ok(non_neg_integer()). +-type qfun(A) :: fun ((amqqueue:amqqueue()) -> A | no_return()). +-type qmsg() :: {name(), pid() | {atom(), pid()}, msg_id(), + boolean(), rabbit_types:message()}. +-type msg_id() :: non_neg_integer(). +-type ok_or_errors() :: + 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}. +-type absent_reason() :: 'nodedown' | 'crashed' | stopped | timeout. +-type queue_not_found() :: not_found. +-type queue_absent() :: {'absent', amqqueue:amqqueue(), absent_reason()}. +-type not_found_or_absent() :: queue_not_found() | queue_absent(). + +%%---------------------------------------------------------------------------- + +-define(CONSUMER_INFO_KEYS, + [queue_name, channel_pid, consumer_tag, ack_required, prefetch_count, + active, activity_status, arguments]). + +warn_file_limit() -> + DurableQueues = find_recoverable_queues(), + L = length(DurableQueues), + + %% if there are not enough file handles, the server might hang + %% when trying to recover queues, warn the user: + case file_handle_cache:get_limit() < L of + true -> + rabbit_log:warning( + "Recovering ~p queues, available file handles: ~p. Please increase max open file handles limit to at least ~p!~n", + [L, file_handle_cache:get_limit(), L]); + false -> + ok + end. + +-spec recover(rabbit_types:vhost()) -> + {Recovered :: [amqqueue:amqqueue()], + Failed :: [amqqueue:amqqueue()]}. +recover(VHost) -> + AllDurable = find_local_durable_queues(VHost), + rabbit_queue_type:recover(VHost, AllDurable). + +filter_pid_per_type(QPids) -> + lists:partition(fun(QPid) -> ?IS_CLASSIC(QPid) end, QPids). + +filter_resource_per_type(Resources) -> + Queues = [begin + {ok, Q} = lookup(Resource), + QPid = amqqueue:get_pid(Q), + {Resource, QPid} + end || Resource <- Resources], + lists:partition(fun({_Resource, QPid}) -> ?IS_CLASSIC(QPid) end, Queues). + +-spec stop(rabbit_types:vhost()) -> 'ok'. +stop(VHost) -> + %% Classic queues + ok = rabbit_amqqueue_sup_sup:stop_for_vhost(VHost), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), + ok = BQ:stop(VHost), + rabbit_quorum_queue:stop(VHost). + +-spec start([amqqueue:amqqueue()]) -> 'ok'. + +start(Qs) -> + %% At this point all recovered queues and their bindings are + %% visible to routing, so now it is safe for them to complete + %% their initialisation (which may involve interacting with other + %% queues). + _ = [amqqueue:get_pid(Q) ! {self(), go} + || Q <- Qs, + %% All queues are supposed to be classic here. + amqqueue:is_classic(Q)], + ok. + +mark_local_durable_queues_stopped(VHost) -> + ?try_mnesia_tx_or_upgrade_amqqueue_and_retry( + do_mark_local_durable_queues_stopped(VHost), + do_mark_local_durable_queues_stopped(VHost)). + +do_mark_local_durable_queues_stopped(VHost) -> + Qs = find_local_durable_queues(VHost), + rabbit_misc:execute_mnesia_transaction( + fun() -> + [ store_queue(amqqueue:set_state(Q, stopped)) + || Q <- Qs, amqqueue:get_type(Q) =:= rabbit_classic_queue, + amqqueue:get_state(Q) =/= stopped ] + end). + +find_local_durable_queues(VHost) -> + mnesia:async_dirty( + fun () -> + qlc:e( + qlc:q( + [Q || Q <- mnesia:table(rabbit_durable_queue), + amqqueue:get_vhost(Q) =:= VHost andalso + rabbit_queue_type:is_recoverable(Q) + ])) + end). + +find_recoverable_queues() -> + mnesia:async_dirty( + fun () -> + qlc:e(qlc:q([Q || Q <- mnesia:table(rabbit_durable_queue), + rabbit_queue_type:is_recoverable(Q)])) + end). + +-spec declare(name(), + boolean(), + boolean(), + rabbit_framing:amqp_table(), + rabbit_types:maybe(pid()), + rabbit_types:username()) -> + {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} | + {'new', amqqueue:amqqueue(), rabbit_fifo_client:state()} | + {'absent', amqqueue:amqqueue(), absent_reason()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +declare(QueueName, Durable, AutoDelete, Args, Owner, ActingUser) -> + declare(QueueName, Durable, AutoDelete, Args, Owner, ActingUser, node()). + + +%% The Node argument suggests where the queue (master if mirrored) +%% should be. Note that in some cases (e.g. with "nodes" policy in +%% effect) this might not be possible to satisfy. + +-spec declare(name(), + boolean(), + boolean(), + rabbit_framing:amqp_table(), + rabbit_types:maybe(pid()), + rabbit_types:username(), + node()) -> + {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} | + {'absent', amqqueue:amqqueue(), absent_reason()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +declare(QueueName = #resource{virtual_host = VHost}, Durable, AutoDelete, Args, + Owner, ActingUser, Node) -> + ok = check_declare_arguments(QueueName, Args), + Type = get_queue_type(Args), + case rabbit_queue_type:is_enabled(Type) of + true -> + Q0 = amqqueue:new(QueueName, + none, + Durable, + AutoDelete, + Owner, + Args, + VHost, + #{user => ActingUser}, + Type), + Q = rabbit_queue_decorator:set( + rabbit_policy:set(Q0)), + rabbit_queue_type:declare(Q, Node); + false -> + {protocol_error, internal_error, + "Cannot declare a queue '~s' of type '~s' on node '~s': " + "the corresponding feature flag is disabled", + [rabbit_misc:rs(QueueName), Type, Node]} + end. + +get_queue_type(Args) -> + case rabbit_misc:table_lookup(Args, <<"x-queue-type">>) of + undefined -> + rabbit_queue_type:default(); + {_, V} -> + rabbit_queue_type:discover(V) + end. + +-spec internal_declare(amqqueue:amqqueue(), boolean()) -> + {created | existing, amqqueue:amqqueue()} | queue_absent(). + +internal_declare(Q, Recover) -> + ?try_mnesia_tx_or_upgrade_amqqueue_and_retry( + do_internal_declare(Q, Recover), + begin + Q1 = amqqueue:upgrade(Q), + do_internal_declare(Q1, Recover) + end). + +do_internal_declare(Q, true) -> + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> + ok = store_queue(amqqueue:set_state(Q, live)), + rabbit_misc:const({created, Q}) + end); +do_internal_declare(Q, false) -> + QueueName = amqqueue:get_name(Q), + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> + case mnesia:wread({rabbit_queue, QueueName}) of + [] -> + case not_found_or_absent(QueueName) of + not_found -> Q1 = rabbit_policy:set(Q), + Q2 = amqqueue:set_state(Q1, live), + ok = store_queue(Q2), + fun () -> {created, Q2} end; + {absent, _Q, _} = R -> rabbit_misc:const(R) + end; + [ExistingQ] -> + rabbit_misc:const({existing, ExistingQ}) + end + end). + +-spec update + (name(), fun((amqqueue:amqqueue()) -> amqqueue:amqqueue())) -> + 'not_found' | amqqueue:amqqueue(). + +update(Name, Fun) -> + case mnesia:wread({rabbit_queue, Name}) of + [Q] -> + Durable = amqqueue:is_durable(Q), + Q1 = Fun(Q), + ok = mnesia:write(rabbit_queue, Q1, write), + case Durable of + true -> ok = mnesia:write(rabbit_durable_queue, Q1, write); + _ -> ok + end, + Q1; + [] -> + not_found + end. + +%% only really used for quorum queues to ensure the rabbit_queue record +%% is initialised +ensure_rabbit_queue_record_is_initialized(Q) -> + ?try_mnesia_tx_or_upgrade_amqqueue_and_retry( + do_ensure_rabbit_queue_record_is_initialized(Q), + begin + Q1 = amqqueue:upgrade(Q), + do_ensure_rabbit_queue_record_is_initialized(Q1) + end). + +do_ensure_rabbit_queue_record_is_initialized(Q) -> + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> + ok = store_queue(Q), + rabbit_misc:const({ok, Q}) + end). + +-spec store_queue(amqqueue:amqqueue()) -> 'ok'. + +store_queue(Q) when ?amqqueue_is_durable(Q) -> + Q1 = amqqueue:reset_mirroring_and_decorators(Q), + ok = mnesia:write(rabbit_durable_queue, Q1, write), + store_queue_ram(Q); +store_queue(Q) when not ?amqqueue_is_durable(Q) -> + store_queue_ram(Q). + +store_queue_ram(Q) -> + ok = mnesia:write(rabbit_queue, rabbit_queue_decorator:set(Q), write). + +-spec update_decorators(name()) -> 'ok'. + +update_decorators(Name) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + case mnesia:wread({rabbit_queue, Name}) of + [Q] -> store_queue_ram(Q), + ok; + [] -> ok + end + end). + +-spec policy_changed(amqqueue:amqqueue(), amqqueue:amqqueue()) -> + 'ok'. + +policy_changed(Q1, Q2) -> + Decorators1 = amqqueue:get_decorators(Q1), + Decorators2 = amqqueue:get_decorators(Q2), + rabbit_mirror_queue_misc:update_mirrors(Q1, Q2), + D1 = rabbit_queue_decorator:select(Decorators1), + D2 = rabbit_queue_decorator:select(Decorators2), + [ok = M:policy_changed(Q1, Q2) || M <- lists:usort(D1 ++ D2)], + %% Make sure we emit a stats event even if nothing + %% mirroring-related has changed - the policy may have changed anyway. + notify_policy_changed(Q2). + +is_policy_applicable(QName, Policy) -> + case lookup(QName) of + {ok, Q} -> + rabbit_queue_type:is_policy_applicable(Q, Policy); + _ -> + %% Defaults to previous behaviour. Apply always + true + end. + +is_server_named_allowed(Args) -> + Type = get_queue_type(Args), + rabbit_queue_type:is_server_named_allowed(Type). + +-spec lookup + (name()) -> + rabbit_types:ok(amqqueue:amqqueue()) | + rabbit_types:error('not_found'); + ([name()]) -> + [amqqueue:amqqueue()]. + +lookup([]) -> []; %% optimisation +lookup([Name]) -> ets:lookup(rabbit_queue, Name); %% optimisation +lookup(Names) when is_list(Names) -> + %% Normally we'd call mnesia:dirty_read/1 here, but that is quite + %% expensive for reasons explained in rabbit_misc:dirty_read/1. + lists:append([ets:lookup(rabbit_queue, Name) || Name <- Names]); +lookup(Name) -> + rabbit_misc:dirty_read({rabbit_queue, Name}). + +-spec lookup_many ([name()]) -> [amqqueue:amqqueue()]. + +lookup_many(Names) when is_list(Names) -> + lookup(Names). + +-spec not_found_or_absent(name()) -> not_found_or_absent(). + +not_found_or_absent(Name) -> + %% NB: we assume that the caller has already performed a lookup on + %% rabbit_queue and not found anything + case mnesia:read({rabbit_durable_queue, Name}) of + [] -> not_found; + [Q] -> {absent, Q, nodedown} %% Q exists on stopped node + end. + +-spec not_found_or_absent_dirty(name()) -> not_found_or_absent(). + +not_found_or_absent_dirty(Name) -> + %% We should read from both tables inside a tx, to get a + %% consistent view. But the chances of an inconsistency are small, + %% and only affect the error kind. + case rabbit_misc:dirty_read({rabbit_durable_queue, Name}) of + {error, not_found} -> not_found; + {ok, Q} -> {absent, Q, nodedown} + end. + +-spec get_rebalance_lock(pid()) -> + {true, {rebalance_queues, pid()}} | false. +get_rebalance_lock(Pid) when is_pid(Pid) -> + Id = {rebalance_queues, Pid}, + Nodes = [node()|nodes()], + %% Note that we're not re-trying. We want to immediately know + %% if a re-balance is taking place and stop accordingly. + case global:set_lock(Id, Nodes, 0) of + true -> + {true, Id}; + false -> + false + end. + +-spec rebalance('all' | 'quorum' | 'classic', binary(), binary()) -> + {ok, [{node(), pos_integer()}]} | {error, term()}. +rebalance(Type, VhostSpec, QueueSpec) -> + %% We have not yet acquired the rebalance_queues global lock. + maybe_rebalance(get_rebalance_lock(self()), Type, VhostSpec, QueueSpec). + +maybe_rebalance({true, Id}, Type, VhostSpec, QueueSpec) -> + rabbit_log:info("Starting queue rebalance operation: '~s' for vhosts matching '~s' and queues matching '~s'", + [Type, VhostSpec, QueueSpec]), + Running = rabbit_nodes:all_running(), + NumRunning = length(Running), + ToRebalance = [Q || Q <- rabbit_amqqueue:list(), + filter_per_type(Type, Q), + is_replicated(Q), + is_match(amqqueue:get_vhost(Q), VhostSpec) andalso + is_match(get_resource_name(amqqueue:get_name(Q)), QueueSpec)], + NumToRebalance = length(ToRebalance), + ByNode = group_by_node(ToRebalance), + Rem = case (NumToRebalance rem NumRunning) of + 0 -> 0; + _ -> 1 + end, + MaxQueuesDesired = (NumToRebalance div NumRunning) + Rem, + Result = iterative_rebalance(ByNode, MaxQueuesDesired), + global:del_lock(Id), + rabbit_log:info("Finished queue rebalance operation"), + Result; +maybe_rebalance(false, _Type, _VhostSpec, _QueueSpec) -> + rabbit_log:warning("Queue rebalance operation is in progress, please wait."), + {error, rebalance_in_progress}. + +filter_per_type(all, _) -> + true; +filter_per_type(quorum, Q) -> + ?amqqueue_is_quorum(Q); +filter_per_type(classic, Q) -> + ?amqqueue_is_classic(Q). + +rebalance_module(Q) when ?amqqueue_is_quorum(Q) -> + rabbit_quorum_queue; +rebalance_module(Q) when ?amqqueue_is_classic(Q) -> + rabbit_mirror_queue_misc. + +get_resource_name(#resource{name = Name}) -> + Name. + +is_match(Subj, E) -> + nomatch /= re:run(Subj, E). + +iterative_rebalance(ByNode, MaxQueuesDesired) -> + case maybe_migrate(ByNode, MaxQueuesDesired) of + {ok, Summary} -> + rabbit_log:info("All queue masters are balanced"), + {ok, Summary}; + {migrated, Other} -> + iterative_rebalance(Other, MaxQueuesDesired); + {not_migrated, Other} -> + iterative_rebalance(Other, MaxQueuesDesired) + end. + +maybe_migrate(ByNode, MaxQueuesDesired) -> + maybe_migrate(ByNode, MaxQueuesDesired, maps:keys(ByNode)). + +maybe_migrate(ByNode, _, []) -> + {ok, maps:fold(fun(K, V, Acc) -> + {CQs, QQs} = lists:partition(fun({_, Q, _}) -> + ?amqqueue_is_classic(Q) + end, V), + [[{<<"Node name">>, K}, {<<"Number of quorum queues">>, length(QQs)}, + {<<"Number of classic queues">>, length(CQs)}] | Acc] + end, [], ByNode)}; +maybe_migrate(ByNode, MaxQueuesDesired, [N | Nodes]) -> + case maps:get(N, ByNode, []) of + [{_, Q, false} = Queue | Queues] = All when length(All) > MaxQueuesDesired -> + Name = amqqueue:get_name(Q), + Module = rebalance_module(Q), + OtherNodes = Module:get_replicas(Q) -- [N], + case OtherNodes of + [] -> + {not_migrated, update_not_migrated_queue(N, Queue, Queues, ByNode)}; + _ -> + [{Length, Destination} | _] = sort_by_number_of_queues(OtherNodes, ByNode), + rabbit_log:warning("Migrating queue ~p from node ~p with ~p queues to node ~p with ~p queues", + [Name, N, length(All), Destination, Length]), + case Module:transfer_leadership(Q, Destination) of + {migrated, NewNode} -> + rabbit_log:warning("Queue ~p migrated to ~p", [Name, NewNode]), + {migrated, update_migrated_queue(Destination, N, Queue, Queues, ByNode)}; + {not_migrated, Reason} -> + rabbit_log:warning("Error migrating queue ~p: ~p", [Name, Reason]), + {not_migrated, update_not_migrated_queue(N, Queue, Queues, ByNode)} + end + end; + [{_, _, true} | _] = All when length(All) > MaxQueuesDesired -> + rabbit_log:warning("Node ~p contains ~p queues, but all have already migrated. " + "Do nothing", [N, length(All)]), + maybe_migrate(ByNode, MaxQueuesDesired, Nodes); + All -> + rabbit_log:warning("Node ~p only contains ~p queues, do nothing", + [N, length(All)]), + maybe_migrate(ByNode, MaxQueuesDesired, Nodes) + end. + +update_not_migrated_queue(N, {Entries, Q, _}, Queues, ByNode) -> + maps:update(N, Queues ++ [{Entries, Q, true}], ByNode). + +update_migrated_queue(NewNode, OldNode, {Entries, Q, _}, Queues, ByNode) -> + maps:update_with(NewNode, + fun(L) -> L ++ [{Entries, Q, true}] end, + [{Entries, Q, true}], maps:update(OldNode, Queues, ByNode)). + +sort_by_number_of_queues(Nodes, ByNode) -> + lists:keysort(1, + lists:map(fun(Node) -> + {num_queues(Node, ByNode), Node} + end, Nodes)). + +num_queues(Node, ByNode) -> + length(maps:get(Node, ByNode, [])). + +group_by_node(Queues) -> + ByNode = lists:foldl(fun(Q, Acc) -> + Module = rebalance_module(Q), + Length = Module:queue_length(Q), + maps:update_with(amqqueue:qnode(Q), + fun(L) -> [{Length, Q, false} | L] end, + [{Length, Q, false}], Acc) + end, #{}, Queues), + maps:map(fun(_K, V) -> lists:keysort(1, V) end, ByNode). + +-spec with(name(), + qfun(A), + fun((not_found_or_absent()) -> rabbit_types:channel_exit())) -> + A | rabbit_types:channel_exit(). + +with(Name, F, E) -> + with(Name, F, E, 2000). + +with(#resource{} = Name, F, E, RetriesLeft) -> + case lookup(Name) of + {ok, Q} when ?amqqueue_state_is(Q, live) andalso RetriesLeft =:= 0 -> + %% Something bad happened to that queue, we are bailing out + %% on processing current request. + E({absent, Q, timeout}); + {ok, Q} when ?amqqueue_state_is(Q, stopped) andalso RetriesLeft =:= 0 -> + %% The queue was stopped and not migrated + E({absent, Q, stopped}); + %% The queue process has crashed with unknown error + {ok, Q} when ?amqqueue_state_is(Q, crashed) -> + E({absent, Q, crashed}); + %% The queue process has been stopped by a supervisor. + %% In that case a synchronised mirror can take over + %% so we should retry. + {ok, Q} when ?amqqueue_state_is(Q, stopped) -> + %% The queue process was stopped by the supervisor + rabbit_misc:with_exit_handler( + fun () -> retry_wait(Q, F, E, RetriesLeft) end, + fun () -> F(Q) end); + %% The queue is supposed to be active. + %% The master node can go away or queue can be killed + %% so we retry, waiting for a mirror to take over. + {ok, Q} when ?amqqueue_state_is(Q, live) -> + %% We check is_process_alive(QPid) in case we receive a + %% nodedown (for example) in F() that has nothing to do + %% with the QPid. F() should be written s.t. that this + %% cannot happen, so we bail if it does since that + %% indicates a code bug and we don't want to get stuck in + %% the retry loop. + rabbit_misc:with_exit_handler( + fun () -> retry_wait(Q, F, E, RetriesLeft) end, + fun () -> F(Q) end); + {error, not_found} -> + E(not_found_or_absent_dirty(Name)) + end. + +-spec retry_wait(amqqueue:amqqueue(), + qfun(A), + fun((not_found_or_absent()) -> rabbit_types:channel_exit()), + non_neg_integer()) -> + A | rabbit_types:channel_exit(). + +retry_wait(Q, F, E, RetriesLeft) -> + Name = amqqueue:get_name(Q), + QPid = amqqueue:get_pid(Q), + QState = amqqueue:get_state(Q), + case {QState, is_replicated(Q)} of + %% We don't want to repeat an operation if + %% there are no mirrors to migrate to + {stopped, false} -> + E({absent, Q, stopped}); + _ -> + case rabbit_mnesia:is_process_alive(QPid) of + true -> + % rabbitmq-server#1682 + % The old check would have crashed here, + % instead, log it and run the exit fun. absent & alive is weird, + % but better than crashing with badmatch,true + rabbit_log:debug("Unexpected alive queue process ~p~n", [QPid]), + E({absent, Q, alive}); + false -> + ok % Expected result + end, + timer:sleep(30), + with(Name, F, E, RetriesLeft - 1) + end. + +-spec with(name(), qfun(A)) -> + A | rabbit_types:error(not_found_or_absent()). + +with(Name, F) -> with(Name, F, fun (E) -> {error, E} end). + +-spec with_or_die(name(), qfun(A)) -> A | rabbit_types:channel_exit(). + +with_or_die(Name, F) -> + with(Name, F, die_fun(Name)). + +-spec die_fun(name()) -> + fun((not_found_or_absent()) -> rabbit_types:channel_exit()). + +die_fun(Name) -> + fun (not_found) -> not_found(Name); + ({absent, Q, Reason}) -> absent(Q, Reason) + end. + +-spec not_found(name()) -> rabbit_types:channel_exit(). + +not_found(R) -> rabbit_misc:protocol_error(not_found, "no ~s", [rabbit_misc:rs(R)]). + +-spec absent(amqqueue:amqqueue(), absent_reason()) -> + rabbit_types:channel_exit(). + +absent(Q, AbsentReason) -> + QueueName = amqqueue:get_name(Q), + QPid = amqqueue:get_pid(Q), + IsDurable = amqqueue:is_durable(Q), + priv_absent(QueueName, QPid, IsDurable, AbsentReason). + +-spec priv_absent(name(), pid(), boolean(), absent_reason()) -> + rabbit_types:channel_exit(). + +priv_absent(QueueName, QPid, true, nodedown) -> + %% The assertion of durability is mainly there because we mention + %% durability in the error message. That way we will hopefully + %% notice if at some future point our logic changes s.t. we get + %% here with non-durable queues. + rabbit_misc:protocol_error( + not_found, + "home node '~s' of durable ~s is down or inaccessible", + [node(QPid), rabbit_misc:rs(QueueName)]); + +priv_absent(QueueName, _QPid, _IsDurable, stopped) -> + rabbit_misc:protocol_error( + not_found, + "~s process is stopped by supervisor", [rabbit_misc:rs(QueueName)]); + +priv_absent(QueueName, _QPid, _IsDurable, crashed) -> + rabbit_misc:protocol_error( + not_found, + "~s has crashed and failed to restart", [rabbit_misc:rs(QueueName)]); + +priv_absent(QueueName, _QPid, _IsDurable, timeout) -> + rabbit_misc:protocol_error( + not_found, + "failed to perform operation on ~s due to timeout", [rabbit_misc:rs(QueueName)]); + +priv_absent(QueueName, QPid, _IsDurable, alive) -> + rabbit_misc:protocol_error( + not_found, + "failed to perform operation on ~s: its master replica ~w may be stopping or being demoted", + [rabbit_misc:rs(QueueName), QPid]). + +-spec assert_equivalence + (amqqueue:amqqueue(), boolean(), boolean(), + rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) -> + 'ok' | rabbit_types:channel_exit() | rabbit_types:connection_exit(). + +assert_equivalence(Q, DurableDeclare, AutoDeleteDeclare, Args1, Owner) -> + QName = amqqueue:get_name(Q), + DurableQ = amqqueue:is_durable(Q), + AutoDeleteQ = amqqueue:is_auto_delete(Q), + ok = check_exclusive_access(Q, Owner, strict), + ok = rabbit_misc:assert_field_equivalence(DurableQ, DurableDeclare, QName, durable), + ok = rabbit_misc:assert_field_equivalence(AutoDeleteQ, AutoDeleteDeclare, QName, auto_delete), + ok = assert_args_equivalence(Q, Args1). + +-spec check_exclusive_access(amqqueue:amqqueue(), pid()) -> + 'ok' | rabbit_types:channel_exit(). + +check_exclusive_access(Q, Owner) -> check_exclusive_access(Q, Owner, lax). + +check_exclusive_access(Q, Owner, _MatchType) + when ?amqqueue_exclusive_owner_is(Q, Owner) -> + ok; +check_exclusive_access(Q, _ReaderPid, lax) + when ?amqqueue_exclusive_owner_is(Q, none) -> + ok; +check_exclusive_access(Q, _ReaderPid, _MatchType) -> + QueueName = amqqueue:get_name(Q), + rabbit_misc:protocol_error( + resource_locked, + "cannot obtain exclusive access to locked ~s. It could be originally " + "declared on another connection or the exclusive property value does not " + "match that of the original declaration.", + [rabbit_misc:rs(QueueName)]). + +-spec with_exclusive_access_or_die(name(), pid(), qfun(A)) -> + A | rabbit_types:channel_exit(). + +with_exclusive_access_or_die(Name, ReaderPid, F) -> + with_or_die(Name, + fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end). + +assert_args_equivalence(Q, RequiredArgs) -> + QueueName = amqqueue:get_name(Q), + Args = amqqueue:get_arguments(Q), + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, + [Key || {Key, _Fun} <- declare_args()]). + +check_declare_arguments(QueueName, Args) -> + check_arguments(QueueName, Args, declare_args()). + +check_consume_arguments(QueueName, Args) -> + check_arguments(QueueName, Args, consume_args()). + +check_arguments(QueueName, Args, Validators) -> + [case rabbit_misc:table_lookup(Args, Key) of + undefined -> ok; + TypeVal -> case Fun(TypeVal, Args) of + ok -> ok; + {error, Error} -> rabbit_misc:protocol_error( + precondition_failed, + "invalid arg '~s' for ~s: ~255p", + [Key, rabbit_misc:rs(QueueName), + Error]) + end + end || {Key, Fun} <- Validators], + ok. + +declare_args() -> + [{<<"x-expires">>, fun check_expires_arg/2}, + {<<"x-message-ttl">>, fun check_message_ttl_arg/2}, + {<<"x-dead-letter-exchange">>, fun check_dlxname_arg/2}, + {<<"x-dead-letter-routing-key">>, fun check_dlxrk_arg/2}, + {<<"x-max-length">>, fun check_non_neg_int_arg/2}, + {<<"x-max-length-bytes">>, fun check_non_neg_int_arg/2}, + {<<"x-max-in-memory-length">>, fun check_non_neg_int_arg/2}, + {<<"x-max-in-memory-bytes">>, fun check_non_neg_int_arg/2}, + {<<"x-max-priority">>, fun check_max_priority_arg/2}, + {<<"x-overflow">>, fun check_overflow/2}, + {<<"x-queue-mode">>, fun check_queue_mode/2}, + {<<"x-single-active-consumer">>, fun check_single_active_consumer_arg/2}, + {<<"x-queue-type">>, fun check_queue_type/2}, + {<<"x-quorum-initial-group-size">>, fun check_initial_cluster_size_arg/2}, + {<<"x-max-age">>, fun check_max_age_arg/2}, + {<<"x-max-segment-size">>, fun check_non_neg_int_arg/2}, + {<<"x-initial-cluster-size">>, fun check_initial_cluster_size_arg/2}, + {<<"x-queue-leader-locator">>, fun check_queue_leader_locator_arg/2}]. + +consume_args() -> [{<<"x-priority">>, fun check_int_arg/2}, + {<<"x-cancel-on-ha-failover">>, fun check_bool_arg/2}]. + +check_int_arg({Type, _}, _) -> + case lists:member(Type, ?INTEGER_ARG_TYPES) of + true -> ok; + false -> {error, {unacceptable_type, Type}} + end. + +check_bool_arg({bool, _}, _) -> ok; +check_bool_arg({Type, _}, _) -> {error, {unacceptable_type, Type}}. + +check_non_neg_int_arg({Type, Val}, Args) -> + case check_int_arg({Type, Val}, Args) of + ok when Val >= 0 -> ok; + ok -> {error, {value_negative, Val}}; + Error -> Error + end. + +check_expires_arg({Type, Val}, Args) -> + case check_int_arg({Type, Val}, Args) of + ok when Val == 0 -> {error, {value_zero, Val}}; + ok -> rabbit_misc:check_expiry(Val); + Error -> Error + end. + +check_message_ttl_arg({Type, Val}, Args) -> + case check_int_arg({Type, Val}, Args) of + ok -> rabbit_misc:check_expiry(Val); + Error -> Error + end. + +check_max_priority_arg({Type, Val}, Args) -> + case check_non_neg_int_arg({Type, Val}, Args) of + ok when Val =< ?MAX_SUPPORTED_PRIORITY -> ok; + ok -> {error, {max_value_exceeded, Val}}; + Error -> Error + end. + +check_single_active_consumer_arg({Type, Val}, Args) -> + case check_bool_arg({Type, Val}, Args) of + ok -> ok; + Error -> Error + end. + +check_initial_cluster_size_arg({Type, Val}, Args) -> + case check_non_neg_int_arg({Type, Val}, Args) of + ok when Val == 0 -> {error, {value_zero, Val}}; + ok -> ok; + Error -> Error + end. + +check_max_age_arg({longstr, Val}, _Args) -> + case check_max_age(Val) of + {error, _} = E -> + E; + _ -> + ok + end; +check_max_age_arg({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. + +check_max_age(MaxAge) -> + case re:run(MaxAge, "(^[0-9]*)(.*)", [{capture, all_but_first, list}]) of + {match, [Value, Unit]} -> + case list_to_integer(Value) of + I when I > 0 -> + case lists:member(Unit, ["Y", "M", "D", "h", "m", "s"]) of + true -> + Int = list_to_integer(Value), + Int * unit_value_in_ms(Unit); + false -> + {error, invalid_max_age} + end; + _ -> + {error, invalid_max_age} + end; + _ -> + {error, invalid_max_age} + end. + +unit_value_in_ms("Y") -> + 365 * unit_value_in_ms("D"); +unit_value_in_ms("M") -> + 30 * unit_value_in_ms("D"); +unit_value_in_ms("D") -> + 24 * unit_value_in_ms("h"); +unit_value_in_ms("h") -> + 3600 * unit_value_in_ms("s"); +unit_value_in_ms("m") -> + 60 * unit_value_in_ms("s"); +unit_value_in_ms("s") -> + 1000. + +%% Note that the validity of x-dead-letter-exchange is already verified +%% by rabbit_channel's queue.declare handler. +check_dlxname_arg({longstr, _}, _) -> ok; +check_dlxname_arg({Type, _}, _) -> {error, {unacceptable_type, Type}}. + +check_dlxrk_arg({longstr, _}, Args) -> + case rabbit_misc:table_lookup(Args, <<"x-dead-letter-exchange">>) of + undefined -> {error, routing_key_but_no_dlx_defined}; + _ -> ok + end; +check_dlxrk_arg({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. + +check_overflow({longstr, Val}, _Args) -> + case lists:member(Val, [<<"drop-head">>, + <<"reject-publish">>, + <<"reject-publish-dlx">>]) of + true -> ok; + false -> {error, invalid_overflow} + end; +check_overflow({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. + +check_queue_leader_locator_arg({longstr, Val}, _Args) -> + case lists:member(Val, [<<"client-local">>, + <<"random">>, + <<"least-leaders">>]) of + true -> ok; + false -> {error, invalid_queue_locator_arg} + end; +check_queue_leader_locator_arg({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. + +check_queue_mode({longstr, Val}, _Args) -> + case lists:member(Val, [<<"default">>, <<"lazy">>]) of + true -> ok; + false -> {error, invalid_queue_mode} + end; +check_queue_mode({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. + +check_queue_type({longstr, Val}, _Args) -> + case lists:member(Val, [<<"classic">>, <<"quorum">>, <<"stream">>]) of + true -> ok; + false -> {error, invalid_queue_type} + end; +check_queue_type({Type, _}, _Args) -> + {error, {unacceptable_type, Type}}. + +-spec list() -> [amqqueue:amqqueue()]. + +list() -> + list_with_possible_retry(fun do_list/0). + +do_list() -> + mnesia:dirty_match_object(rabbit_queue, amqqueue:pattern_match_all()). + +-spec count() -> non_neg_integer(). + +count() -> + mnesia:table_info(rabbit_queue, size). + +-spec list_names() -> [rabbit_amqqueue:name()]. + +list_names() -> mnesia:dirty_all_keys(rabbit_queue). + +list_names(VHost) -> [amqqueue:get_name(Q) || Q <- list(VHost)]. + +list_local_names() -> + [ amqqueue:get_name(Q) || Q <- list(), + amqqueue:get_state(Q) =/= crashed, is_local_to_node(amqqueue:get_pid(Q), node())]. + +list_local_names_down() -> + [ amqqueue:get_name(Q) || Q <- list(), + is_down(Q), + is_local_to_node(amqqueue:get_pid(Q), node())]. + +is_down(Q) -> + try + info(Q, [state]) == [{state, down}] + catch + _:_ -> + true + end. + + +-spec sample_local_queues() -> [amqqueue:amqqueue()]. +sample_local_queues() -> sample_n_by_name(list_local_names(), 300). + +-spec sample_n_by_name([rabbit_amqqueue:name()], pos_integer()) -> [amqqueue:amqqueue()]. +sample_n_by_name([], _N) -> + []; +sample_n_by_name(Names, N) when is_list(Names) andalso is_integer(N) andalso N > 0 -> + %% lists:nth/2 throws when position is > list length + M = erlang:min(N, length(Names)), + Ids = lists:foldl(fun( _, Acc) when length(Acc) >= 100 -> + Acc; + (_, Acc) -> + Pick = lists:nth(rand:uniform(M), Names), + [Pick | Acc] + end, + [], lists:seq(1, M)), + lists:map(fun (Id) -> + {ok, Q} = rabbit_amqqueue:lookup(Id), + Q + end, + lists:usort(Ids)). + +-spec sample_n([amqqueue:amqqueue()], pos_integer()) -> [amqqueue:amqqueue()]. +sample_n([], _N) -> + []; +sample_n(Queues, N) when is_list(Queues) andalso is_integer(N) andalso N > 0 -> + Names = [amqqueue:get_name(Q) || Q <- Queues], + sample_n_by_name(Names, N). + + +-spec list_by_type(atom()) -> [amqqueue:amqqueue()]. + +list_by_type(classic) -> list_by_type(rabbit_classic_queue); +list_by_type(quorum) -> list_by_type(rabbit_quorum_queue); +list_by_type(Type) -> + {atomic, Qs} = + mnesia:sync_transaction( + fun () -> + mnesia:match_object(rabbit_durable_queue, + amqqueue:pattern_match_on_type(Type), + read) + end), + Qs. + +-spec list_local_quorum_queue_names() -> [rabbit_amqqueue:name()]. + +list_local_quorum_queue_names() -> + [ amqqueue:get_name(Q) || Q <- list_by_type(quorum), + amqqueue:get_state(Q) =/= crashed, + lists:member(node(), get_quorum_nodes(Q))]. + +-spec list_local_quorum_queues() -> [amqqueue:amqqueue()]. +list_local_quorum_queues() -> + [ Q || Q <- list_by_type(quorum), + amqqueue:get_state(Q) =/= crashed, + lists:member(node(), get_quorum_nodes(Q))]. + +-spec list_local_leaders() -> [amqqueue:amqqueue()]. +list_local_leaders() -> + [ Q || Q <- list(), + amqqueue:is_quorum(Q), + amqqueue:get_state(Q) =/= crashed, amqqueue:get_leader(Q) =:= node()]. + +-spec list_local_followers() -> [amqqueue:amqqueue()]. +list_local_followers() -> + [Q + || Q <- list(), + amqqueue:is_quorum(Q), + amqqueue:get_state(Q) =/= crashed, + amqqueue:get_leader(Q) =/= node(), + rabbit_quorum_queue:is_recoverable(Q) + ]. + +-spec list_local_mirrored_classic_queues() -> [amqqueue:amqqueue()]. +list_local_mirrored_classic_queues() -> + [ Q || Q <- list(), + amqqueue:get_state(Q) =/= crashed, + amqqueue:is_classic(Q), + is_local_to_node(amqqueue:get_pid(Q), node()), + is_replicated(Q)]. + +-spec list_local_mirrored_classic_names() -> [rabbit_amqqueue:name()]. +list_local_mirrored_classic_names() -> + [ amqqueue:get_name(Q) || Q <- list(), + amqqueue:get_state(Q) =/= crashed, + amqqueue:is_classic(Q), + is_local_to_node(amqqueue:get_pid(Q), node()), + is_replicated(Q)]. + +-spec list_local_mirrored_classic_without_synchronised_mirrors() -> + [amqqueue:amqqueue()]. +list_local_mirrored_classic_without_synchronised_mirrors() -> + [ Q || Q <- list(), + amqqueue:get_state(Q) =/= crashed, + amqqueue:is_classic(Q), + %% filter out exclusive queues as they won't actually be mirrored + is_not_exclusive(Q), + is_local_to_node(amqqueue:get_pid(Q), node()), + is_replicated(Q), + not has_synchronised_mirrors_online(Q)]. + +-spec list_local_mirrored_classic_without_synchronised_mirrors_for_cli() -> + [#{binary => any()}]. +list_local_mirrored_classic_without_synchronised_mirrors_for_cli() -> + ClassicQs = list_local_mirrored_classic_without_synchronised_mirrors(), + [begin + #resource{name = Name} = amqqueue:get_name(Q), + #{ + <<"readable_name">> => rabbit_data_coercion:to_binary(rabbit_misc:rs(amqqueue:get_name(Q))), + <<"name">> => Name, + <<"virtual_host">> => amqqueue:get_vhost(Q), + <<"type">> => <<"classic">> + } + end || Q <- ClassicQs]. + +is_local_to_node(QPid, Node) when ?IS_CLASSIC(QPid) -> + Node =:= node(QPid); +is_local_to_node({_, Leader} = QPid, Node) when ?IS_QUORUM(QPid) -> + Node =:= Leader. + +-spec list(rabbit_types:vhost()) -> [amqqueue:amqqueue()]. + +list(VHostPath) -> + list(VHostPath, rabbit_queue). + +list(VHostPath, TableName) -> + list_with_possible_retry(fun() -> do_list(VHostPath, TableName) end). + +%% Not dirty_match_object since that would not be transactional when used in a +%% tx context +do_list(VHostPath, TableName) -> + mnesia:async_dirty( + fun () -> + mnesia:match_object( + TableName, + amqqueue:pattern_match_on_name(rabbit_misc:r(VHostPath, queue)), + read) + end). + +list_with_possible_retry(Fun) -> + %% amqqueue migration: + %% The `rabbit_queue` or `rabbit_durable_queue` tables + %% might be migrated between the time we query the pattern + %% (with the `amqqueue` module) and the time we call + %% `mnesia:dirty_match_object()`. This would lead to an empty list + %% (no object matching the now incorrect pattern), not a Mnesia + %% error. + %% + %% So if the result is an empty list and the version of the + %% `amqqueue` record changed in between, we retry the operation. + %% + %% However, we don't do this if inside a Mnesia transaction: we + %% could end up with a live lock between this started transaction + %% and the Mnesia table migration which is blocked (but the + %% rabbit_feature_flags lock is held). + AmqqueueRecordVersion = amqqueue:record_version_to_use(), + case Fun() of + [] -> + case mnesia:is_transaction() of + true -> + []; + false -> + case amqqueue:record_version_to_use() of + AmqqueueRecordVersion -> []; + _ -> Fun() + end + end; + Ret -> + Ret + end. + +-spec list_down(rabbit_types:vhost()) -> [amqqueue:amqqueue()]. + +list_down(VHostPath) -> + case rabbit_vhost:exists(VHostPath) of + false -> []; + true -> + Present = list(VHostPath), + Durable = list(VHostPath, rabbit_durable_queue), + PresentS = sets:from_list([amqqueue:get_name(Q) || Q <- Present]), + sets:to_list(sets:filter(fun (Q) -> + N = amqqueue:get_name(Q), + not sets:is_element(N, PresentS) + end, sets:from_list(Durable))) + end. + +count(VHost) -> + try + %% this is certainly suboptimal but there is no way to count + %% things using a secondary index in Mnesia. Our counter-table-per-node + %% won't work here because with master migration of mirrored queues + %% the "ownership" of queues by nodes becomes a non-trivial problem + %% that requires a proper consensus algorithm. + length(list_for_count(VHost)) + catch _:Err -> + rabbit_log:error("Failed to fetch number of queues in vhost ~p:~n~p~n", + [VHost, Err]), + 0 + end. + +list_for_count(VHost) -> + list_with_possible_retry( + fun() -> + mnesia:dirty_index_read(rabbit_queue, + VHost, + amqqueue:field_vhost()) + end). + +-spec info_keys() -> rabbit_types:info_keys(). + +%% It should no default to classic queue keys, but a subset of those that must be shared +%% by all queue types. Not sure this is even being used, so will leave it here for backwards +%% compatibility. Each queue type handles now info(Q, all_keys) with the keys it supports. +info_keys() -> rabbit_amqqueue_process:info_keys(). + +map(Qs, F) -> rabbit_misc:filter_exit_map(F, Qs). + +is_unresponsive(Q, _Timeout) when ?amqqueue_state_is(Q, crashed) -> + false; +is_unresponsive(Q, Timeout) when ?amqqueue_is_classic(Q) -> + QPid = amqqueue:get_pid(Q), + try + delegate:invoke(QPid, {gen_server2, call, [{info, [name]}, Timeout]}), + false + catch + %% TODO catch any exit?? + exit:{timeout, _} -> + true + end; +is_unresponsive(Q, Timeout) when ?amqqueue_is_quorum(Q) -> + try + Leader = amqqueue:get_pid(Q), + case rabbit_fifo_client:stat(Leader, Timeout) of + {ok, _, _} -> false; + {timeout, _} -> true; + {error, _} -> true + end + catch + exit:{timeout, _} -> + true + end. + +format(Q) when ?amqqueue_is_quorum(Q) -> rabbit_quorum_queue:format(Q); +format(Q) -> rabbit_amqqueue_process:format(Q). + +-spec info(amqqueue:amqqueue()) -> rabbit_types:infos(). + +info(Q) when ?is_amqqueue(Q) -> rabbit_queue_type:info(Q, all_keys). + + +-spec info(amqqueue:amqqueue(), rabbit_types:info_keys()) -> + rabbit_types:infos(). + +info(Q, Items) when ?is_amqqueue(Q) -> + rabbit_queue_type:info(Q, Items). + +info_down(Q, DownReason) -> + rabbit_queue_type:info_down(Q, DownReason). + +info_down(Q, Items, DownReason) -> + rabbit_queue_type:info_down(Q, Items, DownReason). + +-spec info_all(rabbit_types:vhost()) -> [rabbit_types:infos()]. + +info_all(VHostPath) -> + map(list(VHostPath), fun (Q) -> info(Q) end) ++ + map(list_down(VHostPath), fun (Q) -> info_down(Q, down) end). + +-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys()) -> + [rabbit_types:infos()]. + +info_all(VHostPath, Items) -> + map(list(VHostPath), fun (Q) -> info(Q, Items) end) ++ + map(list_down(VHostPath), fun (Q) -> info_down(Q, Items, down) end). + +emit_info_local(VHostPath, Items, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map_with_exit_handler( + AggregatorPid, Ref, fun(Q) -> info(Q, Items) end, list_local(VHostPath)). + +emit_info_all(Nodes, VHostPath, Items, Ref, AggregatorPid) -> + Pids = [ spawn_link(Node, rabbit_amqqueue, emit_info_local, [VHostPath, Items, Ref, AggregatorPid]) || Node <- Nodes ], + rabbit_control_misc:await_emitters_termination(Pids). + +collect_info_all(VHostPath, Items) -> + Nodes = rabbit_nodes:all_running(), + Ref = make_ref(), + Pids = [ spawn_link(Node, rabbit_amqqueue, emit_info_local, [VHostPath, Items, Ref, self()]) || Node <- Nodes ], + rabbit_control_misc:await_emitters_termination(Pids), + wait_for_queues(Ref, length(Pids), []). + +wait_for_queues(Ref, N, Acc) -> + receive + {Ref, finished} when N == 1 -> + Acc; + {Ref, finished} -> + wait_for_queues(Ref, N - 1, Acc); + {Ref, Items, continue} -> + wait_for_queues(Ref, N, [Items | Acc]) + after + 1000 -> + Acc + end. + +emit_info_down(VHostPath, Items, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map_with_exit_handler( + AggregatorPid, Ref, fun(Q) -> info_down(Q, Items, down) end, + list_down(VHostPath)). + +emit_unresponsive_local(VHostPath, Items, Timeout, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map_with_exit_handler( + AggregatorPid, Ref, fun(Q) -> case is_unresponsive(Q, Timeout) of + true -> info_down(Q, Items, unresponsive); + false -> [] + end + end, list_local(VHostPath) + ). + +emit_unresponsive(Nodes, VHostPath, Items, Timeout, Ref, AggregatorPid) -> + Pids = [ spawn_link(Node, rabbit_amqqueue, emit_unresponsive_local, + [VHostPath, Items, Timeout, Ref, AggregatorPid]) || Node <- Nodes ], + rabbit_control_misc:await_emitters_termination(Pids). + +info_local(VHostPath) -> + map(list_local(VHostPath), fun (Q) -> info(Q, [name]) end). + +list_local(VHostPath) -> + [Q || Q <- list(VHostPath), + amqqueue:get_state(Q) =/= crashed, is_local_to_node(amqqueue:get_pid(Q), node())]. + +-spec force_event_refresh(reference()) -> 'ok'. + +% Note: https://www.pivotaltracker.com/story/show/166962656 +% This event is necessary for the stats timer to be initialized with +% the correct values once the management agent has started +force_event_refresh(Ref) -> + %% note: quorum queuse emit stats on periodic ticks that run unconditionally, + %% so force_event_refresh is unnecessary (and, in fact, would only produce log noise) for QQs. + ClassicQs = list_by_type(rabbit_classic_queue), + [gen_server2:cast(amqqueue:get_pid(Q), + {force_event_refresh, Ref}) || Q <- ClassicQs], + ok. + +-spec notify_policy_changed(amqqueue:amqqueue()) -> 'ok'. +notify_policy_changed(Q) when ?is_amqqueue(Q) -> + rabbit_queue_type:policy_changed(Q). + +-spec consumers(amqqueue:amqqueue()) -> + [{pid(), rabbit_types:ctag(), boolean(), non_neg_integer(), + boolean(), atom(), + rabbit_framing:amqp_table(), rabbit_types:username()}]. + +consumers(Q) when ?amqqueue_is_classic(Q) -> + QPid = amqqueue:get_pid(Q), + delegate:invoke(QPid, {gen_server2, call, [consumers, infinity]}); +consumers(Q) when ?amqqueue_is_quorum(Q) -> + QPid = amqqueue:get_pid(Q), + case ra:local_query(QPid, fun rabbit_fifo:query_consumers/1) of + {ok, {_, Result}, _} -> maps:values(Result); + _ -> [] + end; +consumers(Q) when ?amqqueue_is_stream(Q) -> + %% TODO how??? they only exist on the channel + %% we could list the offset listener on the writer but we don't even have a consumer tag, + %% only a (channel) pid and offset + []. + +-spec consumer_info_keys() -> rabbit_types:info_keys(). + +consumer_info_keys() -> ?CONSUMER_INFO_KEYS. + +-spec consumers_all(rabbit_types:vhost()) -> + [{name(), pid(), rabbit_types:ctag(), boolean(), + non_neg_integer(), rabbit_framing:amqp_table()}]. + +consumers_all(VHostPath) -> + ConsumerInfoKeys = consumer_info_keys(), + lists:append( + map(list(VHostPath), + fun(Q) -> get_queue_consumer_info(Q, ConsumerInfoKeys) end)). + +emit_consumers_all(Nodes, VHostPath, Ref, AggregatorPid) -> + Pids = [ spawn_link(Node, rabbit_amqqueue, emit_consumers_local, [VHostPath, Ref, AggregatorPid]) || Node <- Nodes ], + rabbit_control_misc:await_emitters_termination(Pids), + ok. + +emit_consumers_local(VHostPath, Ref, AggregatorPid) -> + ConsumerInfoKeys = consumer_info_keys(), + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, + fun(Q) -> get_queue_consumer_info(Q, ConsumerInfoKeys) end, + list_local(VHostPath)). + +get_queue_consumer_info(Q, ConsumerInfoKeys) -> + [lists:zip(ConsumerInfoKeys, + [amqqueue:get_name(Q), ChPid, CTag, + AckRequired, Prefetch, Active, ActivityStatus, Args]) || + {ChPid, CTag, AckRequired, Prefetch, Active, ActivityStatus, Args, _} <- consumers(Q)]. + +-spec stat(amqqueue:amqqueue()) -> + {'ok', non_neg_integer(), non_neg_integer()}. +stat(Q) -> + rabbit_queue_type:stat(Q). + +-spec pid_of(amqqueue:amqqueue()) -> + pid(). + +pid_of(Q) -> amqqueue:get_pid(Q). + +-spec pid_of(rabbit_types:vhost(), rabbit_misc:resource_name()) -> + pid() | rabbit_types:error('not_found'). + +pid_of(VHost, QueueName) -> + case lookup(rabbit_misc:r(VHost, queue, QueueName)) of + {ok, Q} -> pid_of(Q); + {error, not_found} = E -> E + end. + +-spec delete_exclusive(qpids(), pid()) -> 'ok'. + +delete_exclusive(QPids, ConnId) -> + rabbit_amqqueue_common:delete_exclusive(QPids, ConnId). + +-spec delete_immediately(qpids()) -> 'ok'. + +delete_immediately(QPids) -> + {Classic, Quorum} = filter_pid_per_type(QPids), + [gen_server2:cast(QPid, delete_immediately) || QPid <- Classic], + case Quorum of + [] -> ok; + _ -> {error, cannot_delete_quorum_queues, Quorum} + end. + +delete_immediately_by_resource(Resources) -> + {Classic, Quorum} = filter_resource_per_type(Resources), + [gen_server2:cast(QPid, delete_immediately) || {_, QPid} <- Classic], + [rabbit_quorum_queue:delete_immediately(Resource, QPid) + || {Resource, QPid} <- Quorum], + ok. + +-spec delete + (amqqueue:amqqueue(), 'false', 'false', rabbit_types:username()) -> + qlen() | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}; + (amqqueue:amqqueue(), 'true' , 'false', rabbit_types:username()) -> + qlen() | rabbit_types:error('in_use') | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}; + (amqqueue:amqqueue(), 'false', 'true', rabbit_types:username()) -> + qlen() | rabbit_types:error('not_empty') | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}; + (amqqueue:amqqueue(), 'true' , 'true', rabbit_types:username()) -> + qlen() | + rabbit_types:error('in_use') | + rabbit_types:error('not_empty') | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +delete(Q, IfUnused, IfEmpty, ActingUser) -> + rabbit_queue_type:delete(Q, IfUnused, IfEmpty, ActingUser). + +%% delete_crashed* INCLUDED FOR BACKWARDS COMPATBILITY REASONS +delete_crashed(Q) when ?amqqueue_is_classic(Q) -> + rabbit_classic_queue:delete_crashed(Q). + +delete_crashed(Q, ActingUser) when ?amqqueue_is_classic(Q) -> + rabbit_classic_queue:delete_crashed(Q, ActingUser). + +-spec delete_crashed_internal(amqqueue:amqqueue(), rabbit_types:username()) -> 'ok'. +delete_crashed_internal(Q, ActingUser) when ?amqqueue_is_classic(Q) -> + rabbit_classic_queue:delete_crashed_internal(Q, ActingUser). + +-spec purge(amqqueue:amqqueue()) -> qlen(). +purge(Q) when ?is_amqqueue(Q) -> + rabbit_queue_type:purge(Q). + +-spec requeue(name(), + {rabbit_fifo:consumer_tag(), [msg_id()]}, + rabbit_queue_type:state()) -> + {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}. +requeue(QRef, {CTag, MsgIds}, QStates) -> + reject(QRef, true, {CTag, MsgIds}, QStates). + +-spec ack(name(), + {rabbit_fifo:consumer_tag(), [msg_id()]}, + rabbit_queue_type:state()) -> + {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}. +ack(QPid, {CTag, MsgIds}, QueueStates) -> + rabbit_queue_type:settle(QPid, complete, CTag, MsgIds, QueueStates). + + +-spec reject(name(), + boolean(), + {rabbit_fifo:consumer_tag(), [msg_id()]}, + rabbit_queue_type:state()) -> + {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}. +reject(QRef, Requeue, {CTag, MsgIds}, QStates) -> + Op = case Requeue of + true -> requeue; + false -> discard + end, + rabbit_queue_type:settle(QRef, Op, CTag, MsgIds, QStates). + +-spec notify_down_all(qpids(), pid()) -> ok_or_errors(). +notify_down_all(QPids, ChPid) -> + notify_down_all(QPids, ChPid, ?CHANNEL_OPERATION_TIMEOUT). + +-spec notify_down_all(qpids(), pid(), non_neg_integer()) -> + ok_or_errors(). +notify_down_all(QPids, ChPid, Timeout) -> + case rpc:call(node(), delegate, invoke, + [QPids, {gen_server2, call, [{notify_down, ChPid}, infinity]}], Timeout) of + {badrpc, timeout} -> {error, {channel_operation_timeout, Timeout}}; + {badrpc, Reason} -> {error, Reason}; + {_, Bads} -> + case lists:filter( + fun ({_Pid, {exit, {R, _}, _}}) -> + rabbit_misc:is_abnormal_exit(R); + ({_Pid, _}) -> false + end, Bads) of + [] -> ok; + Bads1 -> {error, Bads1} + end; + Error -> {error, Error} + end. + +-spec activate_limit_all(qpids(), pid()) -> ok. + +activate_limit_all(QRefs, ChPid) -> + QPids = [P || P <- QRefs, ?IS_CLASSIC(P)], + delegate:invoke_no_result(QPids, {gen_server2, cast, + [{activate_limit, ChPid}]}). + +-spec credit(amqqueue:amqqueue(), + rabbit_types:ctag(), + non_neg_integer(), + boolean(), + rabbit_queue_type:state()) -> + {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()}. +credit(Q, CTag, Credit, Drain, QStates) -> + rabbit_queue_type:credit(Q, CTag, Credit, Drain, QStates). + +-spec basic_get(amqqueue:amqqueue(), boolean(), pid(), rabbit_types:ctag(), + rabbit_queue_type:state()) -> + {'ok', non_neg_integer(), qmsg(), rabbit_queue_type:state()} | + {'empty', rabbit_queue_type:state()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +basic_get(Q, NoAck, LimiterPid, CTag, QStates0) -> + rabbit_queue_type:dequeue(Q, NoAck, LimiterPid, CTag, QStates0). + + +-spec basic_consume(amqqueue:amqqueue(), boolean(), pid(), pid(), boolean(), + non_neg_integer(), rabbit_types:ctag(), boolean(), + rabbit_framing:amqp_table(), any(), rabbit_types:username(), + rabbit_queue_type:state()) -> + {ok, rabbit_queue_type:state(), rabbit_queue_type:actions()} | + {error, term()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +basic_consume(Q, NoAck, ChPid, LimiterPid, + LimiterActive, ConsumerPrefetchCount, ConsumerTag, + ExclusiveConsume, Args, OkMsg, ActingUser, Contexts) -> + + QName = amqqueue:get_name(Q), + %% first phase argument validation + %% each queue type may do further validations + ok = check_consume_arguments(QName, Args), + Spec = #{no_ack => NoAck, + channel_pid => ChPid, + limiter_pid => LimiterPid, + limiter_active => LimiterActive, + prefetch_count => ConsumerPrefetchCount, + consumer_tag => ConsumerTag, + exclusive_consume => ExclusiveConsume, + args => Args, + ok_msg => OkMsg, + acting_user => ActingUser}, + rabbit_queue_type:consume(Q, Spec, Contexts). + +-spec basic_cancel(amqqueue:amqqueue(), rabbit_types:ctag(), any(), + rabbit_types:username(), + rabbit_queue_type:state()) -> + {ok, rabbit_queue_type:state()} | {error, term()}. +basic_cancel(Q, ConsumerTag, OkMsg, ActingUser, QStates) -> + rabbit_queue_type:cancel(Q, ConsumerTag, + OkMsg, ActingUser, QStates). + +-spec notify_decorators(amqqueue:amqqueue()) -> 'ok'. + +notify_decorators(Q) -> + QPid = amqqueue:get_pid(Q), + delegate:invoke_no_result(QPid, {gen_server2, cast, [notify_decorators]}). + +notify_sent(QPid, ChPid) -> + rabbit_amqqueue_common:notify_sent(QPid, ChPid). + +notify_sent_queue_down(QPid) -> + rabbit_amqqueue_common:notify_sent_queue_down(QPid). + +-spec resume(pid(), pid()) -> 'ok'. + +resume(QPid, ChPid) -> delegate:invoke_no_result(QPid, {gen_server2, cast, + [{resume, ChPid}]}). + +internal_delete1(QueueName, OnlyDurable) -> + internal_delete1(QueueName, OnlyDurable, normal). + +internal_delete1(QueueName, OnlyDurable, Reason) -> + ok = mnesia:delete({rabbit_queue, QueueName}), + case Reason of + auto_delete -> + case mnesia:wread({rabbit_durable_queue, QueueName}) of + [] -> ok; + [_] -> ok = mnesia:delete({rabbit_durable_queue, QueueName}) + end; + _ -> + mnesia:delete({rabbit_durable_queue, QueueName}) + end, + %% we want to execute some things, as decided by rabbit_exchange, + %% after the transaction. + rabbit_binding:remove_for_destination(QueueName, OnlyDurable). + +-spec internal_delete(name(), rabbit_types:username()) -> 'ok'. + +internal_delete(QueueName, ActingUser) -> + internal_delete(QueueName, ActingUser, normal). + +internal_delete(QueueName, ActingUser, Reason) -> + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> + case {mnesia:wread({rabbit_queue, QueueName}), + mnesia:wread({rabbit_durable_queue, QueueName})} of + {[], []} -> + rabbit_misc:const(ok); + _ -> + Deletions = internal_delete1(QueueName, false, Reason), + T = rabbit_binding:process_deletions(Deletions, + ?INTERNAL_USER), + fun() -> + ok = T(), + rabbit_core_metrics:queue_deleted(QueueName), + ok = rabbit_event:notify(queue_deleted, + [{name, QueueName}, + {user_who_performed_action, ActingUser}]) + end + end + end). + +-spec forget_all_durable(node()) -> 'ok'. + +forget_all_durable(Node) -> + %% Note rabbit is not running so we avoid e.g. the worker pool. Also why + %% we don't invoke the return from rabbit_binding:process_deletions/1. + {atomic, ok} = + mnesia:sync_transaction( + fun () -> + Qs = mnesia:match_object(rabbit_durable_queue, + amqqueue:pattern_match_all(), write), + [forget_node_for_queue(Node, Q) || + Q <- Qs, + is_local_to_node(amqqueue:get_pid(Q), Node)], + ok + end), + ok. + +%% Try to promote a mirror while down - it should recover as a +%% master. We try to take the oldest mirror here for best chance of +%% recovery. +forget_node_for_queue(_DeadNode, Q) + when ?amqqueue_is_quorum(Q) -> + ok; +forget_node_for_queue(DeadNode, Q) -> + RS = amqqueue:get_recoverable_slaves(Q), + forget_node_for_queue(DeadNode, RS, Q). + +forget_node_for_queue(_DeadNode, [], Q) -> + %% No mirrors to recover from, queue is gone. + %% Don't process_deletions since that just calls callbacks and we + %% are not really up. + Name = amqqueue:get_name(Q), + internal_delete1(Name, true); + +%% Should not happen, but let's be conservative. +forget_node_for_queue(DeadNode, [DeadNode | T], Q) -> + forget_node_for_queue(DeadNode, T, Q); + +forget_node_for_queue(DeadNode, [H|T], Q) when ?is_amqqueue(Q) -> + Type = amqqueue:get_type(Q), + case {node_permits_offline_promotion(H), Type} of + {false, _} -> forget_node_for_queue(DeadNode, T, Q); + {true, rabbit_classic_queue} -> + Q1 = amqqueue:set_pid(Q, rabbit_misc:node_to_fake_pid(H)), + ok = mnesia:write(rabbit_durable_queue, Q1, write); + {true, rabbit_quorum_queue} -> + ok + end. + +node_permits_offline_promotion(Node) -> + case node() of + Node -> not rabbit:is_running(); %% [1] + _ -> All = rabbit_mnesia:cluster_nodes(all), + Running = rabbit_nodes:all_running(), + lists:member(Node, All) andalso + not lists:member(Node, Running) %% [2] + end. +%% [1] In this case if we are a real running node (i.e. rabbitmqctl +%% has RPCed into us) then we cannot allow promotion. If on the other +%% hand we *are* rabbitmqctl impersonating the node for offline +%% node-forgetting then we can. +%% +%% [2] This is simpler; as long as it's down that's OK + +-spec run_backing_queue + (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> + 'ok'. + +run_backing_queue(QPid, Mod, Fun) -> + gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}). + +-spec set_ram_duration_target(pid(), number() | 'infinity') -> 'ok'. + +set_ram_duration_target(QPid, Duration) -> + gen_server2:cast(QPid, {set_ram_duration_target, Duration}). + +-spec set_maximum_since_use(pid(), non_neg_integer()) -> 'ok'. + +set_maximum_since_use(QPid, Age) -> + gen_server2:cast(QPid, {set_maximum_since_use, Age}). + +-spec update_mirroring(pid()) -> 'ok'. + +update_mirroring(QPid) -> + ok = delegate:invoke_no_result(QPid, {gen_server2, cast, [update_mirroring]}). + +-spec sync_mirrors(amqqueue:amqqueue() | pid()) -> + 'ok' | rabbit_types:error('not_mirrored'). + +sync_mirrors(Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + delegate:invoke(QPid, {gen_server2, call, [sync_mirrors, infinity]}); +sync_mirrors(QPid) -> + delegate:invoke(QPid, {gen_server2, call, [sync_mirrors, infinity]}). + +-spec cancel_sync_mirrors(amqqueue:amqqueue() | pid()) -> + 'ok' | {'ok', 'not_syncing'}. + +cancel_sync_mirrors(Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + delegate:invoke(QPid, {gen_server2, call, [cancel_sync_mirrors, infinity]}); +cancel_sync_mirrors(QPid) -> + delegate:invoke(QPid, {gen_server2, call, [cancel_sync_mirrors, infinity]}). + +-spec is_replicated(amqqueue:amqqueue()) -> boolean(). + +is_replicated(Q) when ?amqqueue_is_quorum(Q) -> + true; +is_replicated(Q) -> + rabbit_mirror_queue_misc:is_mirrored(Q). + +is_exclusive(Q) when ?amqqueue_exclusive_owner_is(Q, none) -> + false; +is_exclusive(Q) when ?amqqueue_exclusive_owner_is_pid(Q) -> + true. + +is_not_exclusive(Q) -> + not is_exclusive(Q). + +is_dead_exclusive(Q) when ?amqqueue_exclusive_owner_is(Q, none) -> + false; +is_dead_exclusive(Q) when ?amqqueue_exclusive_owner_is_pid(Q) -> + Pid = amqqueue:get_pid(Q), + not rabbit_mnesia:is_process_alive(Pid). + +-spec has_synchronised_mirrors_online(amqqueue:amqqueue()) -> boolean(). +has_synchronised_mirrors_online(Q) -> + %% a queue with all mirrors down would have no mirror pids. + %% We treat these as in sync intentionally to avoid false positives. + MirrorPids = amqqueue:get_sync_slave_pids(Q), + MirrorPids =/= [] andalso lists:any(fun rabbit_misc:is_process_alive/1, MirrorPids). + +-spec on_node_up(node()) -> 'ok'. + +on_node_up(Node) -> + ok = rabbit_misc:execute_mnesia_transaction( + fun () -> + Qs = mnesia:match_object(rabbit_queue, + amqqueue:pattern_match_all(), write), + [maybe_clear_recoverable_node(Node, Q) || Q <- Qs], + ok + end). + +maybe_clear_recoverable_node(Node, Q) -> + SPids = amqqueue:get_sync_slave_pids(Q), + RSs = amqqueue:get_recoverable_slaves(Q), + case lists:member(Node, RSs) of + true -> + %% There is a race with + %% rabbit_mirror_queue_slave:record_synchronised/1 called + %% by the incoming mirror node and this function, called + %% by the master node. If this function is executed after + %% record_synchronised/1, the node is erroneously removed + %% from the recoverable mirrors list. + %% + %% We check if the mirror node's queue PID is alive. If it is + %% the case, then this function is executed after. In this + %% situation, we don't touch the queue record, it is already + %% correct. + DoClearNode = + case [SP || SP <- SPids, node(SP) =:= Node] of + [SPid] -> not rabbit_misc:is_process_alive(SPid); + _ -> true + end, + if + DoClearNode -> RSs1 = RSs -- [Node], + store_queue( + amqqueue:set_recoverable_slaves(Q, RSs1)); + true -> ok + end; + false -> + ok + end. + +-spec on_node_down(node()) -> 'ok'. + +on_node_down(Node) -> + {QueueNames, QueueDeletions} = delete_queues_on_node_down(Node), + notify_queue_binding_deletions(QueueDeletions), + rabbit_core_metrics:queues_deleted(QueueNames), + notify_queues_deleted(QueueNames), + ok. + +delete_queues_on_node_down(Node) -> + lists:unzip(lists:flatten([ + rabbit_misc:execute_mnesia_transaction( + fun () -> [{Queue, delete_queue(Queue)} || Queue <- Queues] end + ) || Queues <- partition_queues(queues_to_delete_when_node_down(Node)) + ])). + +delete_queue(QueueName) -> + ok = mnesia:delete({rabbit_queue, QueueName}), + rabbit_binding:remove_transient_for_destination(QueueName). + +% If there are many queues and we delete them all in a single Mnesia transaction, +% this can block all other Mnesia operations for a really long time. +% In situations where a node wants to (re-)join a cluster, +% Mnesia won't be able to sync on the new node until this operation finishes. +% As a result, we want to have multiple Mnesia transactions so that other +% operations can make progress in between these queue delete transactions. +% +% 10 queues per Mnesia transaction is an arbitrary number, but it seems to work OK with 50k queues per node. +partition_queues([Q0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9 | T]) -> + [[Q0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9] | partition_queues(T)]; +partition_queues(T) -> + [T]. + +queues_to_delete_when_node_down(NodeDown) -> + rabbit_misc:execute_mnesia_transaction(fun () -> + qlc:e(qlc:q([amqqueue:get_name(Q) || + Q <- mnesia:table(rabbit_queue), + amqqueue:qnode(Q) == NodeDown andalso + not rabbit_mnesia:is_process_alive(amqqueue:get_pid(Q)) andalso + (not rabbit_amqqueue:is_replicated(Q) orelse + rabbit_amqqueue:is_dead_exclusive(Q))] + )) + end). + +notify_queue_binding_deletions(QueueDeletions) -> + rabbit_misc:execute_mnesia_tx_with_tail( + fun() -> + rabbit_binding:process_deletions( + lists:foldl( + fun rabbit_binding:combine_deletions/2, + rabbit_binding:new_deletions(), + QueueDeletions + ), + ?INTERNAL_USER + ) + end + ). + +notify_queues_deleted(QueueDeletions) -> + lists:foreach( + fun(Queue) -> + ok = rabbit_event:notify(queue_deleted, + [{name, Queue}, + {user, ?INTERNAL_USER}]) + end, + QueueDeletions). + +-spec pseudo_queue(name(), pid()) -> amqqueue:amqqueue(). + +pseudo_queue(QueueName, Pid) -> + pseudo_queue(QueueName, Pid, false). + +-spec pseudo_queue(name(), pid(), boolean()) -> amqqueue:amqqueue(). + +pseudo_queue(#resource{kind = queue} = QueueName, Pid, Durable) + when is_pid(Pid) andalso + is_boolean(Durable) -> + amqqueue:new(QueueName, + Pid, + Durable, + false, + none, % Owner, + [], + undefined, % VHost, + #{user => undefined}, % ActingUser + rabbit_classic_queue % Type + ). + +-spec immutable(amqqueue:amqqueue()) -> amqqueue:amqqueue(). + +immutable(Q) -> amqqueue:set_immutable(Q). + +-spec deliver([amqqueue:amqqueue()], rabbit_types:delivery()) -> 'ok'. + +deliver(Qs, Delivery) -> + _ = rabbit_queue_type:deliver(Qs, Delivery, stateless), + ok. + +get_quorum_nodes(Q) -> + case amqqueue:get_type_state(Q) of + #{nodes := Nodes} -> + Nodes; + _ -> + [] + end. diff --git a/deps/rabbit/src/rabbit_amqqueue_process.erl b/deps/rabbit/src/rabbit_amqqueue_process.erl new file mode 100644 index 0000000000..abad3b5ad4 --- /dev/null +++ b/deps/rabbit/src/rabbit_amqqueue_process.erl @@ -0,0 +1,1849 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_amqqueue_process). +-include_lib("rabbit_common/include/rabbit.hrl"). +-include_lib("rabbit_common/include/rabbit_framing.hrl"). +-include("amqqueue.hrl"). + +-behaviour(gen_server2). + +-define(SYNC_INTERVAL, 200). %% milliseconds +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). +-define(CONSUMER_BIAS_RATIO, 2.0). %% i.e. consume 100% faster + +-export([info_keys/0]). + +-export([init_with_backing_queue_state/7]). + +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, + handle_info/2, handle_pre_hibernate/1, prioritise_call/4, + prioritise_cast/3, prioritise_info/3, format_message_queue/2]). +-export([format/1]). +-export([is_policy_applicable/2]). + +%% Queue's state +-record(q, { + %% an #amqqueue record + q :: amqqueue:amqqueue(), + %% none | {exclusive consumer channel PID, consumer tag} | {single active consumer channel PID, consumer} + active_consumer, + %% Set to true if a queue has ever had a consumer. + %% This is used to determine when to delete auto-delete queues. + has_had_consumers, + %% backing queue module. + %% for mirrored queues, this will be rabbit_mirror_queue_master. + %% for non-priority and non-mirrored queues, rabbit_variable_queue. + %% see rabbit_backing_queue. + backing_queue, + %% backing queue state. + %% see rabbit_backing_queue, rabbit_variable_queue. + backing_queue_state, + %% consumers state, see rabbit_queue_consumers + consumers, + %% queue expiration value + expires, + %% timer used to periodically sync (flush) queue index + sync_timer_ref, + %% timer used to update ingress/egress rates and queue RAM duration target + rate_timer_ref, + %% timer used to clean up this queue due to TTL (on when unused) + expiry_timer_ref, + %% stats emission timer + stats_timer, + %% maps message IDs to {channel pid, MsgSeqNo} + %% pairs + msg_id_to_channel, + %% message TTL value + ttl, + %% timer used to delete expired messages + ttl_timer_ref, + ttl_timer_expiry, + %% Keeps track of channels that publish to this queue. + %% When channel process goes down, queues have to perform + %% certain cleanup. + senders, + %% dead letter exchange as a #resource record, if any + dlx, + dlx_routing_key, + %% max length in messages, if configured + max_length, + %% max length in bytes, if configured + max_bytes, + %% an action to perform if queue is to be over a limit, + %% can be either drop-head (default), reject-publish or reject-publish-dlx + overflow, + %% when policies change, this version helps queue + %% determine what previously scheduled/set up state to ignore, + %% e.g. message expiration messages from previously set up timers + %% that may or may not be still valid + args_policy_version, + %% used to discard outdated/superseded policy updates, + %% e.g. when policies are applied concurrently. See + %% https://github.com/rabbitmq/rabbitmq-server/issues/803 for one + %% example. + mirroring_policy_version = 0, + %% running | flow | idle + status, + %% true | false + single_active_consumer_on + }). + +%%---------------------------------------------------------------------------- + +-define(STATISTICS_KEYS, + [messages_ready, + messages_unacknowledged, + messages, + reductions, + name, + policy, + operator_policy, + effective_policy_definition, + exclusive_consumer_pid, + exclusive_consumer_tag, + single_active_consumer_pid, + single_active_consumer_tag, + consumers, + consumer_utilisation, + memory, + slave_pids, + synchronised_slave_pids, + recoverable_slaves, + state, + garbage_collection + ]). + +-define(CREATION_EVENT_KEYS, + [name, + durable, + auto_delete, + arguments, + owner_pid, + exclusive, + user_who_performed_action + ]). + +-define(INFO_KEYS, [pid | ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [name]]). + +%%---------------------------------------------------------------------------- + +-spec info_keys() -> rabbit_types:info_keys(). + +info_keys() -> ?INFO_KEYS ++ rabbit_backing_queue:info_keys(). +statistics_keys() -> ?STATISTICS_KEYS ++ rabbit_backing_queue:info_keys(). + +%%---------------------------------------------------------------------------- + +init(Q) -> + process_flag(trap_exit, true), + ?store_proc_name(amqqueue:get_name(Q)), + {ok, init_state(amqqueue:set_pid(Q, self())), hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}, + ?MODULE}. + +init_state(Q) -> + SingleActiveConsumerOn = case rabbit_misc:table_lookup(amqqueue:get_arguments(Q), <<"x-single-active-consumer">>) of + {bool, true} -> true; + _ -> false + end, + State = #q{q = Q, + active_consumer = none, + has_had_consumers = false, + consumers = rabbit_queue_consumers:new(), + senders = pmon:new(delegate), + msg_id_to_channel = #{}, + status = running, + args_policy_version = 0, + overflow = 'drop-head', + single_active_consumer_on = SingleActiveConsumerOn}, + rabbit_event:init_stats_timer(State, #q.stats_timer). + +init_it(Recover, From, State = #q{q = Q}) + when ?amqqueue_exclusive_owner_is(Q, none) -> + init_it2(Recover, From, State); + +%% You used to be able to declare an exclusive durable queue. Sadly we +%% need to still tidy up after that case, there could be the remnants +%% of one left over from an upgrade. So that's why we don't enforce +%% Recover = new here. +init_it(Recover, From, State = #q{q = Q0}) -> + Owner = amqqueue:get_exclusive_owner(Q0), + case rabbit_misc:is_process_alive(Owner) of + true -> erlang:monitor(process, Owner), + init_it2(Recover, From, State); + false -> #q{backing_queue = undefined, + backing_queue_state = undefined, + q = Q} = State, + send_reply(From, {owner_died, Q}), + BQ = backing_queue_module(Q), + {_, Terms} = recovery_status(Recover), + BQS = bq_init(BQ, Q, Terms), + %% Rely on terminate to delete the queue. + log_delete_exclusive(Owner, State), + {stop, {shutdown, missing_owner}, + State#q{backing_queue = BQ, backing_queue_state = BQS}} + end. + +init_it2(Recover, From, State = #q{q = Q, + backing_queue = undefined, + backing_queue_state = undefined}) -> + {Barrier, TermsOrNew} = recovery_status(Recover), + case rabbit_amqqueue:internal_declare(Q, Recover /= new) of + {Res, Q1} + when ?is_amqqueue(Q1) andalso + (Res == created orelse Res == existing) -> + case matches(Recover, Q, Q1) of + true -> + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, + set_ram_duration_target, [self()]}), + BQ = backing_queue_module(Q1), + BQS = bq_init(BQ, Q, TermsOrNew), + send_reply(From, {new, Q}), + recovery_barrier(Barrier), + State1 = process_args_policy( + State#q{backing_queue = BQ, + backing_queue_state = BQS}), + notify_decorators(startup, State), + rabbit_event:notify(queue_created, + infos(?CREATION_EVENT_KEYS, State1)), + rabbit_event:if_enabled(State1, #q.stats_timer, + fun() -> emit_stats(State1) end), + noreply(State1); + false -> + {stop, normal, {existing, Q1}, State} + end; + Err -> + {stop, normal, Err, State} + end. + +recovery_status(new) -> {no_barrier, new}; +recovery_status({Recover, Terms}) -> {Recover, Terms}. + +send_reply(none, _Q) -> ok; +send_reply(From, Q) -> gen_server2:reply(From, Q). + +matches(new, Q1, Q2) -> + %% i.e. not policy + amqqueue:get_name(Q1) =:= amqqueue:get_name(Q2) andalso + amqqueue:is_durable(Q1) =:= amqqueue:is_durable(Q2) andalso + amqqueue:is_auto_delete(Q1) =:= amqqueue:is_auto_delete(Q2) andalso + amqqueue:get_exclusive_owner(Q1) =:= amqqueue:get_exclusive_owner(Q2) andalso + amqqueue:get_arguments(Q1) =:= amqqueue:get_arguments(Q2) andalso + amqqueue:get_pid(Q1) =:= amqqueue:get_pid(Q2) andalso + amqqueue:get_slave_pids(Q1) =:= amqqueue:get_slave_pids(Q2); +%% FIXME: Should v1 vs. v2 of the same record match? +matches(_, Q, Q) -> true; +matches(_, _Q, _Q1) -> false. + +recovery_barrier(no_barrier) -> + ok; +recovery_barrier(BarrierPid) -> + MRef = erlang:monitor(process, BarrierPid), + receive + {BarrierPid, go} -> erlang:demonitor(MRef, [flush]); + {'DOWN', MRef, process, _, _} -> ok + end. + +-spec init_with_backing_queue_state + (amqqueue:amqqueue(), atom(), tuple(), any(), + [rabbit_types:delivery()], pmon:pmon(), map()) -> + #q{}. + +init_with_backing_queue_state(Q, BQ, BQS, + RateTRef, Deliveries, Senders, MTC) -> + Owner = amqqueue:get_exclusive_owner(Q), + case Owner of + none -> ok; + _ -> erlang:monitor(process, Owner) + end, + State = init_state(Q), + State1 = State#q{backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + senders = Senders, + msg_id_to_channel = MTC}, + State2 = process_args_policy(State1), + State3 = lists:foldl(fun (Delivery, StateN) -> + maybe_deliver_or_enqueue(Delivery, true, StateN) + end, State2, Deliveries), + notify_decorators(startup, State3), + State3. + +terminate(shutdown = R, State = #q{backing_queue = BQ, q = Q0}) -> + QName = amqqueue:get_name(Q0), + rabbit_core_metrics:queue_deleted(qname(State)), + terminate_shutdown( + fun (BQS) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + [Q] = mnesia:read({rabbit_queue, QName}), + Q2 = amqqueue:set_state(Q, stopped), + %% amqqueue migration: + %% The amqqueue was read from this transaction, no need + %% to handle migration. + rabbit_amqqueue:store_queue(Q2) + end), + BQ:terminate(R, BQS) + end, State); +terminate({shutdown, missing_owner} = Reason, State) -> + %% if the owner was missing then there will be no queue, so don't emit stats + terminate_shutdown(terminate_delete(false, Reason, State), State); +terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) -> + rabbit_core_metrics:queue_deleted(qname(State)), + terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); +terminate(normal, State = #q{status = {terminated_by, auto_delete}}) -> + %% auto_delete case + %% To increase performance we want to avoid a mnesia_sync:sync call + %% after every transaction, as we could be deleting simultaneously + %% thousands of queues. A optimisation introduced by server#1513 + %% needs to be reverted by this case, avoiding to guard the delete + %% operation on `rabbit_durable_queue` + terminate_shutdown(terminate_delete(true, auto_delete, State), State); +terminate(normal, State) -> %% delete case + terminate_shutdown(terminate_delete(true, normal, State), State); +%% If we crashed don't try to clean up the BQS, probably best to leave it. +terminate(_Reason, State = #q{q = Q}) -> + terminate_shutdown(fun (BQS) -> + Q2 = amqqueue:set_state(Q, crashed), + rabbit_misc:execute_mnesia_transaction( + fun() -> + ?try_mnesia_tx_or_upgrade_amqqueue_and_retry( + rabbit_amqqueue:store_queue(Q2), + begin + Q3 = amqqueue:upgrade(Q2), + rabbit_amqqueue:store_queue(Q3) + end) + end), + BQS + end, State). + +terminate_delete(EmitStats, Reason0, + State = #q{q = Q, + backing_queue = BQ, + status = Status}) -> + QName = amqqueue:get_name(Q), + ActingUser = terminated_by(Status), + fun (BQS) -> + Reason = case Reason0 of + auto_delete -> normal; + Any -> Any + end, + BQS1 = BQ:delete_and_terminate(Reason, BQS), + if EmitStats -> rabbit_event:if_enabled(State, #q.stats_timer, + fun() -> emit_stats(State) end); + true -> ok + end, + %% This try-catch block transforms throws to errors since throws are not + %% logged. + try + %% don't care if the internal delete doesn't return 'ok'. + rabbit_amqqueue:internal_delete(QName, ActingUser, Reason0) + catch + {error, ReasonE} -> error(ReasonE) + end, + BQS1 + end. + +terminated_by({terminated_by, auto_delete}) -> + ?INTERNAL_USER; +terminated_by({terminated_by, ActingUser}) -> + ActingUser; +terminated_by(_) -> + ?INTERNAL_USER. + +terminate_shutdown(Fun, #q{status = Status} = State) -> + ActingUser = terminated_by(Status), + State1 = #q{backing_queue_state = BQS, consumers = Consumers} = + lists:foldl(fun (F, S) -> F(S) end, State, + [fun stop_sync_timer/1, + fun stop_rate_timer/1, + fun stop_expiry_timer/1, + fun stop_ttl_timer/1]), + case BQS of + undefined -> State1; + _ -> ok = rabbit_memory_monitor:deregister(self()), + QName = qname(State), + notify_decorators(shutdown, State), + [emit_consumer_deleted(Ch, CTag, QName, ActingUser) || + {Ch, CTag, _, _, _, _, _, _} <- + rabbit_queue_consumers:all(Consumers)], + State1#q{backing_queue_state = Fun(BQS)} + end. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- + +maybe_notify_decorators(false, State) -> State; +maybe_notify_decorators(true, State) -> notify_decorators(State), State. + +notify_decorators(Event, State) -> decorator_callback(qname(State), Event, []). + +notify_decorators(State = #q{consumers = Consumers, + backing_queue = BQ, + backing_queue_state = BQS}) -> + P = rabbit_queue_consumers:max_active_priority(Consumers), + decorator_callback(qname(State), consumer_state_changed, + [P, BQ:is_empty(BQS)]). + +decorator_callback(QName, F, A) -> + %% Look up again in case policy and hence decorators have changed + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + Ds = amqqueue:get_decorators(Q), + [ok = apply(M, F, [Q|A]) || M <- rabbit_queue_decorator:select(Ds)]; + {error, not_found} -> + ok + end. + +bq_init(BQ, Q, Recover) -> + Self = self(), + BQ:init(Q, Recover, + fun (Mod, Fun) -> + rabbit_amqqueue:run_backing_queue(Self, Mod, Fun) + end). + +process_args_policy(State = #q{q = Q, + args_policy_version = N}) -> + ArgsTable = + [{<<"expires">>, fun res_min/2, fun init_exp/2}, + {<<"dead-letter-exchange">>, fun res_arg/2, fun init_dlx/2}, + {<<"dead-letter-routing-key">>, fun res_arg/2, fun init_dlx_rkey/2}, + {<<"message-ttl">>, fun res_min/2, fun init_ttl/2}, + {<<"max-length">>, fun res_min/2, fun init_max_length/2}, + {<<"max-length-bytes">>, fun res_min/2, fun init_max_bytes/2}, + {<<"overflow">>, fun res_arg/2, fun init_overflow/2}, + {<<"queue-mode">>, fun res_arg/2, fun init_queue_mode/2}], + drop_expired_msgs( + lists:foldl(fun({Name, Resolve, Fun}, StateN) -> + Fun(rabbit_queue_type_util:args_policy_lookup(Name, Resolve, Q), StateN) + end, State#q{args_policy_version = N + 1}, ArgsTable)). + +res_arg(_PolVal, ArgVal) -> ArgVal. +res_min(PolVal, ArgVal) -> erlang:min(PolVal, ArgVal). + +%% In both these we init with the undefined variant first to stop any +%% existing timer, then start a new one which may fire after a +%% different time. +init_exp(undefined, State) -> stop_expiry_timer(State#q{expires = undefined}); +init_exp(Expires, State) -> State1 = init_exp(undefined, State), + ensure_expiry_timer(State1#q{expires = Expires}). + +init_ttl(undefined, State) -> stop_ttl_timer(State#q{ttl = undefined}); +init_ttl(TTL, State) -> (init_ttl(undefined, State))#q{ttl = TTL}. + +init_dlx(undefined, State) -> + State#q{dlx = undefined}; +init_dlx(DLX, State = #q{q = Q}) -> + QName = amqqueue:get_name(Q), + State#q{dlx = rabbit_misc:r(QName, exchange, DLX)}. + +init_dlx_rkey(RoutingKey, State) -> State#q{dlx_routing_key = RoutingKey}. + +init_max_length(MaxLen, State) -> + {_Dropped, State1} = maybe_drop_head(State#q{max_length = MaxLen}), + State1. + +init_max_bytes(MaxBytes, State) -> + {_Dropped, State1} = maybe_drop_head(State#q{max_bytes = MaxBytes}), + State1. + +%% Reset overflow to default 'drop-head' value if it's undefined. +init_overflow(undefined, #q{overflow = 'drop-head'} = State) -> + State; +init_overflow(undefined, State) -> + {_Dropped, State1} = maybe_drop_head(State#q{overflow = 'drop-head'}), + State1; +init_overflow(Overflow, State) -> + OverflowVal = binary_to_existing_atom(Overflow, utf8), + case OverflowVal of + 'drop-head' -> + {_Dropped, State1} = maybe_drop_head(State#q{overflow = OverflowVal}), + State1; + _ -> + State#q{overflow = OverflowVal} + end. + +init_queue_mode(undefined, State) -> + State; +init_queue_mode(Mode, State = #q {backing_queue = BQ, + backing_queue_state = BQS}) -> + BQS1 = BQ:set_queue_mode(binary_to_existing_atom(Mode, utf8), BQS), + State#q{backing_queue_state = BQS1}. + +reply(Reply, NewState) -> + {NewState1, Timeout} = next_state(NewState), + {reply, Reply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}. + +noreply(NewState) -> + {NewState1, Timeout} = next_state(NewState), + {noreply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}. + +next_state(State = #q{q = Q, + backing_queue = BQ, + backing_queue_state = BQS, + msg_id_to_channel = MTC}) -> + assert_invariant(State), + {MsgIds, BQS1} = BQ:drain_confirmed(BQS), + MTC1 = confirm_messages(MsgIds, MTC, amqqueue:get_name(Q)), + State1 = State#q{backing_queue_state = BQS1, msg_id_to_channel = MTC1}, + case BQ:needs_timeout(BQS1) of + false -> {stop_sync_timer(State1), hibernate }; + idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL}; + timed -> {ensure_sync_timer(State1), 0 } + end. + +backing_queue_module(Q) -> + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> {ok, BQM} = application:get_env(backing_queue_module), + BQM; + true -> rabbit_mirror_queue_master + end. + +ensure_sync_timer(State) -> + rabbit_misc:ensure_timer(State, #q.sync_timer_ref, + ?SYNC_INTERVAL, sync_timeout). + +stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #q.sync_timer_ref). + +ensure_rate_timer(State) -> + rabbit_misc:ensure_timer(State, #q.rate_timer_ref, + ?RAM_DURATION_UPDATE_INTERVAL, + update_ram_duration). + +stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #q.rate_timer_ref). + +%% We wish to expire only when there are no consumers *and* the expiry +%% hasn't been refreshed (by queue.declare or basic.get) for the +%% configured period. +ensure_expiry_timer(State = #q{expires = undefined}) -> + State; +ensure_expiry_timer(State = #q{expires = Expires, + args_policy_version = Version}) -> + case is_unused(State) of + true -> NewState = stop_expiry_timer(State), + rabbit_misc:ensure_timer(NewState, #q.expiry_timer_ref, + Expires, {maybe_expire, Version}); + false -> State + end. + +stop_expiry_timer(State) -> rabbit_misc:stop_timer(State, #q.expiry_timer_ref). + +ensure_ttl_timer(undefined, State) -> + State; +ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = undefined, + args_policy_version = Version}) -> + After = (case Expiry - os:system_time(micro_seconds) of + V when V > 0 -> V + 999; %% always fire later + _ -> 0 + end) div 1000, + TRef = rabbit_misc:send_after(After, self(), {drop_expired, Version}), + State#q{ttl_timer_ref = TRef, ttl_timer_expiry = Expiry}; +ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = TRef, + ttl_timer_expiry = TExpiry}) + when Expiry + 1000 < TExpiry -> + rabbit_misc:cancel_timer(TRef), + ensure_ttl_timer(Expiry, State#q{ttl_timer_ref = undefined}); +ensure_ttl_timer(_Expiry, State) -> + State. + +stop_ttl_timer(State) -> rabbit_misc:stop_timer(State, #q.ttl_timer_ref). + +ensure_stats_timer(State) -> + rabbit_event:ensure_stats_timer(State, #q.stats_timer, emit_stats). + +assert_invariant(#q{single_active_consumer_on = true}) -> + %% queue may contain messages and have available consumers with exclusive consumer + ok; +assert_invariant(State = #q{consumers = Consumers, single_active_consumer_on = false}) -> + true = (rabbit_queue_consumers:inactive(Consumers) orelse is_empty(State)). + +is_empty(#q{backing_queue = BQ, backing_queue_state = BQS}) -> BQ:is_empty(BQS). + +maybe_send_drained(WasEmpty, State) -> + case (not WasEmpty) andalso is_empty(State) of + true -> notify_decorators(State), + rabbit_queue_consumers:send_drained(); + false -> ok + end, + State. + +confirm_messages([], MTC, _QName) -> + MTC; +confirm_messages(MsgIds, MTC, QName) -> + {CMs, MTC1} = + lists:foldl( + fun(MsgId, {CMs, MTC0}) -> + case maps:get(MsgId, MTC0, none) of + none -> + {CMs, MTC0}; + {SenderPid, MsgSeqNo} -> + {maps:update_with(SenderPid, + fun(MsgSeqNos) -> + [MsgSeqNo | MsgSeqNos] + end, + [MsgSeqNo], + CMs), + maps:remove(MsgId, MTC0)} + + end + end, {#{}, MTC}, MsgIds), + maps:fold( + fun(Pid, MsgSeqNos, _) -> + confirm_to_sender(Pid, QName, MsgSeqNos) + end, + ok, + CMs), + MTC1. + +send_or_record_confirm(#delivery{confirm = false}, State) -> + {never, State}; +send_or_record_confirm(#delivery{confirm = true, + sender = SenderPid, + msg_seq_no = MsgSeqNo, + message = #basic_message { + is_persistent = true, + id = MsgId}}, + State = #q{q = Q, + msg_id_to_channel = MTC}) + when ?amqqueue_is_durable(Q) -> + MTC1 = maps:put(MsgId, {SenderPid, MsgSeqNo}, MTC), + {eventually, State#q{msg_id_to_channel = MTC1}}; +send_or_record_confirm(#delivery{confirm = true, + sender = SenderPid, + msg_seq_no = MsgSeqNo}, + #q{q = Q} = State) -> + confirm_to_sender(SenderPid, amqqueue:get_name(Q), [MsgSeqNo]), + {immediately, State}. + +%% This feature was used by `rabbit_amqqueue_process` and +%% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. It is +%% unused in 3.8.x and thus deprecated. We keep it to support in-place +%% upgrades to 3.8.x (i.e. mixed-version clusters), but it is a no-op +%% starting with that version. +send_mandatory(#delivery{mandatory = false}) -> + ok; +send_mandatory(#delivery{mandatory = true, + sender = SenderPid, + msg_seq_no = MsgSeqNo}) -> + gen_server2:cast(SenderPid, {mandatory_received, MsgSeqNo}). + +discard(#delivery{confirm = Confirm, + sender = SenderPid, + flow = Flow, + message = #basic_message{id = MsgId}}, BQ, BQS, MTC, QName) -> + MTC1 = case Confirm of + true -> confirm_messages([MsgId], MTC, QName); + false -> MTC + end, + BQS1 = BQ:discard(MsgId, SenderPid, Flow, BQS), + {BQS1, MTC1}. + +run_message_queue(State) -> run_message_queue(false, State). + +run_message_queue(ActiveConsumersChanged, State) -> + case is_empty(State) of + true -> maybe_notify_decorators(ActiveConsumersChanged, State); + false -> case rabbit_queue_consumers:deliver( + fun(AckRequired) -> fetch(AckRequired, State) end, + qname(State), State#q.consumers, + State#q.single_active_consumer_on, State#q.active_consumer) of + {delivered, ActiveConsumersChanged1, State1, Consumers} -> + run_message_queue( + ActiveConsumersChanged or ActiveConsumersChanged1, + State1#q{consumers = Consumers}); + {undelivered, ActiveConsumersChanged1, Consumers} -> + maybe_notify_decorators( + ActiveConsumersChanged or ActiveConsumersChanged1, + State#q{consumers = Consumers}) + end + end. + +attempt_delivery(Delivery = #delivery{sender = SenderPid, + flow = Flow, + message = Message}, + Props, Delivered, State = #q{q = Q, + backing_queue = BQ, + backing_queue_state = BQS, + msg_id_to_channel = MTC}) -> + case rabbit_queue_consumers:deliver( + fun (true) -> true = BQ:is_empty(BQS), + {AckTag, BQS1} = + BQ:publish_delivered( + Message, Props, SenderPid, Flow, BQS), + {{Message, Delivered, AckTag}, {BQS1, MTC}}; + (false) -> {{Message, Delivered, undefined}, + discard(Delivery, BQ, BQS, MTC, amqqueue:get_name(Q))} + end, qname(State), State#q.consumers, State#q.single_active_consumer_on, State#q.active_consumer) of + {delivered, ActiveConsumersChanged, {BQS1, MTC1}, Consumers} -> + {delivered, maybe_notify_decorators( + ActiveConsumersChanged, + State#q{backing_queue_state = BQS1, + msg_id_to_channel = MTC1, + consumers = Consumers})}; + {undelivered, ActiveConsumersChanged, Consumers} -> + {undelivered, maybe_notify_decorators( + ActiveConsumersChanged, + State#q{consumers = Consumers})} + end. + +maybe_deliver_or_enqueue(Delivery = #delivery{message = Message}, + Delivered, + State = #q{overflow = Overflow, + backing_queue = BQ, + backing_queue_state = BQS, + dlx = DLX, + dlx_routing_key = RK}) -> + send_mandatory(Delivery), %% must do this before confirms + case {will_overflow(Delivery, State), Overflow} of + {true, 'reject-publish'} -> + %% Drop publish and nack to publisher + send_reject_publish(Delivery, Delivered, State); + {true, 'reject-publish-dlx'} -> + %% Publish to DLX + with_dlx( + DLX, + fun (X) -> + QName = qname(State), + rabbit_dead_letter:publish(Message, maxlen, X, RK, QName) + end, + fun () -> ok end), + %% Drop publish and nack to publisher + send_reject_publish(Delivery, Delivered, State); + _ -> + {IsDuplicate, BQS1} = BQ:is_duplicate(Message, BQS), + State1 = State#q{backing_queue_state = BQS1}, + case IsDuplicate of + true -> State1; + {true, drop} -> State1; + %% Drop publish and nack to publisher + {true, reject} -> + send_reject_publish(Delivery, Delivered, State1); + %% Enqueue and maybe drop head later + false -> + deliver_or_enqueue(Delivery, Delivered, State1) + end + end. + +deliver_or_enqueue(Delivery = #delivery{message = Message, + sender = SenderPid, + flow = Flow}, + Delivered, + State = #q{q = Q, backing_queue = BQ}) -> + {Confirm, State1} = send_or_record_confirm(Delivery, State), + Props = message_properties(Message, Confirm, State1), + case attempt_delivery(Delivery, Props, Delivered, State1) of + {delivered, State2} -> + State2; + %% The next one is an optimisation + {undelivered, State2 = #q{ttl = 0, dlx = undefined, + backing_queue_state = BQS, + msg_id_to_channel = MTC}} -> + {BQS1, MTC1} = discard(Delivery, BQ, BQS, MTC, amqqueue:get_name(Q)), + State2#q{backing_queue_state = BQS1, msg_id_to_channel = MTC1}; + {undelivered, State2 = #q{backing_queue_state = BQS}} -> + + BQS1 = BQ:publish(Message, Props, Delivered, SenderPid, Flow, BQS), + {Dropped, State3 = #q{backing_queue_state = BQS2}} = + maybe_drop_head(State2#q{backing_queue_state = BQS1}), + QLen = BQ:len(BQS2), + %% optimisation: it would be perfectly safe to always + %% invoke drop_expired_msgs here, but that is expensive so + %% we only do that if a new message that might have an + %% expiry ends up at the head of the queue. If the head + %% remains unchanged, or if the newly published message + %% has no expiry and becomes the head of the queue then + %% the call is unnecessary. + case {Dropped, QLen =:= 1, Props#message_properties.expiry} of + {false, false, _} -> State3; + {true, true, undefined} -> State3; + {_, _, _} -> drop_expired_msgs(State3) + end + end. + +maybe_drop_head(State = #q{max_length = undefined, + max_bytes = undefined}) -> + {false, State}; +maybe_drop_head(State = #q{overflow = 'reject-publish'}) -> + {false, State}; +maybe_drop_head(State = #q{overflow = 'reject-publish-dlx'}) -> + {false, State}; +maybe_drop_head(State = #q{overflow = 'drop-head'}) -> + maybe_drop_head(false, State). + +maybe_drop_head(AlreadyDropped, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + case over_max_length(State) of + true -> + maybe_drop_head(true, + with_dlx( + State#q.dlx, + fun (X) -> dead_letter_maxlen_msg(X, State) end, + fun () -> + {_, BQS1} = BQ:drop(false, BQS), + State#q{backing_queue_state = BQS1} + end)); + false -> + {AlreadyDropped, State} + end. + +send_reject_publish(#delivery{confirm = true, + sender = SenderPid, + flow = Flow, + msg_seq_no = MsgSeqNo, + message = #basic_message{id = MsgId}}, + _Delivered, + State = #q{ q = Q, + backing_queue = BQ, + backing_queue_state = BQS, + msg_id_to_channel = MTC}) -> + ok = rabbit_classic_queue:send_rejection(SenderPid, + amqqueue:get_name(Q), MsgSeqNo), + + MTC1 = maps:remove(MsgId, MTC), + BQS1 = BQ:discard(MsgId, SenderPid, Flow, BQS), + State#q{ backing_queue_state = BQS1, msg_id_to_channel = MTC1 }; +send_reject_publish(#delivery{confirm = false}, + _Delivered, State) -> + State. + +will_overflow(_, #q{max_length = undefined, + max_bytes = undefined}) -> false; +will_overflow(#delivery{message = Message}, + #q{max_length = MaxLen, + max_bytes = MaxBytes, + backing_queue = BQ, + backing_queue_state = BQS}) -> + ExpectedQueueLength = BQ:len(BQS) + 1, + + #basic_message{content = #content{payload_fragments_rev = PFR}} = Message, + MessageSize = iolist_size(PFR), + ExpectedQueueSizeBytes = BQ:info(message_bytes_ready, BQS) + MessageSize, + + ExpectedQueueLength > MaxLen orelse ExpectedQueueSizeBytes > MaxBytes. + +over_max_length(#q{max_length = MaxLen, + max_bytes = MaxBytes, + backing_queue = BQ, + backing_queue_state = BQS}) -> + BQ:len(BQS) > MaxLen orelse BQ:info(message_bytes_ready, BQS) > MaxBytes. + +requeue_and_run(AckTags, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + WasEmpty = BQ:is_empty(BQS), + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + {_Dropped, State1} = maybe_drop_head(State#q{backing_queue_state = BQS1}), + run_message_queue(maybe_send_drained(WasEmpty, drop_expired_msgs(State1))). + +fetch(AckRequired, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {Result, BQS1} = BQ:fetch(AckRequired, BQS), + State1 = drop_expired_msgs(State#q{backing_queue_state = BQS1}), + {Result, maybe_send_drained(Result =:= empty, State1)}. + +ack(AckTags, ChPid, State) -> + subtract_acks(ChPid, AckTags, State, + fun (State1 = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {_Guids, BQS1} = BQ:ack(AckTags, BQS), + State1#q{backing_queue_state = BQS1} + end). + +requeue(AckTags, ChPid, State) -> + subtract_acks(ChPid, AckTags, State, + fun (State1) -> requeue_and_run(AckTags, State1) end). + +possibly_unblock(Update, ChPid, State = #q{consumers = Consumers}) -> + case rabbit_queue_consumers:possibly_unblock(Update, ChPid, Consumers) of + unchanged -> State; + {unblocked, Consumers1} -> State1 = State#q{consumers = Consumers1}, + run_message_queue(true, State1) + end. + +should_auto_delete(#q{q = Q}) + when not ?amqqueue_is_auto_delete(Q) -> false; +should_auto_delete(#q{has_had_consumers = false}) -> false; +should_auto_delete(State) -> is_unused(State). + +handle_ch_down(DownPid, State = #q{consumers = Consumers, + active_consumer = Holder, + single_active_consumer_on = SingleActiveConsumerOn, + senders = Senders}) -> + State1 = State#q{senders = case pmon:is_monitored(DownPid, Senders) of + false -> + Senders; + true -> + %% A rabbit_channel process died. Here credit_flow will take care + %% of cleaning up the rabbit_amqqueue_process process dictionary + %% with regards to the credit we were tracking for the channel + %% process. See handle_cast({deliver, Deliver}, State) in this + %% module. In that cast function we process deliveries from the + %% channel, which means we credit_flow:ack/1 said + %% messages. credit_flow:ack'ing messages means we are increasing + %% a counter to know when we need to send MoreCreditAfter. Since + %% the process died, the credit_flow flow module will clean up + %% that for us. + credit_flow:peer_down(DownPid), + pmon:demonitor(DownPid, Senders) + end}, + case rabbit_queue_consumers:erase_ch(DownPid, Consumers) of + not_found -> + {ok, State1}; + {ChAckTags, ChCTags, Consumers1} -> + QName = qname(State1), + [emit_consumer_deleted(DownPid, CTag, QName, ?INTERNAL_USER) || CTag <- ChCTags], + Holder1 = new_single_active_consumer_after_channel_down(DownPid, Holder, SingleActiveConsumerOn, Consumers1), + State2 = State1#q{consumers = Consumers1, + active_consumer = Holder1}, + maybe_notify_consumer_updated(State2, Holder, Holder1), + notify_decorators(State2), + case should_auto_delete(State2) of + true -> + log_auto_delete( + io_lib:format( + "because all of its consumers (~p) were on a channel that was closed", + [length(ChCTags)]), + State), + {stop, State2}; + false -> {ok, requeue_and_run(ChAckTags, + ensure_expiry_timer(State2))} + end + end. + +new_single_active_consumer_after_channel_down(DownChPid, CurrentSingleActiveConsumer, _SingleActiveConsumerIsOn = true, Consumers) -> + case CurrentSingleActiveConsumer of + {DownChPid, _} -> + % the single active consumer is on the down channel, we have to replace it + case rabbit_queue_consumers:get_consumer(Consumers) of + undefined -> none; + Consumer -> Consumer + end; + _ -> + CurrentSingleActiveConsumer + end; +new_single_active_consumer_after_channel_down(DownChPid, CurrentSingleActiveConsumer, _SingleActiveConsumerIsOn = false, _Consumers) -> + case CurrentSingleActiveConsumer of + {DownChPid, _} -> none; + Other -> Other + end. + +check_exclusive_access({_ChPid, _ConsumerTag}, _ExclusiveConsume, _State) -> + in_use; +check_exclusive_access(none, false, _State) -> + ok; +check_exclusive_access(none, true, State) -> + case is_unused(State) of + true -> ok; + false -> in_use + end. + +is_unused(_State) -> rabbit_queue_consumers:count() == 0. + +maybe_send_reply(_ChPid, undefined) -> ok; +maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). + +qname(#q{q = Q}) -> amqqueue:get_name(Q). + +backing_queue_timeout(State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + State#q{backing_queue_state = BQ:timeout(BQS)}. + +subtract_acks(ChPid, AckTags, State = #q{consumers = Consumers}, Fun) -> + case rabbit_queue_consumers:subtract_acks(ChPid, AckTags, Consumers) of + not_found -> State; + unchanged -> Fun(State); + {unblocked, Consumers1} -> State1 = State#q{consumers = Consumers1}, + run_message_queue(true, Fun(State1)) + end. + +message_properties(Message = #basic_message{content = Content}, + Confirm, #q{ttl = TTL}) -> + #content{payload_fragments_rev = PFR} = Content, + #message_properties{expiry = calculate_msg_expiry(Message, TTL), + needs_confirming = Confirm == eventually, + size = iolist_size(PFR)}. + +calculate_msg_expiry(#basic_message{content = Content}, TTL) -> + #content{properties = Props} = + rabbit_binary_parser:ensure_content_decoded(Content), + %% We assert that the expiration must be valid - we check in the channel. + {ok, MsgTTL} = rabbit_basic:parse_expiration(Props), + case lists:min([TTL, MsgTTL]) of + undefined -> undefined; + T -> os:system_time(micro_seconds) + T * 1000 + end. + +%% Logically this function should invoke maybe_send_drained/2. +%% However, that is expensive. Since some frequent callers of +%% drop_expired_msgs/1, in particular deliver_or_enqueue/3, cannot +%% possibly cause the queue to become empty, we push the +%% responsibility to the callers. So be cautious when adding new ones. +drop_expired_msgs(State) -> + case is_empty(State) of + true -> State; + false -> drop_expired_msgs(os:system_time(micro_seconds), + State) + end. + +drop_expired_msgs(Now, State = #q{backing_queue_state = BQS, + backing_queue = BQ }) -> + ExpirePred = fun (#message_properties{expiry = Exp}) -> Now >= Exp end, + {Props, State1} = + with_dlx( + State#q.dlx, + fun (X) -> dead_letter_expired_msgs(ExpirePred, X, State) end, + fun () -> {Next, BQS1} = BQ:dropwhile(ExpirePred, BQS), + {Next, State#q{backing_queue_state = BQS1}} end), + ensure_ttl_timer(case Props of + undefined -> undefined; + #message_properties{expiry = Exp} -> Exp + end, State1). + +with_dlx(undefined, _With, Without) -> Without(); +with_dlx(DLX, With, Without) -> case rabbit_exchange:lookup(DLX) of + {ok, X} -> With(X); + {error, not_found} -> Without() + end. + +dead_letter_expired_msgs(ExpirePred, X, State = #q{backing_queue = BQ}) -> + dead_letter_msgs(fun (DLFun, Acc, BQS1) -> + BQ:fetchwhile(ExpirePred, DLFun, Acc, BQS1) + end, expired, X, State). + +dead_letter_rejected_msgs(AckTags, X, State = #q{backing_queue = BQ}) -> + {ok, State1} = + dead_letter_msgs( + fun (DLFun, Acc, BQS) -> + {Acc1, BQS1} = BQ:ackfold(DLFun, Acc, BQS, AckTags), + {ok, Acc1, BQS1} + end, rejected, X, State), + State1. + +dead_letter_maxlen_msg(X, State = #q{backing_queue = BQ}) -> + {ok, State1} = + dead_letter_msgs( + fun (DLFun, Acc, BQS) -> + {{Msg, _, AckTag}, BQS1} = BQ:fetch(true, BQS), + {ok, DLFun(Msg, AckTag, Acc), BQS1} + end, maxlen, X, State), + State1. + +dead_letter_msgs(Fun, Reason, X, State = #q{dlx_routing_key = RK, + backing_queue_state = BQS, + backing_queue = BQ}) -> + QName = qname(State), + {Res, Acks1, BQS1} = + Fun(fun (Msg, AckTag, Acks) -> + rabbit_dead_letter:publish(Msg, Reason, X, RK, QName), + [AckTag | Acks] + end, [], BQS), + {_Guids, BQS2} = BQ:ack(Acks1, BQS1), + {Res, State#q{backing_queue_state = BQS2}}. + +stop(State) -> stop(noreply, State). + +stop(noreply, State) -> {stop, normal, State}; +stop(Reply, State) -> {stop, normal, Reply, State}. + +infos(Items, #q{q = Q} = State) -> + lists:foldr(fun(totals, Acc) -> + [{messages_ready, i(messages_ready, State)}, + {messages, i(messages, State)}, + {messages_unacknowledged, i(messages_unacknowledged, State)}] ++ Acc; + (type_specific, Acc) -> + format(Q) ++ Acc; + (Item, Acc) -> + [{Item, i(Item, State)} | Acc] + end, [], Items). + +i(name, #q{q = Q}) -> amqqueue:get_name(Q); +i(durable, #q{q = Q}) -> amqqueue:is_durable(Q); +i(auto_delete, #q{q = Q}) -> amqqueue:is_auto_delete(Q); +i(arguments, #q{q = Q}) -> amqqueue:get_arguments(Q); +i(pid, _) -> + self(); +i(owner_pid, #q{q = Q}) when ?amqqueue_exclusive_owner_is(Q, none) -> + ''; +i(owner_pid, #q{q = Q}) -> + amqqueue:get_exclusive_owner(Q); +i(exclusive, #q{q = Q}) -> + ExclusiveOwner = amqqueue:get_exclusive_owner(Q), + is_pid(ExclusiveOwner); +i(policy, #q{q = Q}) -> + case rabbit_policy:name(Q) of + none -> ''; + Policy -> Policy + end; +i(operator_policy, #q{q = Q}) -> + case rabbit_policy:name_op(Q) of + none -> ''; + Policy -> Policy + end; +i(effective_policy_definition, #q{q = Q}) -> + case rabbit_policy:effective_definition(Q) of + undefined -> []; + Def -> Def + end; +i(exclusive_consumer_pid, #q{active_consumer = {ChPid, _ConsumerTag}, single_active_consumer_on = false}) -> + ChPid; +i(exclusive_consumer_pid, _) -> + ''; +i(exclusive_consumer_tag, #q{active_consumer = {_ChPid, ConsumerTag}, single_active_consumer_on = false}) -> + ConsumerTag; +i(exclusive_consumer_tag, _) -> + ''; +i(single_active_consumer_pid, #q{active_consumer = {ChPid, _Consumer}, single_active_consumer_on = true}) -> + ChPid; +i(single_active_consumer_pid, _) -> + ''; +i(single_active_consumer_tag, #q{active_consumer = {_ChPid, Consumer}, single_active_consumer_on = true}) -> + rabbit_queue_consumers:consumer_tag(Consumer); +i(single_active_consumer_tag, _) -> + ''; +i(messages_ready, #q{backing_queue_state = BQS, backing_queue = BQ}) -> + BQ:len(BQS); +i(messages_unacknowledged, _) -> + rabbit_queue_consumers:unacknowledged_message_count(); +i(messages, State) -> + lists:sum([i(Item, State) || Item <- [messages_ready, + messages_unacknowledged]]); +i(consumers, _) -> + rabbit_queue_consumers:count(); +i(consumer_utilisation, #q{consumers = Consumers}) -> + case rabbit_queue_consumers:count() of + 0 -> ''; + _ -> rabbit_queue_consumers:utilisation(Consumers) + end; +i(memory, _) -> + {memory, M} = process_info(self(), memory), + M; +i(slave_pids, #q{q = Q0}) -> + Name = amqqueue:get_name(Q0), + {ok, Q} = rabbit_amqqueue:lookup(Name), + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> amqqueue:get_slave_pids(Q) + end; +i(synchronised_slave_pids, #q{q = Q0}) -> + Name = amqqueue:get_name(Q0), + {ok, Q} = rabbit_amqqueue:lookup(Name), + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> amqqueue:get_sync_slave_pids(Q) + end; +i(recoverable_slaves, #q{q = Q0}) -> + Name = amqqueue:get_name(Q0), + Durable = amqqueue:is_durable(Q0), + {ok, Q} = rabbit_amqqueue:lookup(Name), + case Durable andalso rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> ''; + true -> amqqueue:get_recoverable_slaves(Q) + end; +i(state, #q{status = running}) -> credit_flow:state(); +i(state, #q{status = State}) -> State; +i(garbage_collection, _State) -> + rabbit_misc:get_gc_info(self()); +i(reductions, _State) -> + {reductions, Reductions} = erlang:process_info(self(), reductions), + Reductions; +i(user_who_performed_action, #q{q = Q}) -> + Opts = amqqueue:get_options(Q), + maps:get(user, Opts, ?UNKNOWN_USER); +i(type, _) -> classic; +i(Item, #q{backing_queue_state = BQS, backing_queue = BQ}) -> + BQ:info(Item, BQS). + +emit_stats(State) -> + emit_stats(State, []). + +emit_stats(State, Extra) -> + ExtraKs = [K || {K, _} <- Extra], + [{messages_ready, MR}, {messages_unacknowledged, MU}, {messages, M}, + {reductions, R}, {name, Name} | Infos] = All + = [{K, V} || {K, V} <- infos(statistics_keys(), State), + not lists:member(K, ExtraKs)], + rabbit_core_metrics:queue_stats(Name, Extra ++ Infos), + rabbit_core_metrics:queue_stats(Name, MR, MU, M, R), + rabbit_event:notify(queue_stats, Extra ++ All). + +emit_consumer_created(ChPid, CTag, Exclusive, AckRequired, QName, + PrefetchCount, Args, Ref, ActingUser) -> + rabbit_event:notify(consumer_created, + [{consumer_tag, CTag}, + {exclusive, Exclusive}, + {ack_required, AckRequired}, + {channel, ChPid}, + {queue, QName}, + {prefetch_count, PrefetchCount}, + {arguments, Args}, + {user_who_performed_action, ActingUser}], + Ref). + +emit_consumer_deleted(ChPid, ConsumerTag, QName, ActingUser) -> + rabbit_core_metrics:consumer_deleted(ChPid, ConsumerTag, QName), + rabbit_event:notify(consumer_deleted, + [{consumer_tag, ConsumerTag}, + {channel, ChPid}, + {queue, QName}, + {user_who_performed_action, ActingUser}]). + +%%---------------------------------------------------------------------------- + +prioritise_call(Msg, _From, _Len, State) -> + case Msg of + info -> 9; + {info, _Items} -> 9; + consumers -> 9; + stat -> 7; + {basic_consume, _, _, _, _, _, _, _, _, _} -> consumer_bias(State, 0, 2); + {basic_cancel, _, _, _} -> consumer_bias(State, 0, 2); + _ -> 0 + end. + +prioritise_cast(Msg, _Len, State) -> + case Msg of + delete_immediately -> 8; + {delete_exclusive, _Pid} -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + {run_backing_queue, _Mod, _Fun} -> 6; + {ack, _AckTags, _ChPid} -> 4; %% [1] + {resume, _ChPid} -> 3; + {notify_sent, _ChPid, _Credit} -> consumer_bias(State, 0, 2); + _ -> 0 + end. + +%% [1] It should be safe to always prioritise ack / resume since they +%% will be rate limited by how fast consumers receive messages - +%% i.e. by notify_sent. We prioritise ack and resume to discourage +%% starvation caused by prioritising notify_sent. We don't vary their +%% priority since acks should stay in order (some parts of the queue +%% stack are optimised for that) and to make things easier to reason +%% about. Finally, we prioritise ack over resume since it should +%% always reduce memory use. +%% bump_reduce_memory_use is prioritised over publishes, because sending +%% credit to self is hard to reason about. Consumers can continue while +%% reduce_memory_use is in progress. + +consumer_bias(#q{backing_queue = BQ, backing_queue_state = BQS}, Low, High) -> + case BQ:msg_rates(BQS) of + {0.0, _} -> Low; + {Ingress, Egress} when Egress / Ingress < ?CONSUMER_BIAS_RATIO -> High; + {_, _} -> Low + end. + +prioritise_info(Msg, _Len, #q{q = Q}) -> + DownPid = amqqueue:get_exclusive_owner(Q), + case Msg of + {'DOWN', _, process, DownPid, _} -> 8; + update_ram_duration -> 8; + {maybe_expire, _Version} -> 8; + {drop_expired, _Version} -> 8; + emit_stats -> 7; + sync_timeout -> 6; + bump_reduce_memory_use -> 1; + _ -> 0 + end. + +handle_call({init, Recover}, From, State) -> + try + init_it(Recover, From, State) + catch + {coordinator_not_started, Reason} -> + %% The GM can shutdown before the coordinator has started up + %% (lost membership or missing group), thus the start_link of + %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process + %% is trapping exists. The master captures this return value and + %% throws the current exception. + {stop, Reason, State} + end; + +handle_call(info, _From, State) -> + reply({ok, infos(info_keys(), State)}, State); + +handle_call({info, Items}, _From, State) -> + try + reply({ok, infos(Items, State)}, State) + catch Error -> reply({error, Error}, State) + end; + +handle_call(consumers, _From, State = #q{consumers = Consumers, single_active_consumer_on = false}) -> + reply(rabbit_queue_consumers:all(Consumers), State); +handle_call(consumers, _From, State = #q{consumers = Consumers, active_consumer = ActiveConsumer}) -> + reply(rabbit_queue_consumers:all(Consumers, ActiveConsumer, true), State); + +handle_call({notify_down, ChPid}, _From, State) -> + %% we want to do this synchronously, so that auto_deleted queues + %% are no longer visible by the time we send a response to the + %% client. The queue is ultimately deleted in terminate/2; if we + %% return stop with a reply, terminate/2 will be called by + %% gen_server2 *before* the reply is sent. + case handle_ch_down(ChPid, State) of + {ok, State1} -> reply(ok, State1); + {stop, State1} -> stop(ok, State1#q{status = {terminated_by, auto_delete}}) + end; + +handle_call({basic_get, ChPid, NoAck, LimiterPid}, _From, + State = #q{q = Q}) -> + QName = amqqueue:get_name(Q), + AckRequired = not NoAck, + State1 = ensure_expiry_timer(State), + case fetch(AckRequired, State1) of + {empty, State2} -> + reply(empty, State2); + {{Message, IsDelivered, AckTag}, + #q{backing_queue = BQ, backing_queue_state = BQS} = State2} -> + case AckRequired of + true -> ok = rabbit_queue_consumers:record_ack( + ChPid, LimiterPid, AckTag); + false -> ok + end, + Msg = {QName, self(), AckTag, IsDelivered, Message}, + reply({ok, BQ:len(BQS), Msg}, State2) + end; + +handle_call({basic_consume, NoAck, ChPid, LimiterPid, LimiterActive, + PrefetchCount, ConsumerTag, ExclusiveConsume, Args, OkMsg, ActingUser}, + _From, State = #q{consumers = Consumers, + active_consumer = Holder, + single_active_consumer_on = SingleActiveConsumerOn}) -> + ConsumerRegistration = case SingleActiveConsumerOn of + true -> + case ExclusiveConsume of + true -> + {error, reply({error, exclusive_consume_unavailable}, State)}; + false -> + Consumers1 = rabbit_queue_consumers:add( + ChPid, ConsumerTag, NoAck, + LimiterPid, LimiterActive, + PrefetchCount, Args, is_empty(State), + ActingUser, Consumers), + + case Holder of + none -> + NewConsumer = rabbit_queue_consumers:get(ChPid, ConsumerTag, Consumers1), + {state, State#q{consumers = Consumers1, + has_had_consumers = true, + active_consumer = NewConsumer}}; + _ -> + {state, State#q{consumers = Consumers1, + has_had_consumers = true}} + end + end; + false -> + case check_exclusive_access(Holder, ExclusiveConsume, State) of + in_use -> {error, reply({error, exclusive_consume_unavailable}, State)}; + ok -> + Consumers1 = rabbit_queue_consumers:add( + ChPid, ConsumerTag, NoAck, + LimiterPid, LimiterActive, + PrefetchCount, Args, is_empty(State), + ActingUser, Consumers), + ExclusiveConsumer = + if ExclusiveConsume -> {ChPid, ConsumerTag}; + true -> Holder + end, + {state, State#q{consumers = Consumers1, + has_had_consumers = true, + active_consumer = ExclusiveConsumer}} + end + end, + case ConsumerRegistration of + {error, Reply} -> + Reply; + {state, State1} -> + ok = maybe_send_reply(ChPid, OkMsg), + QName = qname(State1), + AckRequired = not NoAck, + TheConsumer = rabbit_queue_consumers:get(ChPid, ConsumerTag, State1#q.consumers), + {ConsumerIsActive, ActivityStatus} = + case {SingleActiveConsumerOn, State1#q.active_consumer} of + {true, TheConsumer} -> + {true, single_active}; + {true, _} -> + {false, waiting}; + {false, _} -> + {true, up} + end, + rabbit_core_metrics:consumer_created( + ChPid, ConsumerTag, ExclusiveConsume, AckRequired, QName, + PrefetchCount, ConsumerIsActive, ActivityStatus, Args), + emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume, + AckRequired, QName, PrefetchCount, + Args, none, ActingUser), + notify_decorators(State1), + reply(ok, run_message_queue(State1)) + end; + +handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg, ActingUser}, _From, + State = #q{consumers = Consumers, + active_consumer = Holder, + single_active_consumer_on = SingleActiveConsumerOn }) -> + ok = maybe_send_reply(ChPid, OkMsg), + case rabbit_queue_consumers:remove(ChPid, ConsumerTag, Consumers) of + not_found -> + reply(ok, State); + Consumers1 -> + Holder1 = new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag, + Holder, SingleActiveConsumerOn, Consumers1 + ), + State1 = State#q{consumers = Consumers1, + active_consumer = Holder1}, + maybe_notify_consumer_updated(State1, Holder, Holder1), + emit_consumer_deleted(ChPid, ConsumerTag, qname(State1), ActingUser), + notify_decorators(State1), + case should_auto_delete(State1) of + false -> reply(ok, ensure_expiry_timer(State1)); + true -> + log_auto_delete( + io_lib:format( + "because its last consumer with tag '~s' was cancelled", + [ConsumerTag]), + State), + stop(ok, State1) + end + end; + +handle_call(stat, _From, State) -> + State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = + ensure_expiry_timer(State), + reply({ok, BQ:len(BQS), rabbit_queue_consumers:count()}, State1); + +handle_call({delete, IfUnused, IfEmpty, ActingUser}, _From, + State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> + IsEmpty = BQ:is_empty(BQS), + IsUnused = is_unused(State), + if + IfEmpty and not(IsEmpty) -> reply({error, not_empty}, State); + IfUnused and not(IsUnused) -> reply({error, in_use}, State); + true -> stop({ok, BQ:len(BQS)}, + State#q{status = {terminated_by, ActingUser}}) + end; + +handle_call(purge, _From, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {Count, BQS1} = BQ:purge(BQS), + State1 = State#q{backing_queue_state = BQS1}, + reply({ok, Count}, maybe_send_drained(Count =:= 0, State1)); + +handle_call({requeue, AckTags, ChPid}, From, State) -> + gen_server2:reply(From, ok), + noreply(requeue(AckTags, ChPid, State)); + +handle_call(sync_mirrors, _From, + State = #q{backing_queue = rabbit_mirror_queue_master, + backing_queue_state = BQS}) -> + S = fun(BQSN) -> State#q{backing_queue_state = BQSN} end, + HandleInfo = fun (Status) -> + receive {'$gen_call', From, {info, Items}} -> + Infos = infos(Items, State#q{status = Status}), + gen_server2:reply(From, {ok, Infos}) + after 0 -> + ok + end + end, + EmitStats = fun (Status) -> + rabbit_event:if_enabled( + State, #q.stats_timer, + fun() -> emit_stats(State#q{status = Status}) end) + end, + case rabbit_mirror_queue_master:sync_mirrors(HandleInfo, EmitStats, BQS) of + {ok, BQS1} -> reply(ok, S(BQS1)); + {stop, Reason, BQS1} -> {stop, Reason, S(BQS1)} + end; + +handle_call(sync_mirrors, _From, State) -> + reply({error, not_mirrored}, State); + +%% By definition if we get this message here we do not have to do anything. +handle_call(cancel_sync_mirrors, _From, State) -> + reply({ok, not_syncing}, State). + +new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag, CurrentSingleActiveConsumer, + _SingleActiveConsumerIsOn = true, Consumers) -> + case rabbit_queue_consumers:is_same(ChPid, ConsumerTag, CurrentSingleActiveConsumer) of + true -> + case rabbit_queue_consumers:get_consumer(Consumers) of + undefined -> none; + Consumer -> Consumer + end; + false -> + CurrentSingleActiveConsumer + end; +new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag, CurrentSingleActiveConsumer, + _SingleActiveConsumerIsOn = false, _Consumers) -> + case CurrentSingleActiveConsumer of + {ChPid, ConsumerTag} -> none; + _ -> CurrentSingleActiveConsumer + end. + +maybe_notify_consumer_updated(#q{single_active_consumer_on = false}, _, _) -> + ok; +maybe_notify_consumer_updated(#q{single_active_consumer_on = true}, SingleActiveConsumer, SingleActiveConsumer) -> + % the single active consumer didn't change, nothing to do + ok; +maybe_notify_consumer_updated(#q{single_active_consumer_on = true} = State, _PreviousConsumer, NewConsumer) -> + case NewConsumer of + {ChPid, Consumer} -> + {Tag, Ack, Prefetch, Args} = rabbit_queue_consumers:get_infos(Consumer), + rabbit_core_metrics:consumer_updated( + ChPid, Tag, false, Ack, qname(State), + Prefetch, true, single_active, Args + ), + ok; + _ -> + ok + end. + +handle_cast(init, State) -> + try + init_it({no_barrier, non_clean_shutdown}, none, State) + catch + {coordinator_not_started, Reason} -> + %% The GM can shutdown before the coordinator has started up + %% (lost membership or missing group), thus the start_link of + %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process + %% is trapping exists. The master captures this return value and + %% throws the current exception. + {stop, Reason, State} + end; + +handle_cast({run_backing_queue, Mod, Fun}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + noreply(State#q{backing_queue_state = BQ:invoke(Mod, Fun, BQS)}); + +handle_cast({deliver, + Delivery = #delivery{sender = Sender, + flow = Flow}, + SlaveWhenPublished}, + State = #q{senders = Senders}) -> + Senders1 = case Flow of + %% In both credit_flow:ack/1 we are acking messages to the channel + %% process that sent us the message delivery. See handle_ch_down + %% for more info. + flow -> credit_flow:ack(Sender), + case SlaveWhenPublished of + true -> credit_flow:ack(Sender); %% [0] + false -> ok + end, + pmon:monitor(Sender, Senders); + noflow -> Senders + end, + State1 = State#q{senders = Senders1}, + noreply(maybe_deliver_or_enqueue(Delivery, SlaveWhenPublished, State1)); +%% [0] The second ack is since the channel thought we were a mirror at +%% the time it published this message, so it used two credits (see +%% rabbit_queue_type:deliver/2). + +handle_cast({ack, AckTags, ChPid}, State) -> + noreply(ack(AckTags, ChPid, State)); + +handle_cast({reject, true, AckTags, ChPid}, State) -> + noreply(requeue(AckTags, ChPid, State)); + +handle_cast({reject, false, AckTags, ChPid}, State) -> + noreply(with_dlx( + State#q.dlx, + fun (X) -> subtract_acks(ChPid, AckTags, State, + fun (State1) -> + dead_letter_rejected_msgs( + AckTags, X, State1) + end) end, + fun () -> ack(AckTags, ChPid, State) end)); + +handle_cast({delete_exclusive, ConnPid}, State) -> + log_delete_exclusive(ConnPid, State), + stop(State); + +handle_cast(delete_immediately, State) -> + stop(State); + +handle_cast({resume, ChPid}, State) -> + noreply(possibly_unblock(rabbit_queue_consumers:resume_fun(), + ChPid, State)); + +handle_cast({notify_sent, ChPid, Credit}, State) -> + noreply(possibly_unblock(rabbit_queue_consumers:notify_sent_fun(Credit), + ChPid, State)); + +handle_cast({activate_limit, ChPid}, State) -> + noreply(possibly_unblock(rabbit_queue_consumers:activate_limit_fun(), + ChPid, State)); + +handle_cast({set_ram_duration_target, Duration}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> + BQS1 = BQ:set_ram_duration_target(Duration, BQS), + noreply(State#q{backing_queue_state = BQS1}); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State); + +handle_cast(update_mirroring, State = #q{q = Q, + mirroring_policy_version = Version}) -> + case needs_update_mirroring(Q, Version) of + false -> + noreply(State); + {Policy, NewVersion} -> + State1 = State#q{mirroring_policy_version = NewVersion}, + noreply(update_mirroring(Policy, State1)) + end; + +handle_cast({credit, ChPid, CTag, Credit, Drain}, + State = #q{consumers = Consumers, + backing_queue = BQ, + backing_queue_state = BQS, + q = Q}) -> + Len = BQ:len(BQS), + rabbit_classic_queue:send_queue_event(ChPid, amqqueue:get_name(Q), {send_credit_reply, Len}), + noreply( + case rabbit_queue_consumers:credit(Len == 0, Credit, Drain, ChPid, CTag, + Consumers) of + unchanged -> State; + {unblocked, Consumers1} -> State1 = State#q{consumers = Consumers1}, + run_message_queue(true, State1) + end); + +% Note: https://www.pivotaltracker.com/story/show/166962656 +% This event is necessary for the stats timer to be initialized with +% the correct values once the management agent has started +handle_cast({force_event_refresh, Ref}, + State = #q{consumers = Consumers, + active_consumer = Holder}) -> + rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State), Ref), + QName = qname(State), + AllConsumers = rabbit_queue_consumers:all(Consumers), + case Holder of + none -> + [emit_consumer_created( + Ch, CTag, false, AckRequired, QName, Prefetch, + Args, Ref, ActingUser) || + {Ch, CTag, AckRequired, Prefetch, _, _, Args, ActingUser} + <- AllConsumers]; + {Ch, CTag} -> + [{Ch, CTag, AckRequired, Prefetch, _, _, Args, ActingUser}] = AllConsumers, + emit_consumer_created( + Ch, CTag, true, AckRequired, QName, Prefetch, Args, Ref, ActingUser) + end, + noreply(rabbit_event:init_stats_timer(State, #q.stats_timer)); + +handle_cast(notify_decorators, State) -> + notify_decorators(State), + noreply(State); + +handle_cast(policy_changed, State = #q{q = Q0}) -> + Name = amqqueue:get_name(Q0), + %% We depend on the #q.q field being up to date at least WRT + %% policy (but not mirror pids) in various places, so when it + %% changes we go and read it from Mnesia again. + %% + %% This also has the side effect of waking us up so we emit a + %% stats event - so event consumers see the changed policy. + {ok, Q} = rabbit_amqqueue:lookup(Name), + noreply(process_args_policy(State#q{q = Q})); + +handle_cast({sync_start, _, _}, State = #q{q = Q}) -> + Name = amqqueue:get_name(Q), + %% Only a mirror should receive this, it means we are a duplicated master + rabbit_mirror_queue_misc:log_warning( + Name, "Stopping after receiving sync_start from another master", []), + stop(State). + +handle_info({maybe_expire, Vsn}, State = #q{args_policy_version = Vsn}) -> + case is_unused(State) of + true -> stop(State); + false -> noreply(State#q{expiry_timer_ref = undefined}) + end; + +handle_info({maybe_expire, _Vsn}, State) -> + noreply(State); + +handle_info({drop_expired, Vsn}, State = #q{args_policy_version = Vsn}) -> + WasEmpty = is_empty(State), + State1 = drop_expired_msgs(State#q{ttl_timer_ref = undefined}), + noreply(maybe_send_drained(WasEmpty, State1)); + +handle_info({drop_expired, _Vsn}, State) -> + noreply(State); + +handle_info(emit_stats, State) -> + emit_stats(State), + %% Don't call noreply/1, we don't want to set timers + {State1, Timeout} = next_state(rabbit_event:reset_stats_timer( + State, #q.stats_timer)), + {noreply, State1, Timeout}; + +handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, + State = #q{q = Q}) when ?amqqueue_exclusive_owner_is(Q, DownPid) -> + %% Exclusively owned queues must disappear with their owner. In + %% the case of clean shutdown we delete the queue synchronously in + %% the reader - although not required by the spec this seems to + %% match what people expect (see bug 21824). However we need this + %% monitor-and-async- delete in case the connection goes away + %% unexpectedly. + log_delete_exclusive(DownPid, State), + stop(State); + +handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> + case handle_ch_down(DownPid, State) of + {ok, State1} -> noreply(State1); + {stop, State1} -> stop(State1) + end; + +handle_info(update_ram_duration, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + %% Don't call noreply/1, we don't want to set timers + {State1, Timeout} = next_state(State#q{rate_timer_ref = undefined, + backing_queue_state = BQS2}), + {noreply, State1, Timeout}; + +handle_info(sync_timeout, State) -> + noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined})); + +handle_info(timeout, State) -> + noreply(backing_queue_timeout(State)); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}; + +handle_info({bump_credit, Msg}, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + %% The message_store is granting us more credit. This means the + %% backing queue (for the rabbit_variable_queue case) might + %% continue paging messages to disk if it still needs to. We + %% consume credits from the message_store whenever we need to + %% persist a message to disk. See: + %% rabbit_variable_queue:msg_store_write/4. + credit_flow:handle_bump_msg(Msg), + noreply(State#q{backing_queue_state = BQ:resume(BQS)}); +handle_info(bump_reduce_memory_use, State = #q{backing_queue = BQ, + backing_queue_state = BQS0}) -> + BQS1 = BQ:handle_info(bump_reduce_memory_use, BQS0), + noreply(State#q{backing_queue_state = BQ:resume(BQS1)}); + +handle_info(Info, State) -> + {stop, {unhandled_info, Info}, State}. + +handle_pre_hibernate(State = #q{backing_queue_state = undefined}) -> + {hibernate, State}; +handle_pre_hibernate(State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + BQS3 = BQ:handle_pre_hibernate(BQS2), + rabbit_event:if_enabled( + State, #q.stats_timer, + fun () -> emit_stats(State, + [{idle_since, + os:system_time(milli_seconds)}, + {consumer_utilisation, ''}]) + end), + State1 = rabbit_event:stop_stats_timer(State#q{backing_queue_state = BQS3}, + #q.stats_timer), + {hibernate, stop_rate_timer(State1)}. + +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + +format(Q) when ?is_amqqueue(Q) -> + case rabbit_mirror_queue_misc:is_mirrored(Q) of + false -> + [{node, node(amqqueue:get_pid(Q))}]; + true -> + Slaves = amqqueue:get_slave_pids(Q), + SSlaves = amqqueue:get_sync_slave_pids(Q), + [{slave_nodes, [node(S) || S <- Slaves]}, + {synchronised_slave_nodes, [node(S) || S <- SSlaves]}, + {node, node(amqqueue:get_pid(Q))}] + end. + +-spec is_policy_applicable(amqqueue:amqqueue(), any()) -> boolean(). +is_policy_applicable(_Q, _Policy) -> + true. + +log_delete_exclusive({ConPid, _ConRef}, State) -> + log_delete_exclusive(ConPid, State); +log_delete_exclusive(ConPid, #q{ q = Q }) -> + Resource = amqqueue:get_name(Q), + #resource{ name = QName, virtual_host = VHost } = Resource, + rabbit_log_queue:debug("Deleting exclusive queue '~s' in vhost '~s' " ++ + "because its declaring connection ~p was closed", + [QName, VHost, ConPid]). + +log_auto_delete(Reason, #q{ q = Q }) -> + Resource = amqqueue:get_name(Q), + #resource{ name = QName, virtual_host = VHost } = Resource, + rabbit_log_queue:debug("Deleting auto-delete queue '~s' in vhost '~s' " ++ + Reason, + [QName, VHost]). + +needs_update_mirroring(Q, Version) -> + {ok, UpQ} = rabbit_amqqueue:lookup(amqqueue:get_name(Q)), + DBVersion = amqqueue:get_policy_version(UpQ), + case DBVersion > Version of + true -> {rabbit_policy:get(<<"ha-mode">>, UpQ), DBVersion}; + false -> false + end. + + +update_mirroring(Policy, State = #q{backing_queue = BQ}) -> + case update_to(Policy, BQ) of + start_mirroring -> + start_mirroring(State); + stop_mirroring -> + stop_mirroring(State); + ignore -> + State; + update_ha_mode -> + update_ha_mode(State) + end. + +update_to(undefined, rabbit_mirror_queue_master) -> + stop_mirroring; +update_to(_, rabbit_mirror_queue_master) -> + update_ha_mode; +update_to(undefined, BQ) when BQ =/= rabbit_mirror_queue_master -> + ignore; +update_to(_, BQ) when BQ =/= rabbit_mirror_queue_master -> + start_mirroring. + +start_mirroring(State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + %% lookup again to get policy for init_with_existing_bq + {ok, Q} = rabbit_amqqueue:lookup(qname(State)), + true = BQ =/= rabbit_mirror_queue_master, %% assertion + BQ1 = rabbit_mirror_queue_master, + BQS1 = BQ1:init_with_existing_bq(Q, BQ, BQS), + State#q{backing_queue = BQ1, + backing_queue_state = BQS1}. + +stop_mirroring(State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + BQ = rabbit_mirror_queue_master, %% assertion + {BQ1, BQS1} = BQ:stop_mirroring(BQS), + State#q{backing_queue = BQ1, + backing_queue_state = BQS1}. + +update_ha_mode(State) -> + {ok, Q} = rabbit_amqqueue:lookup(qname(State)), + ok = rabbit_mirror_queue_misc:update_mirrors(Q), + State. + +confirm_to_sender(Pid, QName, MsgSeqNos) -> + rabbit_classic_queue:confirm_to_sender(Pid, QName, MsgSeqNos). + + diff --git a/deps/rabbit/src/rabbit_amqqueue_sup.erl b/deps/rabbit/src/rabbit_amqqueue_sup.erl new file mode 100644 index 0000000000..a9eaf4087f --- /dev/null +++ b/deps/rabbit/src/rabbit_amqqueue_sup.erl @@ -0,0 +1,35 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_amqqueue_sup). + +-behaviour(supervisor2). + +-export([start_link/2]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start_link(amqqueue:amqqueue(), rabbit_prequeue:start_mode()) -> + {'ok', pid(), pid()}. + +start_link(Q, StartMode) -> + Marker = spawn_link(fun() -> receive stop -> ok end end), + ChildSpec = {rabbit_amqqueue, + {rabbit_prequeue, start_link, [Q, StartMode, Marker]}, + intrinsic, ?WORKER_WAIT, worker, [rabbit_amqqueue_process, + rabbit_mirror_queue_slave]}, + {ok, SupPid} = supervisor2:start_link(?MODULE, []), + {ok, QPid} = supervisor2:start_child(SupPid, ChildSpec), + unlink(Marker), + Marker ! stop, + {ok, SupPid, QPid}. + +init([]) -> {ok, {{one_for_one, 5, 10}, []}}. diff --git a/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl b/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl new file mode 100644 index 0000000000..732816b79f --- /dev/null +++ b/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl @@ -0,0 +1,84 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_amqqueue_sup_sup). + +-behaviour(supervisor2). + +-export([start_link/0, start_queue_process/3]). +-export([start_for_vhost/1, stop_for_vhost/1, + find_for_vhost/2, find_for_vhost/1]). + +-export([init/1]). + +-include("rabbit.hrl"). + +-define(SERVER, ?MODULE). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + supervisor2:start_link(?MODULE, []). + +-spec start_queue_process + (node(), amqqueue:amqqueue(), 'declare' | 'recovery' | 'slave') -> + pid(). + +start_queue_process(Node, Q, StartMode) -> + #resource{virtual_host = VHost} = amqqueue:get_name(Q), + {ok, Sup} = find_for_vhost(VHost, Node), + {ok, _SupPid, QPid} = supervisor2:start_child(Sup, [Q, StartMode]), + QPid. + +init([]) -> + {ok, {{simple_one_for_one, 10, 10}, + [{rabbit_amqqueue_sup, {rabbit_amqqueue_sup, start_link, []}, + temporary, ?SUPERVISOR_WAIT, supervisor, [rabbit_amqqueue_sup]}]}}. + +-spec find_for_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}. +find_for_vhost(VHost) -> + find_for_vhost(VHost, node()). + +-spec find_for_vhost(rabbit_types:vhost(), atom()) -> {ok, pid()} | {error, term()}. +find_for_vhost(VHost, Node) -> + {ok, VHostSup} = rabbit_vhost_sup_sup:get_vhost_sup(VHost, Node), + case supervisor2:find_child(VHostSup, rabbit_amqqueue_sup_sup) of + [QSup] -> {ok, QSup}; + Result -> {error, {queue_supervisor_not_found, Result}} + end. + +-spec start_for_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}. +start_for_vhost(VHost) -> + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, VHostSup} -> + supervisor2:start_child( + VHostSup, + {rabbit_amqqueue_sup_sup, + {rabbit_amqqueue_sup_sup, start_link, []}, + transient, infinity, supervisor, [rabbit_amqqueue_sup_sup]}); + %% we can get here if a vhost is added and removed concurrently + %% e.g. some integration tests do it + {error, {no_such_vhost, VHost}} -> + rabbit_log:error("Failed to start a queue process supervisor for vhost ~s: vhost no longer exists!", + [VHost]), + {error, {no_such_vhost, VHost}} + end. + +-spec stop_for_vhost(rabbit_types:vhost()) -> ok. +stop_for_vhost(VHost) -> + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, VHostSup} -> + ok = supervisor2:terminate_child(VHostSup, rabbit_amqqueue_sup_sup), + ok = supervisor2:delete_child(VHostSup, rabbit_amqqueue_sup_sup); + %% see start/1 + {error, {no_such_vhost, VHost}} -> + rabbit_log:error("Failed to stop a queue process supervisor for vhost ~s: vhost no longer exists!", + [VHost]), + ok + end. diff --git a/deps/rabbit/src/rabbit_auth_backend_internal.erl b/deps/rabbit/src/rabbit_auth_backend_internal.erl new file mode 100644 index 0000000000..cb930a1630 --- /dev/null +++ b/deps/rabbit/src/rabbit_auth_backend_internal.erl @@ -0,0 +1,1076 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_auth_backend_internal). +-include("rabbit.hrl"). + +-behaviour(rabbit_authn_backend). +-behaviour(rabbit_authz_backend). + +-export([user_login_authentication/2, user_login_authorization/2, + check_vhost_access/3, check_resource_access/4, check_topic_access/4]). + +-export([add_user/3, delete_user/2, lookup_user/1, exists/1, + change_password/3, clear_password/2, + hash_password/2, change_password_hash/2, change_password_hash/3, + set_tags/3, set_permissions/6, clear_permissions/3, + set_topic_permissions/6, clear_topic_permissions/3, clear_topic_permissions/4, + add_user_sans_validation/3, put_user/2, put_user/3]). + +-export([set_user_limits/3, clear_user_limits/3, is_over_connection_limit/1, + is_over_channel_limit/1, get_user_limits/0, get_user_limits/1]). + +-export([user_info_keys/0, perms_info_keys/0, + user_perms_info_keys/0, vhost_perms_info_keys/0, + user_vhost_perms_info_keys/0, all_users/0, + list_users/0, list_users/2, list_permissions/0, + list_user_permissions/1, list_user_permissions/3, + list_topic_permissions/0, + list_vhost_permissions/1, list_vhost_permissions/3, + list_user_vhost_permissions/2, + list_user_topic_permissions/1, list_vhost_topic_permissions/1, list_user_vhost_topic_permissions/2]). + +-export([state_can_expire/0]). + +%% for testing +-export([hashing_module_for_user/1, expand_topic_permission/2]). + +%%---------------------------------------------------------------------------- + +-type regexp() :: binary(). + +%%---------------------------------------------------------------------------- +%% Implementation of rabbit_auth_backend + +%% Returns a password hashing module for the user record provided. If +%% there is no information in the record, we consider it to be legacy +%% (inserted by a version older than 3.6.0) and fall back to MD5, the +%% now obsolete hashing function. +hashing_module_for_user(User) -> + ModOrUndefined = internal_user:get_hashing_algorithm(User), + rabbit_password:hashing_mod(ModOrUndefined). + +-define(BLANK_PASSWORD_REJECTION_MESSAGE, + "user '~s' attempted to log in with a blank password, which is prohibited by the internal authN backend. " + "To use TLS/x509 certificate-based authentication, see the rabbitmq_auth_mechanism_ssl plugin and configure the client to use the EXTERNAL authentication mechanism. " + "Alternatively change the password for the user to be non-blank."). + +%% For cases when we do not have a set of credentials, +%% namely when x509 (TLS) certificates are used. This should only be +%% possible when the EXTERNAL authentication mechanism is used, see +%% rabbit_auth_mechanism_plain:handle_response/2 and rabbit_reader:auth_phase/2. +user_login_authentication(Username, []) -> + internal_check_user_login(Username, fun(_) -> true end); +%% For cases when we do have a set of credentials. rabbit_auth_mechanism_plain:handle_response/2 +%% performs initial validation. +user_login_authentication(Username, AuthProps) -> + case lists:keyfind(password, 1, AuthProps) of + {password, <<"">>} -> + {refused, ?BLANK_PASSWORD_REJECTION_MESSAGE, + [Username]}; + {password, ""} -> + {refused, ?BLANK_PASSWORD_REJECTION_MESSAGE, + [Username]}; + {password, Cleartext} -> + internal_check_user_login( + Username, + fun(User) -> + case internal_user:get_password_hash(User) of + <<Salt:4/binary, Hash/binary>> -> + Hash =:= rabbit_password:salted_hash( + hashing_module_for_user(User), Salt, Cleartext); + _ -> + false + end + end); + false -> exit({unknown_auth_props, Username, AuthProps}) + end. + +state_can_expire() -> false. + +user_login_authorization(Username, _AuthProps) -> + case user_login_authentication(Username, []) of + {ok, #auth_user{impl = Impl, tags = Tags}} -> {ok, Impl, Tags}; + Else -> Else + end. + +internal_check_user_login(Username, Fun) -> + Refused = {refused, "user '~s' - invalid credentials", [Username]}, + case lookup_user(Username) of + {ok, User} -> + Tags = internal_user:get_tags(User), + case Fun(User) of + true -> {ok, #auth_user{username = Username, + tags = Tags, + impl = none}}; + _ -> Refused + end; + {error, not_found} -> + Refused + end. + +check_vhost_access(#auth_user{username = Username}, VHostPath, _AuthzData) -> + case mnesia:dirty_read({rabbit_user_permission, + #user_vhost{username = Username, + virtual_host = VHostPath}}) of + [] -> false; + [_R] -> true + end. + +check_resource_access(#auth_user{username = Username}, + #resource{virtual_host = VHostPath, name = Name}, + Permission, + _AuthContext) -> + case mnesia:dirty_read({rabbit_user_permission, + #user_vhost{username = Username, + virtual_host = VHostPath}}) of + [] -> + false; + [#user_permission{permission = P}] -> + PermRegexp = case element(permission_index(Permission), P) of + %% <<"^$">> breaks Emacs' erlang mode + <<"">> -> <<$^, $$>>; + RE -> RE + end, + case re:run(Name, PermRegexp, [{capture, none}]) of + match -> true; + nomatch -> false + end + end. + +check_topic_access(#auth_user{username = Username}, + #resource{virtual_host = VHostPath, name = Name, kind = topic}, + Permission, + Context) -> + case mnesia:dirty_read({rabbit_topic_permission, + #topic_permission_key{user_vhost = #user_vhost{username = Username, + virtual_host = VHostPath}, + exchange = Name + }}) of + [] -> + true; + [#topic_permission{permission = P}] -> + PermRegexp = case element(permission_index(Permission), P) of + %% <<"^$">> breaks Emacs' erlang mode + <<"">> -> <<$^, $$>>; + RE -> RE + end, + PermRegexpExpanded = expand_topic_permission( + PermRegexp, + maps:get(variable_map, Context, undefined) + ), + case re:run(maps:get(routing_key, Context), PermRegexpExpanded, [{capture, none}]) of + match -> true; + nomatch -> false + end + end. + +expand_topic_permission(Permission, ToExpand) when is_map(ToExpand) -> + Opening = <<"{">>, + Closing = <<"}">>, + ReplaceFun = fun(K, V, Acc) -> + Placeholder = <<Opening/binary, K/binary, Closing/binary>>, + binary:replace(Acc, Placeholder, V, [global]) + end, + maps:fold(ReplaceFun, Permission, ToExpand); +expand_topic_permission(Permission, _ToExpand) -> + Permission. + +permission_index(configure) -> #permission.configure; +permission_index(write) -> #permission.write; +permission_index(read) -> #permission.read. + +%%---------------------------------------------------------------------------- +%% Manipulation of the user database + +validate_credentials(Username, Password) -> + rabbit_credential_validation:validate(Username, Password). + +validate_and_alternate_credentials(Username, Password, ActingUser, Fun) -> + case validate_credentials(Username, Password) of + ok -> + Fun(Username, Password, ActingUser); + {error, Err} -> + rabbit_log:error("Credential validation for '~s' failed!~n", [Username]), + {error, Err} + end. + +-spec add_user(rabbit_types:username(), rabbit_types:password(), + rabbit_types:username()) -> 'ok' | {'error', string()}. + +add_user(Username, Password, ActingUser) -> + validate_and_alternate_credentials(Username, Password, ActingUser, + fun add_user_sans_validation/3). + +add_user_sans_validation(Username, Password, ActingUser) -> + rabbit_log:debug("Asked to create a new user '~s', password length in bytes: ~p", [Username, bit_size(Password)]), + %% hash_password will pick the hashing function configured for us + %% but we also need to store a hint as part of the record, so we + %% retrieve it here one more time + HashingMod = rabbit_password:hashing_mod(), + PasswordHash = hash_password(HashingMod, Password), + User = internal_user:create_user(Username, PasswordHash, HashingMod), + try + R = rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:wread({rabbit_user, Username}) of + [] -> + ok = mnesia:write(rabbit_user, User, write); + _ -> + mnesia:abort({user_already_exists, Username}) + end + end), + rabbit_log:info("Created user '~s'", [Username]), + rabbit_event:notify(user_created, [{name, Username}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {user_already_exists, _}} = Error -> + rabbit_log:warning("Failed to add user '~s': the user already exists", [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to add user '~s': ~p", [Username, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to add user '~s': ~p", [Username, Error]), + exit(Error) + end . + +-spec delete_user(rabbit_types:username(), rabbit_types:username()) -> 'ok'. + +delete_user(Username, ActingUser) -> + rabbit_log:debug("Asked to delete user '~s'", [Username]), + try + R = rabbit_misc:execute_mnesia_transaction( + rabbit_misc:with_user( + Username, + fun () -> + ok = mnesia:delete({rabbit_user, Username}), + [ok = mnesia:delete_object( + rabbit_user_permission, R, write) || + R <- mnesia:match_object( + rabbit_user_permission, + #user_permission{user_vhost = #user_vhost{ + username = Username, + virtual_host = '_'}, + permission = '_'}, + write)], + UserTopicPermissionsQuery = match_user_vhost_topic_permission(Username, '_'), + UserTopicPermissions = UserTopicPermissionsQuery(), + [ok = mnesia:delete_object(rabbit_topic_permission, R, write) || R <- UserTopicPermissions], + ok + end)), + rabbit_log:info("Deleted user '~s'", [Username]), + rabbit_event:notify(user_deleted, + [{name, Username}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to delete user '~s': the user does not exist", [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to delete user '~s': ~p", [Username, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to delete user '~s': ~p", [Username, Error]), + exit(Error) + end . + +-spec lookup_user + (rabbit_types:username()) -> + rabbit_types:ok(internal_user:internal_user()) | + rabbit_types:error('not_found'). + +lookup_user(Username) -> + rabbit_misc:dirty_read({rabbit_user, Username}). + +-spec exists(rabbit_types:username()) -> boolean(). + +exists(Username) -> + case lookup_user(Username) of + {error, not_found} -> false; + _ -> true + end. + +-spec change_password + (rabbit_types:username(), rabbit_types:password(), rabbit_types:username()) -> 'ok'. + +change_password(Username, Password, ActingUser) -> + validate_and_alternate_credentials(Username, Password, ActingUser, + fun change_password_sans_validation/3). + +change_password_sans_validation(Username, Password, ActingUser) -> + try + rabbit_log:debug("Asked to change password of user '~s', new password length in bytes: ~p", [Username, bit_size(Password)]), + HashingAlgorithm = rabbit_password:hashing_mod(), + R = change_password_hash(Username, + hash_password(rabbit_password:hashing_mod(), + Password), + HashingAlgorithm), + rabbit_log:info("Successfully changed password for user '~s'", [Username]), + rabbit_event:notify(user_password_changed, + [{name, Username}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to change password for user '~s': the user does not exist", [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to change password for user '~s': ~p", [Username, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to change password for user '~s': ~p", [Username, Error]), + exit(Error) + end. + +-spec clear_password(rabbit_types:username(), rabbit_types:username()) -> 'ok'. + +clear_password(Username, ActingUser) -> + rabbit_log:info("Clearing password for '~s'~n", [Username]), + R = change_password_hash(Username, <<"">>), + rabbit_event:notify(user_password_cleared, + [{name, Username}, + {user_who_performed_action, ActingUser}]), + R. + +-spec hash_password + (module(), rabbit_types:password()) -> rabbit_types:password_hash(). + +hash_password(HashingMod, Cleartext) -> + rabbit_password:hash(HashingMod, Cleartext). + +-spec change_password_hash + (rabbit_types:username(), rabbit_types:password_hash()) -> 'ok'. + +change_password_hash(Username, PasswordHash) -> + change_password_hash(Username, PasswordHash, rabbit_password:hashing_mod()). + + +change_password_hash(Username, PasswordHash, HashingAlgorithm) -> + update_user(Username, fun(User) -> + internal_user:set_password_hash(User, + PasswordHash, HashingAlgorithm) + end). + +-spec set_tags(rabbit_types:username(), [atom()], rabbit_types:username()) -> 'ok'. + +set_tags(Username, Tags, ActingUser) -> + ConvertedTags = [rabbit_data_coercion:to_atom(I) || I <- Tags], + rabbit_log:debug("Asked to set user tags for user '~s' to ~p", [Username, ConvertedTags]), + try + R = update_user(Username, fun(User) -> + internal_user:set_tags(User, ConvertedTags) + end), + rabbit_log:info("Successfully set user tags for user '~s' to ~p", [Username, ConvertedTags]), + rabbit_event:notify(user_tags_set, [{name, Username}, {tags, ConvertedTags}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to set tags for user '~s': the user does not exist", [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to set tags for user '~s': ~p", [Username, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to set tags for user '~s': ~p", [Username, Error]), + exit(Error) + end . + +-spec set_permissions + (rabbit_types:username(), rabbit_types:vhost(), regexp(), regexp(), + regexp(), rabbit_types:username()) -> + 'ok'. + +set_permissions(Username, VirtualHost, ConfigurePerm, WritePerm, ReadPerm, ActingUser) -> + rabbit_log:debug("Asked to set permissions for " + "'~s' in virtual host '~s' to '~s', '~s', '~s'", + [Username, VirtualHost, ConfigurePerm, WritePerm, ReadPerm]), + lists:map( + fun (RegexpBin) -> + Regexp = binary_to_list(RegexpBin), + case re:compile(Regexp) of + {ok, _} -> ok; + {error, Reason} -> + rabbit_log:warning("Failed to set permissions for '~s' in virtual host '~s': " + "regular expression '~s' is invalid", + [Username, VirtualHost, RegexpBin]), + throw({error, {invalid_regexp, Regexp, Reason}}) + end + end, [ConfigurePerm, WritePerm, ReadPerm]), + try + R = rabbit_misc:execute_mnesia_transaction( + rabbit_vhost:with_user_and_vhost( + Username, VirtualHost, + fun () -> ok = mnesia:write( + rabbit_user_permission, + #user_permission{user_vhost = #user_vhost{ + username = Username, + virtual_host = VirtualHost}, + permission = #permission{ + configure = ConfigurePerm, + write = WritePerm, + read = ReadPerm}}, + write) + end)), + rabbit_log:info("Successfully set permissions for " + "'~s' in virtual host '~s' to '~s', '~s', '~s'", + [Username, VirtualHost, ConfigurePerm, WritePerm, ReadPerm]), + rabbit_event:notify(permission_created, [{user, Username}, + {vhost, VirtualHost}, + {configure, ConfigurePerm}, + {write, WritePerm}, + {read, ReadPerm}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_vhost, _}} = Error -> + rabbit_log:warning("Failed to set permissions for '~s': virtual host '~s' does not exist", + [Username, VirtualHost]), + throw(Error); + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to set permissions for '~s': the user does not exist", + [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to set permissions for '~s' in virtual host '~s': ~p", + [Username, VirtualHost, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to set permissions for '~s' in virtual host '~s': ~p", + [Username, VirtualHost, Error]), + exit(Error) + end. + +-spec clear_permissions + (rabbit_types:username(), rabbit_types:vhost(), rabbit_types:username()) -> 'ok'. + +clear_permissions(Username, VirtualHost, ActingUser) -> + rabbit_log:debug("Asked to clear permissions for '~s' in virtual host '~s'", + [Username, VirtualHost]), + try + R = rabbit_misc:execute_mnesia_transaction( + rabbit_vhost:with_user_and_vhost( + Username, VirtualHost, + fun () -> + ok = mnesia:delete({rabbit_user_permission, + #user_vhost{username = Username, + virtual_host = VirtualHost}}) + end)), + rabbit_log:info("Successfully cleared permissions for '~s' in virtual host '~s'", + [Username, VirtualHost]), + rabbit_event:notify(permission_deleted, [{user, Username}, + {vhost, VirtualHost}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_vhost, _}} = Error -> + rabbit_log:warning("Failed to clear permissions for '~s': virtual host '~s' does not exist", + [Username, VirtualHost]), + throw(Error); + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to clear permissions for '~s': the user does not exist", + [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to clear permissions for '~s' in virtual host '~s': ~p", + [Username, VirtualHost, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to clear permissions for '~s' in virtual host '~s': ~p", + [Username, VirtualHost, Error]), + exit(Error) + end. + + +update_user(Username, Fun) -> + rabbit_misc:execute_mnesia_transaction( + rabbit_misc:with_user( + Username, + fun () -> + {ok, User} = lookup_user(Username), + ok = mnesia:write(rabbit_user, Fun(User), write) + end)). + +set_topic_permissions(Username, VirtualHost, Exchange, WritePerm, ReadPerm, ActingUser) -> + rabbit_log:debug("Asked to set topic permissions on exchange '~s' for " + "user '~s' in virtual host '~s' to '~s', '~s'", + [Exchange, Username, VirtualHost, WritePerm, ReadPerm]), + WritePermRegex = rabbit_data_coercion:to_binary(WritePerm), + ReadPermRegex = rabbit_data_coercion:to_binary(ReadPerm), + lists:map( + fun (RegexpBin) -> + case re:compile(RegexpBin) of + {ok, _} -> ok; + {error, Reason} -> + rabbit_log:warning("Failed to set topic permissions on exchange '~s' for " + "'~s' in virtual host '~s': regular expression '~s' is invalid", + [Exchange, Username, VirtualHost, RegexpBin]), + throw({error, {invalid_regexp, RegexpBin, Reason}}) + end + end, [WritePerm, ReadPerm]), + try + R = rabbit_misc:execute_mnesia_transaction( + rabbit_vhost:with_user_and_vhost( + Username, VirtualHost, + fun () -> ok = mnesia:write( + rabbit_topic_permission, + #topic_permission{ + topic_permission_key = #topic_permission_key{ + user_vhost = #user_vhost{ + username = Username, + virtual_host = VirtualHost}, + exchange = Exchange + }, + permission = #permission{ + write = WritePermRegex, + read = ReadPermRegex + } + }, + write) + end)), + rabbit_log:info("Successfully set topic permissions on exchange '~s' for " + "'~s' in virtual host '~s' to '~s', '~s'", + [Exchange, Username, VirtualHost, WritePerm, ReadPerm]), + rabbit_event:notify(topic_permission_created, [ + {user, Username}, + {vhost, VirtualHost}, + {exchange, Exchange}, + {write, WritePermRegex}, + {read, ReadPermRegex}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_vhost, _}} = Error -> + rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s': virtual host '~s' does not exist.", + [Exchange, Username, VirtualHost]), + throw(Error); + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s': the user does not exist.", + [Exchange, Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p.", + [Exchange, Username, VirtualHost, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to set topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p.", + [Exchange, Username, VirtualHost, Error]), + exit(Error) + end . + +clear_topic_permissions(Username, VirtualHost, ActingUser) -> + rabbit_log:debug("Asked to clear topic permissions for '~s' in virtual host '~s'", + [Username, VirtualHost]), + try + R = rabbit_misc:execute_mnesia_transaction( + rabbit_vhost:with_user_and_vhost( + Username, VirtualHost, + fun () -> + ListFunction = match_user_vhost_topic_permission(Username, VirtualHost), + List = ListFunction(), + lists:foreach(fun(X) -> + ok = mnesia:delete_object(rabbit_topic_permission, X, write) + end, List) + end)), + rabbit_log:info("Successfully cleared topic permissions for '~s' in virtual host '~s'", + [Username, VirtualHost]), + rabbit_event:notify(topic_permission_deleted, [{user, Username}, + {vhost, VirtualHost}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_vhost, _}} = Error -> + rabbit_log:warning("Failed to clear topic permissions for '~s': virtual host '~s' does not exist", + [Username, VirtualHost]), + throw(Error); + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to clear topic permissions for '~s': the user does not exist", + [Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to clear topic permissions for '~s' in virtual host '~s': ~p", + [Username, VirtualHost, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to clear topic permissions for '~s' in virtual host '~s': ~p", + [Username, VirtualHost, Error]), + exit(Error) + end. + +clear_topic_permissions(Username, VirtualHost, Exchange, ActingUser) -> + rabbit_log:debug("Asked to clear topic permissions on exchange '~s' for '~s' in virtual host '~s'", + [Exchange, Username, VirtualHost]), + try + R = rabbit_misc:execute_mnesia_transaction( + rabbit_vhost:with_user_and_vhost( + Username, VirtualHost, + fun () -> + ok = mnesia:delete(rabbit_topic_permission, + #topic_permission_key{ + user_vhost = #user_vhost{ + username = Username, + virtual_host = VirtualHost}, + exchange = Exchange + }, write) + end)), + rabbit_log:info("Successfully cleared topic permissions on exchange '~s' for '~s' in virtual host '~s'", + [Exchange, Username, VirtualHost]), + rabbit_event:notify(permission_deleted, [{user, Username}, + {vhost, VirtualHost}, + {user_who_performed_action, ActingUser}]), + R + catch + throw:{error, {no_such_vhost, _}} = Error -> + rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s': virtual host '~s' does not exist", + [Exchange, Username, VirtualHost]), + throw(Error); + throw:{error, {no_such_user, _}} = Error -> + rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s': the user does not exist", + [Exchange, Username]), + throw(Error); + throw:Error -> + rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p", + [Exchange, Username, VirtualHost, Error]), + throw(Error); + exit:Error -> + rabbit_log:warning("Failed to clear topic permissions on exchange '~s' for '~s' in virtual host '~s': ~p", + [Exchange, Username, VirtualHost, Error]), + exit(Error) + end. + +put_user(User, ActingUser) -> put_user(User, undefined, ActingUser). + +put_user(User, Version, ActingUser) -> + Username = maps:get(name, User), + HasPassword = maps:is_key(password, User), + HasPasswordHash = maps:is_key(password_hash, User), + Password = maps:get(password, User, undefined), + PasswordHash = maps:get(password_hash, User, undefined), + + Tags = case {maps:get(tags, User, undefined), maps:get(administrator, User, undefined)} of + {undefined, undefined} -> + throw({error, tags_not_present}); + {undefined, AdminS} -> + case rabbit_misc:parse_bool(AdminS) of + true -> [administrator]; + false -> [] + end; + {TagsS, _} -> + [list_to_atom(string:strip(T)) || + T <- string:tokens(binary_to_list(TagsS), ",")] + end, + + %% pre-configured, only applies to newly created users + Permissions = maps:get(permissions, User, undefined), + + PassedCredentialValidation = + case {HasPassword, HasPasswordHash} of + {true, false} -> + rabbit_credential_validation:validate(Username, Password) =:= ok; + {false, true} -> true; + _ -> + rabbit_credential_validation:validate(Username, Password) =:= ok + end, + + case exists(Username) of + true -> + case {HasPassword, HasPasswordHash} of + {true, false} -> + update_user_password(PassedCredentialValidation, Username, Password, Tags, ActingUser); + {false, true} -> + update_user_password_hash(Username, PasswordHash, Tags, User, Version, ActingUser); + {true, true} -> + throw({error, both_password_and_password_hash_are_provided}); + %% clear password, update tags if needed + _ -> + rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser), + rabbit_auth_backend_internal:clear_password(Username, ActingUser) + end; + false -> + case {HasPassword, HasPasswordHash} of + {true, false} -> + create_user_with_password(PassedCredentialValidation, Username, Password, Tags, Permissions, ActingUser); + {false, true} -> + create_user_with_password_hash(Username, PasswordHash, Tags, User, Version, Permissions, ActingUser); + {true, true} -> + throw({error, both_password_and_password_hash_are_provided}); + {false, false} -> + %% this user won't be able to sign in using + %% a username/password pair but can be used for x509 certificate authentication, + %% with authn backends such as HTTP or LDAP and so on. + create_user_with_password(PassedCredentialValidation, Username, <<"">>, Tags, Permissions, ActingUser) + end + end. + +update_user_password(_PassedCredentialValidation = true, Username, Password, Tags, ActingUser) -> + rabbit_auth_backend_internal:change_password(Username, Password, ActingUser), + rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser); +update_user_password(_PassedCredentialValidation = false, _Username, _Password, _Tags, _ActingUser) -> + %% we don't log here because + %% rabbit_auth_backend_internal will do it + throw({error, credential_validation_failed}). + +update_user_password_hash(Username, PasswordHash, Tags, User, Version, ActingUser) -> + %% when a hash this provided, credential validation + %% is not applied + HashingAlgorithm = hashing_algorithm(User, Version), + + Hash = rabbit_misc:b64decode_or_throw(PasswordHash), + rabbit_auth_backend_internal:change_password_hash( + Username, Hash, HashingAlgorithm), + rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser). + +create_user_with_password(_PassedCredentialValidation = true, Username, Password, Tags, undefined, ActingUser) -> + rabbit_auth_backend_internal:add_user(Username, Password, ActingUser), + rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser); +create_user_with_password(_PassedCredentialValidation = true, Username, Password, Tags, PreconfiguredPermissions, ActingUser) -> + rabbit_auth_backend_internal:add_user(Username, Password, ActingUser), + rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser), + preconfigure_permissions(Username, PreconfiguredPermissions, ActingUser); +create_user_with_password(_PassedCredentialValidation = false, _Username, _Password, _Tags, _, _) -> + %% we don't log here because + %% rabbit_auth_backend_internal will do it + throw({error, credential_validation_failed}). + +create_user_with_password_hash(Username, PasswordHash, Tags, User, Version, PreconfiguredPermissions, ActingUser) -> + %% when a hash this provided, credential validation + %% is not applied + HashingAlgorithm = hashing_algorithm(User, Version), + Hash = rabbit_misc:b64decode_or_throw(PasswordHash), + + %% first we create a user with dummy credentials and no + %% validation applied, then we update password hash + TmpPassword = rabbit_guid:binary(rabbit_guid:gen_secure(), "tmp"), + rabbit_auth_backend_internal:add_user_sans_validation(Username, TmpPassword, ActingUser), + + rabbit_auth_backend_internal:change_password_hash( + Username, Hash, HashingAlgorithm), + rabbit_auth_backend_internal:set_tags(Username, Tags, ActingUser), + preconfigure_permissions(Username, PreconfiguredPermissions, ActingUser). + +preconfigure_permissions(_Username, undefined, _ActingUser) -> + ok; +preconfigure_permissions(Username, Map, ActingUser) when is_map(Map) -> + maps:map(fun(VHost, M) -> + rabbit_auth_backend_internal:set_permissions(Username, VHost, + maps:get(<<"configure">>, M), + maps:get(<<"write">>, M), + maps:get(<<"read">>, M), + ActingUser) + end, + Map), + ok. + +set_user_limits(Username, Definition, ActingUser) when is_list(Definition); is_binary(Definition) -> + case rabbit_feature_flags:is_enabled(user_limits) of + true -> + case rabbit_json:try_decode(rabbit_data_coercion:to_binary(Definition)) of + {ok, Term} -> + validate_parameters_and_update_limit(Username, Term, ActingUser); + {error, Reason} -> + {error_string, rabbit_misc:format( + "JSON decoding error. Reason: ~ts", [Reason])} + end; + false -> {error_string, "cannot set any user limits: the user_limits feature flag is not enabled"} + end; +set_user_limits(Username, Definition, ActingUser) when is_map(Definition) -> + case rabbit_feature_flags:is_enabled(user_limits) of + true -> validate_parameters_and_update_limit(Username, Definition, ActingUser); + false -> {error_string, "cannot set any user limits: the user_limits feature flag is not enabled"} + end. + +validate_parameters_and_update_limit(Username, Term, ActingUser) -> + case flatten_errors(rabbit_parameter_validation:proplist( + <<"user-limits">>, user_limit_validation(), Term)) of + ok -> + update_user(Username, fun(User) -> + internal_user:update_limits(add, User, Term) + end), + notify_limit_set(Username, ActingUser, Term); + {errors, [{Reason, Arguments}]} -> + {error_string, rabbit_misc:format(Reason, Arguments)} + end. + +user_limit_validation() -> + [{<<"max-connections">>, fun rabbit_parameter_validation:integer/2, optional}, + {<<"max-channels">>, fun rabbit_parameter_validation:integer/2, optional}]. + +clear_user_limits(Username, <<"all">>, ActingUser) -> + update_user(Username, fun(User) -> + internal_user:clear_limits(User) + end), + notify_limit_clear(Username, ActingUser); +clear_user_limits(Username, LimitType, ActingUser) -> + update_user(Username, fun(User) -> + internal_user:update_limits(remove, User, LimitType) + end), + notify_limit_clear(Username, ActingUser). + +flatten_errors(L) -> + case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of + [] -> ok; + E -> {errors, E} + end. + +%%---------------------------------------------------------------------------- +%% Listing + +-define(PERMS_INFO_KEYS, [configure, write, read]). +-define(USER_INFO_KEYS, [user, tags]). + +-spec user_info_keys() -> rabbit_types:info_keys(). + +user_info_keys() -> ?USER_INFO_KEYS. + +-spec perms_info_keys() -> rabbit_types:info_keys(). + +perms_info_keys() -> [user, vhost | ?PERMS_INFO_KEYS]. + +-spec vhost_perms_info_keys() -> rabbit_types:info_keys(). + +vhost_perms_info_keys() -> [user | ?PERMS_INFO_KEYS]. + +-spec user_perms_info_keys() -> rabbit_types:info_keys(). + +user_perms_info_keys() -> [vhost | ?PERMS_INFO_KEYS]. + +-spec user_vhost_perms_info_keys() -> rabbit_types:info_keys(). + +user_vhost_perms_info_keys() -> ?PERMS_INFO_KEYS. + +topic_perms_info_keys() -> [user, vhost, exchange, write, read]. +user_topic_perms_info_keys() -> [vhost, exchange, write, read]. +vhost_topic_perms_info_keys() -> [user, exchange, write, read]. +user_vhost_topic_perms_info_keys() -> [exchange, write, read]. + +all_users() -> mnesia:dirty_match_object(rabbit_user, internal_user:pattern_match_all()). + +-spec list_users() -> [rabbit_types:infos()]. + +list_users() -> + [extract_internal_user_params(U) || + U <- all_users()]. + +-spec list_users(reference(), pid()) -> 'ok'. + +list_users(Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, + fun(U) -> extract_internal_user_params(U) end, + all_users()). + +-spec list_permissions() -> [rabbit_types:infos()]. + +list_permissions() -> + list_permissions(perms_info_keys(), match_user_vhost('_', '_')). + +list_permissions(Keys, QueryThunk) -> + [extract_user_permission_params(Keys, U) || + U <- rabbit_misc:execute_mnesia_transaction(QueryThunk)]. + +list_permissions(Keys, QueryThunk, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, fun(U) -> extract_user_permission_params(Keys, U) end, + rabbit_misc:execute_mnesia_transaction(QueryThunk)). + +filter_props(Keys, Props) -> [T || T = {K, _} <- Props, lists:member(K, Keys)]. + +-spec list_user_permissions + (rabbit_types:username()) -> [rabbit_types:infos()]. + +list_user_permissions(Username) -> + list_permissions( + user_perms_info_keys(), + rabbit_misc:with_user(Username, match_user_vhost(Username, '_'))). + +-spec list_user_permissions + (rabbit_types:username(), reference(), pid()) -> 'ok'. + +list_user_permissions(Username, Ref, AggregatorPid) -> + list_permissions( + user_perms_info_keys(), + rabbit_misc:with_user(Username, match_user_vhost(Username, '_')), + Ref, AggregatorPid). + +-spec list_vhost_permissions + (rabbit_types:vhost()) -> [rabbit_types:infos()]. + +list_vhost_permissions(VHostPath) -> + list_permissions( + vhost_perms_info_keys(), + rabbit_vhost:with(VHostPath, match_user_vhost('_', VHostPath))). + +-spec list_vhost_permissions + (rabbit_types:vhost(), reference(), pid()) -> 'ok'. + +list_vhost_permissions(VHostPath, Ref, AggregatorPid) -> + list_permissions( + vhost_perms_info_keys(), + rabbit_vhost:with(VHostPath, match_user_vhost('_', VHostPath)), + Ref, AggregatorPid). + +-spec list_user_vhost_permissions + (rabbit_types:username(), rabbit_types:vhost()) -> [rabbit_types:infos()]. + +list_user_vhost_permissions(Username, VHostPath) -> + list_permissions( + user_vhost_perms_info_keys(), + rabbit_vhost:with_user_and_vhost( + Username, VHostPath, match_user_vhost(Username, VHostPath))). + +extract_user_permission_params(Keys, #user_permission{ + user_vhost = + #user_vhost{username = Username, + virtual_host = VHostPath}, + permission = #permission{ + configure = ConfigurePerm, + write = WritePerm, + read = ReadPerm}}) -> + filter_props(Keys, [{user, Username}, + {vhost, VHostPath}, + {configure, ConfigurePerm}, + {write, WritePerm}, + {read, ReadPerm}]). + +extract_internal_user_params(User) -> + [{user, internal_user:get_username(User)}, + {tags, internal_user:get_tags(User)}]. + +match_user_vhost(Username, VHostPath) -> + fun () -> mnesia:match_object( + rabbit_user_permission, + #user_permission{user_vhost = #user_vhost{ + username = Username, + virtual_host = VHostPath}, + permission = '_'}, + read) + end. + +list_topic_permissions() -> + list_topic_permissions(topic_perms_info_keys(), match_user_vhost_topic_permission('_', '_')). + +list_user_topic_permissions(Username) -> + list_topic_permissions(user_topic_perms_info_keys(), + rabbit_misc:with_user(Username, match_user_vhost_topic_permission(Username, '_'))). + +list_vhost_topic_permissions(VHost) -> + list_topic_permissions(vhost_topic_perms_info_keys(), + rabbit_vhost:with(VHost, match_user_vhost_topic_permission('_', VHost))). + +list_user_vhost_topic_permissions(Username, VHost) -> + list_topic_permissions(user_vhost_topic_perms_info_keys(), + rabbit_vhost:with_user_and_vhost(Username, VHost, match_user_vhost_topic_permission(Username, VHost))). + +list_topic_permissions(Keys, QueryThunk) -> + [extract_topic_permission_params(Keys, U) || + U <- rabbit_misc:execute_mnesia_transaction(QueryThunk)]. + +match_user_vhost_topic_permission(Username, VHostPath) -> + match_user_vhost_topic_permission(Username, VHostPath, '_'). + +match_user_vhost_topic_permission(Username, VHostPath, Exchange) -> + fun () -> mnesia:match_object( + rabbit_topic_permission, + #topic_permission{topic_permission_key = #topic_permission_key{ + user_vhost = #user_vhost{ + username = Username, + virtual_host = VHostPath}, + exchange = Exchange}, + permission = '_'}, + read) + end. + +extract_topic_permission_params(Keys, #topic_permission{ + topic_permission_key = #topic_permission_key{ + user_vhost = #user_vhost{username = Username, + virtual_host = VHostPath}, + exchange = Exchange}, + permission = #permission{ + write = WritePerm, + read = ReadPerm}}) -> + filter_props(Keys, [{user, Username}, + {vhost, VHostPath}, + {exchange, Exchange}, + {write, WritePerm}, + {read, ReadPerm}]). + +hashing_algorithm(User, Version) -> + case maps:get(hashing_algorithm, User, undefined) of + undefined -> + case Version of + %% 3.6.1 and later versions are supposed to have + %% the algorithm exported and thus not need a default + <<"3.6.0">> -> rabbit_password_hashing_sha256; + <<"3.5.", _/binary>> -> rabbit_password_hashing_md5; + <<"3.4.", _/binary>> -> rabbit_password_hashing_md5; + <<"3.3.", _/binary>> -> rabbit_password_hashing_md5; + <<"3.2.", _/binary>> -> rabbit_password_hashing_md5; + <<"3.1.", _/binary>> -> rabbit_password_hashing_md5; + <<"3.0.", _/binary>> -> rabbit_password_hashing_md5; + _ -> rabbit_password:hashing_mod() + end; + Alg -> rabbit_data_coercion:to_atom(Alg, utf8) + end. + +is_over_connection_limit(Username) -> + Fun = fun() -> + rabbit_connection_tracking:count_tracked_items_in({user, Username}) + end, + is_over_limit(Username, <<"max-connections">>, Fun). + +is_over_channel_limit(Username) -> + Fun = fun() -> + rabbit_channel_tracking:count_tracked_items_in({user, Username}) + end, + is_over_limit(Username, <<"max-channels">>, Fun). + +is_over_limit(Username, LimitType, Fun) -> + case get_user_limit(Username, LimitType) of + undefined -> false; + {ok, 0} -> {true, 0}; + {ok, Limit} -> + case Fun() >= Limit of + false -> false; + true -> {true, Limit} + end + end. + +get_user_limit(Username, LimitType) -> + case lookup_user(Username) of + {ok, User} -> + case rabbit_misc:pget(LimitType, internal_user:get_limits(User)) of + undefined -> undefined; + N when N < 0 -> undefined; + N when N >= 0 -> {ok, N} + end; + _ -> + undefined + end. + +get_user_limits() -> + [{internal_user:get_username(U), internal_user:get_limits(U)} || + U <- all_users(), + internal_user:get_limits(U) =/= #{}]. + +get_user_limits(Username) -> + case lookup_user(Username) of + {ok, User} -> internal_user:get_limits(User); + _ -> undefined + end. + +notify_limit_set(Username, ActingUser, Term) -> + rabbit_event:notify(user_limits_set, + [{name, <<"limits">>}, {user_who_performed_action, ActingUser}, + {username, Username} | maps:to_list(Term)]). + +notify_limit_clear(Username, ActingUser) -> + rabbit_event:notify(user_limits_cleared, + [{name, <<"limits">>}, {user_who_performed_action, ActingUser}, + {username, Username}]). diff --git a/deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl b/deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl new file mode 100644 index 0000000000..c81a337153 --- /dev/null +++ b/deps/rabbit/src/rabbit_auth_mechanism_amqplain.erl @@ -0,0 +1,54 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_auth_mechanism_amqplain). +-include("rabbit.hrl"). + +-behaviour(rabbit_auth_mechanism). + +-export([description/0, should_offer/1, init/1, handle_response/2]). + +-rabbit_boot_step({?MODULE, + [{description, "auth mechanism amqplain"}, + {mfa, {rabbit_registry, register, + [auth_mechanism, <<"AMQPLAIN">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +%% AMQPLAIN, as used by Qpid Python test suite. The 0-8 spec actually +%% defines this as PLAIN, but in 0-9 that definition is gone, instead +%% referring generically to "SASL security mechanism", i.e. the above. + +description() -> + [{description, <<"QPid AMQPLAIN mechanism">>}]. + +should_offer(_Sock) -> + true. + +init(_Sock) -> + []. + +-define(IS_STRING_TYPE(Type), Type =:= longstr orelse Type =:= shortstr). + +handle_response(Response, _State) -> + LoginTable = rabbit_binary_parser:parse_table(Response), + case {lists:keysearch(<<"LOGIN">>, 1, LoginTable), + lists:keysearch(<<"PASSWORD">>, 1, LoginTable)} of + {{value, {_, UserType, User}}, + {value, {_, PassType, Pass}}} when ?IS_STRING_TYPE(UserType); + ?IS_STRING_TYPE(PassType) -> + rabbit_access_control:check_user_pass_login(User, Pass); + {{value, {_, _UserType, _User}}, + {value, {_, _PassType, _Pass}}} -> + {protocol_error, + "AMQPLAIN auth info ~w uses unsupported type for LOGIN or PASSWORD field", + [LoginTable]}; + _ -> + {protocol_error, + "AMQPLAIN auth info ~w is missing LOGIN or PASSWORD field", + [LoginTable]} + end. diff --git a/deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl b/deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl new file mode 100644 index 0000000000..15439c461f --- /dev/null +++ b/deps/rabbit/src/rabbit_auth_mechanism_cr_demo.erl @@ -0,0 +1,48 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_auth_mechanism_cr_demo). +-include("rabbit.hrl"). + +-behaviour(rabbit_auth_mechanism). + +-export([description/0, should_offer/1, init/1, handle_response/2]). + +-rabbit_boot_step({?MODULE, + [{description, "auth mechanism cr-demo"}, + {mfa, {rabbit_registry, register, + [auth_mechanism, <<"RABBIT-CR-DEMO">>, + ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +-record(state, {username = undefined}). + +%% Provides equivalent security to PLAIN but demos use of Connection.Secure(Ok) +%% START-OK: Username +%% SECURE: "Please tell me your password" +%% SECURE-OK: "My password is ~s", [Password] + +description() -> + [{description, <<"RabbitMQ Demo challenge-response authentication " + "mechanism">>}]. + +should_offer(_Sock) -> + true. + +init(_Sock) -> + #state{}. + +handle_response(Response, State = #state{username = undefined}) -> + {challenge, <<"Please tell me your password">>, + State#state{username = Response}}; + +handle_response(<<"My password is ", Password/binary>>, + #state{username = Username}) -> + rabbit_access_control:check_user_pass_login(Username, Password); +handle_response(Response, _State) -> + {protocol_error, "Invalid response '~s'", [Response]}. diff --git a/deps/rabbit/src/rabbit_auth_mechanism_plain.erl b/deps/rabbit/src/rabbit_auth_mechanism_plain.erl new file mode 100644 index 0000000000..d704c72400 --- /dev/null +++ b/deps/rabbit/src/rabbit_auth_mechanism_plain.erl @@ -0,0 +1,60 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_auth_mechanism_plain). +-include("rabbit.hrl"). + +-behaviour(rabbit_auth_mechanism). + +-export([description/0, should_offer/1, init/1, handle_response/2]). + +-rabbit_boot_step({?MODULE, + [{description, "auth mechanism plain"}, + {mfa, {rabbit_registry, register, + [auth_mechanism, <<"PLAIN">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +%% SASL PLAIN, as used by the Qpid Java client and our clients. Also, +%% apparently, by OpenAMQ. + +description() -> + [{description, <<"SASL PLAIN authentication mechanism">>}]. + +should_offer(_Sock) -> + true. + +init(_Sock) -> + []. + +handle_response(Response, _State) -> + case extract_user_pass(Response) of + {ok, User, Pass} -> + rabbit_access_control:check_user_pass_login(User, Pass); + error -> + {protocol_error, "response ~p invalid", [Response]} + end. + +extract_user_pass(Response) -> + case extract_elem(Response) of + {ok, User, Response1} -> case extract_elem(Response1) of + {ok, Pass, <<>>} -> {ok, User, Pass}; + _ -> error + end; + error -> error + end. + +extract_elem(<<0:8, Rest/binary>>) -> + Count = next_null_pos(Rest, 0), + <<Elem:Count/binary, Rest1/binary>> = Rest, + {ok, Elem, Rest1}; +extract_elem(_) -> + error. + +next_null_pos(<<>>, Count) -> Count; +next_null_pos(<<0:8, _Rest/binary>>, Count) -> Count; +next_null_pos(<<_:8, Rest/binary>>, Count) -> next_null_pos(Rest, Count + 1). diff --git a/deps/rabbit/src/rabbit_autoheal.erl b/deps/rabbit/src/rabbit_autoheal.erl new file mode 100644 index 0000000000..6380d71895 --- /dev/null +++ b/deps/rabbit/src/rabbit_autoheal.erl @@ -0,0 +1,456 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_autoheal). + +-export([init/0, enabled/0, maybe_start/1, rabbit_down/2, node_down/2, + handle_msg/3, process_down/2]). + +%% The named process we are running in. +-define(SERVER, rabbit_node_monitor). + +-define(MNESIA_STOPPED_PING_INTERNAL, 200). + +-define(AUTOHEAL_STATE_AFTER_RESTART, rabbit_autoheal_state_after_restart). + +%%---------------------------------------------------------------------------- + +%% In order to autoheal we want to: +%% +%% * Find the winning partition +%% * Stop all nodes in other partitions +%% * Wait for them all to be stopped +%% * Start them again +%% +%% To keep things simple, we assume all nodes are up. We don't start +%% unless all nodes are up, and if a node goes down we abandon the +%% whole process. To further keep things simple we also defer the +%% decision as to the winning node to the "leader" - arbitrarily +%% selected as the first node in the cluster. +%% +%% To coordinate the restarting nodes we pick a special node from the +%% winning partition - the "winner". Restarting nodes then stop, and +%% wait for it to tell them it is safe to start again. The winner +%% determines that a node has stopped just by seeing if its rabbit app +%% stops - if a node stops for any other reason it just gets a message +%% it will ignore, and otherwise we carry on. +%% +%% Meanwhile, the leader may continue to receive new autoheal requests: +%% all of them are ignored. The winner notifies the leader when the +%% current autoheal process is finished (ie. when all losers stopped and +%% were asked to start again) or was aborted. When the leader receives +%% the notification or if it looses contact with the winner, it can +%% accept new autoheal requests. +%% +%% The winner and the leader are not necessarily the same node. +%% +%% The leader can be a loser and will restart in this case. It remembers +%% there is an autoheal in progress by temporarily saving the autoheal +%% state to the application environment. +%% +%% == Possible states == +%% +%% not_healing +%% - the default +%% +%% {winner_waiting, OutstandingStops, Notify} +%% - we are the winner and are waiting for all losing nodes to stop +%% before telling them they can restart +%% +%% {leader_waiting, Winner, Notify} +%% - we are the leader, and have already assigned the winner and losers. +%% We are waiting for a confirmation from the winner that the autoheal +%% process has ended. Meanwhile we can ignore autoheal requests. +%% Because we may be a loser too, this state is saved to the application +%% environment and restored on startup. +%% +%% restarting +%% - we are restarting. Of course the node monitor immediately dies +%% then so this state does not last long. We therefore send the +%% autoheal_safe_to_start message to the rabbit_outside_app_process +%% instead. +%% +%% == Message flow == +%% +%% 1. Any node (leader included) >> {request_start, node()} >> Leader +%% When Mnesia detects it is running partitioned or +%% when a remote node starts, rabbit_node_monitor calls +%% rabbit_autoheal:maybe_start/1. The message above is sent to the +%% leader so the leader can take a decision. +%% +%% 2. Leader >> {become_winner, Losers} >> Winner +%% The leader notifies the winner so the latter can proceed with +%% the autoheal. +%% +%% 3. Winner >> {winner_is, Winner} >> All losers +%% The winner notifies losers they must stop. +%% +%% 4. Winner >> autoheal_safe_to_start >> All losers +%% When either all losers stopped or the autoheal process was +%% aborted, the winner notifies losers they can start again. +%% +%% 5. Leader >> report_autoheal_status >> Winner +%% The leader asks the autoheal status to the winner. This only +%% happens when the leader is a loser too. If this is not the case, +%% this message is never sent. +%% +%% 6. Winner >> {autoheal_finished, Winner} >> Leader +%% The winner notifies the leader that the autoheal process was +%% either finished or aborted (ie. autoheal_safe_to_start was sent +%% to losers). + +%%---------------------------------------------------------------------------- + +init() -> + %% We check the application environment for a saved autoheal state + %% saved during a restart. If this node is a leader, it is used + %% to determine if it needs to ask the winner to report about the + %% autoheal progress. + State = case application:get_env(rabbit, ?AUTOHEAL_STATE_AFTER_RESTART) of + {ok, S} -> S; + undefined -> not_healing + end, + ok = application:unset_env(rabbit, ?AUTOHEAL_STATE_AFTER_RESTART), + case State of + {leader_waiting, Winner, _} -> + rabbit_log:info( + "Autoheal: in progress, requesting report from ~p~n", [Winner]), + send(Winner, report_autoheal_status); + _ -> + ok + end, + State. + +maybe_start(not_healing) -> + case enabled() of + true -> Leader = leader(), + send(Leader, {request_start, node()}), + rabbit_log:info("Autoheal request sent to ~p~n", [Leader]), + not_healing; + false -> not_healing + end; +maybe_start(State) -> + State. + +enabled() -> + case application:get_env(rabbit, cluster_partition_handling) of + {ok, autoheal} -> true; + {ok, {pause_if_all_down, _, autoheal}} -> true; + _ -> false + end. + +leader() -> + [Leader | _] = lists:usort(rabbit_mnesia:cluster_nodes(all)), + Leader. + +%% This is the winner receiving its last notification that a node has +%% stopped - all nodes can now start again +rabbit_down(Node, {winner_waiting, [Node], Notify}) -> + rabbit_log:info("Autoheal: final node has stopped, starting...~n",[]), + winner_finish(Notify); + +rabbit_down(Node, {winner_waiting, WaitFor, Notify}) -> + {winner_waiting, WaitFor -- [Node], Notify}; + +rabbit_down(Winner, {leader_waiting, Winner, Losers}) -> + abort([Winner], Losers); + +rabbit_down(_Node, State) -> + %% Ignore. Either: + %% o we already cancelled the autoheal process; + %% o we are still waiting the winner's report. + State. + +node_down(_Node, not_healing) -> + not_healing; + +node_down(Node, {winner_waiting, _, Notify}) -> + abort([Node], Notify); + +node_down(Node, {leader_waiting, Node, _Notify}) -> + %% The winner went down, we don't know what to do so we simply abort. + rabbit_log:info("Autoheal: aborting - winner ~p went down~n", [Node]), + not_healing; + +node_down(Node, {leader_waiting, _, _} = St) -> + %% If it is a partial partition, the winner might continue with the + %% healing process. If it is a full partition, the winner will also + %% see it and abort. Let's wait for it. + rabbit_log:info("Autoheal: ~p went down, waiting for winner decision ~n", [Node]), + St; + +node_down(Node, _State) -> + rabbit_log:info("Autoheal: aborting - ~p went down~n", [Node]), + not_healing. + +%% If the process that has to restart the node crashes for an unexpected reason, +%% we go back to a not healing state so the node is able to recover. +process_down({'EXIT', Pid, Reason}, {restarting, Pid}) when Reason =/= normal -> + rabbit_log:info("Autoheal: aborting - the process responsible for restarting the " + "node terminated with reason: ~p~n", [Reason]), + not_healing; + +process_down(_, State) -> + State. + +%% By receiving this message we become the leader +%% TODO should we try to debounce this? +handle_msg({request_start, Node}, + not_healing, Partitions) -> + rabbit_log:info("Autoheal request received from ~p~n", [Node]), + case check_other_nodes(Partitions) of + {error, E} -> + rabbit_log:info("Autoheal request denied: ~s~n", [fmt_error(E)]), + not_healing; + {ok, AllPartitions} -> + {Winner, Losers} = make_decision(AllPartitions), + rabbit_log:info("Autoheal decision~n" + " * Partitions: ~p~n" + " * Winner: ~p~n" + " * Losers: ~p~n", + [AllPartitions, Winner, Losers]), + case node() =:= Winner of + true -> handle_msg({become_winner, Losers}, + not_healing, Partitions); + false -> send(Winner, {become_winner, Losers}), + {leader_waiting, Winner, Losers} + end + end; + +handle_msg({request_start, Node}, + State, _Partitions) -> + rabbit_log:info("Autoheal request received from ~p when healing; " + "ignoring~n", [Node]), + State; + +handle_msg({become_winner, Losers}, + not_healing, _Partitions) -> + rabbit_log:info("Autoheal: I am the winner, waiting for ~p to stop~n", + [Losers]), + stop_partition(Losers); + +handle_msg({become_winner, Losers}, + {winner_waiting, _, Losers}, _Partitions) -> + %% The leader has aborted the healing, might have seen us down but + %% we didn't see the same. Let's try again as it is the same partition. + rabbit_log:info("Autoheal: I am the winner and received a duplicated " + "request, waiting again for ~p to stop~n", [Losers]), + stop_partition(Losers); + +handle_msg({become_winner, _}, + {winner_waiting, _, Losers}, _Partitions) -> + %% Something has happened to the leader, it might have seen us down but we + %% are still alive. Partitions have changed, cannot continue. + rabbit_log:info("Autoheal: I am the winner and received another healing " + "request, partitions have changed to ~p. Aborting ~n", [Losers]), + winner_finish(Losers), + not_healing; + +handle_msg({winner_is, Winner}, State = not_healing, + _Partitions) -> + %% This node is a loser, nothing else. + Pid = restart_loser(State, Winner), + {restarting, Pid}; +handle_msg({winner_is, Winner}, State = {leader_waiting, Winner, _}, + _Partitions) -> + %% This node is the leader and a loser at the same time. + Pid = restart_loser(State, Winner), + {restarting, Pid}; + +handle_msg(Request, {restarting, Pid} = St, _Partitions) -> + %% ignore, we can contribute no further + rabbit_log:info("Autoheal: Received the request ~p while waiting for ~p " + "to restart the node. Ignoring it ~n", [Request, Pid]), + St; + +handle_msg(report_autoheal_status, not_healing, _Partitions) -> + %% The leader is asking about the autoheal status to us (the + %% winner). This happens when the leader is a loser and it just + %% restarted. We are in the "not_healing" state, so the previous + %% autoheal process ended: let's tell this to the leader. + send(leader(), {autoheal_finished, node()}), + not_healing; + +handle_msg(report_autoheal_status, State, _Partitions) -> + %% Like above, the leader is asking about the autoheal status. We + %% are not finished with it. There is no need to send anything yet + %% to the leader: we will send the notification when it is over. + State; + +handle_msg({autoheal_finished, Winner}, + {leader_waiting, Winner, _}, _Partitions) -> + %% The winner is finished with the autoheal process and notified us + %% (the leader). We can transition to the "not_healing" state and + %% accept new requests. + rabbit_log:info("Autoheal finished according to winner ~p~n", [Winner]), + not_healing; + +handle_msg({autoheal_finished, Winner}, not_healing, _Partitions) + when Winner =:= node() -> + %% We are the leader and the winner. The state already transitioned + %% to "not_healing" at the end of the autoheal process. + rabbit_log:info("Autoheal finished according to winner ~p~n", [node()]), + not_healing; + +handle_msg({autoheal_finished, Winner}, not_healing, _Partitions) -> + %% We might have seen the winner down during a partial partition and + %% transitioned to not_healing. However, the winner was still able + %% to finish. Let it pass. + rabbit_log:info("Autoheal finished according to winner ~p." + " Unexpected, I might have previously seen the winner down~n", [Winner]), + not_healing. + +%%---------------------------------------------------------------------------- + +send(Node, Msg) -> {?SERVER, Node} ! {autoheal_msg, Msg}. + +abort(Down, Notify) -> + rabbit_log:info("Autoheal: aborting - ~p down~n", [Down]), + %% Make sure any nodes waiting for us start - it won't necessarily + %% heal the partition but at least they won't get stuck. + %% If we are executing this, we are not stopping. Thus, don't wait + %% for ourselves! + winner_finish(Notify -- [node()]). + +winner_finish(Notify) -> + %% There is a race in Mnesia causing a starting loser to hang + %% forever if another loser stops at the same time: the starting + %% node connects to the other node, negotiates the protocol and + %% attempts to acquire a write lock on the schema on the other node. + %% If the other node stops between the protocol negotiation and lock + %% request, the starting node never gets an answer to its lock + %% request. + %% + %% To work around the problem, we make sure Mnesia is stopped on all + %% losing nodes before sending the "autoheal_safe_to_start" signal. + wait_for_mnesia_shutdown(Notify), + [{rabbit_outside_app_process, N} ! autoheal_safe_to_start || N <- Notify], + send(leader(), {autoheal_finished, node()}), + not_healing. + +%% This improves the previous implementation, but could still potentially enter an infinity +%% loop. If it also possible that for when it finishes some of the nodes have been +%% manually restarted, but we can't do much more (apart from stop them again). So let it +%% continue and notify all the losers to restart. +wait_for_mnesia_shutdown(AllNodes) -> + Monitors = lists:foldl(fun(Node, Monitors0) -> + pmon:monitor({mnesia_sup, Node}, Monitors0) + end, pmon:new(), AllNodes), + wait_for_supervisors(Monitors). + +wait_for_supervisors(Monitors) -> + case pmon:is_empty(Monitors) of + true -> + ok; + false -> + receive + {'DOWN', _MRef, process, {mnesia_sup, _} = I, _Reason} -> + wait_for_supervisors(pmon:erase(I, Monitors)) + after + 60000 -> + AliveLosers = [Node || {_, Node} <- pmon:monitored(Monitors)], + rabbit_log:info("Autoheal: mnesia in nodes ~p is still up, sending " + "winner notification again to these ~n", [AliveLosers]), + [send(L, {winner_is, node()}) || L <- AliveLosers], + wait_for_mnesia_shutdown(AliveLosers) + end + end. + +restart_loser(State, Winner) -> + rabbit_log:warning( + "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), + NextStateTimeout = application:get_env(rabbit, autoheal_state_transition_timeout, 60000), + rabbit_node_monitor:run_outside_applications( + fun () -> + MRef = erlang:monitor(process, {?SERVER, Winner}), + rabbit:stop(), + NextState = receive + {'DOWN', MRef, process, {?SERVER, Winner}, _Reason} -> + not_healing; + autoheal_safe_to_start -> + State + after NextStateTimeout -> + rabbit_log:warning( + "Autoheal: timed out waiting for a safe-to-start message from the winner (~p); will retry", + [Winner]), + not_healing + end, + erlang:demonitor(MRef, [flush]), + %% During the restart, the autoheal state is lost so we + %% store it in the application environment temporarily so + %% init/0 can pick it up. + %% + %% This is useful to the leader which is a loser at the + %% same time: because the leader is restarting, there + %% is a great chance it misses the "autoheal finished!" + %% notification from the winner. Thanks to the saved + %% state, it knows it needs to ask the winner if the + %% autoheal process is finished or not. + application:set_env(rabbit, + ?AUTOHEAL_STATE_AFTER_RESTART, NextState), + rabbit:start() + end, true). + +make_decision(AllPartitions) -> + Sorted = lists:sort([{partition_value(P), P} || P <- AllPartitions]), + [[Winner | _] | Rest] = lists:reverse([P || {_, P} <- Sorted]), + {Winner, lists:append(Rest)}. + +partition_value(Partition) -> + Connections = [Res || Node <- Partition, + Res <- [rpc:call(Node, rabbit_networking, + connections_local, [])], + is_list(Res)], + {length(lists:append(Connections)), length(Partition)}. + +%% We have our local understanding of what partitions exist; but we +%% only know which nodes we have been partitioned from, not which +%% nodes are partitioned from each other. +check_other_nodes(LocalPartitions) -> + Nodes = rabbit_mnesia:cluster_nodes(all), + {Results, Bad} = rabbit_node_monitor:status(Nodes -- [node()]), + RemotePartitions = [{Node, proplists:get_value(partitions, Res)} + || {Node, Res} <- Results], + RemoteDown = [{Node, Down} + || {Node, Res} <- Results, + Down <- [Nodes -- proplists:get_value(nodes, Res)], + Down =/= []], + case {Bad, RemoteDown} of + {[], []} -> Partitions = [{node(), LocalPartitions} | RemotePartitions], + {ok, all_partitions(Partitions, [Nodes])}; + {[], _} -> {error, {remote_down, RemoteDown}}; + {_, _} -> {error, {nodes_down, Bad}} + end. + +all_partitions([], Partitions) -> + Partitions; +all_partitions([{Node, CantSee} | Rest], Partitions) -> + {[Containing], Others} = + lists:partition(fun (Part) -> lists:member(Node, Part) end, Partitions), + A = Containing -- CantSee, + B = Containing -- A, + Partitions1 = case {A, B} of + {[], _} -> Partitions; + {_, []} -> Partitions; + _ -> [A, B | Others] + end, + all_partitions(Rest, Partitions1). + +fmt_error({remote_down, RemoteDown}) -> + rabbit_misc:format("Remote nodes disconnected:~n ~p", [RemoteDown]); +fmt_error({nodes_down, NodesDown}) -> + rabbit_misc:format("Local nodes down: ~p", [NodesDown]). + +stop_partition(Losers) -> + %% The leader said everything was ready - do we agree? If not then + %% give up. + Down = Losers -- rabbit_node_monitor:alive_rabbit_nodes(Losers), + case Down of + [] -> [send(L, {winner_is, node()}) || L <- Losers], + {winner_waiting, Losers, Losers}; + _ -> abort(Down, Losers) + end. diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl new file mode 100644 index 0000000000..4d709e14d0 --- /dev/null +++ b/deps/rabbit/src/rabbit_backing_queue.erl @@ -0,0 +1,264 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_backing_queue). + +-export([info_keys/0]). + +-define(INFO_KEYS, [messages_ram, messages_ready_ram, + messages_unacknowledged_ram, messages_persistent, + message_bytes, message_bytes_ready, + message_bytes_unacknowledged, message_bytes_ram, + message_bytes_persistent, head_message_timestamp, + disk_reads, disk_writes, backing_queue_status, + messages_paged_out, message_bytes_paged_out]). + +%% We can't specify a per-queue ack/state with callback signatures +-type ack() :: any(). +-type state() :: any(). + +-type flow() :: 'flow' | 'noflow'. +-type msg_ids() :: [rabbit_types:msg_id()]. +-type publish() :: {rabbit_types:basic_message(), + rabbit_types:message_properties(), boolean()}. +-type delivered_publish() :: {rabbit_types:basic_message(), + rabbit_types:message_properties()}. +-type fetch_result(Ack) :: + ('empty' | {rabbit_types:basic_message(), boolean(), Ack}). +-type drop_result(Ack) :: + ('empty' | {rabbit_types:msg_id(), Ack}). +-type recovery_terms() :: [term()] | 'non_clean_shutdown'. +-type recovery_info() :: 'new' | recovery_terms(). +-type purged_msg_count() :: non_neg_integer(). +-type async_callback() :: + fun ((atom(), fun ((atom(), state()) -> state())) -> 'ok'). +-type duration() :: ('undefined' | 'infinity' | number()). + +-type msg_fun(A) :: fun ((rabbit_types:basic_message(), ack(), A) -> A). +-type msg_pred() :: fun ((rabbit_types:message_properties()) -> boolean()). + +-type queue_mode() :: atom(). + +%% Called on startup with a vhost and a list of durable queue names on this vhost. +%% The queues aren't being started at this point, but this call allows the +%% backing queue to perform any checking necessary for the consistency +%% of those queues, or initialise any other shared resources. +%% +%% The list of queue recovery terms returned as {ok, Terms} must be given +%% in the same order as the list of queue names supplied. +-callback start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> rabbit_types:ok(recovery_terms()). + +%% Called to tear down any state/resources for vhost. NB: Implementations should +%% not depend on this function being called on shutdown and instead +%% should hook into the rabbit supervision hierarchy. +-callback stop(rabbit_types:vhost()) -> 'ok'. + +%% Initialise the backing queue and its state. +%% +%% Takes +%% 1. the amqqueue record +%% 2. a term indicating whether the queue is an existing queue that +%% should be recovered or not. When 'new' is given, no recovery is +%% taking place, otherwise a list of recovery terms is given, or +%% the atom 'non_clean_shutdown' if no recovery terms are available. +%% 3. an asynchronous callback which accepts a function of type +%% backing-queue-state to backing-queue-state. This callback +%% function can be safely invoked from any process, which makes it +%% useful for passing messages back into the backing queue, +%% especially as the backing queue does not have control of its own +%% mailbox. +-callback init(amqqueue:amqqueue(), recovery_info(), + async_callback()) -> state(). + +%% Called on queue shutdown when queue isn't being deleted. +-callback terminate(any(), state()) -> state(). + +%% Called when the queue is terminating and needs to delete all its +%% content. +-callback delete_and_terminate(any(), state()) -> state(). + +%% Called to clean up after a crashed queue. In this case we don't +%% have a process and thus a state(), we are just removing on-disk data. +-callback delete_crashed(amqqueue:amqqueue()) -> 'ok'. + +%% Remove all 'fetchable' messages from the queue, i.e. all messages +%% except those that have been fetched already and are pending acks. +-callback purge(state()) -> {purged_msg_count(), state()}. + +%% Remove all messages in the queue which have been fetched and are +%% pending acks. +-callback purge_acks(state()) -> state(). + +%% Publish a message. +-callback publish(rabbit_types:basic_message(), + rabbit_types:message_properties(), boolean(), pid(), flow(), + state()) -> state(). + +%% Like publish/6 but for batches of publishes. +-callback batch_publish([publish()], pid(), flow(), state()) -> state(). + +%% Called for messages which have already been passed straight +%% out to a client. The queue will be empty for these calls +%% (i.e. saves the round trip through the backing queue). +-callback publish_delivered(rabbit_types:basic_message(), + rabbit_types:message_properties(), pid(), flow(), + state()) + -> {ack(), state()}. + +%% Like publish_delivered/5 but for batches of publishes. +-callback batch_publish_delivered([delivered_publish()], pid(), flow(), + state()) + -> {[ack()], state()}. + +%% Called to inform the BQ about messages which have reached the +%% queue, but are not going to be further passed to BQ. +-callback discard(rabbit_types:msg_id(), pid(), flow(), state()) -> state(). + +%% Return ids of messages which have been confirmed since the last +%% invocation of this function (or initialisation). +%% +%% Message ids should only appear in the result of drain_confirmed +%% under the following circumstances: +%% +%% 1. The message appears in a call to publish_delivered/4 and the +%% first argument (ack_required) is false; or +%% 2. The message is fetched from the queue with fetch/2 and the first +%% argument (ack_required) is false; or +%% 3. The message is acked (ack/2 is called for the message); or +%% 4. The message is fully fsync'd to disk in such a way that the +%% recovery of the message is guaranteed in the event of a crash of +%% this rabbit node (excluding hardware failure). +%% +%% In addition to the above conditions, a message id may only appear +%% in the result of drain_confirmed if +%% #message_properties.needs_confirming = true when the msg was +%% published (through whichever means) to the backing queue. +%% +%% It is legal for the same message id to appear in the results of +%% multiple calls to drain_confirmed, which means that the backing +%% queue is not required to keep track of which messages it has +%% already confirmed. The confirm will be issued to the publisher the +%% first time the message id appears in the result of +%% drain_confirmed. All subsequent appearances of that message id will +%% be ignored. +-callback drain_confirmed(state()) -> {msg_ids(), state()}. + +%% Drop messages from the head of the queue while the supplied +%% predicate on message properties returns true. Returns the first +%% message properties for which the predicate returned false, or +%% 'undefined' if the whole backing queue was traversed w/o the +%% predicate ever returning false. +-callback dropwhile(msg_pred(), state()) + -> {rabbit_types:message_properties() | undefined, state()}. + +%% Like dropwhile, except messages are fetched in "require +%% acknowledgement" mode and are passed, together with their ack tag, +%% to the supplied function. The function is also fed an +%% accumulator. The result of fetchwhile is as for dropwhile plus the +%% accumulator. +-callback fetchwhile(msg_pred(), msg_fun(A), A, state()) + -> {rabbit_types:message_properties() | undefined, + A, state()}. + +%% Produce the next message. +-callback fetch(true, state()) -> {fetch_result(ack()), state()}; + (false, state()) -> {fetch_result(undefined), state()}. + +%% Remove the next message. +-callback drop(true, state()) -> {drop_result(ack()), state()}; + (false, state()) -> {drop_result(undefined), state()}. + +%% Acktags supplied are for messages which can now be forgotten +%% about. Must return 1 msg_id per Ack, in the same order as Acks. +-callback ack([ack()], state()) -> {msg_ids(), state()}. + +%% Reinsert messages into the queue which have already been delivered +%% and were pending acknowledgement. +-callback requeue([ack()], state()) -> {msg_ids(), state()}. + +%% Fold over messages by ack tag. The supplied function is called with +%% each message, its ack tag, and an accumulator. +-callback ackfold(msg_fun(A), A, state(), [ack()]) -> {A, state()}. + +%% Fold over all the messages in a queue and return the accumulated +%% results, leaving the queue undisturbed. +-callback fold(fun((rabbit_types:basic_message(), + rabbit_types:message_properties(), + boolean(), A) -> {('stop' | 'cont'), A}), + A, state()) -> {A, state()}. + +%% How long is my queue? +-callback len(state()) -> non_neg_integer(). + +%% Is my queue empty? +-callback is_empty(state()) -> boolean(). + +%% What's the queue depth, where depth = length + number of pending acks +-callback depth(state()) -> non_neg_integer(). + +%% For the next three functions, the assumption is that you're +%% monitoring something like the ingress and egress rates of the +%% queue. The RAM duration is thus the length of time represented by +%% the messages held in RAM given the current rates. If you want to +%% ignore all of this stuff, then do so, and return 0 in +%% ram_duration/1. + +%% The target is to have no more messages in RAM than indicated by the +%% duration and the current queue rates. +-callback set_ram_duration_target(duration(), state()) -> state(). + +%% Optionally recalculate the duration internally (likely to be just +%% update your internal rates), and report how many seconds the +%% messages in RAM represent given the current rates of the queue. +-callback ram_duration(state()) -> {duration(), state()}. + +%% Should 'timeout' be called as soon as the queue process can manage +%% (either on an empty mailbox, or when a timer fires)? +-callback needs_timeout(state()) -> 'false' | 'timed' | 'idle'. + +%% Called (eventually) after needs_timeout returns 'idle' or 'timed'. +%% Note this may be called more than once for each 'idle' or 'timed' +%% returned from needs_timeout +-callback timeout(state()) -> state(). + +%% Called immediately before the queue hibernates. +-callback handle_pre_hibernate(state()) -> state(). + +%% Called when more credit has become available for credit_flow. +-callback resume(state()) -> state(). + +%% Used to help prioritisation in rabbit_amqqueue_process. The rate of +%% inbound messages and outbound messages at the moment. +-callback msg_rates(state()) -> {float(), float()}. + +-callback info(atom(), state()) -> any(). + +%% Passed a function to be invoked with the relevant backing queue's +%% state. Useful for when the backing queue or other components need +%% to pass functions into the backing queue. +-callback invoke(atom(), fun ((atom(), A) -> A), state()) -> state(). + +%% Called prior to a publish or publish_delivered call. Allows the BQ +%% to signal that it's already seen this message, (e.g. it was published +%% or discarded previously) specifying whether to drop the message or reject it. +-callback is_duplicate(rabbit_types:basic_message(), state()) + -> {{true, drop} | {true, reject} | boolean(), state()}. + +-callback set_queue_mode(queue_mode(), state()) -> state(). + +-callback zip_msgs_and_acks([delivered_publish()], + [ack()], Acc, state()) + -> Acc. + +%% Called when rabbit_amqqueue_process receives a message via +%% handle_info and it should be processed by the backing +%% queue +-callback handle_info(term(), state()) -> state(). + +-spec info_keys() -> rabbit_types:info_keys(). + +info_keys() -> ?INFO_KEYS. diff --git a/deps/rabbit/src/rabbit_basic.erl b/deps/rabbit/src/rabbit_basic.erl new file mode 100644 index 0000000000..cdc9e082e4 --- /dev/null +++ b/deps/rabbit/src/rabbit_basic.erl @@ -0,0 +1,354 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_basic). +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +-export([publish/4, publish/5, publish/1, + message/3, message/4, properties/1, prepend_table_header/3, + extract_headers/1, extract_timestamp/1, map_headers/2, delivery/4, + header_routes/1, parse_expiration/1, header/2, header/3]). +-export([build_content/2, from_content/1, msg_size/1, + maybe_gc_large_msg/1, maybe_gc_large_msg/2]). +-export([add_header/4, + peek_fmt_message/1]). + +%%---------------------------------------------------------------------------- + +-type properties_input() :: + rabbit_framing:amqp_property_record() | [{atom(), any()}]. +-type publish_result() :: + ok | rabbit_types:error('not_found'). +-type header() :: any(). +-type headers() :: rabbit_framing:amqp_table() | 'undefined'. + +-type exchange_input() :: rabbit_types:exchange() | rabbit_exchange:name(). +-type body_input() :: binary() | [binary()]. + +%%---------------------------------------------------------------------------- + +%% Convenience function, for avoiding round-trips in calls across the +%% erlang distributed network. + +-spec publish + (exchange_input(), rabbit_router:routing_key(), properties_input(), + body_input()) -> + publish_result(). + +publish(Exchange, RoutingKeyBin, Properties, Body) -> + publish(Exchange, RoutingKeyBin, false, Properties, Body). + +%% Convenience function, for avoiding round-trips in calls across the +%% erlang distributed network. + +-spec publish + (exchange_input(), rabbit_router:routing_key(), boolean(), + properties_input(), body_input()) -> + publish_result(). + +publish(X = #exchange{name = XName}, RKey, Mandatory, Props, Body) -> + Message = message(XName, RKey, properties(Props), Body), + publish(X, delivery(Mandatory, false, Message, undefined)); +publish(XName, RKey, Mandatory, Props, Body) -> + Message = message(XName, RKey, properties(Props), Body), + publish(delivery(Mandatory, false, Message, undefined)). + +-spec publish(rabbit_types:delivery()) -> publish_result(). + +publish(Delivery = #delivery{ + message = #basic_message{exchange_name = XName}}) -> + case rabbit_exchange:lookup(XName) of + {ok, X} -> publish(X, Delivery); + Err -> Err + end. + +publish(X, Delivery) -> + Qs = rabbit_amqqueue:lookup(rabbit_exchange:route(X, Delivery)), + _ = rabbit_queue_type:deliver(Qs, Delivery, stateless), + ok. + +-spec delivery + (boolean(), boolean(), rabbit_types:message(), undefined | integer()) -> + rabbit_types:delivery(). + +delivery(Mandatory, Confirm, Message, MsgSeqNo) -> + #delivery{mandatory = Mandatory, confirm = Confirm, sender = self(), + message = Message, msg_seq_no = MsgSeqNo, flow = noflow}. + +-spec build_content + (rabbit_framing:amqp_property_record(), binary() | [binary()]) -> + rabbit_types:content(). + +build_content(Properties, BodyBin) when is_binary(BodyBin) -> + build_content(Properties, [BodyBin]); + +build_content(Properties, PFR) -> + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), + #content{class_id = ClassId, + properties = Properties, + properties_bin = none, + protocol = none, + payload_fragments_rev = PFR}. + +-spec from_content + (rabbit_types:content()) -> + {rabbit_framing:amqp_property_record(), binary()}. + +from_content(Content) -> + #content{class_id = ClassId, + properties = Props, + payload_fragments_rev = FragmentsRev} = + rabbit_binary_parser:ensure_content_decoded(Content), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), + {Props, list_to_binary(lists:reverse(FragmentsRev))}. + +%% This breaks the spec rule forbidding message modification +strip_header(#content{properties = #'P_basic'{headers = undefined}} + = DecodedContent, _Key) -> + DecodedContent; +strip_header(#content{properties = Props = #'P_basic'{headers = Headers}} + = DecodedContent, Key) -> + case lists:keysearch(Key, 1, Headers) of + false -> DecodedContent; + {value, Found} -> Headers0 = lists:delete(Found, Headers), + rabbit_binary_generator:clear_encoded_content( + DecodedContent#content{ + properties = Props#'P_basic'{ + headers = Headers0}}) + end. + +-spec message + (rabbit_exchange:name(), rabbit_router:routing_key(), + rabbit_types:decoded_content()) -> + rabbit_types:ok_or_error2(rabbit_types:message(), any()). + +message(XName, RoutingKey, #content{properties = Props} = DecodedContent) -> + try + {ok, #basic_message{ + exchange_name = XName, + content = strip_header(DecodedContent, ?DELETED_HEADER), + id = rabbit_guid:gen(), + is_persistent = is_message_persistent(DecodedContent), + routing_keys = [RoutingKey | + header_routes(Props#'P_basic'.headers)]}} + catch + {error, _Reason} = Error -> Error + end. + +-spec message + (rabbit_exchange:name(), rabbit_router:routing_key(), properties_input(), + binary()) -> + rabbit_types:message(). + +message(XName, RoutingKey, RawProperties, Body) -> + Properties = properties(RawProperties), + Content = build_content(Properties, Body), + {ok, Msg} = message(XName, RoutingKey, Content), + Msg. + +-spec properties + (properties_input()) -> rabbit_framing:amqp_property_record(). + +properties(P = #'P_basic'{}) -> + P; +properties(P) when is_list(P) -> + %% Yes, this is O(length(P) * record_info(size, 'P_basic') / 2), + %% i.e. slow. Use the definition of 'P_basic' directly if + %% possible! + lists:foldl(fun ({Key, Value}, Acc) -> + case indexof(record_info(fields, 'P_basic'), Key) of + 0 -> throw({unknown_basic_property, Key}); + N -> setelement(N + 1, Acc, Value) + end + end, #'P_basic'{}, P). + +-spec prepend_table_header + (binary(), rabbit_framing:amqp_table(), headers()) -> headers(). + +prepend_table_header(Name, Info, undefined) -> + prepend_table_header(Name, Info, []); +prepend_table_header(Name, Info, Headers) -> + case rabbit_misc:table_lookup(Headers, Name) of + {array, Existing} -> + prepend_table(Name, Info, Existing, Headers); + undefined -> + prepend_table(Name, Info, [], Headers); + Other -> + Headers2 = prepend_table(Name, Info, [], Headers), + set_invalid_header(Name, Other, Headers2) + end. + +prepend_table(Name, Info, Prior, Headers) -> + rabbit_misc:set_table_value(Headers, Name, array, [{table, Info} | Prior]). + +set_invalid_header(Name, {_, _}=Value, Headers) when is_list(Headers) -> + case rabbit_misc:table_lookup(Headers, ?INVALID_HEADERS_KEY) of + undefined -> + set_invalid([{Name, array, [Value]}], Headers); + {table, ExistingHdr} -> + update_invalid(Name, Value, ExistingHdr, Headers); + Other -> + %% somehow the x-invalid-headers header is corrupt + Invalid = [{?INVALID_HEADERS_KEY, array, [Other]}], + set_invalid_header(Name, Value, set_invalid(Invalid, Headers)) + end. + +set_invalid(NewHdr, Headers) -> + rabbit_misc:set_table_value(Headers, ?INVALID_HEADERS_KEY, table, NewHdr). + +update_invalid(Name, Value, ExistingHdr, Header) -> + Values = case rabbit_misc:table_lookup(ExistingHdr, Name) of + undefined -> [Value]; + {array, Prior} -> [Value | Prior] + end, + NewHdr = rabbit_misc:set_table_value(ExistingHdr, Name, array, Values), + set_invalid(NewHdr, Header). + +-spec header(header(), headers()) -> 'undefined' | any(). + +header(_Header, undefined) -> + undefined; +header(_Header, []) -> + undefined; +header(Header, Headers) -> + header(Header, Headers, undefined). + +-spec header(header(), headers(), any()) -> 'undefined' | any(). + +header(Header, Headers, Default) -> + case lists:keysearch(Header, 1, Headers) of + false -> Default; + {value, Val} -> Val + end. + +-spec extract_headers(rabbit_types:content()) -> headers(). + +extract_headers(Content) -> + #content{properties = #'P_basic'{headers = Headers}} = + rabbit_binary_parser:ensure_content_decoded(Content), + Headers. + +extract_timestamp(Content) -> + #content{properties = #'P_basic'{timestamp = Timestamp}} = + rabbit_binary_parser:ensure_content_decoded(Content), + Timestamp. + +-spec map_headers + (fun((headers()) -> headers()), rabbit_types:content()) -> + rabbit_types:content(). + +map_headers(F, Content) -> + Content1 = rabbit_binary_parser:ensure_content_decoded(Content), + #content{properties = #'P_basic'{headers = Headers} = Props} = Content1, + Headers1 = F(Headers), + rabbit_binary_generator:clear_encoded_content( + Content1#content{properties = Props#'P_basic'{headers = Headers1}}). + +indexof(L, Element) -> indexof(L, Element, 1). + +indexof([], _Element, _N) -> 0; +indexof([Element | _Rest], Element, N) -> N; +indexof([_ | Rest], Element, N) -> indexof(Rest, Element, N + 1). + +is_message_persistent(#content{properties = #'P_basic'{ + delivery_mode = Mode}}) -> + case Mode of + 1 -> false; + 2 -> true; + undefined -> false; + Other -> throw({error, {delivery_mode_unknown, Other}}) + end. + +%% Extract CC routes from headers + +-spec header_routes(undefined | rabbit_framing:amqp_table()) -> [string()]. + +header_routes(undefined) -> + []; +header_routes(HeadersTable) -> + lists:append( + [case rabbit_misc:table_lookup(HeadersTable, HeaderKey) of + {array, Routes} -> [Route || {longstr, Route} <- Routes]; + undefined -> []; + {Type, _Val} -> throw({error, {unacceptable_type_in_header, + binary_to_list(HeaderKey), Type}}) + end || HeaderKey <- ?ROUTING_HEADERS]). + +-spec parse_expiration + (rabbit_framing:amqp_property_record()) -> + rabbit_types:ok_or_error2('undefined' | non_neg_integer(), any()). + +parse_expiration(#'P_basic'{expiration = undefined}) -> + {ok, undefined}; +parse_expiration(#'P_basic'{expiration = Expiration}) -> + case string:to_integer(binary_to_list(Expiration)) of + {error, no_integer} = E -> + E; + {N, ""} -> + case rabbit_misc:check_expiry(N) of + ok -> {ok, N}; + E = {error, _} -> E + end; + {_, S} -> + {error, {leftover_string, S}} + end. + +maybe_gc_large_msg(Content) -> + rabbit_writer:maybe_gc_large_msg(Content). + +maybe_gc_large_msg(Content, undefined) -> + rabbit_writer:msg_size(Content); +maybe_gc_large_msg(Content, GCThreshold) -> + rabbit_writer:maybe_gc_large_msg(Content, GCThreshold). + +msg_size(Content) -> + rabbit_writer:msg_size(Content). + +add_header(Name, Type, Value, #basic_message{content = Content0} = Msg) -> + Content = rabbit_basic:map_headers( + fun(undefined) -> + rabbit_misc:set_table_value([], Name, Type, Value); + (Headers) -> + rabbit_misc:set_table_value(Headers, Name, Type, Value) + end, Content0), + Msg#basic_message{content = Content}. + +peek_fmt_message(#basic_message{exchange_name = Ex, + routing_keys = RKeys, + content = + #content{payload_fragments_rev = Payl0, + properties = Props}}) -> + Fields = [atom_to_binary(F, utf8) || F <- record_info(fields, 'P_basic')], + T = lists:zip(Fields, tl(tuple_to_list(Props))), + lists:foldl( + fun ({<<"headers">>, Hdrs}, Acc) -> + case Hdrs of + [] -> + Acc; + _ -> + Acc ++ [{header_key(H), V} || {H, _T, V} <- Hdrs] + end; + ({_, undefined}, Acc) -> + Acc; + (KV, Acc) -> + [KV | Acc] + end, [], [{<<"payload (max 64 bytes)">>, + %% restric payload to 64 bytes + binary_prefix_64(iolist_to_binary(lists:reverse(Payl0)), 64)}, + {<<"exchange">>, Ex#resource.name}, + {<<"routing_keys">>, RKeys} | T]). + +header_key(A) -> + <<"header.", A/binary>>. + +binary_prefix_64(Bin, Len) -> + binary:part(Bin, 0, min(byte_size(Bin), Len)). diff --git a/deps/rabbit/src/rabbit_binding.erl b/deps/rabbit/src/rabbit_binding.erl new file mode 100644 index 0000000000..6ef25c4e60 --- /dev/null +++ b/deps/rabbit/src/rabbit_binding.erl @@ -0,0 +1,691 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_binding). +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([recover/0, recover/2, exists/1, add/2, add/3, remove/1, remove/2, remove/3, remove/4]). +-export([list/1, list_for_source/1, list_for_destination/1, + list_for_source_and_destination/2, list_explicit/0]). +-export([new_deletions/0, combine_deletions/2, add_deletion/3, + process_deletions/2, binding_action/3]). +-export([info_keys/0, info/1, info/2, info_all/1, info_all/2, info_all/4]). +%% these must all be run inside a mnesia tx +-export([has_for_source/1, remove_for_source/1, + remove_for_destination/2, remove_transient_for_destination/1, + remove_default_exchange_binding_rows_of/1]). + +-export([implicit_for_destination/1, reverse_binding/1]). +-export([new/4]). + +-define(DEFAULT_EXCHANGE(VHostPath), #resource{virtual_host = VHostPath, + kind = exchange, + name = <<>>}). + +%%---------------------------------------------------------------------------- + +-export_type([key/0, deletions/0]). + +-type key() :: binary(). + +-type bind_errors() :: rabbit_types:error( + {'resources_missing', + [{'not_found', (rabbit_types:binding_source() | + rabbit_types:binding_destination())} | + {'absent', amqqueue:amqqueue()}]}). + +-type bind_ok_or_error() :: 'ok' | bind_errors() | + rabbit_types:error( + {'binding_invalid', string(), [any()]}). +-type bind_res() :: bind_ok_or_error() | rabbit_misc:thunk(bind_ok_or_error()). +-type inner_fun() :: + fun((rabbit_types:exchange(), + rabbit_types:exchange() | amqqueue:amqqueue()) -> + rabbit_types:ok_or_error(rabbit_types:amqp_error())). +-type bindings() :: [rabbit_types:binding()]. + +%% TODO this should really be opaque but that seems to confuse 17.1's +%% dialyzer into objecting to everything that uses it. +-type deletions() :: dict:dict(). + +%%---------------------------------------------------------------------------- + +-spec new(rabbit_types:exchange(), + key(), + rabbit_types:exchange() | amqqueue:amqqueue(), + rabbit_framing:amqp_table()) -> + rabbit_types:binding(). + +new(Src, RoutingKey, Dst, #{}) -> + new(Src, RoutingKey, Dst, []); +new(Src, RoutingKey, Dst, Arguments) when is_map(Arguments) -> + new(Src, RoutingKey, Dst, maps:to_list(Arguments)); +new(Src, RoutingKey, Dst, Arguments) -> + #binding{source = Src, key = RoutingKey, destination = Dst, args = Arguments}. + + +-define(INFO_KEYS, [source_name, source_kind, + destination_name, destination_kind, + routing_key, arguments, + vhost]). + +%% Global table recovery + +-spec recover([rabbit_exchange:name()], [rabbit_amqqueue:name()]) -> + 'ok'. + +recover() -> + rabbit_misc:table_filter( + fun (Route) -> + mnesia:read({rabbit_semi_durable_route, Route}) =:= [] + end, + fun (Route, true) -> + ok = mnesia:write(rabbit_semi_durable_route, Route, write); + (_Route, false) -> + ok + end, rabbit_durable_route). + +%% Virtual host-specific recovery +recover(XNames, QNames) -> + XNameSet = sets:from_list(XNames), + QNameSet = sets:from_list(QNames), + SelectSet = fun (#resource{kind = exchange}) -> XNameSet; + (#resource{kind = queue}) -> QNameSet + end, + {ok, Gatherer} = gatherer:start_link(), + [recover_semi_durable_route(Gatherer, R, SelectSet(Dst)) || + R = #route{binding = #binding{destination = Dst}} <- + rabbit_misc:dirty_read_all(rabbit_semi_durable_route)], + empty = gatherer:out(Gatherer), + ok = gatherer:stop(Gatherer), + ok. + +recover_semi_durable_route(Gatherer, R = #route{binding = B}, ToRecover) -> + #binding{source = Src, destination = Dst} = B, + case sets:is_element(Dst, ToRecover) of + true -> {ok, X} = rabbit_exchange:lookup(Src), + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> + recover_semi_durable_route_txn(R, X), + gatherer:finish(Gatherer) + end); + false -> ok + end. + +recover_semi_durable_route_txn(R = #route{binding = B}, X) -> + rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:read(rabbit_semi_durable_route, B, read) of + [] -> no_recover; + _ -> ok = sync_transient_route(R, fun mnesia:write/3), + rabbit_exchange:serial(X) + end + end, + fun (no_recover, _) -> ok; + (_Serial, true) -> x_callback(transaction, X, add_binding, B); + (Serial, false) -> x_callback(Serial, X, add_binding, B) + end). + +-spec exists(rabbit_types:binding()) -> boolean() | bind_errors(). + +exists(#binding{source = ?DEFAULT_EXCHANGE(_), + destination = #resource{kind = queue, name = QName} = Queue, + key = QName, + args = []}) -> + case rabbit_amqqueue:lookup(Queue) of + {ok, _} -> true; + {error, not_found} -> false + end; +exists(Binding) -> + binding_action( + Binding, fun (_Src, _Dst, B) -> + rabbit_misc:const(mnesia:read({rabbit_route, B}) /= []) + end, fun not_found_or_absent_errs/1). + +-spec add(rabbit_types:binding(), rabbit_types:username()) -> bind_res(). + +add(Binding, ActingUser) -> add(Binding, fun (_Src, _Dst) -> ok end, ActingUser). + +-spec add(rabbit_types:binding(), inner_fun(), rabbit_types:username()) -> bind_res(). + +add(Binding, InnerFun, ActingUser) -> + binding_action( + Binding, + fun (Src, Dst, B) -> + case rabbit_exchange:validate_binding(Src, B) of + ok -> + lock_resource(Src, read), + lock_resource(Dst, read), + %% this argument is used to check queue exclusivity; + %% in general, we want to fail on that in preference to + %% anything else + case InnerFun(Src, Dst) of + ok -> + case mnesia:read({rabbit_route, B}) of + [] -> add(Src, Dst, B, ActingUser); + [_] -> fun () -> ok end + end; + {error, _} = Err -> + rabbit_misc:const(Err) + end; + {error, _} = Err -> + rabbit_misc:const(Err) + end + end, fun not_found_or_absent_errs/1). + +add(Src, Dst, B, ActingUser) -> + [SrcDurable, DstDurable] = [durable(E) || E <- [Src, Dst]], + ok = sync_route(#route{binding = B}, SrcDurable, DstDurable, + fun mnesia:write/3), + x_callback(transaction, Src, add_binding, B), + Serial = rabbit_exchange:serial(Src), + fun () -> + x_callback(Serial, Src, add_binding, B), + ok = rabbit_event:notify( + binding_created, + info(B) ++ [{user_who_performed_action, ActingUser}]) + end. + +-spec remove(rabbit_types:binding()) -> bind_res(). +remove(Binding) -> remove(Binding, fun (_Src, _Dst) -> ok end, ?INTERNAL_USER). + +-spec remove(rabbit_types:binding(), rabbit_types:username()) -> bind_res(). +remove(Binding, ActingUser) -> remove(Binding, fun (_Src, _Dst) -> ok end, ActingUser). + + +-spec remove(rabbit_types:binding(), inner_fun(), rabbit_types:username()) -> bind_res(). +remove(Binding, InnerFun, ActingUser) -> + binding_action( + Binding, + fun (Src, Dst, B) -> + lock_resource(Src, read), + lock_resource(Dst, read), + case mnesia:read(rabbit_route, B, write) of + [] -> case mnesia:read(rabbit_durable_route, B, write) of + [] -> rabbit_misc:const(ok); + %% We still delete the binding and run + %% all post-delete functions if there is only + %% a durable route in the database + _ -> remove(Src, Dst, B, ActingUser) + end; + _ -> case InnerFun(Src, Dst) of + ok -> remove(Src, Dst, B, ActingUser); + {error, _} = Err -> rabbit_misc:const(Err) + end + end + end, fun absent_errs_only/1). + +remove(Src, Dst, B, ActingUser) -> + ok = sync_route(#route{binding = B}, durable(Src), durable(Dst), + fun delete/3), + Deletions = maybe_auto_delete( + B#binding.source, [B], new_deletions(), false), + process_deletions(Deletions, ActingUser). + +%% Implicit bindings are implicit as of rabbitmq/rabbitmq-server#1721. +remove_default_exchange_binding_rows_of(Dst = #resource{}) -> + case implicit_for_destination(Dst) of + [Binding] -> + mnesia:dirty_delete(rabbit_durable_route, Binding), + mnesia:dirty_delete(rabbit_semi_durable_route, Binding), + mnesia:dirty_delete(rabbit_reverse_route, + reverse_binding(Binding)), + mnesia:dirty_delete(rabbit_route, Binding); + _ -> + %% no binding to remove or + %% a competing tx has beaten us to it? + ok + end, + ok. + +-spec list_explicit() -> bindings(). + +list_explicit() -> + mnesia:async_dirty( + fun () -> + AllRoutes = mnesia:dirty_match_object(rabbit_route, #route{_ = '_'}), + %% if there are any default exchange bindings left after an upgrade + %% of a pre-3.8 database, filter them out + AllBindings = [B || #route{binding = B} <- AllRoutes], + lists:filter(fun(#binding{source = S}) -> + not (S#resource.kind =:= exchange andalso S#resource.name =:= <<>>) + end, AllBindings) + end). + +-spec list(rabbit_types:vhost()) -> bindings(). + +list(VHostPath) -> + VHostResource = rabbit_misc:r(VHostPath, '_'), + Route = #route{binding = #binding{source = VHostResource, + destination = VHostResource, + _ = '_'}, + _ = '_'}, + %% if there are any default exchange bindings left after an upgrade + %% of a pre-3.8 database, filter them out + AllBindings = [B || #route{binding = B} <- mnesia:dirty_match_object(rabbit_route, + Route)], + Filtered = lists:filter(fun(#binding{source = S}) -> + S =/= ?DEFAULT_EXCHANGE(VHostPath) + end, AllBindings), + implicit_bindings(VHostPath) ++ Filtered. + +-spec list_for_source + (rabbit_types:binding_source()) -> bindings(). + +list_for_source(?DEFAULT_EXCHANGE(VHostPath)) -> + implicit_bindings(VHostPath); +list_for_source(SrcName) -> + mnesia:async_dirty( + fun() -> + Route = #route{binding = #binding{source = SrcName, _ = '_'}}, + [B || #route{binding = B} + <- mnesia:match_object(rabbit_route, Route, read)] + end). + +-spec list_for_destination + (rabbit_types:binding_destination()) -> bindings(). + +list_for_destination(DstName = #resource{virtual_host = VHostPath}) -> + AllBindings = mnesia:async_dirty( + fun() -> + Route = #route{binding = #binding{destination = DstName, + _ = '_'}}, + [reverse_binding(B) || + #reverse_route{reverse_binding = B} <- + mnesia:match_object(rabbit_reverse_route, + reverse_route(Route), read)] + end), + Filtered = lists:filter(fun(#binding{source = S}) -> + S =/= ?DEFAULT_EXCHANGE(VHostPath) + end, AllBindings), + implicit_for_destination(DstName) ++ Filtered. + +implicit_bindings(VHostPath) -> + DstQueues = rabbit_amqqueue:list_names(VHostPath), + [ #binding{source = ?DEFAULT_EXCHANGE(VHostPath), + destination = DstQueue, + key = QName, + args = []} + || DstQueue = #resource{name = QName} <- DstQueues ]. + +implicit_for_destination(DstQueue = #resource{kind = queue, + virtual_host = VHostPath, + name = QName}) -> + [#binding{source = ?DEFAULT_EXCHANGE(VHostPath), + destination = DstQueue, + key = QName, + args = []}]; +implicit_for_destination(_) -> + []. + +-spec list_for_source_and_destination + (rabbit_types:binding_source(), rabbit_types:binding_destination()) -> + bindings(). + +list_for_source_and_destination(?DEFAULT_EXCHANGE(VHostPath), + #resource{kind = queue, + virtual_host = VHostPath, + name = QName} = DstQueue) -> + [#binding{source = ?DEFAULT_EXCHANGE(VHostPath), + destination = DstQueue, + key = QName, + args = []}]; +list_for_source_and_destination(SrcName, DstName) -> + mnesia:async_dirty( + fun() -> + Route = #route{binding = #binding{source = SrcName, + destination = DstName, + _ = '_'}}, + [B || #route{binding = B} <- mnesia:match_object(rabbit_route, + Route, read)] + end). + +-spec info_keys() -> rabbit_types:info_keys(). + +info_keys() -> ?INFO_KEYS. + +map(VHostPath, F) -> + %% TODO: there is scope for optimisation here, e.g. using a + %% cursor, parallelising the function invocation + lists:map(F, list(VHostPath)). + +infos(Items, B) -> [{Item, i(Item, B)} || Item <- Items]. + +i(source_name, #binding{source = SrcName}) -> SrcName#resource.name; +i(source_kind, #binding{source = SrcName}) -> SrcName#resource.kind; +i(vhost, #binding{source = SrcName}) -> SrcName#resource.virtual_host; +i(destination_name, #binding{destination = DstName}) -> DstName#resource.name; +i(destination_kind, #binding{destination = DstName}) -> DstName#resource.kind; +i(routing_key, #binding{key = RoutingKey}) -> RoutingKey; +i(arguments, #binding{args = Arguments}) -> Arguments; +i(Item, _) -> throw({bad_argument, Item}). + +-spec info(rabbit_types:binding()) -> rabbit_types:infos(). + +info(B = #binding{}) -> infos(?INFO_KEYS, B). + +-spec info(rabbit_types:binding(), rabbit_types:info_keys()) -> + rabbit_types:infos(). + +info(B = #binding{}, Items) -> infos(Items, B). + +-spec info_all(rabbit_types:vhost()) -> [rabbit_types:infos()]. + +info_all(VHostPath) -> map(VHostPath, fun (B) -> info(B) end). + +-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys()) -> + [rabbit_types:infos()]. + +info_all(VHostPath, Items) -> map(VHostPath, fun (B) -> info(B, Items) end). + +-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys(), + reference(), pid()) -> 'ok'. + +info_all(VHostPath, Items, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, fun(B) -> info(B, Items) end, list(VHostPath)). + +-spec has_for_source(rabbit_types:binding_source()) -> boolean(). + +has_for_source(SrcName) -> + Match = #route{binding = #binding{source = SrcName, _ = '_'}}, + %% we need to check for semi-durable routes (which subsumes + %% durable routes) here too in case a bunch of routes to durable + %% queues have been removed temporarily as a result of a node + %% failure + contains(rabbit_route, Match) orelse + contains(rabbit_semi_durable_route, Match). + +-spec remove_for_source(rabbit_types:binding_source()) -> bindings(). + +remove_for_source(SrcName) -> + lock_resource(SrcName), + Match = #route{binding = #binding{source = SrcName, _ = '_'}}, + remove_routes( + lists:usort( + mnesia:dirty_match_object(rabbit_route, Match) ++ + mnesia:dirty_match_object(rabbit_semi_durable_route, Match))). + +-spec remove_for_destination + (rabbit_types:binding_destination(), boolean()) -> deletions(). + +remove_for_destination(DstName, OnlyDurable) -> + remove_for_destination(DstName, OnlyDurable, fun remove_routes/1). + +-spec remove_transient_for_destination + (rabbit_types:binding_destination()) -> deletions(). + +remove_transient_for_destination(DstName) -> + remove_for_destination(DstName, false, fun remove_transient_routes/1). + +%%---------------------------------------------------------------------------- + +durable(#exchange{durable = D}) -> D; +durable(Q) when ?is_amqqueue(Q) -> + amqqueue:is_durable(Q). + +binding_action(Binding = #binding{source = SrcName, + destination = DstName, + args = Arguments}, Fun, ErrFun) -> + call_with_source_and_destination( + SrcName, DstName, + fun (Src, Dst) -> + SortedArgs = rabbit_misc:sort_field_table(Arguments), + Fun(Src, Dst, Binding#binding{args = SortedArgs}) + end, ErrFun). + +sync_route(Route, true, true, Fun) -> + ok = Fun(rabbit_durable_route, Route, write), + sync_route(Route, false, true, Fun); + +sync_route(Route, false, true, Fun) -> + ok = Fun(rabbit_semi_durable_route, Route, write), + sync_route(Route, false, false, Fun); + +sync_route(Route, _SrcDurable, false, Fun) -> + sync_transient_route(Route, Fun). + +sync_transient_route(Route, Fun) -> + ok = Fun(rabbit_route, Route, write), + ok = Fun(rabbit_reverse_route, reverse_route(Route), write). + +call_with_source_and_destination(SrcName, DstName, Fun, ErrFun) -> + SrcTable = table_for_resource(SrcName), + DstTable = table_for_resource(DstName), + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> + case {mnesia:read({SrcTable, SrcName}), + mnesia:read({DstTable, DstName})} of + {[Src], [Dst]} -> Fun(Src, Dst); + {[], [_] } -> ErrFun([SrcName]); + {[_], [] } -> ErrFun([DstName]); + {[], [] } -> ErrFun([SrcName, DstName]) + end + end). + +not_found_or_absent_errs(Names) -> + Errs = [not_found_or_absent(Name) || Name <- Names], + rabbit_misc:const({error, {resources_missing, Errs}}). + +absent_errs_only(Names) -> + Errs = [E || Name <- Names, + {absent, _Q, _Reason} = E <- [not_found_or_absent(Name)]], + rabbit_misc:const(case Errs of + [] -> ok; + _ -> {error, {resources_missing, Errs}} + end). + +table_for_resource(#resource{kind = exchange}) -> rabbit_exchange; +table_for_resource(#resource{kind = queue}) -> rabbit_queue. + +not_found_or_absent(#resource{kind = exchange} = Name) -> + {not_found, Name}; +not_found_or_absent(#resource{kind = queue} = Name) -> + case rabbit_amqqueue:not_found_or_absent(Name) of + not_found -> {not_found, Name}; + {absent, _Q, _Reason} = R -> R + end. + +contains(Table, MatchHead) -> + continue(mnesia:select(Table, [{MatchHead, [], ['$_']}], 1, read)). + +continue('$end_of_table') -> false; +continue({[_|_], _}) -> true; +continue({[], Continuation}) -> continue(mnesia:select(Continuation)). + +remove_routes(Routes) -> + %% This partitioning allows us to suppress unnecessary delete + %% operations on disk tables, which require an fsync. + {RamRoutes, DiskRoutes} = + lists:partition(fun (R) -> mnesia:read( + rabbit_durable_route, R#route.binding, read) == [] end, + Routes), + {RamOnlyRoutes, SemiDurableRoutes} = + lists:partition(fun (R) -> mnesia:read( + rabbit_semi_durable_route, R#route.binding, read) == [] end, + RamRoutes), + %% Of course the destination might not really be durable but it's + %% just as easy to try to delete it from the semi-durable table + %% than check first + [ok = sync_route(R, true, true, fun delete/3) || + R <- DiskRoutes], + [ok = sync_route(R, false, true, fun delete/3) || + R <- SemiDurableRoutes], + [ok = sync_route(R, false, false, fun delete/3) || + R <- RamOnlyRoutes], + [R#route.binding || R <- Routes]. + + +delete(Tab, #route{binding = B}, LockKind) -> + mnesia:delete(Tab, B, LockKind); +delete(Tab, #reverse_route{reverse_binding = B}, LockKind) -> + mnesia:delete(Tab, B, LockKind). + +remove_transient_routes(Routes) -> + [begin + ok = sync_transient_route(R, fun delete/3), + R#route.binding + end || R <- Routes]. + +remove_for_destination(DstName, OnlyDurable, Fun) -> + lock_resource(DstName), + MatchFwd = #route{binding = #binding{destination = DstName, _ = '_'}}, + MatchRev = reverse_route(MatchFwd), + Routes = case OnlyDurable of + false -> + [reverse_route(R) || + R <- mnesia:dirty_match_object( + rabbit_reverse_route, MatchRev)]; + true -> lists:usort( + mnesia:dirty_match_object( + rabbit_durable_route, MatchFwd) ++ + mnesia:dirty_match_object( + rabbit_semi_durable_route, MatchFwd)) + end, + Bindings = Fun(Routes), + group_bindings_fold(fun maybe_auto_delete/4, new_deletions(), + lists:keysort(#binding.source, Bindings), OnlyDurable). + +%% Instead of locking entire table on remove operations we can lock the +%% affected resource only. +lock_resource(Name) -> lock_resource(Name, write). + +lock_resource(Name, LockKind) -> + mnesia:lock({global, Name, mnesia:table_info(rabbit_route, where_to_write)}, + LockKind). + +%% Requires that its input binding list is sorted in exchange-name +%% order, so that the grouping of bindings (for passing to +%% group_bindings_and_auto_delete1) works properly. +group_bindings_fold(_Fun, Acc, [], _OnlyDurable) -> + Acc; +group_bindings_fold(Fun, Acc, [B = #binding{source = SrcName} | Bs], + OnlyDurable) -> + group_bindings_fold(Fun, SrcName, Acc, Bs, [B], OnlyDurable). + +group_bindings_fold( + Fun, SrcName, Acc, [B = #binding{source = SrcName} | Bs], Bindings, + OnlyDurable) -> + group_bindings_fold(Fun, SrcName, Acc, Bs, [B | Bindings], OnlyDurable); +group_bindings_fold(Fun, SrcName, Acc, Removed, Bindings, OnlyDurable) -> + %% Either Removed is [], or its head has a non-matching SrcName. + group_bindings_fold(Fun, Fun(SrcName, Bindings, Acc, OnlyDurable), Removed, + OnlyDurable). + +maybe_auto_delete(XName, Bindings, Deletions, OnlyDurable) -> + {Entry, Deletions1} = + case mnesia:read({case OnlyDurable of + true -> rabbit_durable_exchange; + false -> rabbit_exchange + end, XName}) of + [] -> {{undefined, not_deleted, Bindings}, Deletions}; + [X] -> case rabbit_exchange:maybe_auto_delete(X, OnlyDurable) of + not_deleted -> + {{X, not_deleted, Bindings}, Deletions}; + {deleted, Deletions2} -> + {{X, deleted, Bindings}, + combine_deletions(Deletions, Deletions2)} + end + end, + add_deletion(XName, Entry, Deletions1). + +reverse_route(#route{binding = Binding}) -> + #reverse_route{reverse_binding = reverse_binding(Binding)}; + +reverse_route(#reverse_route{reverse_binding = Binding}) -> + #route{binding = reverse_binding(Binding)}. + +reverse_binding(#reverse_binding{source = SrcName, + destination = DstName, + key = Key, + args = Args}) -> + #binding{source = SrcName, + destination = DstName, + key = Key, + args = Args}; + +reverse_binding(#binding{source = SrcName, + destination = DstName, + key = Key, + args = Args}) -> + #reverse_binding{source = SrcName, + destination = DstName, + key = Key, + args = Args}. + +%% ---------------------------------------------------------------------------- +%% Binding / exchange deletion abstraction API +%% ---------------------------------------------------------------------------- + +anything_but( NotThis, NotThis, NotThis) -> NotThis; +anything_but( NotThis, NotThis, This) -> This; +anything_but( NotThis, This, NotThis) -> This; +anything_but(_NotThis, This, This) -> This. + +-spec new_deletions() -> deletions(). + +new_deletions() -> dict:new(). + +-spec add_deletion + (rabbit_exchange:name(), + {'undefined' | rabbit_types:exchange(), + 'deleted' | 'not_deleted', + bindings()}, + deletions()) -> + deletions(). + +add_deletion(XName, Entry, Deletions) -> + dict:update(XName, fun (Entry1) -> merge_entry(Entry1, Entry) end, + Entry, Deletions). + +-spec combine_deletions(deletions(), deletions()) -> deletions(). + +combine_deletions(Deletions1, Deletions2) -> + dict:merge(fun (_XName, Entry1, Entry2) -> merge_entry(Entry1, Entry2) end, + Deletions1, Deletions2). + +merge_entry({X1, Deleted1, Bindings1}, {X2, Deleted2, Bindings2}) -> + {anything_but(undefined, X1, X2), + anything_but(not_deleted, Deleted1, Deleted2), + [Bindings1 | Bindings2]}. + +-spec process_deletions(deletions(), rabbit_types:username()) -> rabbit_misc:thunk('ok'). + +process_deletions(Deletions, ActingUser) -> + AugmentedDeletions = + dict:map(fun (_XName, {X, deleted, Bindings}) -> + Bs = lists:flatten(Bindings), + x_callback(transaction, X, delete, Bs), + {X, deleted, Bs, none}; + (_XName, {X, not_deleted, Bindings}) -> + Bs = lists:flatten(Bindings), + x_callback(transaction, X, remove_bindings, Bs), + {X, not_deleted, Bs, rabbit_exchange:serial(X)} + end, Deletions), + fun() -> + dict:fold(fun (XName, {X, deleted, Bs, Serial}, ok) -> + ok = rabbit_event:notify( + exchange_deleted, + [{name, XName}, + {user_who_performed_action, ActingUser}]), + del_notify(Bs, ActingUser), + x_callback(Serial, X, delete, Bs); + (_XName, {X, not_deleted, Bs, Serial}, ok) -> + del_notify(Bs, ActingUser), + x_callback(Serial, X, remove_bindings, Bs) + end, ok, AugmentedDeletions) + end. + +del_notify(Bs, ActingUser) -> [rabbit_event:notify( + binding_deleted, + info(B) ++ [{user_who_performed_action, ActingUser}]) + || B <- Bs]. + +x_callback(Serial, X, F, Bs) -> + ok = rabbit_exchange:callback(X, F, Serial, [X, Bs]). diff --git a/deps/rabbit/src/rabbit_boot_steps.erl b/deps/rabbit/src/rabbit_boot_steps.erl new file mode 100644 index 0000000000..f87448edb7 --- /dev/null +++ b/deps/rabbit/src/rabbit_boot_steps.erl @@ -0,0 +1,91 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_boot_steps). + +-export([run_boot_steps/0, run_boot_steps/1, run_cleanup_steps/1]). +-export([find_steps/0, find_steps/1]). + +run_boot_steps() -> + run_boot_steps(loaded_applications()). + +run_boot_steps(Apps) -> + [begin + rabbit_log:info("Running boot step ~s defined by app ~s", [Step, App]), + ok = run_step(Attrs, mfa) + end || {App, Step, Attrs} <- find_steps(Apps)], + ok. + +run_cleanup_steps(Apps) -> + [run_step(Attrs, cleanup) || {_, _, Attrs} <- find_steps(Apps)], + ok. + +loaded_applications() -> + [App || {App, _, _} <- application:loaded_applications()]. + +find_steps() -> + find_steps(loaded_applications()). + +find_steps(Apps) -> + All = sort_boot_steps(rabbit_misc:all_module_attributes(rabbit_boot_step)), + [Step || {App, _, _} = Step <- All, lists:member(App, Apps)]. + +run_step(Attributes, AttributeName) -> + [begin + rabbit_log:debug("Applying MFA: M = ~s, F = ~s, A = ~p", + [M, F, A]), + case apply(M,F,A) of + ok -> ok; + {error, Reason} -> exit({error, Reason}) + end + end + || {Key, {M,F,A}} <- Attributes, + Key =:= AttributeName], + ok. + +vertices({AppName, _Module, Steps}) -> + [{StepName, {AppName, StepName, Atts}} || {StepName, Atts} <- Steps]. + +edges({_AppName, _Module, Steps}) -> + EnsureList = fun (L) when is_list(L) -> L; + (T) -> [T] + end, + [case Key of + requires -> {StepName, OtherStep}; + enables -> {OtherStep, StepName} + end || {StepName, Atts} <- Steps, + {Key, OtherStepOrSteps} <- Atts, + OtherStep <- EnsureList(OtherStepOrSteps), + Key =:= requires orelse Key =:= enables]. + +sort_boot_steps(UnsortedSteps) -> + case rabbit_misc:build_acyclic_graph(fun vertices/1, fun edges/1, + UnsortedSteps) of + {ok, G} -> + %% Use topological sort to find a consistent ordering (if + %% there is one, otherwise fail). + SortedSteps = lists:reverse( + [begin + {StepName, Step} = digraph:vertex(G, + StepName), + Step + end || StepName <- digraph_utils:topsort(G)]), + digraph:delete(G), + %% Check that all mentioned {M,F,A} triples are exported. + case [{StepName, {M,F,A}} || + {_App, StepName, Attributes} <- SortedSteps, + {mfa, {M,F,A}} <- Attributes, + code:ensure_loaded(M) =/= {module, M} orelse + not erlang:function_exported(M, F, length(A))] of + [] -> SortedSteps; + MissingFns -> exit({boot_functions_not_exported, MissingFns}) + end; + {error, {vertex, duplicate, StepName}} -> + exit({duplicate_boot_step, StepName}); + {error, {edge, Reason, From, To}} -> + exit({invalid_boot_step_dependency, From, To, Reason}) + end. diff --git a/deps/rabbit/src/rabbit_channel.erl b/deps/rabbit/src/rabbit_channel.erl new file mode 100644 index 0000000000..8e7828a7c0 --- /dev/null +++ b/deps/rabbit/src/rabbit_channel.erl @@ -0,0 +1,2797 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_channel). + +%% Transitional step until we can require Erlang/OTP 21 and +%% use the now recommended try/catch syntax for obtaining the stack trace. +-compile(nowarn_deprecated_function). + +%% rabbit_channel processes represent an AMQP 0-9-1 channels. +%% +%% Connections parse protocol frames coming from clients and +%% dispatch them to channel processes. +%% Channels are responsible for implementing the logic behind +%% the various protocol methods, involving other processes as +%% needed: +%% +%% * Routing messages (using functions in various exchange type +%% modules) to queue processes. +%% * Managing queues, exchanges, and bindings. +%% * Keeping track of consumers +%% * Keeping track of unacknowledged deliveries to consumers +%% * Keeping track of publisher confirms +%% * Transaction management +%% * Authorisation (enforcing permissions) +%% * Publishing trace events if tracing is enabled +%% +%% Every channel has a number of dependent processes: +%% +%% * A writer which is responsible for sending frames to clients. +%% * A limiter which controls how many messages can be delivered +%% to consumers according to active QoS prefetch and internal +%% flow control logic. +%% +%% Channels are also aware of their connection's queue collector. +%% When a queue is declared as exclusive on a channel, the channel +%% will notify queue collector of that queue. + +-include_lib("rabbit_common/include/rabbit_framing.hrl"). +-include_lib("rabbit_common/include/rabbit.hrl"). +-include_lib("rabbit_common/include/rabbit_misc.hrl"). + +-include("amqqueue.hrl"). + +-behaviour(gen_server2). + +-export([start_link/11, start_link/12, do/2, do/3, do_flow/3, flush/1, shutdown/1]). +-export([send_command/2, deliver/4, deliver_reply/2, + send_credit_reply/2, send_drained/2]). +-export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1, + emit_info_all/4, info_local/1]). +-export([refresh_config_local/0, ready_for_close/1]). +-export([refresh_interceptors/0]). +-export([force_event_refresh/1]). +-export([update_user_state/2]). + +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, + handle_info/2, handle_pre_hibernate/1, handle_post_hibernate/1, + prioritise_call/4, prioritise_cast/3, prioritise_info/3, + format_message_queue/2]). + +%% Internal +-export([list_local/0, emit_info_local/3, deliver_reply_local/3]). +-export([get_vhost/1, get_user/1]). +%% For testing +-export([build_topic_variable_map/3]). +-export([list_queue_states/1, get_max_message_size/0]). + +%% Mgmt HTTP API refactor +-export([handle_method/6]). + +-record(conf, { + %% starting | running | flow | closing + state, + %% same as reader's protocol. Used when instantiating + %% (protocol) exceptions. + protocol, + %% channel number + channel, + %% reader process + reader_pid, + %% writer process + writer_pid, + %% + conn_pid, + %% same as reader's name, see #v1.name + %% in rabbit_reader + conn_name, + %% channel's originating source e.g. rabbit_reader | rabbit_direct | undefined + %% or any other channel creating/spawning entity + source, + %% same as #v1.user in the reader, used in + %% authorisation checks + user, + %% same as #v1.user in the reader + virtual_host, + %% when queue.bind's queue field is empty, + %% this name will be used instead + most_recently_declared_queue, + %% when a queue is declared as exclusive, queue + %% collector must be notified. + %% see rabbit_queue_collector for more info. + queue_collector_pid, + + %% same as capabilities in the reader + capabilities, + %% tracing exchange resource if tracing is enabled, + %% 'none' otherwise + trace_state, + consumer_prefetch, + %% Message content size limit + max_message_size, + consumer_timeout, + authz_context, + %% defines how ofter gc will be executed + writer_gc_threshold + }). + +-record(pending_ack, {delivery_tag, + tag, + delivered_at, + queue, %% queue name + msg_id}). + +-record(ch, {cfg :: #conf{}, + %% limiter state, see rabbit_limiter + limiter, + %% none | {Msgs, Acks} | committing | failed | + tx, + %% (consumer) delivery tag sequence + next_tag, + %% messages pending consumer acknowledgement + unacked_message_q, + %% queue processes are monitored to update + %% queue names + queue_monitors, + %% a map of consumer tags to + %% consumer details: #amqqueue record, acknowledgement mode, + %% consumer exclusivity, etc + consumer_mapping, + %% a map of queue names to consumer tag lists + queue_consumers, + %% timer used to emit statistics + stats_timer, + %% are publisher confirms enabled for this channel? + confirm_enabled, + %% publisher confirm delivery tag sequence + publish_seqno, + %% an unconfirmed_messages data structure used to track unconfirmed + %% (to publishers) messages + unconfirmed, + %% a list of tags for published messages that were + %% delivered but are yet to be confirmed to the client + confirmed, + %% a list of tags for published messages that were + %% rejected but are yet to be sent to the client + rejected, + %% used by "one shot RPC" (amq. + reply_consumer, + %% flow | noflow, see rabbitmq-server#114 + delivery_flow, + interceptor_state, + queue_states, + tick_timer + }). + +-define(QUEUE, lqueue). + +-define(MAX_PERMISSION_CACHE_SIZE, 12). + +-define(REFRESH_TIMEOUT, 15000). + +-define(STATISTICS_KEYS, + [reductions, + pid, + transactional, + confirm, + consumer_count, + messages_unacknowledged, + messages_unconfirmed, + messages_uncommitted, + acks_uncommitted, + pending_raft_commands, + prefetch_count, + global_prefetch_count, + state, + garbage_collection]). + + +-define(CREATION_EVENT_KEYS, + [pid, + name, + connection, + number, + user, + vhost, + user_who_performed_action]). + +-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). + +-define(INCR_STATS(Type, Key, Inc, Measure, State), + case rabbit_event:stats_level(State, #ch.stats_timer) of + fine -> + rabbit_core_metrics:channel_stats(Type, Measure, {self(), Key}, Inc), + %% Keys in the process dictionary are used to clean up the core metrics + put({Type, Key}, none); + _ -> + ok + end). + +-define(INCR_STATS(Type, Key, Inc, Measure), + begin + rabbit_core_metrics:channel_stats(Type, Measure, {self(), Key}, Inc), + %% Keys in the process dictionary are used to clean up the core metrics + put({Type, Key}, none) + end). + +%%---------------------------------------------------------------------------- + +-export_type([channel_number/0]). + +-type channel_number() :: non_neg_integer(). + +-export_type([channel/0]). + +-type channel() :: #ch{}. + +%%---------------------------------------------------------------------------- + +-spec start_link + (channel_number(), pid(), pid(), pid(), string(), rabbit_types:protocol(), + rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), + pid(), pid()) -> + rabbit_types:ok_pid_or_error(). + +start_link(Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, + VHost, Capabilities, CollectorPid, Limiter) -> + start_link(Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, + VHost, Capabilities, CollectorPid, Limiter, undefined). + +-spec start_link + (channel_number(), pid(), pid(), pid(), string(), rabbit_types:protocol(), + rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), + pid(), pid(), any()) -> + rabbit_types:ok_pid_or_error(). + +start_link(Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, + VHost, Capabilities, CollectorPid, Limiter, AmqpParams) -> + gen_server2:start_link( + ?MODULE, [Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, + User, VHost, Capabilities, CollectorPid, Limiter, AmqpParams], []). + +-spec do(pid(), rabbit_framing:amqp_method_record()) -> 'ok'. + +do(Pid, Method) -> + rabbit_channel_common:do(Pid, Method). + +-spec do + (pid(), rabbit_framing:amqp_method_record(), + rabbit_types:maybe(rabbit_types:content())) -> + 'ok'. + +do(Pid, Method, Content) -> + rabbit_channel_common:do(Pid, Method, Content). + +-spec do_flow + (pid(), rabbit_framing:amqp_method_record(), + rabbit_types:maybe(rabbit_types:content())) -> + 'ok'. + +do_flow(Pid, Method, Content) -> + rabbit_channel_common:do_flow(Pid, Method, Content). + +-spec flush(pid()) -> 'ok'. + +flush(Pid) -> + gen_server2:call(Pid, flush, infinity). + +-spec shutdown(pid()) -> 'ok'. + +shutdown(Pid) -> + gen_server2:cast(Pid, terminate). + +-spec send_command(pid(), rabbit_framing:amqp_method_record()) -> 'ok'. + +send_command(Pid, Msg) -> + gen_server2:cast(Pid, {command, Msg}). + +-spec deliver + (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg()) -> 'ok'. + +deliver(Pid, ConsumerTag, AckRequired, Msg) -> + gen_server2:cast(Pid, {deliver, ConsumerTag, AckRequired, Msg}). + +-spec deliver_reply(binary(), rabbit_types:delivery()) -> 'ok'. + +deliver_reply(<<"amq.rabbitmq.reply-to.", Rest/binary>>, Delivery) -> + case decode_fast_reply_to(Rest) of + {ok, Pid, Key} -> + delegate:invoke_no_result( + Pid, {?MODULE, deliver_reply_local, [Key, Delivery]}); + error -> + ok + end. + +%% We want to ensure people can't use this mechanism to send a message +%% to an arbitrary process and kill it! + +-spec deliver_reply_local(pid(), binary(), rabbit_types:delivery()) -> 'ok'. + +deliver_reply_local(Pid, Key, Delivery) -> + case pg_local:in_group(rabbit_channels, Pid) of + true -> gen_server2:cast(Pid, {deliver_reply, Key, Delivery}); + false -> ok + end. + +declare_fast_reply_to(<<"amq.rabbitmq.reply-to">>) -> + exists; +declare_fast_reply_to(<<"amq.rabbitmq.reply-to.", Rest/binary>>) -> + case decode_fast_reply_to(Rest) of + {ok, Pid, Key} -> + Msg = {declare_fast_reply_to, Key}, + rabbit_misc:with_exit_handler( + rabbit_misc:const(not_found), + fun() -> gen_server2:call(Pid, Msg, infinity) end); + error -> + not_found + end; +declare_fast_reply_to(_) -> + not_found. + +decode_fast_reply_to(Rest) -> + case string:tokens(binary_to_list(Rest), ".") of + [PidEnc, Key] -> Pid = binary_to_term(base64:decode(PidEnc)), + {ok, Pid, Key}; + _ -> error + end. + +-spec send_credit_reply(pid(), non_neg_integer()) -> 'ok'. + +send_credit_reply(Pid, Len) -> + gen_server2:cast(Pid, {send_credit_reply, Len}). + +-spec send_drained(pid(), [{rabbit_types:ctag(), non_neg_integer()}]) -> 'ok'. + +send_drained(Pid, CTagCredit) -> + gen_server2:cast(Pid, {send_drained, CTagCredit}). + +-spec list() -> [pid()]. + +list() -> + Nodes = rabbit_nodes:all_running(), + rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_channel, list_local, [], ?RPC_TIMEOUT). + +-spec list_local() -> [pid()]. + +list_local() -> + pg_local:get_members(rabbit_channels). + +-spec info_keys() -> rabbit_types:info_keys(). + +info_keys() -> ?INFO_KEYS. + +-spec info(pid()) -> rabbit_types:infos(). + +info(Pid) -> + {Timeout, Deadline} = get_operation_timeout_and_deadline(), + try + case gen_server2:call(Pid, {info, Deadline}, Timeout) of + {ok, Res} -> Res; + {error, Error} -> throw(Error) + end + catch + exit:{timeout, _} -> + rabbit_log:error("Timed out getting channel ~p info", [Pid]), + throw(timeout) + end. + +-spec info(pid(), rabbit_types:info_keys()) -> rabbit_types:infos(). + +info(Pid, Items) -> + {Timeout, Deadline} = get_operation_timeout_and_deadline(), + try + case gen_server2:call(Pid, {{info, Items}, Deadline}, Timeout) of + {ok, Res} -> Res; + {error, Error} -> throw(Error) + end + catch + exit:{timeout, _} -> + rabbit_log:error("Timed out getting channel ~p info", [Pid]), + throw(timeout) + end. + +-spec info_all() -> [rabbit_types:infos()]. + +info_all() -> + rabbit_misc:filter_exit_map(fun (C) -> info(C) end, list()). + +-spec info_all(rabbit_types:info_keys()) -> [rabbit_types:infos()]. + +info_all(Items) -> + rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()). + +info_local(Items) -> + rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list_local()). + +emit_info_all(Nodes, Items, Ref, AggregatorPid) -> + Pids = [ spawn_link(Node, rabbit_channel, emit_info_local, [Items, Ref, AggregatorPid]) || Node <- Nodes ], + rabbit_control_misc:await_emitters_termination(Pids). + +emit_info_local(Items, Ref, AggregatorPid) -> + emit_info(list_local(), Items, Ref, AggregatorPid). + +emit_info(PidList, InfoItems, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map_with_exit_handler( + AggregatorPid, Ref, fun(C) -> info(C, InfoItems) end, PidList). + +-spec refresh_config_local() -> 'ok'. + +refresh_config_local() -> + rabbit_misc:upmap( + fun (C) -> + try + gen_server2:call(C, refresh_config, infinity) + catch _:Reason -> + rabbit_log:error("Failed to refresh channel config " + "for channel ~p. Reason ~p", + [C, Reason]) + end + end, + list_local()), + ok. + +refresh_interceptors() -> + rabbit_misc:upmap( + fun (C) -> + try + gen_server2:call(C, refresh_interceptors, ?REFRESH_TIMEOUT) + catch _:Reason -> + rabbit_log:error("Failed to refresh channel interceptors " + "for channel ~p. Reason ~p", + [C, Reason]) + end + end, + list_local()), + ok. + +-spec ready_for_close(pid()) -> 'ok'. + +ready_for_close(Pid) -> + rabbit_channel_common:ready_for_close(Pid). + +-spec force_event_refresh(reference()) -> 'ok'. + +% Note: https://www.pivotaltracker.com/story/show/166962656 +% This event is necessary for the stats timer to be initialized with +% the correct values once the management agent has started +force_event_refresh(Ref) -> + [gen_server2:cast(C, {force_event_refresh, Ref}) || C <- list()], + ok. + +list_queue_states(Pid) -> + gen_server2:call(Pid, list_queue_states). + +-spec update_user_state(pid(), rabbit_types:auth_user()) -> 'ok' | {error, channel_terminated}. + +update_user_state(Pid, UserState) when is_pid(Pid) -> + case erlang:is_process_alive(Pid) of + true -> Pid ! {update_user_state, UserState}, + ok; + false -> {error, channel_terminated} + end. + +%%--------------------------------------------------------------------------- + +init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost, + Capabilities, CollectorPid, LimiterPid, AmqpParams]) -> + process_flag(trap_exit, true), + ?LG_PROCESS_TYPE(channel), + ?store_proc_name({ConnName, Channel}), + ok = pg_local:join(rabbit_channels, self()), + Flow = case rabbit_misc:get_env(rabbit, mirroring_flow_control, true) of + true -> flow; + false -> noflow + end, + {ok, {Global, Prefetch}} = application:get_env(rabbit, default_consumer_prefetch), + Limiter0 = rabbit_limiter:new(LimiterPid), + Limiter = case {Global, Prefetch} of + {true, 0} -> + rabbit_limiter:unlimit_prefetch(Limiter0); + {true, _} -> + rabbit_limiter:limit_prefetch(Limiter0, Prefetch, 0); + _ -> + Limiter0 + end, + %% Process dictionary is used here because permission cache already uses it. MK. + put(permission_cache_can_expire, rabbit_access_control:permission_cache_can_expire(User)), + MaxMessageSize = get_max_message_size(), + ConsumerTimeout = get_consumer_timeout(), + OptionalVariables = extract_variable_map_from_amqp_params(AmqpParams), + {ok, GCThreshold} = application:get_env(rabbit, writer_gc_threshold), + State = #ch{cfg = #conf{state = starting, + protocol = Protocol, + channel = Channel, + reader_pid = ReaderPid, + writer_pid = WriterPid, + conn_pid = ConnPid, + conn_name = ConnName, + user = User, + virtual_host = VHost, + most_recently_declared_queue = <<>>, + queue_collector_pid = CollectorPid, + capabilities = Capabilities, + trace_state = rabbit_trace:init(VHost), + consumer_prefetch = Prefetch, + max_message_size = MaxMessageSize, + consumer_timeout = ConsumerTimeout, + authz_context = OptionalVariables, + writer_gc_threshold = GCThreshold + }, + limiter = Limiter, + tx = none, + next_tag = 1, + unacked_message_q = ?QUEUE:new(), + queue_monitors = pmon:new(), + consumer_mapping = #{}, + queue_consumers = #{}, + confirm_enabled = false, + publish_seqno = 1, + unconfirmed = rabbit_confirms:init(), + rejected = [], + confirmed = [], + reply_consumer = none, + delivery_flow = Flow, + interceptor_state = undefined, + queue_states = rabbit_queue_type:init() + }, + State1 = State#ch{ + interceptor_state = rabbit_channel_interceptor:init(State)}, + State2 = rabbit_event:init_stats_timer(State1, #ch.stats_timer), + Infos = infos(?CREATION_EVENT_KEYS, State2), + rabbit_core_metrics:channel_created(self(), Infos), + rabbit_event:notify(channel_created, Infos), + rabbit_event:if_enabled(State2, #ch.stats_timer, + fun() -> emit_stats(State2) end), + put_operation_timeout(), + State3 = init_tick_timer(State2), + {ok, State3, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +prioritise_call(Msg, _From, _Len, _State) -> + case Msg of + info -> 9; + {info, _Items} -> 9; + _ -> 0 + end. + +prioritise_cast(Msg, _Len, _State) -> + case Msg of + {confirm, _MsgSeqNos, _QPid} -> 5; + {reject_publish, _MsgSeqNos, _QPid} -> 5; + {queue_event, _, {confirm, _MsgSeqNos, _QPid}} -> 5; + {queue_event, _, {reject_publish, _MsgSeqNos, _QPid}} -> 5; + _ -> 0 + end. + +prioritise_info(Msg, _Len, _State) -> + case Msg of + emit_stats -> 7; + _ -> 0 + end. + +handle_call(flush, _From, State) -> + reply(ok, State); + +handle_call({info, Deadline}, _From, State) -> + try + reply({ok, infos(?INFO_KEYS, Deadline, State)}, State) + catch + Error -> + reply({error, Error}, State) + end; + +handle_call({{info, Items}, Deadline}, _From, State) -> + try + reply({ok, infos(Items, Deadline, State)}, State) + catch + Error -> + reply({error, Error}, State) + end; + +handle_call(refresh_config, _From, + State = #ch{cfg = #conf{virtual_host = VHost} = Cfg}) -> + reply(ok, State#ch{cfg = Cfg#conf{trace_state = rabbit_trace:init(VHost)}}); + +handle_call(refresh_interceptors, _From, State) -> + IState = rabbit_channel_interceptor:init(State), + reply(ok, State#ch{interceptor_state = IState}); + +handle_call({declare_fast_reply_to, Key}, _From, + State = #ch{reply_consumer = Consumer}) -> + reply(case Consumer of + {_, _, Key} -> exists; + _ -> not_found + end, State); + +handle_call(list_queue_states, _From, State = #ch{queue_states = QueueStates}) -> + %% For testing of cleanup only + %% HACK + {reply, maps:keys(element(2, QueueStates)), State}; + +handle_call(_Request, _From, State) -> + noreply(State). + +handle_cast({method, Method, Content, Flow}, + State = #ch{cfg = #conf{reader_pid = Reader}, + interceptor_state = IState}) -> + case Flow of + %% We are going to process a message from the rabbit_reader + %% process, so here we ack it. In this case we are accessing + %% the rabbit_channel process dictionary. + flow -> credit_flow:ack(Reader); + noflow -> ok + end, + try handle_method(rabbit_channel_interceptor:intercept_in( + expand_shortcuts(Method, State), Content, IState), + State) of + {reply, Reply, NewState} -> + ok = send(Reply, NewState), + noreply(NewState); + {noreply, NewState} -> + noreply(NewState); + stop -> + {stop, normal, State} + catch + exit:Reason = #amqp_error{} -> + MethodName = rabbit_misc:method_record_type(Method), + handle_exception(Reason#amqp_error{method = MethodName}, State); + _:Reason:Stacktrace -> + {stop, {Reason, Stacktrace}, State} + end; + +handle_cast(ready_for_close, + State = #ch{cfg = #conf{state = closing, + writer_pid = WriterPid}}) -> + ok = rabbit_writer:send_command_sync(WriterPid, #'channel.close_ok'{}), + {stop, normal, State}; + +handle_cast(terminate, State = #ch{cfg = #conf{writer_pid = WriterPid}}) -> + ok = rabbit_writer:flush(WriterPid), + {stop, normal, State}; + +handle_cast({command, #'basic.consume_ok'{consumer_tag = CTag} = Msg}, State) -> + ok = send(Msg, State), + noreply(consumer_monitor(CTag, State)); + +handle_cast({command, Msg}, State) -> + ok = send(Msg, State), + noreply(State); + +handle_cast({deliver, _CTag, _AckReq, _Msg}, + State = #ch{cfg = #conf{state = closing}}) -> + noreply(State); +handle_cast({deliver, ConsumerTag, AckRequired, Msg}, State) -> + % TODO: handle as action + noreply(handle_deliver(ConsumerTag, AckRequired, Msg, State)); + +handle_cast({deliver_reply, _K, _Del}, + State = #ch{cfg = #conf{state = closing}}) -> + noreply(State); +handle_cast({deliver_reply, _K, _Del}, State = #ch{reply_consumer = none}) -> + noreply(State); +handle_cast({deliver_reply, Key, #delivery{message = + #basic_message{exchange_name = ExchangeName, + routing_keys = [RoutingKey | _CcRoutes], + content = Content}}}, + State = #ch{cfg = #conf{writer_pid = WriterPid}, + next_tag = DeliveryTag, + reply_consumer = {ConsumerTag, _Suffix, Key}}) -> + ok = rabbit_writer:send_command( + WriterPid, + #'basic.deliver'{consumer_tag = ConsumerTag, + delivery_tag = DeliveryTag, + redelivered = false, + exchange = ExchangeName#resource.name, + routing_key = RoutingKey}, + Content), + noreply(State); +handle_cast({deliver_reply, _K1, _}, State=#ch{reply_consumer = {_, _, _K2}}) -> + noreply(State); + +handle_cast({send_credit_reply, Len}, + State = #ch{cfg = #conf{writer_pid = WriterPid}}) -> + ok = rabbit_writer:send_command( + WriterPid, #'basic.credit_ok'{available = Len}), + noreply(State); + +handle_cast({send_drained, CTagCredit}, + State = #ch{cfg = #conf{writer_pid = WriterPid}}) -> + [ok = rabbit_writer:send_command( + WriterPid, #'basic.credit_drained'{consumer_tag = ConsumerTag, + credit_drained = CreditDrained}) + || {ConsumerTag, CreditDrained} <- CTagCredit], + noreply(State); + +% Note: https://www.pivotaltracker.com/story/show/166962656 +% This event is necessary for the stats timer to be initialized with +% the correct values once the management agent has started +handle_cast({force_event_refresh, Ref}, State) -> + rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State), + Ref), + noreply(rabbit_event:init_stats_timer(State, #ch.stats_timer)); + +handle_cast({mandatory_received, _MsgSeqNo}, State) -> + %% This feature was used by `rabbit_amqqueue_process` and + %% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. + %% It is unused in 3.8.x and thus deprecated. We keep it to support + %% in-place upgrades to 3.8.x (i.e. mixed-version clusters), but it + %% is a no-op starting with that version. + %% + %% NB: don't call noreply/1 since we don't want to send confirms. + noreply_coalesce(State); + +handle_cast({reject_publish, _MsgSeqNo, QPid} = Evt, State) -> + %% For backwards compatibility + QRef = find_queue_name_from_pid(QPid, State#ch.queue_states), + case QRef of + undefined -> + %% ignore if no queue could be found for the given pid + noreply(State); + _ -> + handle_cast({queue_event, QRef, Evt}, State) + end; + +handle_cast({confirm, _MsgSeqNo, QPid} = Evt, State) -> + %% For backwards compatibility + QRef = find_queue_name_from_pid(QPid, State#ch.queue_states), + case QRef of + undefined -> + %% ignore if no queue could be found for the given pid + noreply(State); + _ -> + handle_cast({queue_event, QRef, Evt}, State) + end; +handle_cast({queue_event, QRef, Evt}, + #ch{queue_states = QueueStates0} = State0) -> + case rabbit_queue_type:handle_event(QRef, Evt, QueueStates0) of + {ok, QState1, Actions} -> + State1 = State0#ch{queue_states = QState1}, + State = handle_queue_actions(Actions, State1), + noreply_coalesce(State); + eol -> + State1 = handle_consuming_queue_down_or_eol(QRef, State0), + {ConfirmMXs, UC1} = + rabbit_confirms:remove_queue(QRef, State1#ch.unconfirmed), + %% Deleted queue is a special case. + %% Do not nack the "rejected" messages. + State2 = record_confirms(ConfirmMXs, + State1#ch{unconfirmed = UC1}), + erase_queue_stats(QRef), + noreply_coalesce( + State2#ch{queue_states = rabbit_queue_type:remove(QRef, QueueStates0)}); + {protocol_error, Type, Reason, ReasonArgs} -> + rabbit_misc:protocol_error(Type, Reason, ReasonArgs) + end. + +handle_info({ra_event, {Name, _} = From, Evt}, State) -> + %% For backwards compatibility + QRef = find_queue_name_from_quorum_name(Name, State#ch.queue_states), + handle_cast({queue_event, QRef, {From, Evt}}, State); + +handle_info({bump_credit, Msg}, State) -> + %% A rabbit_amqqueue_process is granting credit to our channel. If + %% our channel was being blocked by this process, and no other + %% process is blocking our channel, then this channel will be + %% unblocked. This means that any credit that was deferred will be + %% sent to rabbit_reader processs that might be blocked by this + %% particular channel. + credit_flow:handle_bump_msg(Msg), + noreply(State); + +handle_info(timeout, State) -> + noreply(State); + +handle_info(emit_stats, State) -> + emit_stats(State), + State1 = rabbit_event:reset_stats_timer(State, #ch.stats_timer), + %% NB: don't call noreply/1 since we don't want to kick off the + %% stats timer. + {noreply, send_confirms_and_nacks(State1), hibernate}; + +handle_info({'DOWN', _MRef, process, QPid, Reason}, + #ch{queue_states = QStates0, + queue_monitors = _QMons} = State0) -> + credit_flow:peer_down(QPid), + case rabbit_queue_type:handle_down(QPid, Reason, QStates0) of + {ok, QState1, Actions} -> + State1 = State0#ch{queue_states = QState1}, + State = handle_queue_actions(Actions, State1), + noreply_coalesce(State); + {eol, QRef} -> + State1 = handle_consuming_queue_down_or_eol(QRef, State0), + {ConfirmMXs, UC1} = + rabbit_confirms:remove_queue(QRef, State1#ch.unconfirmed), + %% Deleted queue is a special case. + %% Do not nack the "rejected" messages. + State2 = record_confirms(ConfirmMXs, + State1#ch{unconfirmed = UC1}), + erase_queue_stats(QRef), + noreply_coalesce( + State2#ch{queue_states = rabbit_queue_type:remove(QRef, QStates0)}) + end; + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}; + +handle_info({{Ref, Node}, LateAnswer}, + State = #ch{cfg = #conf{channel = Channel}}) + when is_reference(Ref) -> + rabbit_log_channel:warning("Channel ~p ignoring late answer ~p from ~p", + [Channel, LateAnswer, Node]), + noreply(State); + +handle_info(tick, State0 = #ch{queue_states = QueueStates0}) -> + case get(permission_cache_can_expire) of + true -> ok = clear_permission_cache(); + _ -> ok + end, + case evaluate_consumer_timeout(State0#ch{queue_states = QueueStates0}) of + {noreply, State} -> + noreply(init_tick_timer(reset_tick_timer(State))); + Return -> + Return + end; +handle_info({update_user_state, User}, State = #ch{cfg = Cfg}) -> + noreply(State#ch{cfg = Cfg#conf{user = User}}). + + +handle_pre_hibernate(State0) -> + ok = clear_permission_cache(), + State = maybe_cancel_tick_timer(State0), + rabbit_event:if_enabled( + State, #ch.stats_timer, + fun () -> emit_stats(State, + [{idle_since, + os:system_time(milli_seconds)}]) + end), + {hibernate, rabbit_event:stop_stats_timer(State, #ch.stats_timer)}. + +handle_post_hibernate(State0) -> + State = init_tick_timer(State0), + {noreply, State}. + +terminate(_Reason, + State = #ch{cfg = #conf{user = #user{username = Username}}, + queue_states = QueueCtxs}) -> + _ = rabbit_queue_type:close(QueueCtxs), + {_Res, _State1} = notify_queues(State), + pg_local:leave(rabbit_channels, self()), + rabbit_event:if_enabled(State, #ch.stats_timer, + fun() -> emit_stats(State) end), + [delete_stats(Tag) || {Tag, _} <- get()], + rabbit_core_metrics:channel_closed(self()), + rabbit_event:notify(channel_closed, [{pid, self()}, + {user_who_performed_action, Username}]). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + +-spec get_max_message_size() -> non_neg_integer(). + +get_max_message_size() -> + case application:get_env(rabbit, max_message_size) of + {ok, MS} when is_integer(MS) -> + erlang:min(MS, ?MAX_MSG_SIZE); + _ -> + ?MAX_MSG_SIZE + end. + +get_consumer_timeout() -> + case application:get_env(rabbit, consumer_timeout) of + {ok, MS} when is_integer(MS) -> + MS; + _ -> + undefined + end. +%%--------------------------------------------------------------------------- + +reply(Reply, NewState) -> {reply, Reply, next_state(NewState), hibernate}. + +noreply(NewState) -> {noreply, next_state(NewState), hibernate}. + +next_state(State) -> ensure_stats_timer(send_confirms_and_nacks(State)). + +noreply_coalesce(State = #ch{confirmed = C, rejected = R}) -> + Timeout = case {C, R} of {[], []} -> hibernate; _ -> 0 end, + {noreply, ensure_stats_timer(State), Timeout}. + +ensure_stats_timer(State) -> + rabbit_event:ensure_stats_timer(State, #ch.stats_timer, emit_stats). + +return_ok(State, true, _Msg) -> {noreply, State}; +return_ok(State, false, Msg) -> {reply, Msg, State}. + +ok_msg(true, _Msg) -> undefined; +ok_msg(false, Msg) -> Msg. + +send(_Command, #ch{cfg = #conf{state = closing}}) -> + ok; +send(Command, #ch{cfg = #conf{writer_pid = WriterPid}}) -> + ok = rabbit_writer:send_command(WriterPid, Command). + +format_soft_error(#amqp_error{name = N, explanation = E, method = M}) -> + io_lib:format("operation ~s caused a channel exception ~s: ~ts", [M, N, E]). + +handle_exception(Reason, State = #ch{cfg = #conf{protocol = Protocol, + channel = Channel, + writer_pid = WriterPid, + reader_pid = ReaderPid, + conn_pid = ConnPid, + conn_name = ConnName, + virtual_host = VHost, + user = User + }}) -> + %% something bad's happened: notify_queues may not be 'ok' + {_Result, State1} = notify_queues(State), + case rabbit_binary_generator:map_exception(Channel, Reason, Protocol) of + {Channel, CloseMethod} -> + rabbit_log_channel:error( + "Channel error on connection ~p (~s, vhost: '~s'," + " user: '~s'), channel ~p:~n~s~n", + [ConnPid, ConnName, VHost, User#user.username, + Channel, format_soft_error(Reason)]), + ok = rabbit_writer:send_command(WriterPid, CloseMethod), + {noreply, State1}; + {0, _} -> + ReaderPid ! {channel_exit, Channel, Reason}, + {stop, normal, State1} + end. + +-spec precondition_failed(string()) -> no_return(). + +precondition_failed(Format) -> precondition_failed(Format, []). + +-spec precondition_failed(string(), [any()]) -> no_return(). + +precondition_failed(Format, Params) -> + rabbit_misc:protocol_error(precondition_failed, Format, Params). + +return_queue_declare_ok(#resource{name = ActualName}, + NoWait, MessageCount, ConsumerCount, + #ch{cfg = Cfg} = State) -> + return_ok(State#ch{cfg = Cfg#conf{most_recently_declared_queue = ActualName}}, + NoWait, #'queue.declare_ok'{queue = ActualName, + message_count = MessageCount, + consumer_count = ConsumerCount}). + +check_resource_access(User, Resource, Perm, Context) -> + V = {Resource, Context, Perm}, + + Cache = case get(permission_cache) of + undefined -> []; + Other -> Other + end, + case lists:member(V, Cache) of + true -> ok; + false -> ok = rabbit_access_control:check_resource_access( + User, Resource, Perm, Context), + CacheTail = lists:sublist(Cache, ?MAX_PERMISSION_CACHE_SIZE-1), + put(permission_cache, [V | CacheTail]) + end. + +clear_permission_cache() -> erase(permission_cache), + erase(topic_permission_cache), + ok. + +check_configure_permitted(Resource, User, Context) -> + check_resource_access(User, Resource, configure, Context). + +check_write_permitted(Resource, User, Context) -> + check_resource_access(User, Resource, write, Context). + +check_read_permitted(Resource, User, Context) -> + check_resource_access(User, Resource, read, Context). + +check_write_permitted_on_topic(Resource, User, RoutingKey, AuthzContext) -> + check_topic_authorisation(Resource, User, RoutingKey, AuthzContext, write). + +check_read_permitted_on_topic(Resource, User, RoutingKey, AuthzContext) -> + check_topic_authorisation(Resource, User, RoutingKey, AuthzContext, read). + +check_user_id_header(#'P_basic'{user_id = undefined}, _) -> + ok; +check_user_id_header(#'P_basic'{user_id = Username}, + #ch{cfg = #conf{user = #user{username = Username}}}) -> + ok; +check_user_id_header( + #'P_basic'{}, #ch{cfg = #conf{user = #user{authz_backends = + [{rabbit_auth_backend_dummy, _}]}}}) -> + ok; +check_user_id_header(#'P_basic'{user_id = Claimed}, + #ch{cfg = #conf{user = #user{username = Actual, + tags = Tags}}}) -> + case lists:member(impersonator, Tags) of + true -> ok; + false -> precondition_failed( + "user_id property set to '~s' but authenticated user was " + "'~s'", [Claimed, Actual]) + end. + +check_expiration_header(Props) -> + case rabbit_basic:parse_expiration(Props) of + {ok, _} -> ok; + {error, E} -> precondition_failed("invalid expiration '~s': ~p", + [Props#'P_basic'.expiration, E]) + end. + +check_internal_exchange(#exchange{name = Name, internal = true}) -> + rabbit_misc:protocol_error(access_refused, + "cannot publish to internal ~s", + [rabbit_misc:rs(Name)]); +check_internal_exchange(_) -> + ok. + +check_topic_authorisation(#exchange{name = Name = #resource{virtual_host = VHost}, type = topic}, + User = #user{username = Username}, + RoutingKey, AuthzContext, Permission) -> + Resource = Name#resource{kind = topic}, + VariableMap = build_topic_variable_map(AuthzContext, VHost, Username), + Context = #{routing_key => RoutingKey, + variable_map => VariableMap}, + Cache = case get(topic_permission_cache) of + undefined -> []; + Other -> Other + end, + case lists:member({Resource, Context, Permission}, Cache) of + true -> ok; + false -> ok = rabbit_access_control:check_topic_access( + User, Resource, Permission, Context), + CacheTail = lists:sublist(Cache, ?MAX_PERMISSION_CACHE_SIZE-1), + put(topic_permission_cache, [{Resource, Context, Permission} | CacheTail]) + end; +check_topic_authorisation(_, _, _, _, _) -> + ok. + + +build_topic_variable_map(AuthzContext, VHost, Username) when is_map(AuthzContext) -> + maps:merge(AuthzContext, #{<<"vhost">> => VHost, <<"username">> => Username}); +build_topic_variable_map(AuthzContext, VHost, Username) -> + maps:merge(extract_variable_map_from_amqp_params(AuthzContext), #{<<"vhost">> => VHost, <<"username">> => Username}). + +%% Use tuple representation of amqp_params to avoid a dependency on amqp_client. +%% Extracts variable map only from amqp_params_direct, not amqp_params_network. +%% amqp_params_direct records are usually used by plugins (e.g. MQTT, STOMP) +extract_variable_map_from_amqp_params({amqp_params, {amqp_params_direct, _, _, _, _, + {amqp_adapter_info, _,_,_,_,_,_,AdditionalInfo}, _}}) -> + proplists:get_value(variable_map, AdditionalInfo, #{}); +extract_variable_map_from_amqp_params({amqp_params_direct, _, _, _, _, + {amqp_adapter_info, _,_,_,_,_,_,AdditionalInfo}, _}) -> + proplists:get_value(variable_map, AdditionalInfo, #{}); +extract_variable_map_from_amqp_params([Value]) -> + extract_variable_map_from_amqp_params(Value); +extract_variable_map_from_amqp_params(_) -> + #{}. + +check_msg_size(Content, MaxMessageSize, GCThreshold) -> + Size = rabbit_basic:maybe_gc_large_msg(Content, GCThreshold), + case Size of + S when S > MaxMessageSize -> + ErrorMessage = case MaxMessageSize of + ?MAX_MSG_SIZE -> + "message size ~B is larger than max size ~B"; + _ -> + "message size ~B is larger than configured max size ~B" + end, + precondition_failed(ErrorMessage, + [Size, MaxMessageSize]); + _ -> ok + end. + +check_vhost_queue_limit(#resource{name = QueueName}, VHost) -> + case rabbit_vhost_limit:is_over_queue_limit(VHost) of + false -> ok; + {true, Limit} -> precondition_failed("cannot declare queue '~s': " + "queue limit in vhost '~s' (~p) is reached", + [QueueName, VHost, Limit]) + + end. + +qbin_to_resource(QueueNameBin, VHostPath) -> + name_to_resource(queue, QueueNameBin, VHostPath). + +name_to_resource(Type, NameBin, VHostPath) -> + rabbit_misc:r(VHostPath, Type, NameBin). + +expand_queue_name_shortcut(<<>>, #ch{cfg = #conf{most_recently_declared_queue = <<>>}}) -> + rabbit_misc:protocol_error(not_found, "no previously declared queue", []); +expand_queue_name_shortcut(<<>>, #ch{cfg = #conf{most_recently_declared_queue = MRDQ}}) -> + MRDQ; +expand_queue_name_shortcut(QueueNameBin, _) -> + QueueNameBin. + +expand_routing_key_shortcut(<<>>, <<>>, + #ch{cfg = #conf{most_recently_declared_queue = <<>>}}) -> + rabbit_misc:protocol_error(not_found, "no previously declared queue", []); +expand_routing_key_shortcut(<<>>, <<>>, + #ch{cfg = #conf{most_recently_declared_queue = MRDQ}}) -> + MRDQ; +expand_routing_key_shortcut(_QueueNameBin, RoutingKey, _State) -> + RoutingKey. + +expand_shortcuts(#'basic.get' {queue = Q} = M, State) -> + M#'basic.get' {queue = expand_queue_name_shortcut(Q, State)}; +expand_shortcuts(#'basic.consume'{queue = Q} = M, State) -> + M#'basic.consume'{queue = expand_queue_name_shortcut(Q, State)}; +expand_shortcuts(#'queue.delete' {queue = Q} = M, State) -> + M#'queue.delete' {queue = expand_queue_name_shortcut(Q, State)}; +expand_shortcuts(#'queue.purge' {queue = Q} = M, State) -> + M#'queue.purge' {queue = expand_queue_name_shortcut(Q, State)}; +expand_shortcuts(#'queue.bind' {queue = Q, routing_key = K} = M, State) -> + M#'queue.bind' {queue = expand_queue_name_shortcut(Q, State), + routing_key = expand_routing_key_shortcut(Q, K, State)}; +expand_shortcuts(#'queue.unbind' {queue = Q, routing_key = K} = M, State) -> + M#'queue.unbind' {queue = expand_queue_name_shortcut(Q, State), + routing_key = expand_routing_key_shortcut(Q, K, State)}; +expand_shortcuts(M, _State) -> + M. + +check_not_default_exchange(#resource{kind = exchange, name = <<"">>}) -> + rabbit_misc:protocol_error( + access_refused, "operation not permitted on the default exchange", []); +check_not_default_exchange(_) -> + ok. + +check_exchange_deletion(XName = #resource{name = <<"amq.", _/binary>>, + kind = exchange}) -> + rabbit_misc:protocol_error( + access_refused, "deletion of system ~s not allowed", + [rabbit_misc:rs(XName)]); +check_exchange_deletion(_) -> + ok. + +%% check that an exchange/queue name does not contain the reserved +%% "amq." prefix. +%% +%% As per the AMQP 0-9-1 spec, the exclusion of "amq." prefixed names +%% only applies on actual creation, and not in the cases where the +%% entity already exists or passive=true. +%% +%% NB: We deliberately do not enforce the other constraints on names +%% required by the spec. +check_name(Kind, NameBin = <<"amq.", _/binary>>) -> + rabbit_misc:protocol_error( + access_refused, + "~s name '~s' contains reserved prefix 'amq.*'",[Kind, NameBin]); +check_name(_Kind, NameBin) -> + NameBin. + +strip_cr_lf(NameBin) -> + binary:replace(NameBin, [<<"\n">>, <<"\r">>], <<"">>, [global]). + + +maybe_set_fast_reply_to( + C = #content{properties = P = #'P_basic'{reply_to = + <<"amq.rabbitmq.reply-to">>}}, + #ch{reply_consumer = ReplyConsumer}) -> + case ReplyConsumer of + none -> rabbit_misc:protocol_error( + precondition_failed, + "fast reply consumer does not exist", []); + {_, Suf, _K} -> Rep = <<"amq.rabbitmq.reply-to.", Suf/binary>>, + rabbit_binary_generator:clear_encoded_content( + C#content{properties = P#'P_basic'{reply_to = Rep}}) + end; +maybe_set_fast_reply_to(C, _State) -> + C. + +record_rejects([], State) -> + State; +record_rejects(MXs, State = #ch{rejected = R, tx = Tx}) -> + Tx1 = case Tx of + none -> none; + _ -> failed + end, + State#ch{rejected = [MXs | R], tx = Tx1}. + +record_confirms([], State) -> + State; +record_confirms(MXs, State = #ch{confirmed = C}) -> + State#ch{confirmed = [MXs | C]}. + +handle_method({Method, Content}, State) -> + handle_method(Method, Content, State). + +handle_method(#'channel.open'{}, _, + State = #ch{cfg = #conf{state = starting} = Cfg}) -> + %% Don't leave "starting" as the state for 5s. TODO is this TRTTD? + State1 = State#ch{cfg = Cfg#conf{state = running}}, + rabbit_event:if_enabled(State1, #ch.stats_timer, + fun() -> emit_stats(State1) end), + {reply, #'channel.open_ok'{}, State1}; + +handle_method(#'channel.open'{}, _, _State) -> + rabbit_misc:protocol_error( + channel_error, "second 'channel.open' seen", []); + +handle_method(_Method, _, #ch{cfg = #conf{state = starting}}) -> + rabbit_misc:protocol_error(channel_error, "expected 'channel.open'", []); + +handle_method(#'channel.close_ok'{}, _, #ch{cfg = #conf{state = closing}}) -> + stop; + +handle_method(#'channel.close'{}, _, + State = #ch{cfg = #conf{state = closing, + writer_pid = WriterPid}}) -> + ok = rabbit_writer:send_command(WriterPid, #'channel.close_ok'{}), + {noreply, State}; + +handle_method(_Method, _, State = #ch{cfg = #conf{state = closing}}) -> + {noreply, State}; + +handle_method(#'channel.close'{}, _, + State = #ch{cfg = #conf{reader_pid = ReaderPid}}) -> + {_Result, State1} = notify_queues(State), + %% We issue the channel.close_ok response after a handshake with + %% the reader, the other half of which is ready_for_close. That + %% way the reader forgets about the channel before we send the + %% response (and this channel process terminates). If we didn't do + %% that, a channel.open for the same channel number, which a + %% client is entitled to send as soon as it has received the + %% close_ok, might be received by the reader before it has seen + %% the termination and hence be sent to the old, now dead/dying + %% channel process, instead of a new process, and thus lost. + ReaderPid ! {channel_closing, self()}, + {noreply, State1}; + +%% Even though the spec prohibits the client from sending commands +%% while waiting for the reply to a synchronous command, we generally +%% do allow this...except in the case of a pending tx.commit, where +%% it could wreak havoc. +handle_method(_Method, _, #ch{tx = Tx}) + when Tx =:= committing orelse Tx =:= failed -> + rabbit_misc:protocol_error( + channel_error, "unexpected command while processing 'tx.commit'", []); + +handle_method(#'access.request'{},_, State) -> + {reply, #'access.request_ok'{ticket = 1}, State}; + +handle_method(#'basic.publish'{immediate = true}, _Content, _State) -> + rabbit_misc:protocol_error(not_implemented, "immediate=true", []); + +handle_method(#'basic.publish'{exchange = ExchangeNameBin, + routing_key = RoutingKey, + mandatory = Mandatory}, + Content, State = #ch{cfg = #conf{channel = ChannelNum, + conn_name = ConnName, + virtual_host = VHostPath, + user = #user{username = Username} = User, + trace_state = TraceState, + max_message_size = MaxMessageSize, + authz_context = AuthzContext, + writer_gc_threshold = GCThreshold + }, + tx = Tx, + confirm_enabled = ConfirmEnabled, + delivery_flow = Flow + }) -> + check_msg_size(Content, MaxMessageSize, GCThreshold), + ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), + check_write_permitted(ExchangeName, User, AuthzContext), + Exchange = rabbit_exchange:lookup_or_die(ExchangeName), + check_internal_exchange(Exchange), + check_write_permitted_on_topic(Exchange, User, RoutingKey, AuthzContext), + %% We decode the content's properties here because we're almost + %% certain to want to look at delivery-mode and priority. + DecodedContent = #content {properties = Props} = + maybe_set_fast_reply_to( + rabbit_binary_parser:ensure_content_decoded(Content), State), + check_user_id_header(Props, State), + check_expiration_header(Props), + DoConfirm = Tx =/= none orelse ConfirmEnabled, + {MsgSeqNo, State1} = + case DoConfirm orelse Mandatory of + false -> {undefined, State}; + true -> SeqNo = State#ch.publish_seqno, + {SeqNo, State#ch{publish_seqno = SeqNo + 1}} + end, + case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of + {ok, Message} -> + Delivery = rabbit_basic:delivery( + Mandatory, DoConfirm, Message, MsgSeqNo), + QNames = rabbit_exchange:route(Exchange, Delivery), + rabbit_trace:tap_in(Message, QNames, ConnName, ChannelNum, + Username, TraceState), + DQ = {Delivery#delivery{flow = Flow}, QNames}, + {noreply, case Tx of + none -> deliver_to_queues(DQ, State1); + {Msgs, Acks} -> Msgs1 = ?QUEUE:in(DQ, Msgs), + State1#ch{tx = {Msgs1, Acks}} + end}; + {error, Reason} -> + precondition_failed("invalid message: ~p", [Reason]) + end; + +handle_method(#'basic.nack'{delivery_tag = DeliveryTag, + multiple = Multiple, + requeue = Requeue}, _, State) -> + reject(DeliveryTag, Requeue, Multiple, State); + +handle_method(#'basic.ack'{delivery_tag = DeliveryTag, + multiple = Multiple}, + _, State = #ch{unacked_message_q = UAMQ, tx = Tx}) -> + {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), + State1 = State#ch{unacked_message_q = Remaining}, + {noreply, case Tx of + none -> {State2, Actions} = ack(Acked, State1), + handle_queue_actions(Actions, State2); + {Msgs, Acks} -> Acks1 = ack_cons(ack, Acked, Acks), + State1#ch{tx = {Msgs, Acks1}} + end}; + +handle_method(#'basic.get'{queue = QueueNameBin, no_ack = NoAck}, + _, State = #ch{cfg = #conf{writer_pid = WriterPid, + conn_pid = ConnPid, + user = User, + virtual_host = VHostPath, + authz_context = AuthzContext + }, + limiter = Limiter, + next_tag = DeliveryTag, + queue_states = QueueStates0}) -> + QueueName = qbin_to_resource(QueueNameBin, VHostPath), + check_read_permitted(QueueName, User, AuthzContext), + case rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ConnPid, + %% Use the delivery tag as consumer tag for quorum queues + fun (Q) -> + rabbit_queue_type:dequeue( + Q, NoAck, rabbit_limiter:pid(Limiter), + DeliveryTag, QueueStates0) + end) of + {ok, MessageCount, Msg, QueueStates} -> + handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount, Msg, + State#ch{queue_states = QueueStates}); + {empty, QueueStates} -> + ?INCR_STATS(queue_stats, QueueName, 1, get_empty, State), + {reply, #'basic.get_empty'{}, State#ch{queue_states = QueueStates}}; + empty -> + ?INCR_STATS(queue_stats, QueueName, 1, get_empty, State), + {reply, #'basic.get_empty'{}, State}; + {error, {unsupported, single_active_consumer}} -> + rabbit_misc:protocol_error( + resource_locked, + "cannot obtain access to locked ~s. basic.get operations " + "are not supported by quorum queues with single active consumer", + [rabbit_misc:rs(QueueName)]); + {error, Reason} -> + %% TODO add queue type to error message + rabbit_misc:protocol_error(internal_error, + "Cannot get a message from queue '~s': ~p", + [rabbit_misc:rs(QueueName), Reason]); + {protocol_error, Type, Reason, ReasonArgs} -> + rabbit_misc:protocol_error(Type, Reason, ReasonArgs) + end; + +handle_method(#'basic.consume'{queue = <<"amq.rabbitmq.reply-to">>, + consumer_tag = CTag0, + no_ack = NoAck, + nowait = NoWait}, + _, State = #ch{reply_consumer = ReplyConsumer, + consumer_mapping = ConsumerMapping}) -> + case maps:find(CTag0, ConsumerMapping) of + error -> + case {ReplyConsumer, NoAck} of + {none, true} -> + CTag = case CTag0 of + <<>> -> rabbit_guid:binary( + rabbit_guid:gen_secure(), "amq.ctag"); + Other -> Other + end, + %% Precalculate both suffix and key; base64 encoding is + %% expensive + Key = base64:encode(rabbit_guid:gen_secure()), + PidEnc = base64:encode(term_to_binary(self())), + Suffix = <<PidEnc/binary, ".", Key/binary>>, + Consumer = {CTag, Suffix, binary_to_list(Key)}, + State1 = State#ch{reply_consumer = Consumer}, + case NoWait of + true -> {noreply, State1}; + false -> Rep = #'basic.consume_ok'{consumer_tag = CTag}, + {reply, Rep, State1} + end; + {_, false} -> + rabbit_misc:protocol_error( + precondition_failed, + "reply consumer cannot acknowledge", []); + _ -> + rabbit_misc:protocol_error( + precondition_failed, "reply consumer already set", []) + end; + {ok, _} -> + %% Attempted reuse of consumer tag. + rabbit_misc:protocol_error( + not_allowed, "attempt to reuse consumer tag '~s'", [CTag0]) + end; + +handle_method(#'basic.cancel'{consumer_tag = ConsumerTag, nowait = NoWait}, + _, State = #ch{reply_consumer = {ConsumerTag, _, _}}) -> + State1 = State#ch{reply_consumer = none}, + case NoWait of + true -> {noreply, State1}; + false -> Rep = #'basic.cancel_ok'{consumer_tag = ConsumerTag}, + {reply, Rep, State1} + end; + +handle_method(#'basic.consume'{queue = QueueNameBin, + consumer_tag = ConsumerTag, + no_local = _, % FIXME: implement + no_ack = NoAck, + exclusive = ExclusiveConsume, + nowait = NoWait, + arguments = Args}, + _, State = #ch{cfg = #conf{consumer_prefetch = ConsumerPrefetch, + user = User, + virtual_host = VHostPath, + authz_context = AuthzContext}, + consumer_mapping = ConsumerMapping + }) -> + case maps:find(ConsumerTag, ConsumerMapping) of + error -> + QueueName = qbin_to_resource(QueueNameBin, VHostPath), + check_read_permitted(QueueName, User, AuthzContext), + ActualConsumerTag = + case ConsumerTag of + <<>> -> rabbit_guid:binary(rabbit_guid:gen_secure(), + "amq.ctag"); + Other -> Other + end, + case basic_consume( + QueueName, NoAck, ConsumerPrefetch, ActualConsumerTag, + ExclusiveConsume, Args, NoWait, State) of + {ok, State1} -> + {noreply, State1}; + {error, exclusive_consume_unavailable} -> + rabbit_misc:protocol_error( + access_refused, "~s in exclusive use", + [rabbit_misc:rs(QueueName)]); + {error, global_qos_not_supported_for_queue_type} -> + rabbit_misc:protocol_error( + not_implemented, "~s does not support global qos", + [rabbit_misc:rs(QueueName)]) + end; + {ok, _} -> + %% Attempted reuse of consumer tag. + rabbit_misc:protocol_error( + not_allowed, "attempt to reuse consumer tag '~s'", [ConsumerTag]) + end; + +handle_method(#'basic.cancel'{consumer_tag = ConsumerTag, nowait = NoWait}, + _, State = #ch{cfg = #conf{user = #user{username = Username}}, + consumer_mapping = ConsumerMapping, + queue_consumers = QCons, + queue_states = QueueStates0}) -> + OkMsg = #'basic.cancel_ok'{consumer_tag = ConsumerTag}, + case maps:find(ConsumerTag, ConsumerMapping) of + error -> + %% Spec requires we ignore this situation. + return_ok(State, NoWait, OkMsg); + {ok, {Q, _CParams}} when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + + ConsumerMapping1 = maps:remove(ConsumerTag, ConsumerMapping), + QCons1 = + case maps:find(QName, QCons) of + error -> QCons; + {ok, CTags} -> CTags1 = gb_sets:delete(ConsumerTag, CTags), + case gb_sets:is_empty(CTags1) of + true -> maps:remove(QName, QCons); + false -> maps:put(QName, CTags1, QCons) + end + end, + NewState = State#ch{consumer_mapping = ConsumerMapping1, + queue_consumers = QCons1}, + %% In order to ensure that no more messages are sent to + %% the consumer after the cancel_ok has been sent, we get + %% the queue process to send the cancel_ok on our + %% behalf. If we were sending the cancel_ok ourselves it + %% might overtake a message sent previously by the queue. + case rabbit_misc:with_exit_handler( + fun () -> {error, not_found} end, + fun () -> + rabbit_queue_type:cancel( + Q, ConsumerTag, ok_msg(NoWait, OkMsg), + Username, QueueStates0) + end) of + {ok, QueueStates} -> + {noreply, NewState#ch{queue_states = QueueStates}}; + {error, not_found} -> + %% Spec requires we ignore this situation. + return_ok(NewState, NoWait, OkMsg) + end + end; + +handle_method(#'basic.qos'{prefetch_size = Size}, _, _State) when Size /= 0 -> + rabbit_misc:protocol_error(not_implemented, + "prefetch_size!=0 (~w)", [Size]); + +handle_method(#'basic.qos'{global = false, + prefetch_count = PrefetchCount}, + _, State = #ch{cfg = Cfg, + limiter = Limiter}) -> + %% Ensures that if default was set, it's overridden + Limiter1 = rabbit_limiter:unlimit_prefetch(Limiter), + {reply, #'basic.qos_ok'{}, State#ch{cfg = Cfg#conf{consumer_prefetch = PrefetchCount}, + limiter = Limiter1}}; + +handle_method(#'basic.qos'{global = true, + prefetch_count = 0}, + _, State = #ch{limiter = Limiter}) -> + Limiter1 = rabbit_limiter:unlimit_prefetch(Limiter), + {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter1}}; + +handle_method(#'basic.qos'{global = true, + prefetch_count = PrefetchCount}, + _, State = #ch{limiter = Limiter, unacked_message_q = UAMQ}) -> + %% TODO ?QUEUE:len(UAMQ) is not strictly right since that counts + %% unacked messages from basic.get too. Pretty obscure though. + Limiter1 = rabbit_limiter:limit_prefetch(Limiter, + PrefetchCount, ?QUEUE:len(UAMQ)), + case ((not rabbit_limiter:is_active(Limiter)) andalso + rabbit_limiter:is_active(Limiter1)) of + true -> rabbit_amqqueue:activate_limit_all( + classic_consumer_queue_pids(State#ch.consumer_mapping), self()); + false -> ok + end, + {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter1}}; + +handle_method(#'basic.recover_async'{requeue = true}, + _, State = #ch{unacked_message_q = UAMQ, + limiter = Limiter, + queue_states = QueueStates0}) -> + OkFun = fun () -> ok end, + UAMQL = ?QUEUE:to_list(UAMQ), + {QueueStates, Actions} = + foreach_per_queue( + fun ({QPid, CTag}, MsgIds, {Acc0, Actions0}) -> + rabbit_misc:with_exit_handler( + OkFun, + fun () -> + {ok, Acc, Act} = rabbit_amqqueue:requeue(QPid, {CTag, MsgIds}, Acc0), + {Acc, Act ++ Actions0} + end) + end, lists:reverse(UAMQL), {QueueStates0, []}), + ok = notify_limiter(Limiter, UAMQL), + State1 = handle_queue_actions(Actions, State#ch{unacked_message_q = ?QUEUE:new(), + queue_states = QueueStates}), + %% No answer required - basic.recover is the newer, synchronous + %% variant of this method + {noreply, State1}; + +handle_method(#'basic.recover_async'{requeue = false}, _, _State) -> + rabbit_misc:protocol_error(not_implemented, "requeue=false", []); + +handle_method(#'basic.recover'{requeue = Requeue}, Content, State) -> + {noreply, State1} = handle_method(#'basic.recover_async'{requeue = Requeue}, + Content, State), + {reply, #'basic.recover_ok'{}, State1}; + +handle_method(#'basic.reject'{delivery_tag = DeliveryTag, requeue = Requeue}, + _, State) -> + reject(DeliveryTag, Requeue, false, State); + +handle_method(#'exchange.declare'{nowait = NoWait} = Method, + _, State = #ch{cfg = #conf{virtual_host = VHostPath, + user = User, + queue_collector_pid = CollectorPid, + conn_pid = ConnPid, + authz_context = AuthzContext}}) -> + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, NoWait, #'exchange.declare_ok'{}); + +handle_method(#'exchange.delete'{nowait = NoWait} = Method, + _, State = #ch{cfg = #conf{conn_pid = ConnPid, + authz_context = AuthzContext, + virtual_host = VHostPath, + queue_collector_pid = CollectorPid, + user = User}}) -> + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, NoWait, #'exchange.delete_ok'{}); + +handle_method(#'exchange.bind'{nowait = NoWait} = Method, + _, State = #ch{cfg = #conf{virtual_host = VHostPath, + conn_pid = ConnPid, + authz_context = AuthzContext, + queue_collector_pid = CollectorPid, + user = User}}) -> + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, NoWait, #'exchange.bind_ok'{}); + +handle_method(#'exchange.unbind'{nowait = NoWait} = Method, + _, State = #ch{cfg = #conf{virtual_host = VHostPath, + conn_pid = ConnPid, + authz_context = AuthzContext, + queue_collector_pid = CollectorPid, + user = User}}) -> + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, NoWait, #'exchange.unbind_ok'{}); + +handle_method(#'queue.declare'{nowait = NoWait} = Method, + _, State = #ch{cfg = #conf{virtual_host = VHostPath, + conn_pid = ConnPid, + authz_context = AuthzContext, + queue_collector_pid = CollectorPid, + user = User}}) -> + {ok, QueueName, MessageCount, ConsumerCount} = + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_queue_declare_ok(QueueName, NoWait, MessageCount, + ConsumerCount, State); + +handle_method(#'queue.delete'{nowait = NoWait} = Method, _, + State = #ch{cfg = #conf{conn_pid = ConnPid, + authz_context = AuthzContext, + virtual_host = VHostPath, + queue_collector_pid = CollectorPid, + user = User}}) -> + {ok, PurgedMessageCount} = + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, NoWait, + #'queue.delete_ok'{message_count = PurgedMessageCount}); + +handle_method(#'queue.bind'{nowait = NoWait} = Method, _, + State = #ch{cfg = #conf{conn_pid = ConnPid, + authz_context = AuthzContext, + user = User, + queue_collector_pid = CollectorPid, + virtual_host = VHostPath}}) -> + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, NoWait, #'queue.bind_ok'{}); + +handle_method(#'queue.unbind'{} = Method, _, + State = #ch{cfg = #conf{conn_pid = ConnPid, + authz_context = AuthzContext, + user = User, + queue_collector_pid = CollectorPid, + virtual_host = VHostPath}}) -> + handle_method(Method, ConnPid, AuthzContext, CollectorPid, VHostPath, User), + return_ok(State, false, #'queue.unbind_ok'{}); + +handle_method(#'queue.purge'{nowait = NoWait} = Method, + _, State = #ch{cfg = #conf{conn_pid = ConnPid, + authz_context = AuthzContext, + user = User, + queue_collector_pid = CollectorPid, + virtual_host = VHostPath}}) -> + case handle_method(Method, ConnPid, AuthzContext, CollectorPid, + VHostPath, User) of + {ok, PurgedMessageCount} -> + return_ok(State, NoWait, + #'queue.purge_ok'{message_count = PurgedMessageCount}) + end; + +handle_method(#'tx.select'{}, _, #ch{confirm_enabled = true}) -> + precondition_failed("cannot switch from confirm to tx mode"); + +handle_method(#'tx.select'{}, _, State = #ch{tx = none}) -> + {reply, #'tx.select_ok'{}, State#ch{tx = new_tx()}}; + +handle_method(#'tx.select'{}, _, State) -> + {reply, #'tx.select_ok'{}, State}; + +handle_method(#'tx.commit'{}, _, #ch{tx = none}) -> + precondition_failed("channel is not transactional"); + +handle_method(#'tx.commit'{}, _, State = #ch{tx = {Msgs, Acks}, + limiter = Limiter}) -> + State1 = queue_fold(fun deliver_to_queues/2, State, Msgs), + Rev = fun (X) -> lists:reverse(lists:sort(X)) end, + {State2, Actions2} = + lists:foldl(fun ({ack, A}, {Acc, Actions}) -> + {Acc0, Actions0} = ack(Rev(A), Acc), + {Acc0, Actions ++ Actions0}; + ({Requeue, A}, {Acc, Actions}) -> + {Acc0, Actions0} = internal_reject(Requeue, Rev(A), Limiter, Acc), + {Acc0, Actions ++ Actions0} + end, {State1, []}, lists:reverse(Acks)), + State3 = handle_queue_actions(Actions2, State2), + {noreply, maybe_complete_tx(State3#ch{tx = committing})}; + +handle_method(#'tx.rollback'{}, _, #ch{tx = none}) -> + precondition_failed("channel is not transactional"); + +handle_method(#'tx.rollback'{}, _, State = #ch{unacked_message_q = UAMQ, + tx = {_Msgs, Acks}}) -> + AcksL = lists:append(lists:reverse([lists:reverse(L) || {_, L} <- Acks])), + UAMQ1 = ?QUEUE:from_list(lists:usort(AcksL ++ ?QUEUE:to_list(UAMQ))), + {reply, #'tx.rollback_ok'{}, State#ch{unacked_message_q = UAMQ1, + tx = new_tx()}}; + +handle_method(#'confirm.select'{}, _, #ch{tx = {_, _}}) -> + precondition_failed("cannot switch from tx to confirm mode"); + +handle_method(#'confirm.select'{nowait = NoWait}, _, State) -> + return_ok(State#ch{confirm_enabled = true}, + NoWait, #'confirm.select_ok'{}); + +handle_method(#'channel.flow'{active = true}, _, State) -> + {reply, #'channel.flow_ok'{active = true}, State}; + +handle_method(#'channel.flow'{active = false}, _, _State) -> + rabbit_misc:protocol_error(not_implemented, "active=false", []); + +handle_method(#'basic.credit'{consumer_tag = CTag, + credit = Credit, + drain = Drain}, + _, State = #ch{consumer_mapping = Consumers, + queue_states = QStates0}) -> + case maps:find(CTag, Consumers) of + {ok, {Q, _CParams}} -> + {ok, QStates, Actions} = rabbit_queue_type:credit(Q, CTag, Credit, Drain, QStates0), + {noreply, handle_queue_actions(Actions, State#ch{queue_states = QStates})}; + error -> precondition_failed( + "unknown consumer tag '~s'", [CTag]) + end; + +handle_method(_MethodRecord, _Content, _State) -> + rabbit_misc:protocol_error( + command_invalid, "unimplemented method", []). + +%%---------------------------------------------------------------------------- + +%% We get the queue process to send the consume_ok on our behalf. This +%% is for symmetry with basic.cancel - see the comment in that method +%% for why. +basic_consume(QueueName, NoAck, ConsumerPrefetch, ActualConsumerTag, + ExclusiveConsume, Args, NoWait, + State = #ch{cfg = #conf{conn_pid = ConnPid, + user = #user{username = Username}}, + limiter = Limiter, + consumer_mapping = ConsumerMapping, + queue_states = QueueStates0}) -> + case rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ConnPid, + fun (Q) -> + {rabbit_amqqueue:basic_consume( + Q, NoAck, self(), + rabbit_limiter:pid(Limiter), + rabbit_limiter:is_active(Limiter), + ConsumerPrefetch, ActualConsumerTag, + ExclusiveConsume, Args, + ok_msg(NoWait, #'basic.consume_ok'{ + consumer_tag = ActualConsumerTag}), + Username, QueueStates0), + Q} + end) of + {{ok, QueueStates, Actions}, Q} when ?is_amqqueue(Q) -> + CM1 = maps:put( + ActualConsumerTag, + {Q, {NoAck, ConsumerPrefetch, ExclusiveConsume, Args}}, + ConsumerMapping), + + State1 = State#ch{consumer_mapping = CM1, + queue_states = QueueStates}, + State2 = handle_queue_actions(Actions, State1), + {ok, case NoWait of + true -> consumer_monitor(ActualConsumerTag, State2); + false -> State2 + end}; + {{error, exclusive_consume_unavailable} = E, _Q} -> + E; + {{error, global_qos_not_supported_for_queue_type} = E, _Q} -> + E; + {{protocol_error, Type, Reason, ReasonArgs}, _Q} -> + rabbit_misc:protocol_error(Type, Reason, ReasonArgs) + end. + +maybe_stat(false, Q) -> rabbit_amqqueue:stat(Q); +maybe_stat(true, _Q) -> {ok, 0, 0}. + +consumer_monitor(ConsumerTag, + State = #ch{consumer_mapping = ConsumerMapping, + queue_consumers = QCons}) -> + {Q, _} = maps:get(ConsumerTag, ConsumerMapping), + QRef = amqqueue:get_name(Q), + CTags1 = case maps:find(QRef, QCons) of + {ok, CTags} -> gb_sets:insert(ConsumerTag, CTags); + error -> gb_sets:singleton(ConsumerTag) + end, + QCons1 = maps:put(QRef, CTags1, QCons), + State#ch{queue_consumers = QCons1}. + +handle_consuming_queue_down_or_eol(QName, + State = #ch{queue_consumers = QCons}) -> + ConsumerTags = case maps:find(QName, QCons) of + error -> gb_sets:new(); + {ok, CTags} -> CTags + end, + gb_sets:fold( + fun (CTag, StateN = #ch{consumer_mapping = CMap}) -> + case queue_down_consumer_action(CTag, CMap) of + remove -> + cancel_consumer(CTag, QName, StateN); + {recover, {NoAck, ConsumerPrefetch, Exclusive, Args}} -> + case catch basic_consume( + QName, NoAck, ConsumerPrefetch, CTag, + Exclusive, Args, true, StateN) of + {ok, StateN1} -> + StateN1; + _Err -> + cancel_consumer(CTag, QName, StateN) + end + end + end, State#ch{queue_consumers = maps:remove(QName, QCons)}, ConsumerTags). + +%% [0] There is a slight danger here that if a queue is deleted and +%% then recreated again the reconsume will succeed even though it was +%% not an HA failover. But the likelihood is not great and most users +%% are unlikely to care. + +cancel_consumer(CTag, QName, + State = #ch{cfg = #conf{capabilities = Capabilities}, + consumer_mapping = CMap}) -> + case rabbit_misc:table_lookup( + Capabilities, <<"consumer_cancel_notify">>) of + {bool, true} -> ok = send(#'basic.cancel'{consumer_tag = CTag, + nowait = true}, State); + _ -> ok + end, + rabbit_event:notify(consumer_deleted, [{consumer_tag, CTag}, + {channel, self()}, + {queue, QName}]), + State#ch{consumer_mapping = maps:remove(CTag, CMap)}. + +queue_down_consumer_action(CTag, CMap) -> + {_, {_, _, _, Args} = ConsumeSpec} = maps:get(CTag, CMap), + case rabbit_misc:table_lookup(Args, <<"x-cancel-on-ha-failover">>) of + {bool, true} -> remove; + _ -> {recover, ConsumeSpec} + end. + +binding_action(Fun, SourceNameBin0, DestinationType, DestinationNameBin0, + RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, + #user{username = Username} = User) -> + ExchangeNameBin = strip_cr_lf(SourceNameBin0), + DestinationNameBin = strip_cr_lf(DestinationNameBin0), + DestinationName = name_to_resource(DestinationType, DestinationNameBin, VHostPath), + check_write_permitted(DestinationName, User, AuthzContext), + ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), + [check_not_default_exchange(N) || N <- [DestinationName, ExchangeName]], + check_read_permitted(ExchangeName, User, AuthzContext), + case rabbit_exchange:lookup(ExchangeName) of + {error, not_found} -> + ok; + {ok, Exchange} -> + check_read_permitted_on_topic(Exchange, User, RoutingKey, AuthzContext) + end, + case Fun(#binding{source = ExchangeName, + destination = DestinationName, + key = RoutingKey, + args = Arguments}, + fun (_X, Q) when ?is_amqqueue(Q) -> + try rabbit_amqqueue:check_exclusive_access(Q, ConnPid) + catch exit:Reason -> {error, Reason} + end; + (_X, #exchange{}) -> + ok + end, + Username) of + {error, {resources_missing, [{not_found, Name} | _]}} -> + rabbit_amqqueue:not_found(Name); + {error, {resources_missing, [{absent, Q, Reason} | _]}} -> + rabbit_amqqueue:absent(Q, Reason); + {error, binding_not_found} -> + rabbit_misc:protocol_error( + not_found, "no binding ~s between ~s and ~s", + [RoutingKey, rabbit_misc:rs(ExchangeName), + rabbit_misc:rs(DestinationName)]); + {error, {binding_invalid, Fmt, Args}} -> + rabbit_misc:protocol_error(precondition_failed, Fmt, Args); + {error, #amqp_error{} = Error} -> + rabbit_misc:protocol_error(Error); + ok -> + ok + end. + +basic_return(#basic_message{exchange_name = ExchangeName, + routing_keys = [RoutingKey | _CcRoutes], + content = Content}, + State = #ch{cfg = #conf{protocol = Protocol, + writer_pid = WriterPid}}, + Reason) -> + ?INCR_STATS(exchange_stats, ExchangeName, 1, return_unroutable, State), + {_Close, ReplyCode, ReplyText} = Protocol:lookup_amqp_exception(Reason), + ok = rabbit_writer:send_command( + WriterPid, + #'basic.return'{reply_code = ReplyCode, + reply_text = ReplyText, + exchange = ExchangeName#resource.name, + routing_key = RoutingKey}, + Content). + +reject(DeliveryTag, Requeue, Multiple, + State = #ch{unacked_message_q = UAMQ, tx = Tx}) -> + {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), + State1 = State#ch{unacked_message_q = Remaining}, + {noreply, case Tx of + none -> + {State2, Actions} = internal_reject(Requeue, Acked, State1#ch.limiter, State1), + handle_queue_actions(Actions, State2); + {Msgs, Acks} -> + Acks1 = ack_cons(Requeue, Acked, Acks), + State1#ch{tx = {Msgs, Acks1}} + end}. + +%% NB: Acked is in youngest-first order +internal_reject(Requeue, Acked, Limiter, + State = #ch{queue_states = QueueStates0}) -> + {QueueStates, Actions} = + foreach_per_queue( + fun({QRef, CTag}, MsgIds, {Acc0, Actions0}) -> + Op = case Requeue of + false -> discard; + true -> requeue + end, + case rabbit_queue_type:settle(QRef, Op, CTag, MsgIds, Acc0) of + {ok, Acc, Actions} -> + {Acc, Actions0 ++ Actions}; + {protocol_error, ErrorType, Reason, ReasonArgs} -> + rabbit_misc:protocol_error(ErrorType, Reason, ReasonArgs) + end + end, Acked, {QueueStates0, []}), + ok = notify_limiter(Limiter, Acked), + {State#ch{queue_states = QueueStates}, Actions}. + +record_sent(Type, Tag, AckRequired, + Msg = {QName, _QPid, MsgId, Redelivered, _Message}, + State = #ch{cfg = #conf{channel = ChannelNum, + trace_state = TraceState, + user = #user{username = Username}, + conn_name = ConnName + }, + unacked_message_q = UAMQ, + next_tag = DeliveryTag + }) -> + ?INCR_STATS(queue_stats, QName, 1, case {Type, AckRequired} of + {get, true} -> get; + {get, false} -> get_no_ack; + {deliver, true} -> deliver; + {deliver, false} -> deliver_no_ack + end, State), + case Redelivered of + true -> ?INCR_STATS(queue_stats, QName, 1, redeliver, State); + false -> ok + end, + DeliveredAt = os:system_time(millisecond), + rabbit_trace:tap_out(Msg, ConnName, ChannelNum, Username, TraceState), + UAMQ1 = case AckRequired of + true -> + ?QUEUE:in(#pending_ack{delivery_tag = DeliveryTag, + tag = Tag, + delivered_at = DeliveredAt, + queue = QName, + msg_id = MsgId}, UAMQ); + false -> + UAMQ + end, + State#ch{unacked_message_q = UAMQ1, next_tag = DeliveryTag + 1}. + +%% NB: returns acks in youngest-first order +collect_acks(Q, 0, true) -> + {lists:reverse(?QUEUE:to_list(Q)), ?QUEUE:new()}; +collect_acks(Q, DeliveryTag, Multiple) -> + collect_acks([], [], Q, DeliveryTag, Multiple). + +collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> + case ?QUEUE:out(Q) of + {{value, UnackedMsg = #pending_ack{delivery_tag = CurrentDeliveryTag}}, + QTail} -> + if CurrentDeliveryTag == DeliveryTag -> + {[UnackedMsg | ToAcc], + case PrefixAcc of + [] -> QTail; + _ -> ?QUEUE:join( + ?QUEUE:from_list(lists:reverse(PrefixAcc)), + QTail) + end}; + Multiple -> + collect_acks([UnackedMsg | ToAcc], PrefixAcc, + QTail, DeliveryTag, Multiple); + true -> + collect_acks(ToAcc, [UnackedMsg | PrefixAcc], + QTail, DeliveryTag, Multiple) + end; + {empty, _} -> + precondition_failed("unknown delivery tag ~w", [DeliveryTag]) + end. + +%% NB: Acked is in youngest-first order +ack(Acked, State = #ch{queue_states = QueueStates0}) -> + {QueueStates, Actions} = + foreach_per_queue( + fun ({QRef, CTag}, MsgIds, {Acc0, ActionsAcc0}) -> + case rabbit_queue_type:settle(QRef, complete, CTag, + MsgIds, Acc0) of + {ok, Acc, ActionsAcc} -> + incr_queue_stats(QRef, MsgIds, State), + {Acc, ActionsAcc0 ++ ActionsAcc}; + {protocol_error, ErrorType, Reason, ReasonArgs} -> + rabbit_misc:protocol_error(ErrorType, Reason, ReasonArgs) + end + end, Acked, {QueueStates0, []}), + ok = notify_limiter(State#ch.limiter, Acked), + {State#ch{queue_states = QueueStates}, Actions}. + +incr_queue_stats(QName, MsgIds, State) -> + Count = length(MsgIds), + ?INCR_STATS(queue_stats, QName, Count, ack, State). + +%% {Msgs, Acks} +%% +%% Msgs is a queue. +%% +%% Acks looks s.t. like this: +%% [{false,[5,4]},{true,[3]},{ack,[2,1]}, ...] +%% +%% Each element is a pair consisting of a tag and a list of +%% ack'ed/reject'ed msg ids. The tag is one of 'ack' (to ack), 'true' +%% (reject w requeue), 'false' (reject w/o requeue). The msg ids, as +%% well as the list overall, are in "most-recent (generally youngest) +%% ack first" order. +new_tx() -> {?QUEUE:new(), []}. + +notify_queues(State = #ch{cfg = #conf{state = closing}}) -> + {ok, State}; +notify_queues(State = #ch{consumer_mapping = Consumers, + cfg = Cfg}) -> + QPids = classic_consumer_queue_pids(Consumers), + Timeout = get_operation_timeout(), + {rabbit_amqqueue:notify_down_all(QPids, self(), Timeout), + State#ch{cfg = Cfg#conf{state = closing}}}. + +foreach_per_queue(_F, [], Acc) -> + Acc; +foreach_per_queue(F, [#pending_ack{tag = CTag, + queue = QName, + msg_id = MsgId}], Acc) -> + %% quorum queue, needs the consumer tag + F({QName, CTag}, [MsgId], Acc); +foreach_per_queue(F, UAL, Acc) -> + T = lists:foldl(fun (#pending_ack{tag = CTag, + queue = QName, + msg_id = MsgId}, T) -> + rabbit_misc:gb_trees_cons({QName, CTag}, MsgId, T) + end, gb_trees:empty(), UAL), + rabbit_misc:gb_trees_fold(fun (Key, Val, Acc0) -> F(Key, Val, Acc0) end, Acc, T). + +%% hack to patch up missing queue type behaviour for classic queue +classic_consumer_queue_pids(Consumers) -> + lists:usort([amqqueue:get_pid(Q) + || {Q, _CParams} <- maps:values(Consumers), + amqqueue:get_type(Q) == rabbit_classic_queue]). + +%% tell the limiter about the number of acks that have been received +%% for messages delivered to subscribed consumers, but not acks for +%% messages sent in a response to a basic.get (identified by their +%% consumer tag as an integer (the same as the delivery tag, required +%% quorum queues)) +notify_limiter(Limiter, Acked) -> + %% optimisation: avoid the potentially expensive 'foldl' in the + %% common case. + case rabbit_limiter:is_active(Limiter) of + false -> ok; + true -> case lists:foldl(fun ({_, CTag, _, _}, Acc) when is_integer(CTag) -> + %% Quorum queues use integer CTags + %% classic queues use binaries + %% Quorum queues do not interact + %% with limiters + Acc; + ({_, _, _, _}, Acc) -> Acc + 1 + end, 0, Acked) of + 0 -> ok; + Count -> rabbit_limiter:ack(Limiter, Count) + end + end. + +deliver_to_queues({#delivery{message = #basic_message{exchange_name = XName}, + confirm = false, + mandatory = false}, + _RoutedToQs = []}, State) -> %% optimisation + ?INCR_STATS(exchange_stats, XName, 1, publish, State), + ?INCR_STATS(exchange_stats, XName, 1, drop_unroutable, State), + State; +deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ + exchange_name = XName}, + mandatory = Mandatory, + confirm = Confirm, + msg_seq_no = MsgSeqNo}, + DelQNames}, State0 = #ch{queue_states = QueueStates0}) -> + Qs = rabbit_amqqueue:lookup(DelQNames), + AllQueueNames = lists:foldl(fun (Q, Acc) -> + QRef = amqqueue:get_name(Q), + [QRef | Acc] + end, [], Qs), + {ok, QueueStates, Actions} = + rabbit_queue_type:deliver(Qs, Delivery, QueueStates0), + %% NB: the order here is important since basic.returns must be + %% sent before confirms. + ok = process_routing_mandatory(Mandatory, Qs, Message, State0), + State1 = process_routing_confirm(Confirm, AllQueueNames, + MsgSeqNo, XName, State0), + %% Actions must be processed after registering confirms as actions may + %% contain rejections of publishes + State = handle_queue_actions(Actions, + State1#ch{queue_states = QueueStates}), + case rabbit_event:stats_level(State, #ch.stats_timer) of + fine -> + ?INCR_STATS(exchange_stats, XName, 1, publish), + [?INCR_STATS(queue_exchange_stats, + {amqqueue:get_name(Q), XName}, 1, publish) + || Q <- Qs]; + _ -> + ok + end, + State. + +process_routing_mandatory(_Mandatory = true, + _RoutedToQs = [], + Msg, State) -> + ok = basic_return(Msg, State, no_route), + ok; +process_routing_mandatory(_Mandatory = false, + _RoutedToQs = [], + #basic_message{exchange_name = ExchangeName}, State) -> + ?INCR_STATS(exchange_stats, ExchangeName, 1, drop_unroutable, State), + ok; +process_routing_mandatory(_, _, _, _) -> + ok. + +process_routing_confirm(false, _, _, _, State) -> + State; +process_routing_confirm(true, [], MsgSeqNo, XName, State) -> + record_confirms([{MsgSeqNo, XName}], State); +process_routing_confirm(true, QRefs, MsgSeqNo, XName, State) -> + State#ch{unconfirmed = + rabbit_confirms:insert(MsgSeqNo, QRefs, XName, State#ch.unconfirmed)}. + +confirm(MsgSeqNos, QRef, State = #ch{unconfirmed = UC}) -> + %% NOTE: if queue name does not exist here it's likely that the ref also + %% does not exist in unconfirmed messages. + %% Neither does the 'ignore' atom, so it's a reasonable fallback. + {ConfirmMXs, UC1} = rabbit_confirms:confirm(MsgSeqNos, QRef, UC), + %% NB: don't call noreply/1 since we don't want to send confirms. + record_confirms(ConfirmMXs, State#ch{unconfirmed = UC1}). + +send_confirms_and_nacks(State = #ch{tx = none, confirmed = [], rejected = []}) -> + State; +send_confirms_and_nacks(State = #ch{tx = none, confirmed = C, rejected = R}) -> + case rabbit_node_monitor:pause_partition_guard() of + ok -> + Confirms = lists:append(C), + Rejects = lists:append(R), + ConfirmMsgSeqNos = + lists:foldl( + fun ({MsgSeqNo, XName}, MSNs) -> + ?INCR_STATS(exchange_stats, XName, 1, confirm, State), + [MsgSeqNo | MSNs] + end, [], Confirms), + RejectMsgSeqNos = [MsgSeqNo || {MsgSeqNo, _} <- Rejects], + + State1 = send_confirms(ConfirmMsgSeqNos, + RejectMsgSeqNos, + State#ch{confirmed = []}), + %% TODO: msg seq nos, same as for confirms. Need to implement + %% nack rates first. + send_nacks(RejectMsgSeqNos, + ConfirmMsgSeqNos, + State1#ch{rejected = []}); + pausing -> State + end; +send_confirms_and_nacks(State) -> + case rabbit_node_monitor:pause_partition_guard() of + ok -> maybe_complete_tx(State); + pausing -> State + end. + +send_nacks([], _, State) -> + State; +send_nacks(_Rs, _, State = #ch{cfg = #conf{state = closing}}) -> %% optimisation + State; +send_nacks(Rs, Cs, State) -> + coalesce_and_send(Rs, Cs, + fun(MsgSeqNo, Multiple) -> + #'basic.nack'{delivery_tag = MsgSeqNo, + multiple = Multiple} + end, State). + +send_confirms([], _, State) -> + State; +send_confirms(_Cs, _, State = #ch{cfg = #conf{state = closing}}) -> %% optimisation + State; +send_confirms([MsgSeqNo], _, State) -> + ok = send(#'basic.ack'{delivery_tag = MsgSeqNo}, State), + State; +send_confirms(Cs, Rs, State) -> + coalesce_and_send(Cs, Rs, + fun(MsgSeqNo, Multiple) -> + #'basic.ack'{delivery_tag = MsgSeqNo, + multiple = Multiple} + end, State). + +coalesce_and_send(MsgSeqNos, NegativeMsgSeqNos, MkMsgFun, State = #ch{unconfirmed = UC}) -> + SMsgSeqNos = lists:usort(MsgSeqNos), + UnconfirmedCutoff = case rabbit_confirms:is_empty(UC) of + true -> lists:last(SMsgSeqNos) + 1; + false -> rabbit_confirms:smallest(UC) + end, + Cutoff = lists:min([UnconfirmedCutoff | NegativeMsgSeqNos]), + {Ms, Ss} = lists:splitwith(fun(X) -> X < Cutoff end, SMsgSeqNos), + case Ms of + [] -> ok; + _ -> ok = send(MkMsgFun(lists:last(Ms), true), State) + end, + [ok = send(MkMsgFun(SeqNo, false), State) || SeqNo <- Ss], + State. + +ack_cons(Tag, Acked, [{Tag, Acks} | L]) -> [{Tag, Acked ++ Acks} | L]; +ack_cons(Tag, Acked, Acks) -> [{Tag, Acked} | Acks]. + +ack_len(Acks) -> lists:sum([length(L) || {ack, L} <- Acks]). + +maybe_complete_tx(State = #ch{tx = {_, _}}) -> + State; +maybe_complete_tx(State = #ch{unconfirmed = UC}) -> + case rabbit_confirms:is_empty(UC) of + false -> State; + true -> complete_tx(State#ch{confirmed = []}) + end. + +complete_tx(State = #ch{tx = committing}) -> + ok = send(#'tx.commit_ok'{}, State), + State#ch{tx = new_tx()}; +complete_tx(State = #ch{tx = failed}) -> + {noreply, State1} = handle_exception( + rabbit_misc:amqp_error( + precondition_failed, "partial tx completion", [], + 'tx.commit'), + State), + State1#ch{tx = new_tx()}. + +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. + +infos(Items, Deadline, State) -> + [begin + Now = now_millis(), + if + Now > Deadline -> + throw(timeout); + true -> + {Item, i(Item, State)} + end + end || Item <- Items]. + +i(pid, _) -> self(); +i(connection, #ch{cfg = #conf{conn_pid = ConnPid}}) -> ConnPid; +i(number, #ch{cfg = #conf{channel = Channel}}) -> Channel; +i(user, #ch{cfg = #conf{user = User}}) -> User#user.username; +i(user_who_performed_action, Ch) -> i(user, Ch); +i(vhost, #ch{cfg = #conf{virtual_host = VHost}}) -> VHost; +i(transactional, #ch{tx = Tx}) -> Tx =/= none; +i(confirm, #ch{confirm_enabled = CE}) -> CE; +i(name, State) -> name(State); +i(consumer_count, #ch{consumer_mapping = CM}) -> maps:size(CM); +i(messages_unconfirmed, #ch{unconfirmed = UC}) -> rabbit_confirms:size(UC); +i(messages_unacknowledged, #ch{unacked_message_q = UAMQ}) -> ?QUEUE:len(UAMQ); +i(messages_uncommitted, #ch{tx = {Msgs, _Acks}}) -> ?QUEUE:len(Msgs); +i(messages_uncommitted, #ch{}) -> 0; +i(acks_uncommitted, #ch{tx = {_Msgs, Acks}}) -> ack_len(Acks); +i(acks_uncommitted, #ch{}) -> 0; +i(pending_raft_commands, #ch{queue_states = QS}) -> + pending_raft_commands(QS); +i(state, #ch{cfg = #conf{state = running}}) -> credit_flow:state(); +i(state, #ch{cfg = #conf{state = State}}) -> State; +i(prefetch_count, #ch{cfg = #conf{consumer_prefetch = C}}) -> C; +i(global_prefetch_count, #ch{limiter = Limiter}) -> + rabbit_limiter:get_prefetch_limit(Limiter); +i(interceptors, #ch{interceptor_state = IState}) -> + IState; +i(garbage_collection, _State) -> + rabbit_misc:get_gc_info(self()); +i(reductions, _State) -> + {reductions, Reductions} = erlang:process_info(self(), reductions), + Reductions; +i(Item, _) -> + throw({bad_argument, Item}). + +pending_raft_commands(QStates) -> + Fun = fun(_, V, Acc) -> + case rabbit_queue_type:state_info(V) of + #{pending_raft_commands := P} -> + Acc + P; + _ -> + Acc + end + end, + rabbit_queue_type:fold_state(Fun, 0, QStates). + +name(#ch{cfg = #conf{conn_name = ConnName, channel = Channel}}) -> + list_to_binary(rabbit_misc:format("~s (~p)", [ConnName, Channel])). + +emit_stats(State) -> emit_stats(State, []). + +emit_stats(State, Extra) -> + [{reductions, Red} | Coarse0] = infos(?STATISTICS_KEYS, State), + %% First metric must be `idle_since` (if available), as expected by + %% `rabbit_mgmt_format:format_channel_stats`. This is a performance + %% optimisation that avoids traversing the whole list when only + %% one element has to be formatted. + rabbit_core_metrics:channel_stats(self(), Extra ++ Coarse0), + rabbit_core_metrics:channel_stats(reductions, self(), Red). + +erase_queue_stats(QName) -> + rabbit_core_metrics:channel_queue_down({self(), QName}), + erase({queue_stats, QName}), + [begin + rabbit_core_metrics:channel_queue_exchange_down({self(), QX}), + erase({queue_exchange_stats, QX}) + end || {{queue_exchange_stats, QX = {QName0, _}}, _} <- get(), + QName0 =:= QName]. + +get_vhost(#ch{cfg = #conf{virtual_host = VHost}}) -> VHost. + +get_user(#ch{cfg = #conf{user = User}}) -> User. + +delete_stats({queue_stats, QName}) -> + rabbit_core_metrics:channel_queue_down({self(), QName}); +delete_stats({exchange_stats, XName}) -> + rabbit_core_metrics:channel_exchange_down({self(), XName}); +delete_stats({queue_exchange_stats, QX}) -> + rabbit_core_metrics:channel_queue_exchange_down({self(), QX}); +delete_stats(_) -> + ok. + +put_operation_timeout() -> + put(channel_operation_timeout, ?CHANNEL_OPERATION_TIMEOUT). + +get_operation_timeout() -> + get(channel_operation_timeout). + +%% Refactored and exported to allow direct calls from the HTTP API, +%% avoiding the usage of AMQP 0-9-1 from the management. + +handle_method(#'exchange.bind'{destination = DestinationNameBin, + source = SourceNameBin, + routing_key = RoutingKey, + arguments = Arguments}, + ConnPid, AuthzContext, _CollectorId, VHostPath, User) -> + binding_action(fun rabbit_binding:add/3, + SourceNameBin, exchange, DestinationNameBin, + RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User); +handle_method(#'exchange.unbind'{destination = DestinationNameBin, + source = SourceNameBin, + routing_key = RoutingKey, + arguments = Arguments}, + ConnPid, AuthzContext, _CollectorId, VHostPath, User) -> + binding_action(fun rabbit_binding:remove/3, + SourceNameBin, exchange, DestinationNameBin, + RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User); +handle_method(#'queue.unbind'{queue = QueueNameBin, + exchange = ExchangeNameBin, + routing_key = RoutingKey, + arguments = Arguments}, + ConnPid, AuthzContext, _CollectorId, VHostPath, User) -> + binding_action(fun rabbit_binding:remove/3, + ExchangeNameBin, queue, QueueNameBin, + RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User); +handle_method(#'queue.bind'{queue = QueueNameBin, + exchange = ExchangeNameBin, + routing_key = RoutingKey, + arguments = Arguments}, + ConnPid, AuthzContext, _CollectorId, VHostPath, User) -> + binding_action(fun rabbit_binding:add/3, + ExchangeNameBin, queue, QueueNameBin, + RoutingKey, Arguments, VHostPath, ConnPid, AuthzContext, User); +%% Note that all declares to these are effectively passive. If it +%% exists it by definition has one consumer. +handle_method(#'queue.declare'{queue = <<"amq.rabbitmq.reply-to", + _/binary>> = QueueNameBin}, + _ConnPid, _AuthzContext, _CollectorPid, VHost, _User) -> + StrippedQueueNameBin = strip_cr_lf(QueueNameBin), + QueueName = rabbit_misc:r(VHost, queue, StrippedQueueNameBin), + case declare_fast_reply_to(StrippedQueueNameBin) of + exists -> {ok, QueueName, 0, 1}; + not_found -> rabbit_amqqueue:not_found(QueueName) + end; +handle_method(#'queue.declare'{queue = QueueNameBin, + passive = false, + durable = DurableDeclare, + exclusive = ExclusiveDeclare, + auto_delete = AutoDelete, + nowait = NoWait, + arguments = Args} = Declare, + ConnPid, AuthzContext, CollectorPid, VHostPath, + #user{username = Username} = User) -> + Owner = case ExclusiveDeclare of + true -> ConnPid; + false -> none + end, + StrippedQueueNameBin = strip_cr_lf(QueueNameBin), + Durable = DurableDeclare andalso not ExclusiveDeclare, + ActualNameBin = case StrippedQueueNameBin of + <<>> -> + case rabbit_amqqueue:is_server_named_allowed(Args) of + true -> + rabbit_guid:binary(rabbit_guid:gen_secure(), "amq.gen"); + false -> + rabbit_misc:protocol_error( + precondition_failed, + "Cannot declare a server-named queue for type ~p", + [rabbit_amqqueue:get_queue_type(Args)]) + end; + Other -> check_name('queue', Other) + end, + QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin), + check_configure_permitted(QueueName, User, AuthzContext), + rabbit_core_metrics:queue_declared(QueueName), + case rabbit_amqqueue:with( + QueueName, + fun (Q) -> ok = rabbit_amqqueue:assert_equivalence( + Q, Durable, AutoDelete, Args, Owner), + maybe_stat(NoWait, Q) + end) of + {ok, MessageCount, ConsumerCount} -> + {ok, QueueName, MessageCount, ConsumerCount}; + {error, not_found} -> + %% enforce the limit for newly declared queues only + check_vhost_queue_limit(QueueName, VHostPath), + DlxKey = <<"x-dead-letter-exchange">>, + case rabbit_misc:r_arg(VHostPath, exchange, Args, DlxKey) of + undefined -> + ok; + {error, {invalid_type, Type}} -> + precondition_failed( + "invalid type '~s' for arg '~s' in ~s", + [Type, DlxKey, rabbit_misc:rs(QueueName)]); + DLX -> + check_read_permitted(QueueName, User, AuthzContext), + check_write_permitted(DLX, User, AuthzContext), + ok + end, + case rabbit_amqqueue:declare(QueueName, Durable, AutoDelete, + Args, Owner, Username) of + {new, Q} when ?is_amqqueue(Q) -> + %% We need to notify the reader within the channel + %% process so that we can be sure there are no + %% outstanding exclusive queues being declared as + %% the connection shuts down. + QPid = amqqueue:get_pid(Q), + ok = case {Owner, CollectorPid} of + {none, _} -> ok; + {_, none} -> ok; %% Supports call from mgmt API + _ -> rabbit_queue_collector:register( + CollectorPid, QPid) + end, + rabbit_core_metrics:queue_created(QueueName), + {ok, QueueName, 0, 0}; + {existing, _Q} -> + %% must have been created between the stat and the + %% declare. Loop around again. + handle_method(Declare, ConnPid, AuthzContext, CollectorPid, VHostPath, + User); + {absent, Q, Reason} -> + rabbit_amqqueue:absent(Q, Reason); + {owner_died, _Q} -> + %% Presumably our own days are numbered since the + %% connection has died. Pretend the queue exists though, + %% just so nothing fails. + {ok, QueueName, 0, 0}; + {protocol_error, ErrorType, Reason, ReasonArgs} -> + rabbit_misc:protocol_error(ErrorType, Reason, ReasonArgs) + end; + {error, {absent, Q, Reason}} -> + rabbit_amqqueue:absent(Q, Reason) + end; +handle_method(#'queue.declare'{queue = QueueNameBin, + nowait = NoWait, + passive = true}, + ConnPid, _AuthzContext, _CollectorPid, VHostPath, _User) -> + StrippedQueueNameBin = strip_cr_lf(QueueNameBin), + QueueName = rabbit_misc:r(VHostPath, queue, StrippedQueueNameBin), + Fun = fun (Q0) -> + QStat = maybe_stat(NoWait, Q0), + {QStat, Q0} + end, + %% Note: no need to check if Q is an #amqqueue, with_or_die does it + {{ok, MessageCount, ConsumerCount}, Q} = rabbit_amqqueue:with_or_die(QueueName, Fun), + ok = rabbit_amqqueue:check_exclusive_access(Q, ConnPid), + {ok, QueueName, MessageCount, ConsumerCount}; +handle_method(#'queue.delete'{queue = QueueNameBin, + if_unused = IfUnused, + if_empty = IfEmpty}, + ConnPid, AuthzContext, _CollectorPid, VHostPath, + User = #user{username = Username}) -> + StrippedQueueNameBin = strip_cr_lf(QueueNameBin), + QueueName = qbin_to_resource(StrippedQueueNameBin, VHostPath), + + check_configure_permitted(QueueName, User, AuthzContext), + case rabbit_amqqueue:with( + QueueName, + fun (Q) -> + rabbit_amqqueue:check_exclusive_access(Q, ConnPid), + rabbit_queue_type:delete(Q, IfUnused, IfEmpty, Username) + end, + fun (not_found) -> + {ok, 0}; + ({absent, Q, crashed}) -> + _ = rabbit_classic_queue:delete_crashed(Q, Username), + {ok, 0}; + ({absent, Q, stopped}) -> + _ = rabbit_classic_queue:delete_crashed(Q, Username), + {ok, 0}; + ({absent, Q, Reason}) -> + rabbit_amqqueue:absent(Q, Reason) + end) of + {error, in_use} -> + precondition_failed("~s in use", [rabbit_misc:rs(QueueName)]); + {error, not_empty} -> + precondition_failed("~s not empty", [rabbit_misc:rs(QueueName)]); + {ok, Count} -> + {ok, Count}; + {protocol_error, Type, Reason, ReasonArgs} -> + rabbit_misc:protocol_error(Type, Reason, ReasonArgs) + end; +handle_method(#'exchange.delete'{exchange = ExchangeNameBin, + if_unused = IfUnused}, + _ConnPid, AuthzContext, _CollectorPid, VHostPath, + User = #user{username = Username}) -> + StrippedExchangeNameBin = strip_cr_lf(ExchangeNameBin), + ExchangeName = rabbit_misc:r(VHostPath, exchange, StrippedExchangeNameBin), + check_not_default_exchange(ExchangeName), + check_exchange_deletion(ExchangeName), + check_configure_permitted(ExchangeName, User, AuthzContext), + case rabbit_exchange:delete(ExchangeName, IfUnused, Username) of + {error, not_found} -> + ok; + {error, in_use} -> + precondition_failed("~s in use", [rabbit_misc:rs(ExchangeName)]); + ok -> + ok + end; +handle_method(#'queue.purge'{queue = QueueNameBin}, + ConnPid, AuthzContext, _CollectorPid, VHostPath, User) -> + QueueName = qbin_to_resource(QueueNameBin, VHostPath), + check_read_permitted(QueueName, User, AuthzContext), + rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ConnPid, + fun (Q) -> + case rabbit_queue_type:purge(Q) of + {ok, _} = Res -> + Res; + {error, not_supported} -> + rabbit_misc:protocol_error( + not_implemented, + "queue.purge not supported by stream queues ~s", + [rabbit_misc:rs(amqqueue:get_name(Q))]) + end + end); +handle_method(#'exchange.declare'{exchange = ExchangeNameBin, + type = TypeNameBin, + passive = false, + durable = Durable, + auto_delete = AutoDelete, + internal = Internal, + arguments = Args}, + _ConnPid, AuthzContext, _CollectorPid, VHostPath, + #user{username = Username} = User) -> + CheckedType = rabbit_exchange:check_type(TypeNameBin), + ExchangeName = rabbit_misc:r(VHostPath, exchange, strip_cr_lf(ExchangeNameBin)), + check_not_default_exchange(ExchangeName), + check_configure_permitted(ExchangeName, User, AuthzContext), + X = case rabbit_exchange:lookup(ExchangeName) of + {ok, FoundX} -> FoundX; + {error, not_found} -> + check_name('exchange', strip_cr_lf(ExchangeNameBin)), + AeKey = <<"alternate-exchange">>, + case rabbit_misc:r_arg(VHostPath, exchange, Args, AeKey) of + undefined -> ok; + {error, {invalid_type, Type}} -> + precondition_failed( + "invalid type '~s' for arg '~s' in ~s", + [Type, AeKey, rabbit_misc:rs(ExchangeName)]); + AName -> check_read_permitted(ExchangeName, User, AuthzContext), + check_write_permitted(AName, User, AuthzContext), + ok + end, + rabbit_exchange:declare(ExchangeName, + CheckedType, + Durable, + AutoDelete, + Internal, + Args, + Username) + end, + ok = rabbit_exchange:assert_equivalence(X, CheckedType, Durable, + AutoDelete, Internal, Args); +handle_method(#'exchange.declare'{exchange = ExchangeNameBin, + passive = true}, + _ConnPid, _AuthzContext, _CollectorPid, VHostPath, _User) -> + ExchangeName = rabbit_misc:r(VHostPath, exchange, strip_cr_lf(ExchangeNameBin)), + check_not_default_exchange(ExchangeName), + _ = rabbit_exchange:lookup_or_die(ExchangeName). + +handle_deliver(CTag, Ack, Msgs, State) when is_list(Msgs) -> + lists:foldl(fun(Msg, S) -> + handle_deliver0(CTag, Ack, Msg, S) + end, State, Msgs); +handle_deliver(CTag, Ack, Msg, State) -> + %% backwards compatibility clause + handle_deliver0(CTag, Ack, Msg, State). + +handle_deliver0(ConsumerTag, AckRequired, + Msg = {QName, QPid, _MsgId, Redelivered, + #basic_message{exchange_name = ExchangeName, + routing_keys = [RoutingKey | _CcRoutes], + content = Content}}, + State = #ch{cfg = #conf{writer_pid = WriterPid, + writer_gc_threshold = GCThreshold}, + next_tag = DeliveryTag, + queue_states = Qs}) -> + Deliver = #'basic.deliver'{consumer_tag = ConsumerTag, + delivery_tag = DeliveryTag, + redelivered = Redelivered, + exchange = ExchangeName#resource.name, + routing_key = RoutingKey}, + case rabbit_queue_type:module(QName, Qs) of + {ok, rabbit_classic_queue} -> + ok = rabbit_writer:send_command_and_notify( + WriterPid, QPid, self(), Deliver, Content); + _ -> + ok = rabbit_writer:send_command(WriterPid, Deliver, Content) + end, + case GCThreshold of + undefined -> ok; + _ -> rabbit_basic:maybe_gc_large_msg(Content, GCThreshold) + end, + record_sent(deliver, ConsumerTag, AckRequired, Msg, State). + +handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount, + Msg = {_QName, _QPid, _MsgId, Redelivered, + #basic_message{exchange_name = ExchangeName, + routing_keys = [RoutingKey | _CcRoutes], + content = Content}}, State) -> + ok = rabbit_writer:send_command( + WriterPid, + #'basic.get_ok'{delivery_tag = DeliveryTag, + redelivered = Redelivered, + exchange = ExchangeName#resource.name, + routing_key = RoutingKey, + message_count = MessageCount}, + Content), + {noreply, record_sent(get, DeliveryTag, not(NoAck), Msg, State)}. + +init_tick_timer(State = #ch{tick_timer = undefined}) -> + {ok, Interval} = application:get_env(rabbit, channel_tick_interval), + State#ch{tick_timer = erlang:send_after(Interval, self(), tick)}; +init_tick_timer(State) -> + State. + +reset_tick_timer(State) -> + State#ch{tick_timer = undefined}. + +maybe_cancel_tick_timer(#ch{tick_timer = undefined} = State) -> + State; +maybe_cancel_tick_timer(#ch{tick_timer = TRef, + unacked_message_q = UMQ} = State) -> + case ?QUEUE:len(UMQ) of + 0 -> + %% we can only cancel the tick timer if the unacked messages + %% queue is empty. + _ = erlang:cancel_timer(TRef), + State#ch{tick_timer = undefined}; + _ -> + %% let the timer continue + State + end. + +now_millis() -> + erlang:monotonic_time(millisecond). + +get_operation_timeout_and_deadline() -> + % NB: can't use get_operation_timeout because + % this code may not be running via the channel Pid + Timeout = ?CHANNEL_OPERATION_TIMEOUT, + Deadline = now_millis() + Timeout, + {Timeout, Deadline}. + +queue_fold(Fun, Init, Q) -> + case ?QUEUE:out(Q) of + {empty, _Q} -> Init; + {{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1) + end. + +evaluate_consumer_timeout(State0 = #ch{cfg = #conf{channel = Channel, + consumer_timeout = Timeout}, + unacked_message_q = UAMQ}) -> + Now = os:system_time(millisecond), + case ?QUEUE:peek(UAMQ) of + {value, #pending_ack{delivery_tag = ConsumerTag, + delivered_at = Time}} + when is_integer(Timeout) + andalso Time < Now - Timeout -> + rabbit_log_channel:warning("Consumer ~s on channel ~w has timed out " + "waiting on consumer acknowledgement. Timeout used: ~p ms", + [rabbit_data_coercion:to_binary(ConsumerTag), + Channel, Timeout]), + Ex = rabbit_misc:amqp_error(precondition_failed, + "consumer ack timed out on channel ~w", + [Channel], none), + handle_exception(Ex, State0); + _ -> + {noreply, State0} + end. + +handle_queue_actions(Actions, #ch{} = State0) -> + WriterPid = State0#ch.cfg#conf.writer_pid, + lists:foldl( + fun ({send_credit_reply, Avail}, S0) -> + ok = rabbit_writer:send_command(WriterPid, + #'basic.credit_ok'{available = Avail}), + S0; + ({send_drained, {CTag, Credit}}, S0) -> + ok = rabbit_writer:send_command( + WriterPid, + #'basic.credit_drained'{consumer_tag = CTag, + credit_drained = Credit}), + S0; + ({settled, QRef, MsgSeqNos}, S0) -> + confirm(MsgSeqNos, QRef, S0); + ({rejected, _QRef, MsgSeqNos}, S0) -> + {U, Rej} = + lists:foldr( + fun(SeqNo, {U1, Acc}) -> + case rabbit_confirms:reject(SeqNo, U1) of + {ok, MX, U2} -> + {U2, [MX | Acc]}; + {error, not_found} -> + {U1, Acc} + end + end, {S0#ch.unconfirmed, []}, MsgSeqNos), + S = S0#ch{unconfirmed = U}, + record_rejects(Rej, S); + ({deliver, CTag, AckRequired, Msgs}, S0) -> + handle_deliver(CTag, AckRequired, Msgs, S0); + ({queue_down, QRef}, S0) -> + handle_consuming_queue_down_or_eol(QRef, S0) + + end, State0, Actions). + +find_queue_name_from_pid(Pid, QStates) when is_pid(Pid) -> + Fun = fun(K, _V, undefined) -> + case rabbit_amqqueue:lookup(K) of + {error, not_found} -> + undefined; + {ok, Q} -> + Pids = get_queue_pids(Q), + case lists:member(Pid, Pids) of + true -> + K; + false -> + undefined + end + end; + (_K, _V, Acc) -> + Acc + end, + rabbit_queue_type:fold_state(Fun, undefined, QStates). + +get_queue_pids(Q) when ?amqqueue_is_quorum(Q) -> + [amqqueue:get_leader(Q)]; +get_queue_pids(Q) -> + [amqqueue:get_pid(Q) | amqqueue:get_slave_pids(Q)]. + +find_queue_name_from_quorum_name(Name, QStates) -> + Fun = fun(K, _V, undefined) -> + {ok, Q} = rabbit_amqqueue:lookup(K), + case amqqueue:get_pid(Q) of + {Name, _} -> + amqqueue:get_name(Q); + _ -> + undefined + end + end, + rabbit_queue_type:fold_state(Fun, undefined, QStates). diff --git a/deps/rabbit/src/rabbit_channel_interceptor.erl b/deps/rabbit/src/rabbit_channel_interceptor.erl new file mode 100644 index 0000000000..c40b437f10 --- /dev/null +++ b/deps/rabbit/src/rabbit_channel_interceptor.erl @@ -0,0 +1,104 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_channel_interceptor). + +-include("rabbit_framing.hrl"). +-include("rabbit.hrl"). + +-export([init/1, intercept_in/3]). + +-behaviour(rabbit_registry_class). + +-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). + +-type(method_name() :: rabbit_framing:amqp_method_name()). +-type(original_method() :: rabbit_framing:amqp_method_record()). +-type(processed_method() :: rabbit_framing:amqp_method_record()). +-type(original_content() :: rabbit_types:maybe(rabbit_types:content())). +-type(processed_content() :: rabbit_types:maybe(rabbit_types:content())). +-type(interceptor_state() :: term()). + +-callback description() -> [proplists:property()]. +%% Derive some initial state from the channel. This will be passed back +%% as the third argument of intercept/3. +-callback init(rabbit_channel:channel()) -> interceptor_state(). +-callback intercept(original_method(), original_content(), + interceptor_state()) -> + {processed_method(), processed_content()} | + rabbit_misc:channel_or_connection_exit(). +-callback applies_to() -> list(method_name()). + +added_to_rabbit_registry(_Type, _ModuleName) -> + rabbit_channel:refresh_interceptors(). +removed_from_rabbit_registry(_Type) -> + rabbit_channel:refresh_interceptors(). + +init(Ch) -> + Mods = [M || {_, M} <- rabbit_registry:lookup_all(channel_interceptor)], + check_no_overlap(Mods), + [{Mod, Mod:init(Ch)} || Mod <- Mods]. + +check_no_overlap(Mods) -> + check_no_overlap1([sets:from_list(Mod:applies_to()) || Mod <- Mods]). + +%% Check no non-empty pairwise intersection in a list of sets +check_no_overlap1(Sets) -> + lists:foldl(fun(Set, Union) -> + Is = sets:intersection(Set, Union), + case sets:size(Is) of + 0 -> ok; + _ -> + internal_error("Interceptor: more than one " + "module handles ~p~n", [Is]) + end, + sets:union(Set, Union) + end, + sets:new(), + Sets), + ok. + +intercept_in(M, C, Mods) -> + lists:foldl(fun({Mod, ModState}, {M1, C1}) -> + call_module(Mod, ModState, M1, C1) + end, + {M, C}, + Mods). + +call_module(Mod, St, M, C) -> + % this little dance is because Mod might be unloaded at any point + case (catch {ok, Mod:intercept(M, C, St)}) of + {ok, R} -> validate_response(Mod, M, C, R); + {'EXIT', {undef, [{Mod, intercept, _, _} | _]}} -> {M, C} + end. + +validate_response(Mod, M1, C1, R = {M2, C2}) -> + case {validate_method(M1, M2), validate_content(C1, C2)} of + {true, true} -> R; + {false, _} -> + internal_error("Interceptor: ~p expected to return " + "method: ~p but returned: ~p", + [Mod, rabbit_misc:method_record_type(M1), + rabbit_misc:method_record_type(M2)]); + {_, false} -> + internal_error("Interceptor: ~p expected to return " + "content iff content is provided but " + "content in = ~p; content out = ~p", + [Mod, C1, C2]) + end. + +validate_method(M, M2) -> + rabbit_misc:method_record_type(M) =:= rabbit_misc:method_record_type(M2). + +validate_content(none, none) -> true; +validate_content(#content{}, #content{}) -> true; +validate_content(_, _) -> false. + +%% keep dialyzer happy +-spec internal_error(string(), [any()]) -> no_return(). +internal_error(Format, Args) -> + rabbit_misc:protocol_error(internal_error, Format, Args). diff --git a/deps/rabbit/src/rabbit_channel_sup.erl b/deps/rabbit/src/rabbit_channel_sup.erl new file mode 100644 index 0000000000..0d405ad3a7 --- /dev/null +++ b/deps/rabbit/src/rabbit_channel_sup.erl @@ -0,0 +1,92 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_channel_sup). + +%% Supervises processes that implement AMQP 0-9-1 channels: +%% +%% * Channel process itself +%% * Network writer (for network connections) +%% * Limiter (handles channel QoS and flow control) +%% +%% Every rabbit_channel_sup is supervised by rabbit_channel_sup_sup. +%% +%% See also rabbit_channel, rabbit_writer, rabbit_limiter. + +-behaviour(supervisor2). + +-export([start_link/1]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-export_type([start_link_args/0]). + +-type start_link_args() :: + {'tcp', rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), pid(), string(), rabbit_types:protocol(), + rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), + pid()} | + {'direct', rabbit_channel:channel_number(), pid(), string(), + rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), + rabbit_framing:amqp_table(), pid()}. + +-define(FAIR_WAIT, 70000). + +%%---------------------------------------------------------------------------- + +-spec start_link(start_link_args()) -> {'ok', pid(), {pid(), any()}}. + +start_link({tcp, Sock, Channel, FrameMax, ReaderPid, ConnName, Protocol, User, + VHost, Capabilities, Collector}) -> + {ok, SupPid} = supervisor2:start_link( + ?MODULE, {tcp, Sock, Channel, FrameMax, + ReaderPid, Protocol, {ConnName, Channel}}), + [LimiterPid] = supervisor2:find_child(SupPid, limiter), + [WriterPid] = supervisor2:find_child(SupPid, writer), + {ok, ChannelPid} = + supervisor2:start_child( + SupPid, + {channel, {rabbit_channel, start_link, + [Channel, ReaderPid, WriterPid, ReaderPid, ConnName, + Protocol, User, VHost, Capabilities, Collector, + LimiterPid]}, + intrinsic, ?FAIR_WAIT, worker, [rabbit_channel]}), + {ok, AState} = rabbit_command_assembler:init(Protocol), + {ok, SupPid, {ChannelPid, AState}}; +start_link({direct, Channel, ClientChannelPid, ConnPid, ConnName, Protocol, + User, VHost, Capabilities, Collector, AmqpParams}) -> + {ok, SupPid} = supervisor2:start_link( + ?MODULE, {direct, {ConnName, Channel}}), + [LimiterPid] = supervisor2:find_child(SupPid, limiter), + {ok, ChannelPid} = + supervisor2:start_child( + SupPid, + {channel, {rabbit_channel, start_link, + [Channel, ClientChannelPid, ClientChannelPid, ConnPid, + ConnName, Protocol, User, VHost, Capabilities, Collector, + LimiterPid, AmqpParams]}, + intrinsic, ?FAIR_WAIT, worker, [rabbit_channel]}), + {ok, SupPid, {ChannelPid, none}}. + +%%---------------------------------------------------------------------------- + +init(Type) -> + ?LG_PROCESS_TYPE(channel_sup), + {ok, {{one_for_all, 0, 1}, child_specs(Type)}}. + +child_specs({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol, Identity}) -> + [{writer, {rabbit_writer, start_link, + [Sock, Channel, FrameMax, Protocol, ReaderPid, Identity, true]}, + intrinsic, ?FAIR_WAIT, worker, [rabbit_writer]} + | child_specs({direct, Identity})]; +child_specs({direct, Identity}) -> + [{limiter, {rabbit_limiter, start_link, [Identity]}, + transient, ?FAIR_WAIT, worker, [rabbit_limiter]}]. diff --git a/deps/rabbit/src/rabbit_channel_sup_sup.erl b/deps/rabbit/src/rabbit_channel_sup_sup.erl new file mode 100644 index 0000000000..72cf38d6c8 --- /dev/null +++ b/deps/rabbit/src/rabbit_channel_sup_sup.erl @@ -0,0 +1,42 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_channel_sup_sup). + +%% Supervisor for AMQP 0-9-1 channels. Every AMQP 0-9-1 connection has +%% one of these. +%% +%% See also rabbit_channel_sup, rabbit_connection_helper_sup, rabbit_reader. + +-behaviour(supervisor2). + +-export([start_link/0, start_channel/2]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + supervisor2:start_link(?MODULE, []). + +-spec start_channel(pid(), rabbit_channel_sup:start_link_args()) -> + {'ok', pid(), {pid(), any()}}. + +start_channel(Pid, Args) -> + supervisor2:start_child(Pid, [Args]). + +%%---------------------------------------------------------------------------- + +init([]) -> + ?LG_PROCESS_TYPE(channel_sup_sup), + {ok, {{simple_one_for_one, 0, 1}, + [{channel_sup, {rabbit_channel_sup, start_link, []}, + temporary, infinity, supervisor, [rabbit_channel_sup]}]}}. diff --git a/deps/rabbit/src/rabbit_channel_tracking.erl b/deps/rabbit/src/rabbit_channel_tracking.erl new file mode 100644 index 0000000000..42ab664a06 --- /dev/null +++ b/deps/rabbit/src/rabbit_channel_tracking.erl @@ -0,0 +1,291 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_channel_tracking). + +%% Abstracts away how tracked connection records are stored +%% and queried. +%% +%% See also: +%% +%% * rabbit_channel_tracking_handler +%% * rabbit_reader +%% * rabbit_event +-behaviour(rabbit_tracking). + +-export([boot/0, + update_tracked/1, + handle_cast/1, + register_tracked/1, + unregister_tracked/1, + count_tracked_items_in/1, + clear_tracking_tables/0, + shutdown_tracked_items/2]). + +-export([list/0, list_of_user/1, list_on_node/1, + tracked_channel_table_name_for/1, + tracked_channel_per_user_table_name_for/1, + get_all_tracked_channel_table_names_for_node/1, + delete_tracked_channel_user_entry/1]). + +-include_lib("rabbit.hrl"). + +-import(rabbit_misc, [pget/2]). + +%% +%% API +%% + +%% Sets up and resets channel tracking tables for this node. +-spec boot() -> ok. + +boot() -> + ensure_tracked_channels_table_for_this_node(), + rabbit_log:info("Setting up a table for channel tracking on this node: ~p", + [tracked_channel_table_name_for(node())]), + ensure_per_user_tracked_channels_table_for_node(), + rabbit_log:info("Setting up a table for channel tracking on this node: ~p", + [tracked_channel_per_user_table_name_for(node())]), + clear_tracking_tables(), + ok. + +-spec update_tracked(term()) -> ok. + +update_tracked(Event) -> + spawn(?MODULE, handle_cast, [Event]), + ok. + +%% Asynchronously handle update events +-spec handle_cast(term()) -> ok. + +handle_cast({channel_created, Details}) -> + ThisNode = node(), + case node(pget(pid, Details)) of + ThisNode -> + TrackedCh = #tracked_channel{id = TrackedChId} = + tracked_channel_from_channel_created_event(Details), + try + register_tracked(TrackedCh) + catch + error:{no_exists, _} -> + Msg = "Could not register channel ~p for tracking, " + "its table is not ready yet or the channel terminated prematurely", + rabbit_log_connection:warning(Msg, [TrackedChId]), + ok; + error:Err -> + Msg = "Could not register channel ~p for tracking: ~p", + rabbit_log_connection:warning(Msg, [TrackedChId, Err]), + ok + end; + _OtherNode -> + %% ignore + ok + end; +handle_cast({channel_closed, Details}) -> + %% channel has terminated, unregister iff local + case get_tracked_channel_by_pid(pget(pid, Details)) of + [#tracked_channel{name = Name}] -> + unregister_tracked(rabbit_tracking:id(node(), Name)); + _Other -> ok + end; +handle_cast({connection_closed, ConnDetails}) -> + ThisNode= node(), + ConnPid = pget(pid, ConnDetails), + + case pget(node, ConnDetails) of + ThisNode -> + TrackedChs = get_tracked_channels_by_connection_pid(ConnPid), + rabbit_log_connection:info( + "Closing all channels from connection '~p' " + "because it has been closed", [pget(name, ConnDetails)]), + %% Shutting down channels will take care of unregistering the + %% corresponding tracking. + shutdown_tracked_items(TrackedChs, undefined), + ok; + _DifferentNode -> + ok + end; +handle_cast({user_deleted, Details}) -> + Username = pget(name, Details), + %% Schedule user entry deletion, allowing time for connections to close + _ = timer:apply_after(?TRACKING_EXECUTION_TIMEOUT, ?MODULE, + delete_tracked_channel_user_entry, [Username]), + ok; +handle_cast({node_deleted, Details}) -> + Node = pget(node, Details), + rabbit_log_connection:info( + "Node '~s' was removed from the cluster, deleting" + " its channel tracking tables...", [Node]), + delete_tracked_channels_table_for_node(Node), + delete_per_user_tracked_channels_table_for_node(Node). + +-spec register_tracked(rabbit_types:tracked_channel()) -> ok. +-dialyzer([{nowarn_function, [register_tracked/1]}, race_conditions]). + +register_tracked(TrackedCh = + #tracked_channel{node = Node, name = Name, username = Username}) -> + ChId = rabbit_tracking:id(Node, Name), + TableName = tracked_channel_table_name_for(Node), + PerUserChTableName = tracked_channel_per_user_table_name_for(Node), + %% upsert + case mnesia:dirty_read(TableName, ChId) of + [] -> + mnesia:dirty_write(TableName, TrackedCh), + mnesia:dirty_update_counter(PerUserChTableName, Username, 1); + [#tracked_channel{}] -> + ok + end, + ok. + +-spec unregister_tracked(rabbit_types:tracked_channel_id()) -> ok. + +unregister_tracked(ChId = {Node, _Name}) when Node =:= node() -> + TableName = tracked_channel_table_name_for(Node), + PerUserChannelTableName = tracked_channel_per_user_table_name_for(Node), + case mnesia:dirty_read(TableName, ChId) of + [] -> ok; + [#tracked_channel{username = Username}] -> + mnesia:dirty_update_counter(PerUserChannelTableName, Username, -1), + mnesia:dirty_delete(TableName, ChId) + end. + +-spec count_tracked_items_in({atom(), rabbit_types:username()}) -> non_neg_integer(). + +count_tracked_items_in({user, Username}) -> + rabbit_tracking:count_tracked_items( + fun tracked_channel_per_user_table_name_for/1, + #tracked_channel_per_user.channel_count, Username, + "channels in vhost"). + +-spec clear_tracking_tables() -> ok. + +clear_tracking_tables() -> + clear_tracked_channel_tables_for_this_node(), + ok. + +-spec shutdown_tracked_items(list(), term()) -> ok. + +shutdown_tracked_items(TrackedItems, _Args) -> + close_channels(TrackedItems). + +%% helper functions +-spec list() -> [rabbit_types:tracked_channel()]. + +list() -> + lists:foldl( + fun (Node, Acc) -> + Tab = tracked_channel_table_name_for(Node), + Acc ++ mnesia:dirty_match_object(Tab, #tracked_channel{_ = '_'}) + end, [], rabbit_nodes:all_running()). + +-spec list_of_user(rabbit_types:username()) -> [rabbit_types:tracked_channel()]. + +list_of_user(Username) -> + rabbit_tracking:match_tracked_items( + fun tracked_channel_table_name_for/1, + #tracked_channel{username = Username, _ = '_'}). + +-spec list_on_node(node()) -> [rabbit_types:tracked_channel()]. + +list_on_node(Node) -> + try mnesia:dirty_match_object( + tracked_channel_table_name_for(Node), + #tracked_channel{_ = '_'}) + catch exit:{aborted, {no_exists, _}} -> [] + end. + +-spec tracked_channel_table_name_for(node()) -> atom(). + +tracked_channel_table_name_for(Node) -> + list_to_atom(rabbit_misc:format("tracked_channel_on_node_~s", [Node])). + +-spec tracked_channel_per_user_table_name_for(node()) -> atom(). + +tracked_channel_per_user_table_name_for(Node) -> + list_to_atom(rabbit_misc:format( + "tracked_channel_table_per_user_on_node_~s", [Node])). + +%% internal +ensure_tracked_channels_table_for_this_node() -> + ensure_tracked_channels_table_for_node(node()). + +ensure_per_user_tracked_channels_table_for_node() -> + ensure_per_user_tracked_channels_table_for_node(node()). + +%% Create tables +ensure_tracked_channels_table_for_node(Node) -> + TableName = tracked_channel_table_name_for(Node), + case mnesia:create_table(TableName, [{record_name, tracked_channel}, + {attributes, record_info(fields, tracked_channel)}]) of + {atomic, ok} -> ok; + {aborted, {already_exists, _}} -> ok; + {aborted, Error} -> + rabbit_log:error("Failed to create a tracked channel table for node ~p: ~p", [Node, Error]), + ok + end. + +ensure_per_user_tracked_channels_table_for_node(Node) -> + TableName = tracked_channel_per_user_table_name_for(Node), + case mnesia:create_table(TableName, [{record_name, tracked_channel_per_user}, + {attributes, record_info(fields, tracked_channel_per_user)}]) of + {atomic, ok} -> ok; + {aborted, {already_exists, _}} -> ok; + {aborted, Error} -> + rabbit_log:error("Failed to create a per-user tracked channel table for node ~p: ~p", [Node, Error]), + ok + end. + +clear_tracked_channel_tables_for_this_node() -> + [rabbit_tracking:clear_tracking_table(T) + || T <- get_all_tracked_channel_table_names_for_node(node())]. + +delete_tracked_channels_table_for_node(Node) -> + TableName = tracked_channel_table_name_for(Node), + rabbit_tracking:delete_tracking_table(TableName, Node, "tracked channel"). + +delete_per_user_tracked_channels_table_for_node(Node) -> + TableName = tracked_channel_per_user_table_name_for(Node), + rabbit_tracking:delete_tracking_table(TableName, Node, + "per-user tracked channels"). + +get_all_tracked_channel_table_names_for_node(Node) -> + [tracked_channel_table_name_for(Node), + tracked_channel_per_user_table_name_for(Node)]. + +get_tracked_channels_by_connection_pid(ConnPid) -> + rabbit_tracking:match_tracked_items( + fun tracked_channel_table_name_for/1, + #tracked_channel{connection = ConnPid, _ = '_'}). + +get_tracked_channel_by_pid(ChPid) -> + rabbit_tracking:match_tracked_items( + fun tracked_channel_table_name_for/1, + #tracked_channel{pid = ChPid, _ = '_'}). + +delete_tracked_channel_user_entry(Username) -> + rabbit_tracking:delete_tracked_entry( + {rabbit_auth_backend_internal, exists, [Username]}, + fun tracked_channel_per_user_table_name_for/1, + Username). + +tracked_channel_from_channel_created_event(ChannelDetails) -> + Node = node(ChPid = pget(pid, ChannelDetails)), + Name = pget(name, ChannelDetails), + #tracked_channel{ + id = rabbit_tracking:id(Node, Name), + name = Name, + node = Node, + vhost = pget(vhost, ChannelDetails), + pid = ChPid, + connection = pget(connection, ChannelDetails), + username = pget(user, ChannelDetails)}. + +close_channels(TrackedChannels = [#tracked_channel{}|_]) -> + [rabbit_channel:shutdown(ChPid) + || #tracked_channel{pid = ChPid} <- TrackedChannels], + ok; +close_channels(_TrackedChannels = []) -> ok. diff --git a/deps/rabbit/src/rabbit_channel_tracking_handler.erl b/deps/rabbit/src/rabbit_channel_tracking_handler.erl new file mode 100644 index 0000000000..0cbe02f39e --- /dev/null +++ b/deps/rabbit/src/rabbit_channel_tracking_handler.erl @@ -0,0 +1,71 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_channel_tracking_handler). + +%% This module keeps track of channel creation and termination events +%% on its local node. Similar to the rabbit_connection_tracking_handler, +%% the primary goal here is to decouple channel tracking from rabbit_reader +%% and isolate channel tracking to its own process to avoid blocking connection +%% creation events. Additionaly, creation events are also non-blocking in that +%% they spawn a short-live process for updating the tracking tables in realtime. +%% +%% Events from other nodes are ignored. + +-behaviour(gen_event). + +-export([init/1, handle_call/2, handle_event/2, handle_info/2, + terminate/2, code_change/3]). + +-include_lib("rabbit.hrl"). + +-rabbit_boot_step({?MODULE, + [{description, "channel tracking event handler"}, + {mfa, {gen_event, add_handler, + [rabbit_event, ?MODULE, []]}}, + {cleanup, {gen_event, delete_handler, + [rabbit_event, ?MODULE, []]}}, + {requires, [channel_tracking]}, + {enables, recovery}]}). + +%% +%% API +%% + +init([]) -> + {ok, []}. + +handle_event(#event{type = channel_created, props = Details}, State) -> + ok = rabbit_channel_tracking:update_tracked({channel_created, Details}), + {ok, State}; +handle_event(#event{type = channel_closed, props = Details}, State) -> + ok = rabbit_channel_tracking:update_tracked({channel_closed, Details}), + {ok, State}; +handle_event(#event{type = connection_closed, props = Details}, State) -> + ok = rabbit_channel_tracking:update_tracked({connection_closed, Details}), + {ok, State}; +handle_event(#event{type = user_deleted, props = Details}, State) -> + ok = rabbit_channel_tracking:update_tracked({user_deleted, Details}), + {ok, State}; +%% A node had been deleted from the cluster. +handle_event(#event{type = node_deleted, props = Details}, State) -> + ok = rabbit_channel_tracking:update_tracked({node_deleted, Details}), + {ok, State}; +handle_event(_Event, State) -> + {ok, State}. + +handle_call(_Request, State) -> + {ok, not_understood, State}. + +handle_info(_Info, State) -> + {ok, State}. + +terminate(_Arg, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/deps/rabbit/src/rabbit_classic_queue.erl b/deps/rabbit/src/rabbit_classic_queue.erl new file mode 100644 index 0000000000..e53c0aecc2 --- /dev/null +++ b/deps/rabbit/src/rabbit_classic_queue.erl @@ -0,0 +1,527 @@ +-module(rabbit_classic_queue). +-behaviour(rabbit_queue_type). + +-include("amqqueue.hrl"). +-include_lib("rabbit_common/include/rabbit.hrl"). + +-record(msg_status, {pending :: [pid()], + confirmed = [] :: [pid()]}). + +-record(?MODULE, {pid :: undefined | pid(), %% the current master pid + qref :: term(), %% TODO + unconfirmed = #{} :: + #{non_neg_integer() => #msg_status{}}}). +-define(STATE, ?MODULE). + +-opaque state() :: #?STATE{}. + +-export_type([state/0]). + +-export([ + is_enabled/0, + declare/2, + delete/4, + is_recoverable/1, + recover/2, + purge/1, + policy_changed/1, + stat/1, + init/1, + close/1, + update/2, + consume/3, + cancel/5, + handle_event/2, + deliver/2, + settle/4, + credit/4, + dequeue/4, + info/2, + state_info/1, + capabilities/0 + ]). + +-export([delete_crashed/1, + delete_crashed/2, + delete_crashed_internal/2]). + +-export([confirm_to_sender/3, + send_rejection/3, + send_queue_event/3]). + +is_enabled() -> true. + +declare(Q, Node) when ?amqqueue_is_classic(Q) -> + QName = amqqueue:get_name(Q), + VHost = amqqueue:get_vhost(Q), + Node1 = case Node of + {ignore_location, Node0} -> + Node0; + _ -> + case rabbit_queue_master_location_misc:get_location(Q) of + {ok, Node0} -> Node0; + _ -> Node + end + end, + Node1 = rabbit_mirror_queue_misc:initial_queue_node(Q, Node1), + case rabbit_vhost_sup_sup:get_vhost_sup(VHost, Node1) of + {ok, _} -> + gen_server2:call( + rabbit_amqqueue_sup_sup:start_queue_process(Node1, Q, declare), + {init, new}, infinity); + {error, Error} -> + {protocol_error, internal_error, "Cannot declare a queue '~s' on node '~s': ~255p", + [rabbit_misc:rs(QName), Node1, Error]} + end. + +delete(Q, IfUnused, IfEmpty, ActingUser) when ?amqqueue_is_classic(Q) -> + case wait_for_promoted_or_stopped(Q) of + {promoted, Q1} -> + QPid = amqqueue:get_pid(Q1), + delegate:invoke(QPid, {gen_server2, call, + [{delete, IfUnused, IfEmpty, ActingUser}, + infinity]}); + {stopped, Q1} -> + #resource{name = Name, virtual_host = Vhost} = amqqueue:get_name(Q1), + case IfEmpty of + true -> + rabbit_log:error("Queue ~s in vhost ~s has its master node down and " + "no mirrors available or eligible for promotion. " + "The queue may be non-empty. " + "Refusing to force-delete.", + [Name, Vhost]), + {error, not_empty}; + false -> + rabbit_log:warning("Queue ~s in vhost ~s has its master node is down and " + "no mirrors available or eligible for promotion. " + "Forcing queue deletion.", + [Name, Vhost]), + delete_crashed_internal(Q1, ActingUser), + {ok, 0} + end; + {error, not_found} -> + %% Assume the queue was deleted + {ok, 0} + end. + +is_recoverable(Q) when ?is_amqqueue(Q) -> + Node = node(), + Node =:= node(amqqueue:get_pid(Q)) andalso + %% Terminations on node down will not remove the rabbit_queue + %% record if it is a mirrored queue (such info is now obtained from + %% the policy). Thus, we must check if the local pid is alive + %% - if the record is present - in order to restart. + (mnesia:read(rabbit_queue, amqqueue:get_name(Q), read) =:= [] + orelse not rabbit_mnesia:is_process_alive(amqqueue:get_pid(Q))). + +recover(VHost, Queues) -> + {ok, BQ} = application:get_env(rabbit, backing_queue_module), + %% We rely on BQ:start/1 returning the recovery terms in the same + %% order as the supplied queue names, so that we can zip them together + %% for further processing in recover_durable_queues. + {ok, OrderedRecoveryTerms} = + BQ:start(VHost, [amqqueue:get_name(Q) || Q <- Queues]), + case rabbit_amqqueue_sup_sup:start_for_vhost(VHost) of + {ok, _} -> + RecoveredQs = recover_durable_queues(lists:zip(Queues, + OrderedRecoveryTerms)), + RecoveredNames = [amqqueue:get_name(Q) || Q <- RecoveredQs], + FailedQueues = [Q || Q <- Queues, + not lists:member(amqqueue:get_name(Q), RecoveredNames)], + {RecoveredQs, FailedQueues}; + {error, Reason} -> + rabbit_log:error("Failed to start queue supervisor for vhost '~s': ~s", [VHost, Reason]), + throw({error, Reason}) + end. + +-spec policy_changed(amqqueue:amqqueue()) -> ok. +policy_changed(Q) -> + QPid = amqqueue:get_pid(Q), + gen_server2:cast(QPid, policy_changed). + +stat(Q) -> + delegate:invoke(amqqueue:get_pid(Q), + {gen_server2, call, [stat, infinity]}). + +-spec init(amqqueue:amqqueue()) -> state(). +init(Q) when ?amqqueue_is_classic(Q) -> + QName = amqqueue:get_name(Q), + #?STATE{pid = amqqueue:get_pid(Q), + qref = QName}. + +-spec close(state()) -> ok. +close(_State) -> + ok. + +-spec update(amqqueue:amqqueue(), state()) -> state(). +update(Q, #?STATE{pid = Pid} = State) when ?amqqueue_is_classic(Q) -> + case amqqueue:get_pid(Q) of + Pid -> + State; + NewPid -> + %% master pid is different, update + State#?STATE{pid = NewPid} + end. + +consume(Q, Spec, State) when ?amqqueue_is_classic(Q) -> + QPid = amqqueue:get_pid(Q), + QRef = amqqueue:get_name(Q), + #{no_ack := NoAck, + channel_pid := ChPid, + limiter_pid := LimiterPid, + limiter_active := LimiterActive, + prefetch_count := ConsumerPrefetchCount, + consumer_tag := ConsumerTag, + exclusive_consume := ExclusiveConsume, + args := Args, + ok_msg := OkMsg, + acting_user := ActingUser} = Spec, + case delegate:invoke(QPid, + {gen_server2, call, + [{basic_consume, NoAck, ChPid, LimiterPid, + LimiterActive, ConsumerPrefetchCount, ConsumerTag, + ExclusiveConsume, Args, OkMsg, ActingUser}, + infinity]}) of + ok -> + %% ask the host process to monitor this pid + %% TODO: track pids as they change + {ok, State#?STATE{pid = QPid}, [{monitor, QPid, QRef}]}; + Err -> + Err + end. + +cancel(Q, ConsumerTag, OkMsg, ActingUser, State) -> + QPid = amqqueue:get_pid(Q), + case delegate:invoke(QPid, {gen_server2, call, + [{basic_cancel, self(), ConsumerTag, + OkMsg, ActingUser}, infinity]}) of + ok -> + {ok, State}; + Err -> Err + end. + +-spec settle(rabbit_queue_type:settle_op(), rabbit_types:ctag(), + [non_neg_integer()], state()) -> + {state(), rabbit_queue_type:actions()}. +settle(complete, _CTag, MsgIds, State) -> + Pid = State#?STATE.pid, + delegate:invoke_no_result(Pid, + {gen_server2, cast, [{ack, MsgIds, self()}]}), + {State, []}; +settle(Op, _CTag, MsgIds, State) -> + ChPid = self(), + ok = delegate:invoke_no_result(State#?STATE.pid, + {gen_server2, cast, + [{reject, Op == requeue, MsgIds, ChPid}]}), + {State, []}. + +credit(CTag, Credit, Drain, State) -> + ChPid = self(), + delegate:invoke_no_result(State#?STATE.pid, + {gen_server2, cast, + [{credit, ChPid, CTag, Credit, Drain}]}), + {State, []}. + +handle_event({confirm, MsgSeqNos, Pid}, #?STATE{qref = QRef, + unconfirmed = U0} = State) -> + %% confirms should never result in rejections + {Unconfirmed, ConfirmedSeqNos, []} = + settle_seq_nos(MsgSeqNos, Pid, U0, confirm), + Actions = [{settled, QRef, ConfirmedSeqNos}], + %% handle confirm event from queues + %% in this case the classic queue should track each individual publish and + %% the processes involved and only emit a settle action once they have all + %% been received (or DOWN has been received). + %% Hence this part of the confirm logic is queue specific. + {ok, State#?STATE{unconfirmed = Unconfirmed}, Actions}; +handle_event({reject_publish, SeqNo, _QPid}, + #?STATE{qref = QRef, + unconfirmed = U0} = State) -> + %% It does not matter which queue rejected the message, + %% if any queue did, it should not be confirmed. + {U, Rejected} = reject_seq_no(SeqNo, U0), + Actions = [{rejected, QRef, Rejected}], + {ok, State#?STATE{unconfirmed = U}, Actions}; +handle_event({down, Pid, Info}, #?STATE{qref = QRef, + pid = MasterPid, + unconfirmed = U0} = State0) -> + Actions0 = case Pid =:= MasterPid of + true -> + [{queue_down, QRef}]; + false -> + [] + end, + case rabbit_misc:is_abnormal_exit(Info) of + false when Info =:= normal andalso Pid == MasterPid -> + %% queue was deleted and masterpid is down + eol; + false -> + %% this assumes the mirror isn't part of the active set + MsgSeqNos = maps:keys( + maps:filter(fun (_, #msg_status{pending = Pids}) -> + lists:member(Pid, Pids) + end, U0)), + {Unconfirmed, Settled, Rejected} = + settle_seq_nos(MsgSeqNos, Pid, U0, down), + Actions = settlement_action( + settled, QRef, Settled, + settlement_action(rejected, QRef, Rejected, Actions0)), + {ok, State0#?STATE{unconfirmed = Unconfirmed}, Actions}; + true -> + %% any abnormal exit should be considered a full reject of the + %% oustanding message ids - If the message didn't get to all + %% mirrors we have to assume it will never get there + MsgIds = maps:fold( + fun (SeqNo, Status, Acc) -> + case lists:member(Pid, Status#msg_status.pending) of + true -> + [SeqNo | Acc]; + false -> + Acc + end + end, [], U0), + U = maps:without(MsgIds, U0), + {ok, State0#?STATE{unconfirmed = U}, + [{rejected, QRef, MsgIds} | Actions0]} + end; +handle_event({send_credit_reply, _} = Action, State) -> + {ok, State, [Action]}. + +settlement_action(_Type, _QRef, [], Acc) -> + Acc; +settlement_action(Type, QRef, MsgSeqs, Acc) -> + [{Type, QRef, MsgSeqs} | Acc]. + +-spec deliver([{amqqueue:amqqueue(), state()}], + Delivery :: term()) -> + {[{amqqueue:amqqueue(), state()}], rabbit_queue_type:actions()}. +deliver(Qs0, #delivery{flow = Flow, + msg_seq_no = MsgNo, + message = #basic_message{exchange_name = _Ex}, + confirm = _Confirm} = Delivery) -> + %% TODO: record master and slaves for confirm processing + {MPids, SPids, Qs, Actions} = qpids(Qs0, MsgNo), + QPids = MPids ++ SPids, + case Flow of + %% Here we are tracking messages sent by the rabbit_channel + %% process. We are accessing the rabbit_channel process + %% dictionary. + flow -> [credit_flow:send(QPid) || QPid <- QPids], + [credit_flow:send(QPid) || QPid <- SPids]; + noflow -> ok + end, + MMsg = {deliver, Delivery, false}, + SMsg = {deliver, Delivery, true}, + delegate:invoke_no_result(MPids, {gen_server2, cast, [MMsg]}), + delegate:invoke_no_result(SPids, {gen_server2, cast, [SMsg]}), + {Qs, Actions}. + + +-spec dequeue(NoAck :: boolean(), LimiterPid :: pid(), + rabbit_types:ctag(), state()) -> + {ok, Count :: non_neg_integer(), rabbit_amqqueue:qmsg(), state()} | + {empty, state()}. +dequeue(NoAck, LimiterPid, _CTag, State) -> + QPid = State#?STATE.pid, + case delegate:invoke(QPid, {gen_server2, call, + [{basic_get, self(), NoAck, LimiterPid}, infinity]}) of + empty -> + {empty, State}; + {ok, Count, Msg} -> + {ok, Count, Msg, State} + end. + +-spec state_info(state()) -> #{atom() := term()}. +state_info(_State) -> + #{}. + +%% general queue info +-spec info(amqqueue:amqqueue(), all_keys | rabbit_types:info_keys()) -> + rabbit_types:infos(). +info(Q, Items) -> + QPid = amqqueue:get_pid(Q), + Req = case Items of + all_keys -> info; + _ -> {info, Items} + end, + case delegate:invoke(QPid, {gen_server2, call, [Req, infinity]}) of + {ok, Result} -> + Result; + {error, _Err} -> + []; + Result when is_list(Result) -> + %% this is a backwards compatibility clause + Result + end. + +-spec purge(amqqueue:amqqueue()) -> + {ok, non_neg_integer()}. +purge(Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + delegate:invoke(QPid, {gen_server2, call, [purge, infinity]}). + +qpids(Qs, MsgNo) -> + lists:foldl( + fun ({Q, S0}, {MPidAcc, SPidAcc, Qs0, Actions0}) -> + QPid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + QRef = amqqueue:get_name(Q), + Actions = [{monitor, QPid, QRef} + | [{monitor, P, QRef} || P <- SPids]] ++ Actions0, + %% confirm record only if MsgNo isn't undefined + S = case S0 of + #?STATE{unconfirmed = U0} -> + Rec = [QPid | SPids], + U = case MsgNo of + undefined -> + U0; + _ -> + U0#{MsgNo => #msg_status{pending = Rec}} + end, + S0#?STATE{pid = QPid, + unconfirmed = U}; + stateless -> + S0 + end, + {[QPid | MPidAcc], SPidAcc ++ SPids, + [{Q, S} | Qs0], Actions} + end, {[], [], [], []}, Qs). + +%% internal-ish +-spec wait_for_promoted_or_stopped(amqqueue:amqqueue()) -> + {promoted, amqqueue:amqqueue()} | + {stopped, amqqueue:amqqueue()} | + {error, not_found}. +wait_for_promoted_or_stopped(Q0) -> + QName = amqqueue:get_name(Q0), + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + QPid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + case rabbit_mnesia:is_process_alive(QPid) of + true -> {promoted, Q}; + false -> + case lists:any(fun(Pid) -> + rabbit_mnesia:is_process_alive(Pid) + end, SPids) of + %% There is a live slave. May be promoted + true -> + timer:sleep(100), + wait_for_promoted_or_stopped(Q); + %% All slave pids are stopped. + %% No process left for the queue + false -> {stopped, Q} + end + end; + {error, not_found} -> + {error, not_found} + end. + +-spec delete_crashed(amqqueue:amqqueue()) -> ok. +delete_crashed(Q) -> + delete_crashed(Q, ?INTERNAL_USER). + +delete_crashed(Q, ActingUser) -> + ok = rpc:call(amqqueue:qnode(Q), ?MODULE, delete_crashed_internal, + [Q, ActingUser]). + +delete_crashed_internal(Q, ActingUser) -> + QName = amqqueue:get_name(Q), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), + BQ:delete_crashed(Q), + ok = rabbit_amqqueue:internal_delete(QName, ActingUser). + +recover_durable_queues(QueuesAndRecoveryTerms) -> + {Results, Failures} = + gen_server2:mcall( + [{rabbit_amqqueue_sup_sup:start_queue_process(node(), Q, recovery), + {init, {self(), Terms}}} || {Q, Terms} <- QueuesAndRecoveryTerms]), + [rabbit_log:error("Queue ~p failed to initialise: ~p~n", + [Pid, Error]) || {Pid, Error} <- Failures], + [Q || {_, {new, Q}} <- Results]. + +capabilities() -> + #{policies => [<<"expires">>, <<"message-ttl">>, <<"dead-letter-exchange">>, + <<"dead-letter-routing-key">>, <<"max-length">>, + <<"max-length-bytes">>, <<"max-in-memory-length">>, <<"max-in-memory-bytes">>, + <<"max-priority">>, <<"overflow">>, <<"queue-mode">>, + <<"single-active-consumer">>, <<"delivery-limit">>, + <<"ha-mode">>, <<"ha-params">>, <<"ha-sync-mode">>, + <<"ha-promote-on-shutdown">>, <<"ha-promote-on-failure">>, + <<"queue-master-locator">>], + queue_arguments => [<<"x-expires">>, <<"x-message-ttl">>, <<"x-dead-letter-exchange">>, + <<"x-dead-letter-routing-key">>, <<"x-max-length">>, + <<"x-max-length-bytes">>, <<"x-max-in-memory-length">>, + <<"x-max-in-memory-bytes">>, <<"x-max-priority">>, + <<"x-overflow">>, <<"x-queue-mode">>, <<"x-single-active-consumer">>, + <<"x-queue-type">>, <<"x-queue-master-locator">>], + consumer_arguments => [<<"x-cancel-on-ha-failover">>, + <<"x-priority">>, <<"x-credit">> + ], + server_named => true}. + +reject_seq_no(SeqNo, U0) -> + reject_seq_no(SeqNo, U0, []). + +reject_seq_no(SeqNo, U0, Acc) -> + case maps:take(SeqNo, U0) of + {_, U} -> + {U, [SeqNo | Acc]}; + error -> + {U0, Acc} + end. + +settle_seq_nos(MsgSeqNos, Pid, U0, Reason) -> + lists:foldl( + fun (SeqNo, {U, C0, R0}) -> + case U of + #{SeqNo := Status0} -> + case update_msg_status(Reason, Pid, Status0) of + #msg_status{pending = [], + confirmed = []} -> + %% no pending left and nothing confirmed + %% then we reject it + {maps:remove(SeqNo, U), C0, [SeqNo | R0]}; + #msg_status{pending = [], + confirmed = _} -> + %% this can be confirmed as there are no pending + %% and confirmed isn't empty + {maps:remove(SeqNo, U), [SeqNo | C0], R0}; + MsgStatus -> + {U#{SeqNo => MsgStatus}, C0, R0} + end; + _ -> + {U, C0, R0} + end + end, {U0, [], []}, MsgSeqNos). + +update_msg_status(confirm, Pid, #msg_status{pending = P, + confirmed = C} = S) -> + Rem = lists:delete(Pid, P), + S#msg_status{pending = Rem, confirmed = [Pid | C]}; +update_msg_status(down, Pid, #msg_status{pending = P} = S) -> + S#msg_status{pending = lists:delete(Pid, P)}. + +%% part of channel <-> queue api +confirm_to_sender(Pid, QName, MsgSeqNos) -> + %% the stream queue included the queue type refactoring and thus requires + %% a different message format + Evt = case rabbit_ff_registry:is_enabled(stream_queue) of + true -> + {queue_event, QName, {confirm, MsgSeqNos, self()}}; + false -> + {confirm, MsgSeqNos, self()} + end, + gen_server2:cast(Pid, Evt). + +send_rejection(Pid, QName, MsgSeqNo) -> + case rabbit_ff_registry:is_enabled(stream_queue) of + true -> + gen_server2:cast(Pid, {queue_event, QName, + {reject_publish, MsgSeqNo, self()}}); + false -> + gen_server2:cast(Pid, {reject_publish, MsgSeqNo, self()}) + end. + +send_queue_event(Pid, QName, Evt) -> + gen_server2:cast(Pid, {queue_event, QName, Evt}). diff --git a/deps/rabbit/src/rabbit_client_sup.erl b/deps/rabbit/src/rabbit_client_sup.erl new file mode 100644 index 0000000000..a28e4ce39c --- /dev/null +++ b/deps/rabbit/src/rabbit_client_sup.erl @@ -0,0 +1,43 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_client_sup). + +-behaviour(supervisor2). + +-export([start_link/1, start_link/2, start_link_worker/2]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start_link(rabbit_types:mfargs()) -> + rabbit_types:ok_pid_or_error(). + +start_link(Callback) -> + supervisor2:start_link(?MODULE, Callback). + +-spec start_link({'local', atom()}, rabbit_types:mfargs()) -> + rabbit_types:ok_pid_or_error(). + +start_link(SupName, Callback) -> + supervisor2:start_link(SupName, ?MODULE, Callback). + +-spec start_link_worker({'local', atom()}, rabbit_types:mfargs()) -> + rabbit_types:ok_pid_or_error(). + +start_link_worker(SupName, Callback) -> + supervisor2:start_link(SupName, ?MODULE, {Callback, worker}). + +init({M,F,A}) -> + {ok, {{simple_one_for_one, 0, 1}, + [{client, {M,F,A}, temporary, infinity, supervisor, [M]}]}}; +init({{M,F,A}, worker}) -> + {ok, {{simple_one_for_one, 0, 1}, + [{client, {M,F,A}, temporary, ?WORKER_WAIT, worker, [M]}]}}. diff --git a/deps/rabbit/src/rabbit_config.erl b/deps/rabbit/src/rabbit_config.erl new file mode 100644 index 0000000000..1198035a7a --- /dev/null +++ b/deps/rabbit/src/rabbit_config.erl @@ -0,0 +1,46 @@ +-module(rabbit_config). + +-export([ + config_files/0, + get_advanced_config/0 + ]). + +-export([schema_dir/0]). +-deprecated([{schema_dir, 0, eventually}]). + +-export_type([config_location/0]). + +-type config_location() :: string(). + +get_confs() -> + case get_prelaunch_config_state() of + #{config_files := Confs} -> Confs; + _ -> [] + end. + +schema_dir() -> + undefined. + +get_advanced_config() -> + case get_prelaunch_config_state() of + %% There can be only one advanced.config + #{config_advanced_file := FileName} when FileName =/= undefined -> + case rabbit_file:is_file(FileName) of + true -> FileName; + false -> none + end; + _ -> none + end. + +-spec config_files() -> [config_location()]. +config_files() -> + ConfFiles = [filename:absname(File) || File <- get_confs(), + filelib:is_regular(File)], + AdvancedFiles = case get_advanced_config() of + none -> []; + FileName -> [filename:absname(FileName)] + end, + AdvancedFiles ++ ConfFiles. + +get_prelaunch_config_state() -> + rabbit_prelaunch_conf:get_config_state(). diff --git a/deps/rabbit/src/rabbit_confirms.erl b/deps/rabbit/src/rabbit_confirms.erl new file mode 100644 index 0000000000..2fe032d1f1 --- /dev/null +++ b/deps/rabbit/src/rabbit_confirms.erl @@ -0,0 +1,152 @@ +-module(rabbit_confirms). + +-compile({no_auto_import, [size/1]}). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([init/0, + insert/4, + confirm/3, + reject/2, + + remove_queue/2, + + smallest/1, + size/1, + is_empty/1]). + +-type seq_no() :: non_neg_integer(). +-type queue_name() :: rabbit_amqqueue:name(). +-type exchange_name() :: rabbit_exchange:name(). + +-record(?MODULE, {smallest :: undefined | seq_no(), + unconfirmed = #{} :: #{seq_no() => + {exchange_name(), + #{queue_name() => ok}}} + }). + +-type mx() :: {seq_no(), exchange_name()}. + +-opaque state() :: #?MODULE{}. + +-export_type([ + state/0 + ]). + +-spec init() -> state(). +init() -> + #?MODULE{}. + +-spec insert(seq_no(), [queue_name()], exchange_name(), state()) -> + state(). +insert(SeqNo, QNames, #resource{kind = exchange} = XName, + #?MODULE{smallest = S0, + unconfirmed = U0} = State) + when is_integer(SeqNo) + andalso is_list(QNames) + andalso is_map_key(SeqNo, U0) == false -> + U = U0#{SeqNo => {XName, maps:from_list([{Q, ok} || Q <- QNames])}}, + S = case S0 of + undefined -> SeqNo; + _ -> S0 + end, + State#?MODULE{smallest = S, + unconfirmed = U}. + +-spec confirm([seq_no()], queue_name(), state()) -> + {[mx()], state()}. +confirm(SeqNos, QName, #?MODULE{smallest = Smallest0, + unconfirmed = U0} = State) + when is_list(SeqNos) -> + {Confirmed, U} = lists:foldr( + fun (SeqNo, Acc) -> + confirm_one(SeqNo, QName, Acc) + end, {[], U0}, SeqNos), + %% check if smallest is in Confirmed + %% TODO: this can be optimised by checking in the preceeding foldr + Smallest = + case lists:any(fun ({S, _}) -> S == Smallest0 end, Confirmed) of + true -> + %% work out new smallest + next_smallest(Smallest0, U); + false -> + Smallest0 + end, + {Confirmed, State#?MODULE{smallest = Smallest, + unconfirmed = U}}. + +-spec reject(seq_no(), state()) -> + {ok, mx(), state()} | {error, not_found}. +reject(SeqNo, #?MODULE{smallest = Smallest0, + unconfirmed = U0} = State) + when is_integer(SeqNo) -> + case maps:take(SeqNo, U0) of + {{XName, _QS}, U} -> + Smallest = case SeqNo of + Smallest0 -> + %% need to scan as the smallest was removed + next_smallest(Smallest0, U); + _ -> + Smallest0 + end, + {ok, {SeqNo, XName}, State#?MODULE{unconfirmed = U, + smallest = Smallest}}; + error -> + {error, not_found} + end. + +%% idempotent +-spec remove_queue(queue_name(), state()) -> + {[mx()], state()}. +remove_queue(QName, #?MODULE{unconfirmed = U} = State) -> + SeqNos = maps:fold( + fun (SeqNo, {_XName, QS0}, Acc) -> + case maps:is_key(QName, QS0) of + true -> + [SeqNo | Acc]; + false -> + Acc + end + end, [], U), + confirm(lists:sort(SeqNos), QName,State). + +-spec smallest(state()) -> seq_no() | undefined. +smallest(#?MODULE{smallest = Smallest}) -> + Smallest. + +-spec size(state()) -> non_neg_integer(). +size(#?MODULE{unconfirmed = U}) -> + maps:size(U). + +-spec is_empty(state()) -> boolean(). +is_empty(State) -> + size(State) == 0. + +%% INTERNAL + +confirm_one(SeqNo, QName, {Acc, U0}) -> + case maps:take(SeqNo, U0) of + {{XName, QS}, U1} + when is_map_key(QName, QS) + andalso map_size(QS) == 1 -> + %% last queue confirm + {[{SeqNo, XName} | Acc], U1}; + {{XName, QS}, U1} -> + {Acc, U1#{SeqNo => {XName, maps:remove(QName, QS)}}}; + error -> + {Acc, U0} + end. + +next_smallest(_S, U) when map_size(U) == 0 -> + undefined; +next_smallest(S, U) when is_map_key(S, U) -> + S; +next_smallest(S, U) -> + %% TODO: this is potentially infinitely recursive if called incorrectly + next_smallest(S+1, U). + + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. diff --git a/deps/rabbit/src/rabbit_connection_helper_sup.erl b/deps/rabbit/src/rabbit_connection_helper_sup.erl new file mode 100644 index 0000000000..d0509029fd --- /dev/null +++ b/deps/rabbit/src/rabbit_connection_helper_sup.erl @@ -0,0 +1,57 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_connection_helper_sup). + +%% Supervises auxiliary processes of AMQP 0-9-1 connections: +%% +%% * Channel supervisor +%% * Heartbeat receiver +%% * Heartbeat sender +%% * Exclusive queue collector +%% +%% See also rabbit_heartbeat, rabbit_channel_sup_sup, rabbit_queue_collector. + +-behaviour(supervisor2). + +-export([start_link/0]). +-export([start_channel_sup_sup/1, + start_queue_collector/2]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + supervisor2:start_link(?MODULE, []). + +-spec start_channel_sup_sup(pid()) -> rabbit_types:ok_pid_or_error(). + +start_channel_sup_sup(SupPid) -> + supervisor2:start_child( + SupPid, + {channel_sup_sup, {rabbit_channel_sup_sup, start_link, []}, + intrinsic, infinity, supervisor, [rabbit_channel_sup_sup]}). + +-spec start_queue_collector(pid(), rabbit_types:proc_name()) -> + rabbit_types:ok_pid_or_error(). + +start_queue_collector(SupPid, Identity) -> + supervisor2:start_child( + SupPid, + {collector, {rabbit_queue_collector, start_link, [Identity]}, + intrinsic, ?WORKER_WAIT, worker, [rabbit_queue_collector]}). + +%%---------------------------------------------------------------------------- + +init([]) -> + ?LG_PROCESS_TYPE(connection_helper_sup), + {ok, {{one_for_one, 10, 10}, []}}. diff --git a/deps/rabbit/src/rabbit_connection_sup.erl b/deps/rabbit/src/rabbit_connection_sup.erl new file mode 100644 index 0000000000..c1d1bd0d77 --- /dev/null +++ b/deps/rabbit/src/rabbit_connection_sup.erl @@ -0,0 +1,66 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_connection_sup). + +%% Supervisor for a (network) AMQP 0-9-1 client connection. +%% +%% Supervises +%% +%% * rabbit_reader +%% * Auxiliary process supervisor +%% +%% See also rabbit_reader, rabbit_connection_helper_sup. + +-behaviour(supervisor2). +-behaviour(ranch_protocol). + +-export([start_link/4, reader/1]). + +-export([init/1]). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start_link(any(), rabbit_net:socket(), module(), any()) -> + {'ok', pid(), pid()}. + +start_link(Ref, _Sock, _Transport, _Opts) -> + {ok, SupPid} = supervisor2:start_link(?MODULE, []), + %% We need to get channels in the hierarchy here so they get shut + %% down after the reader, so the reader gets a chance to terminate + %% them cleanly. But for 1.0 readers we can't start the real + %% ch_sup_sup (because we don't know if we will be 0-9-1 or 1.0) - + %% so we add another supervisor into the hierarchy. + %% + %% This supervisor also acts as an intermediary for heartbeaters and + %% the queue collector process, since these must not be siblings of the + %% reader due to the potential for deadlock if they are added/restarted + %% whilst the supervision tree is shutting down. + {ok, HelperSup} = + supervisor2:start_child( + SupPid, + {helper_sup, {rabbit_connection_helper_sup, start_link, []}, + intrinsic, infinity, supervisor, [rabbit_connection_helper_sup]}), + {ok, ReaderPid} = + supervisor2:start_child( + SupPid, + {reader, {rabbit_reader, start_link, [HelperSup, Ref]}, + intrinsic, ?WORKER_WAIT, worker, [rabbit_reader]}), + {ok, SupPid, ReaderPid}. + +-spec reader(pid()) -> pid(). + +reader(Pid) -> + hd(supervisor2:find_child(Pid, reader)). + +%%-------------------------------------------------------------------------- + +init([]) -> + ?LG_PROCESS_TYPE(connection_sup), + {ok, {{one_for_all, 0, 1}, []}}. diff --git a/deps/rabbit/src/rabbit_connection_tracking.erl b/deps/rabbit/src/rabbit_connection_tracking.erl new file mode 100644 index 0000000000..c0704e6a7c --- /dev/null +++ b/deps/rabbit/src/rabbit_connection_tracking.erl @@ -0,0 +1,515 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_connection_tracking). + +%% Abstracts away how tracked connection records are stored +%% and queried. +%% +%% See also: +%% +%% * rabbit_connection_tracking_handler +%% * rabbit_reader +%% * rabbit_event +-behaviour(rabbit_tracking). + +-export([boot/0, + update_tracked/1, + handle_cast/1, + register_tracked/1, + unregister_tracked/1, + count_tracked_items_in/1, + clear_tracking_tables/0, + shutdown_tracked_items/2]). + +-export([ensure_tracked_connections_table_for_node/1, + ensure_per_vhost_tracked_connections_table_for_node/1, + ensure_per_user_tracked_connections_table_for_node/1, + + ensure_tracked_connections_table_for_this_node/0, + ensure_per_vhost_tracked_connections_table_for_this_node/0, + ensure_per_user_tracked_connections_table_for_this_node/0, + + tracked_connection_table_name_for/1, + tracked_connection_per_vhost_table_name_for/1, + tracked_connection_per_user_table_name_for/1, + get_all_tracked_connection_table_names_for_node/1, + + delete_tracked_connections_table_for_node/1, + delete_per_vhost_tracked_connections_table_for_node/1, + delete_per_user_tracked_connections_table_for_node/1, + delete_tracked_connection_user_entry/1, + delete_tracked_connection_vhost_entry/1, + + clear_tracked_connection_tables_for_this_node/0, + + list/0, list/1, list_on_node/1, list_on_node/2, list_of_user/1, + tracked_connection_from_connection_created/1, + tracked_connection_from_connection_state/1, + lookup/1, + count/0]). + +-include_lib("rabbit.hrl"). + +-import(rabbit_misc, [pget/2]). + +-export([close_connections/3]). + +%% +%% API +%% + +%% Behaviour callbacks + +-spec boot() -> ok. + +%% Sets up and resets connection tracking tables for this +%% node. +boot() -> + ensure_tracked_connections_table_for_this_node(), + rabbit_log:info("Setting up a table for connection tracking on this node: ~p", + [tracked_connection_table_name_for(node())]), + ensure_per_vhost_tracked_connections_table_for_this_node(), + rabbit_log:info("Setting up a table for per-vhost connection counting on this node: ~p", + [tracked_connection_per_vhost_table_name_for(node())]), + ensure_per_user_tracked_connections_table_for_this_node(), + rabbit_log:info("Setting up a table for per-user connection counting on this node: ~p", + [tracked_connection_per_user_table_name_for(node())]), + clear_tracking_tables(), + ok. + +-spec update_tracked(term()) -> ok. + +update_tracked(Event) -> + spawn(?MODULE, handle_cast, [Event]), + ok. + +%% Asynchronously handle update events +-spec handle_cast(term()) -> ok. + +handle_cast({connection_created, Details}) -> + ThisNode = node(), + case pget(node, Details) of + ThisNode -> + TConn = tracked_connection_from_connection_created(Details), + ConnId = TConn#tracked_connection.id, + try + register_tracked(TConn) + catch + error:{no_exists, _} -> + Msg = "Could not register connection ~p for tracking, " + "its table is not ready yet or the connection terminated prematurely", + rabbit_log_connection:warning(Msg, [ConnId]), + ok; + error:Err -> + Msg = "Could not register connection ~p for tracking: ~p", + rabbit_log_connection:warning(Msg, [ConnId, Err]), + ok + end; + _OtherNode -> + %% ignore + ok + end; +handle_cast({connection_closed, Details}) -> + ThisNode = node(), + case pget(node, Details) of + ThisNode -> + %% [{name,<<"127.0.0.1:64078 -> 127.0.0.1:5672">>}, + %% {pid,<0.1774.0>}, + %% {node, rabbit@hostname}] + unregister_tracked( + rabbit_tracking:id(ThisNode, pget(name, Details))); + _OtherNode -> + %% ignore + ok + end; +handle_cast({vhost_deleted, Details}) -> + VHost = pget(name, Details), + %% Schedule vhost entry deletion, allowing time for connections to close + _ = timer:apply_after(?TRACKING_EXECUTION_TIMEOUT, ?MODULE, + delete_tracked_connection_vhost_entry, [VHost]), + rabbit_log_connection:info("Closing all connections in vhost '~s' because it's being deleted", [VHost]), + shutdown_tracked_items( + rabbit_connection_tracking:list(VHost), + rabbit_misc:format("vhost '~s' is deleted", [VHost])); +%% Note: under normal circumstances this will be called immediately +%% after the vhost_deleted above. Therefore we should be careful about +%% what we log and be more defensive. +handle_cast({vhost_down, Details}) -> + VHost = pget(name, Details), + Node = pget(node, Details), + rabbit_log_connection:info("Closing all connections in vhost '~s' on node '~s'" + " because the vhost is stopping", + [VHost, Node]), + shutdown_tracked_items( + rabbit_connection_tracking:list_on_node(Node, VHost), + rabbit_misc:format("vhost '~s' is down", [VHost])); +handle_cast({user_deleted, Details}) -> + Username = pget(name, Details), + %% Schedule user entry deletion, allowing time for connections to close + _ = timer:apply_after(?TRACKING_EXECUTION_TIMEOUT, ?MODULE, + delete_tracked_connection_user_entry, [Username]), + rabbit_log_connection:info("Closing all connections from user '~s' because it's being deleted", [Username]), + shutdown_tracked_items( + rabbit_connection_tracking:list_of_user(Username), + rabbit_misc:format("user '~s' is deleted", [Username])); +%% A node had been deleted from the cluster. +handle_cast({node_deleted, Details}) -> + Node = pget(node, Details), + rabbit_log_connection:info("Node '~s' was removed from the cluster, deleting its connection tracking tables...", [Node]), + delete_tracked_connections_table_for_node(Node), + delete_per_vhost_tracked_connections_table_for_node(Node), + delete_per_user_tracked_connections_table_for_node(Node). + +-spec register_tracked(rabbit_types:tracked_connection()) -> ok. +-dialyzer([{nowarn_function, [register_tracked/1]}, race_conditions]). + +register_tracked(#tracked_connection{username = Username, vhost = VHost, id = ConnId, node = Node} = Conn) when Node =:= node() -> + TableName = tracked_connection_table_name_for(Node), + PerVhostTableName = tracked_connection_per_vhost_table_name_for(Node), + PerUserConnTableName = tracked_connection_per_user_table_name_for(Node), + %% upsert + case mnesia:dirty_read(TableName, ConnId) of + [] -> + mnesia:dirty_write(TableName, Conn), + mnesia:dirty_update_counter(PerVhostTableName, VHost, 1), + mnesia:dirty_update_counter(PerUserConnTableName, Username, 1); + [#tracked_connection{}] -> + ok + end, + ok. + +-spec unregister_tracked(rabbit_types:tracked_connection_id()) -> ok. + +unregister_tracked(ConnId = {Node, _Name}) when Node =:= node() -> + TableName = tracked_connection_table_name_for(Node), + PerVhostTableName = tracked_connection_per_vhost_table_name_for(Node), + PerUserConnTableName = tracked_connection_per_user_table_name_for(Node), + case mnesia:dirty_read(TableName, ConnId) of + [] -> ok; + [#tracked_connection{vhost = VHost, username = Username}] -> + mnesia:dirty_update_counter(PerUserConnTableName, Username, -1), + mnesia:dirty_update_counter(PerVhostTableName, VHost, -1), + mnesia:dirty_delete(TableName, ConnId) + end. + +-spec count_tracked_items_in({atom(), rabbit_types:vhost()}) -> non_neg_integer(). + +count_tracked_items_in({vhost, VirtualHost}) -> + rabbit_tracking:count_tracked_items( + fun tracked_connection_per_vhost_table_name_for/1, + #tracked_connection_per_vhost.connection_count, VirtualHost, + "connections in vhost"); +count_tracked_items_in({user, Username}) -> + rabbit_tracking:count_tracked_items( + fun tracked_connection_per_user_table_name_for/1, + #tracked_connection_per_user.connection_count, Username, + "connections for user"). + +-spec clear_tracking_tables() -> ok. + +clear_tracking_tables() -> + clear_tracked_connection_tables_for_this_node(). + +-spec shutdown_tracked_items(list(), term()) -> ok. + +shutdown_tracked_items(TrackedItems, Message) -> + close_connections(TrackedItems, Message). + +%% Extended API + +-spec ensure_tracked_connections_table_for_this_node() -> ok. + +ensure_tracked_connections_table_for_this_node() -> + ensure_tracked_connections_table_for_node(node()). + + +-spec ensure_per_vhost_tracked_connections_table_for_this_node() -> ok. + +ensure_per_vhost_tracked_connections_table_for_this_node() -> + ensure_per_vhost_tracked_connections_table_for_node(node()). + + +-spec ensure_per_user_tracked_connections_table_for_this_node() -> ok. + +ensure_per_user_tracked_connections_table_for_this_node() -> + ensure_per_user_tracked_connections_table_for_node(node()). + + +%% Create tables +-spec ensure_tracked_connections_table_for_node(node()) -> ok. + +ensure_tracked_connections_table_for_node(Node) -> + TableName = tracked_connection_table_name_for(Node), + case mnesia:create_table(TableName, [{record_name, tracked_connection}, + {attributes, record_info(fields, tracked_connection)}]) of + {atomic, ok} -> ok; + {aborted, {already_exists, _}} -> ok; + {aborted, Error} -> + rabbit_log:error("Failed to create a tracked connection table for node ~p: ~p", [Node, Error]), + ok + end. + +-spec ensure_per_vhost_tracked_connections_table_for_node(node()) -> ok. + +ensure_per_vhost_tracked_connections_table_for_node(Node) -> + TableName = tracked_connection_per_vhost_table_name_for(Node), + case mnesia:create_table(TableName, [{record_name, tracked_connection_per_vhost}, + {attributes, record_info(fields, tracked_connection_per_vhost)}]) of + {atomic, ok} -> ok; + {aborted, {already_exists, _}} -> ok; + {aborted, Error} -> + rabbit_log:error("Failed to create a per-vhost tracked connection table for node ~p: ~p", [Node, Error]), + ok + end. + +-spec ensure_per_user_tracked_connections_table_for_node(node()) -> ok. + +ensure_per_user_tracked_connections_table_for_node(Node) -> + TableName = tracked_connection_per_user_table_name_for(Node), + case mnesia:create_table(TableName, [{record_name, tracked_connection_per_user}, + {attributes, record_info(fields, tracked_connection_per_user)}]) of + {atomic, ok} -> ok; + {aborted, {already_exists, _}} -> ok; + {aborted, Error} -> + rabbit_log:error("Failed to create a per-user tracked connection table for node ~p: ~p", [Node, Error]), + ok + end. + +-spec clear_tracked_connection_tables_for_this_node() -> ok. + +clear_tracked_connection_tables_for_this_node() -> + [rabbit_tracking:clear_tracking_table(T) + || T <- get_all_tracked_connection_table_names_for_node(node())], + ok. + +-spec delete_tracked_connections_table_for_node(node()) -> ok. + +delete_tracked_connections_table_for_node(Node) -> + TableName = tracked_connection_table_name_for(Node), + rabbit_tracking:delete_tracking_table(TableName, Node, "tracked connection"). + +-spec delete_per_vhost_tracked_connections_table_for_node(node()) -> ok. + +delete_per_vhost_tracked_connections_table_for_node(Node) -> + TableName = tracked_connection_per_vhost_table_name_for(Node), + rabbit_tracking:delete_tracking_table(TableName, Node, + "per-vhost tracked connection"). + +-spec delete_per_user_tracked_connections_table_for_node(node()) -> ok. + +delete_per_user_tracked_connections_table_for_node(Node) -> + TableName = tracked_connection_per_user_table_name_for(Node), + rabbit_tracking:delete_tracking_table(TableName, Node, + "per-user tracked connection"). + +-spec tracked_connection_table_name_for(node()) -> atom(). + +tracked_connection_table_name_for(Node) -> + list_to_atom(rabbit_misc:format("tracked_connection_on_node_~s", [Node])). + +-spec tracked_connection_per_vhost_table_name_for(node()) -> atom(). + +tracked_connection_per_vhost_table_name_for(Node) -> + list_to_atom(rabbit_misc:format("tracked_connection_per_vhost_on_node_~s", [Node])). + +-spec tracked_connection_per_user_table_name_for(node()) -> atom(). + +tracked_connection_per_user_table_name_for(Node) -> + list_to_atom(rabbit_misc:format( + "tracked_connection_table_per_user_on_node_~s", [Node])). + +-spec get_all_tracked_connection_table_names_for_node(node()) -> [atom()]. + +get_all_tracked_connection_table_names_for_node(Node) -> + [tracked_connection_table_name_for(Node), + tracked_connection_per_vhost_table_name_for(Node), + tracked_connection_per_user_table_name_for(Node)]. + +-spec lookup(rabbit_types:connection_name()) -> rabbit_types:tracked_connection() | 'not_found'. + +lookup(Name) -> + Nodes = rabbit_nodes:all_running(), + lookup(Name, Nodes). + +lookup(_, []) -> + not_found; +lookup(Name, [Node | Nodes]) -> + TableName = tracked_connection_table_name_for(Node), + case mnesia:dirty_read(TableName, {Node, Name}) of + [] -> lookup(Name, Nodes); + [Row] -> Row + end. + +-spec list() -> [rabbit_types:tracked_connection()]. + +list() -> + lists:foldl( + fun (Node, Acc) -> + Tab = tracked_connection_table_name_for(Node), + Acc ++ mnesia:dirty_match_object(Tab, #tracked_connection{_ = '_'}) + end, [], rabbit_nodes:all_running()). + +-spec count() -> non_neg_integer(). + +count() -> + lists:foldl( + fun (Node, Acc) -> + Tab = tracked_connection_table_name_for(Node), + Acc + mnesia:table_info(Tab, size) + end, 0, rabbit_nodes:all_running()). + +-spec list(rabbit_types:vhost()) -> [rabbit_types:tracked_connection()]. + +list(VHost) -> + rabbit_tracking:match_tracked_items( + fun tracked_connection_table_name_for/1, + #tracked_connection{vhost = VHost, _ = '_'}). + +-spec list_on_node(node()) -> [rabbit_types:tracked_connection()]. + +list_on_node(Node) -> + try mnesia:dirty_match_object( + tracked_connection_table_name_for(Node), + #tracked_connection{_ = '_'}) + catch exit:{aborted, {no_exists, _}} -> [] + end. + +-spec list_on_node(node(), rabbit_types:vhost()) -> [rabbit_types:tracked_connection()]. + +list_on_node(Node, VHost) -> + try mnesia:dirty_match_object( + tracked_connection_table_name_for(Node), + #tracked_connection{vhost = VHost, _ = '_'}) + catch exit:{aborted, {no_exists, _}} -> [] + end. + + +-spec list_of_user(rabbit_types:username()) -> [rabbit_types:tracked_connection()]. + +list_of_user(Username) -> + rabbit_tracking:match_tracked_items( + fun tracked_connection_table_name_for/1, + #tracked_connection{username = Username, _ = '_'}). + +%% Internal, delete tracked entries + +delete_tracked_connection_vhost_entry(Vhost) -> + rabbit_tracking:delete_tracked_entry( + {rabbit_vhost, exists, [Vhost]}, + fun tracked_connection_per_vhost_table_name_for/1, + Vhost). + +delete_tracked_connection_user_entry(Username) -> + rabbit_tracking:delete_tracked_entry( + {rabbit_auth_backend_internal, exists, [Username]}, + fun tracked_connection_per_user_table_name_for/1, + Username). + +%% Returns a #tracked_connection from connection_created +%% event details. +%% +%% @see rabbit_connection_tracking_handler. +tracked_connection_from_connection_created(EventDetails) -> + %% Example event: + %% + %% [{type,network}, + %% {pid,<0.329.0>}, + %% {name,<<"127.0.0.1:60998 -> 127.0.0.1:5672">>}, + %% {port,5672}, + %% {peer_port,60998}, + %% {host,{0,0,0,0,0,65535,32512,1}}, + %% {peer_host,{0,0,0,0,0,65535,32512,1}}, + %% {ssl,false}, + %% {peer_cert_subject,''}, + %% {peer_cert_issuer,''}, + %% {peer_cert_validity,''}, + %% {auth_mechanism,<<"PLAIN">>}, + %% {ssl_protocol,''}, + %% {ssl_key_exchange,''}, + %% {ssl_cipher,''}, + %% {ssl_hash,''}, + %% {protocol,{0,9,1}}, + %% {user,<<"guest">>}, + %% {vhost,<<"/">>}, + %% {timeout,14}, + %% {frame_max,131072}, + %% {channel_max,65535}, + %% {client_properties, + %% [{<<"capabilities">>,table, + %% [{<<"publisher_confirms">>,bool,true}, + %% {<<"consumer_cancel_notify">>,bool,true}, + %% {<<"exchange_exchange_bindings">>,bool,true}, + %% {<<"basic.nack">>,bool,true}, + %% {<<"connection.blocked">>,bool,true}, + %% {<<"authentication_failure_close">>,bool,true}]}, + %% {<<"product">>,longstr,<<"Bunny">>}, + %% {<<"platform">>,longstr, + %% <<"ruby 2.3.0p0 (2015-12-25 revision 53290) [x86_64-darwin15]">>}, + %% {<<"version">>,longstr,<<"2.3.0.pre">>}, + %% {<<"information">>,longstr, + %% <<"http://rubybunny.info">>}]}, + %% {connected_at,1453214290847}] + Name = pget(name, EventDetails), + Node = pget(node, EventDetails), + #tracked_connection{id = rabbit_tracking:id(Node, Name), + name = Name, + node = Node, + vhost = pget(vhost, EventDetails), + username = pget(user, EventDetails), + connected_at = pget(connected_at, EventDetails), + pid = pget(pid, EventDetails), + type = pget(type, EventDetails), + peer_host = pget(peer_host, EventDetails), + peer_port = pget(peer_port, EventDetails)}. + +tracked_connection_from_connection_state(#connection{ + vhost = VHost, + connected_at = Ts, + peer_host = PeerHost, + peer_port = PeerPort, + user = Username, + name = Name + }) -> + tracked_connection_from_connection_created( + [{name, Name}, + {node, node()}, + {vhost, VHost}, + {user, Username}, + {user_who_performed_action, Username}, + {connected_at, Ts}, + {pid, self()}, + {type, network}, + {peer_port, PeerPort}, + {peer_host, PeerHost}]). + +close_connections(Tracked, Message) -> + close_connections(Tracked, Message, 0). + +close_connections(Tracked, Message, Delay) -> + [begin + close_connection(Conn, Message), + timer:sleep(Delay) + end || Conn <- Tracked], + ok. + +close_connection(#tracked_connection{pid = Pid, type = network}, Message) -> + try + rabbit_networking:close_connection(Pid, Message) + catch error:{not_a_connection, _} -> + %% could has been closed concurrently, or the input + %% is bogus. In any case, we should not terminate + ok; + _:Err -> + %% ignore, don't terminate + rabbit_log:warning("Could not close connection ~p: ~p", [Pid, Err]), + ok + end; +close_connection(#tracked_connection{pid = Pid, type = direct}, Message) -> + %% Do an RPC call to the node running the direct client. + Node = node(Pid), + rpc:call(Node, amqp_direct_connection, server_close, [Pid, 320, Message]). diff --git a/deps/rabbit/src/rabbit_connection_tracking_handler.erl b/deps/rabbit/src/rabbit_connection_tracking_handler.erl new file mode 100644 index 0000000000..17085d805a --- /dev/null +++ b/deps/rabbit/src/rabbit_connection_tracking_handler.erl @@ -0,0 +1,80 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_connection_tracking_handler). + +%% This module keeps track of connection creation and termination events +%% on its local node. The primary goal here is to decouple connection +%% tracking from rabbit_reader in rabbit_common. +%% +%% Events from other nodes are ignored. + +-behaviour(gen_event). + +-export([init/1, handle_call/2, handle_event/2, handle_info/2, + terminate/2, code_change/3]). + +%% for compatibility with previous versions of CLI tools +-export([close_connections/3]). + +-include_lib("rabbit.hrl"). + +-rabbit_boot_step({?MODULE, + [{description, "connection tracking event handler"}, + {mfa, {gen_event, add_handler, + [rabbit_event, ?MODULE, []]}}, + {cleanup, {gen_event, delete_handler, + [rabbit_event, ?MODULE, []]}}, + {requires, [connection_tracking]}, + {enables, recovery}]}). + +%% +%% API +%% + +init([]) -> + {ok, []}. + +handle_event(#event{type = connection_created, props = Details}, State) -> + ok = rabbit_connection_tracking:update_tracked({connection_created, Details}), + {ok, State}; +handle_event(#event{type = connection_closed, props = Details}, State) -> + ok = rabbit_connection_tracking:update_tracked({connection_closed, Details}), + {ok, State}; +handle_event(#event{type = vhost_deleted, props = Details}, State) -> + ok = rabbit_connection_tracking:update_tracked({vhost_deleted, Details}), + {ok, State}; +%% Note: under normal circumstances this will be called immediately +%% after the vhost_deleted above. Therefore we should be careful about +%% what we log and be more defensive. +handle_event(#event{type = vhost_down, props = Details}, State) -> + ok = rabbit_connection_tracking:update_tracked({vhost_down, Details}), + {ok, State}; +handle_event(#event{type = user_deleted, props = Details}, State) -> + ok = rabbit_connection_tracking:update_tracked({user_deleted, Details}), + {ok, State}; +%% A node had been deleted from the cluster. +handle_event(#event{type = node_deleted, props = Details}, State) -> + ok = rabbit_connection_tracking:update_tracked({node_deleted, Details}), + {ok, State}; +handle_event(_Event, State) -> + {ok, State}. + +handle_call(_Request, State) -> + {ok, not_understood, State}. + +handle_info(_Info, State) -> + {ok, State}. + +terminate(_Arg, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +close_connections(Tracked, Message, Delay) -> + rabbit_connection_tracking:close_connections(Tracked, Message, Delay). diff --git a/deps/rabbit/src/rabbit_control_pbe.erl b/deps/rabbit/src/rabbit_control_pbe.erl new file mode 100644 index 0000000000..95c4fe41f1 --- /dev/null +++ b/deps/rabbit/src/rabbit_control_pbe.erl @@ -0,0 +1,82 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_control_pbe). + +-export([decode/4, encode/4, list_ciphers/0, list_hashes/0]). + +% for testing purposes +-export([evaluate_input_as_term/1]). + +list_ciphers() -> + {ok, io_lib:format("~p", [rabbit_pbe:supported_ciphers()])}. + +list_hashes() -> + {ok, io_lib:format("~p", [rabbit_pbe:supported_hashes()])}. + +validate(_Cipher, _Hash, Iterations, _Args) when Iterations =< 0 -> + {error, io_lib:format("The requested number of iterations is incorrect", [])}; +validate(_Cipher, _Hash, _Iterations, Args) when length(Args) < 2 -> + {error, io_lib:format("Please provide a value to encode/decode and a passphrase", [])}; +validate(_Cipher, _Hash, _Iterations, Args) when length(Args) > 2 -> + {error, io_lib:format("Too many arguments. Please provide a value to encode/decode and a passphrase", [])}; +validate(Cipher, Hash, _Iterations, _Args) -> + case lists:member(Cipher, rabbit_pbe:supported_ciphers()) of + false -> + {error, io_lib:format("The requested cipher is not supported", [])}; + true -> + case lists:member(Hash, rabbit_pbe:supported_hashes()) of + false -> + {error, io_lib:format("The requested hash is not supported", [])}; + true -> ok + end + end. + +encode(Cipher, Hash, Iterations, Args) -> + case validate(Cipher, Hash, Iterations, Args) of + {error, Err} -> {error, Err}; + ok -> + [Value, PassPhrase] = Args, + try begin + TermValue = evaluate_input_as_term(Value), + Result = {encrypted, _} = rabbit_pbe:encrypt_term(Cipher, Hash, Iterations, + list_to_binary(PassPhrase), TermValue), + {ok, io_lib:format("~p", [Result])} + end + catch + _:Msg -> {error, io_lib:format("Error during cipher operation: ~p", [Msg])} + end + end. + +decode(Cipher, Hash, Iterations, Args) -> + case validate(Cipher, Hash, Iterations, Args) of + {error, Err} -> {error, Err}; + ok -> + [Value, PassPhrase] = Args, + try begin + TermValue = evaluate_input_as_term(Value), + TermToDecrypt = case TermValue of + {encrypted, _}=EncryptedTerm -> + EncryptedTerm; + _ -> + {encrypted, TermValue} + end, + Result = rabbit_pbe:decrypt_term(Cipher, Hash, Iterations, + list_to_binary(PassPhrase), + TermToDecrypt), + {ok, io_lib:format("~p", [Result])} + end + catch + _:Msg -> {error, io_lib:format("Error during cipher operation: ~p", [Msg])} + end + end. + +evaluate_input_as_term(Input) -> + {ok,Tokens,_EndLine} = erl_scan:string(Input ++ "."), + {ok,AbsForm} = erl_parse:parse_exprs(Tokens), + {value,TermValue,_Bs} = erl_eval:exprs(AbsForm, erl_eval:new_bindings()), + TermValue. diff --git a/deps/rabbit/src/rabbit_core_ff.erl b/deps/rabbit/src/rabbit_core_ff.erl new file mode 100644 index 0000000000..6d30846775 --- /dev/null +++ b/deps/rabbit/src/rabbit_core_ff.erl @@ -0,0 +1,179 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_core_ff). + +-export([quorum_queue_migration/3, + stream_queue_migration/3, + implicit_default_bindings_migration/3, + virtual_host_metadata_migration/3, + maintenance_mode_status_migration/3, + user_limits_migration/3]). + +-rabbit_feature_flag( + {quorum_queue, + #{desc => "Support queues of type `quorum`", + doc_url => "https://www.rabbitmq.com/quorum-queues.html", + stability => stable, + migration_fun => {?MODULE, quorum_queue_migration} + }}). + +-rabbit_feature_flag( + {stream_queue, + #{desc => "Support queues of type `stream`", + doc_url => "https://www.rabbitmq.com/stream-queues.html", + stability => stable, + depends_on => [quorum_queue], + migration_fun => {?MODULE, stream_queue_migration} + }}). + +-rabbit_feature_flag( + {implicit_default_bindings, + #{desc => "Default bindings are now implicit, instead of " + "being stored in the database", + stability => stable, + migration_fun => {?MODULE, implicit_default_bindings_migration} + }}). + +-rabbit_feature_flag( + {virtual_host_metadata, + #{desc => "Virtual host metadata (description, tags, etc)", + stability => stable, + migration_fun => {?MODULE, virtual_host_metadata_migration} + }}). + +-rabbit_feature_flag( + {maintenance_mode_status, + #{desc => "Maintenance mode status", + stability => stable, + migration_fun => {?MODULE, maintenance_mode_status_migration} + }}). + +-rabbit_feature_flag( + {user_limits, + #{desc => "Configure connection and channel limits for a user", + stability => stable, + migration_fun => {?MODULE, user_limits_migration} + }}). + +%% ------------------------------------------------------------------- +%% Quorum queues. +%% ------------------------------------------------------------------- + +-define(quorum_queue_tables, [rabbit_queue, + rabbit_durable_queue]). + +quorum_queue_migration(FeatureName, _FeatureProps, enable) -> + Tables = ?quorum_queue_tables, + rabbit_table:wait(Tables, _Retry = true), + Fields = amqqueue:fields(amqqueue_v2), + migrate_to_amqqueue_with_type(FeatureName, Tables, Fields); +quorum_queue_migration(_FeatureName, _FeatureProps, is_enabled) -> + Tables = ?quorum_queue_tables, + rabbit_table:wait(Tables, _Retry = true), + Fields = amqqueue:fields(amqqueue_v2), + mnesia:table_info(rabbit_queue, attributes) =:= Fields andalso + mnesia:table_info(rabbit_durable_queue, attributes) =:= Fields. + +stream_queue_migration(_FeatureName, _FeatureProps, _Enable) -> + ok. + +migrate_to_amqqueue_with_type(FeatureName, [Table | Rest], Fields) -> + rabbit_log_feature_flags:info( + "Feature flag `~s`: migrating Mnesia table ~s...", + [FeatureName, Table]), + Fun = fun(Queue) -> amqqueue:upgrade_to(amqqueue_v2, Queue) end, + case mnesia:transform_table(Table, Fun, Fields) of + {atomic, ok} -> migrate_to_amqqueue_with_type(FeatureName, + Rest, + Fields); + {aborted, Reason} -> {error, Reason} + end; +migrate_to_amqqueue_with_type(FeatureName, [], _) -> + rabbit_log_feature_flags:info( + "Feature flag `~s`: Mnesia tables migration done", + [FeatureName]), + ok. + +%% ------------------------------------------------------------------- +%% Default bindings. +%% ------------------------------------------------------------------- + +implicit_default_bindings_migration(FeatureName, _FeatureProps, + enable) -> + %% Default exchange bindings are now implicit (not stored in the + %% route tables). It should be safe to remove them outside of a + %% transaction. + rabbit_table:wait([rabbit_queue]), + Queues = mnesia:dirty_all_keys(rabbit_queue), + remove_explicit_default_bindings(FeatureName, Queues); +implicit_default_bindings_migration(_Feature_Name, _FeatureProps, + is_enabled) -> + undefined. + +remove_explicit_default_bindings(_FeatureName, []) -> + ok; +remove_explicit_default_bindings(FeatureName, Queues) -> + rabbit_log_feature_flags:info( + "Feature flag `~s`: deleting explicit default bindings " + "for ~b queues (it may take some time)...", + [FeatureName, length(Queues)]), + [rabbit_binding:remove_default_exchange_binding_rows_of(Q) + || Q <- Queues], + ok. + +%% ------------------------------------------------------------------- +%% Virtual host metadata. +%% ------------------------------------------------------------------- + +virtual_host_metadata_migration(_FeatureName, _FeatureProps, enable) -> + Tab = rabbit_vhost, + rabbit_table:wait([Tab], _Retry = true), + Fun = fun(Row) -> vhost:upgrade_to(vhost_v2, Row) end, + case mnesia:transform_table(Tab, Fun, vhost:fields(vhost_v2)) of + {atomic, ok} -> ok; + {aborted, Reason} -> {error, Reason} + end; +virtual_host_metadata_migration(_FeatureName, _FeatureProps, is_enabled) -> + mnesia:table_info(rabbit_vhost, attributes) =:= vhost:fields(vhost_v2). + +%% ------------------------------------------------------------------- +%% Maintenance mode. +%% ------------------------------------------------------------------- + +maintenance_mode_status_migration(FeatureName, _FeatureProps, enable) -> + TableName = rabbit_maintenance:status_table_name(), + rabbit_log:info( + "Creating table ~s for feature flag `~s`", + [TableName, FeatureName]), + try + _ = rabbit_table:create( + TableName, + rabbit_maintenance:status_table_definition()), + _ = rabbit_table:ensure_table_copy(TableName, node()) + catch throw:Reason -> + rabbit_log:error( + "Failed to create maintenance status table: ~p", + [Reason]) + end; +maintenance_mode_status_migration(_FeatureName, _FeatureProps, is_enabled) -> + rabbit_table:exists(rabbit_maintenance:status_table_name()). + +%% ------------------------------------------------------------------- +%% User limits. +%% ------------------------------------------------------------------- + +user_limits_migration(_FeatureName, _FeatureProps, enable) -> + Tab = rabbit_user, + rabbit_table:wait([Tab], _Retry = true), + Fun = fun(Row) -> internal_user:upgrade_to(internal_user_v2, Row) end, + case mnesia:transform_table(Tab, Fun, internal_user:fields(internal_user_v2)) of + {atomic, ok} -> ok; + {aborted, Reason} -> {error, Reason} + end; +user_limits_migration(_FeatureName, _FeatureProps, is_enabled) -> + mnesia:table_info(rabbit_user, attributes) =:= internal_user:fields(internal_user_v2). diff --git a/deps/rabbit/src/rabbit_core_metrics_gc.erl b/deps/rabbit/src/rabbit_core_metrics_gc.erl new file mode 100644 index 0000000000..890c127586 --- /dev/null +++ b/deps/rabbit/src/rabbit_core_metrics_gc.erl @@ -0,0 +1,199 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +-module(rabbit_core_metrics_gc). + +-record(state, {timer, + interval + }). + +-export([start_link/0]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init(_) -> + Interval = rabbit_misc:get_env(rabbit, core_metrics_gc_interval, 120000), + {ok, start_timer(#state{interval = Interval})}. + +handle_call(test, _From, State) -> + {reply, ok, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(start_gc, State) -> + gc_connections(), + gc_channels(), + gc_queues(), + gc_exchanges(), + gc_nodes(), + gc_gen_server2(), + gc_auth_attempts(), + {noreply, start_timer(State)}. + +terminate(_Reason, #state{timer = TRef}) -> + erlang:cancel_timer(TRef), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +start_timer(#state{interval = Interval} = St) -> + TRef = erlang:send_after(Interval, self(), start_gc), + St#state{timer = TRef}. + +gc_connections() -> + gc_process(connection_created), + gc_process(connection_metrics), + gc_process(connection_coarse_metrics). + +gc_channels() -> + gc_process(channel_created), + gc_process(channel_metrics), + gc_process(channel_process_metrics), + ok. + +gc_queues() -> + gc_local_queues(), + gc_global_queues(). + +gc_local_queues() -> + Queues = rabbit_amqqueue:list_local_names(), + QueuesDown = rabbit_amqqueue:list_local_names_down(), + GbSet = gb_sets:from_list(Queues), + GbSetDown = gb_sets:from_list(QueuesDown), + gc_queue_metrics(GbSet, GbSetDown), + gc_entity(queue_coarse_metrics, GbSet), + Followers = gb_sets:from_list([amqqueue:get_name(Q) || Q <- rabbit_amqqueue:list_local_followers() ]), + gc_leader_data(Followers). + +gc_leader_data(Followers) -> + ets:foldl(fun({Id, _, _, _, _}, none) -> + gc_leader_data(Id, queue_coarse_metrics, Followers) + end, none, queue_coarse_metrics). + +gc_leader_data(Id, Table, GbSet) -> + case gb_sets:is_member(Id, GbSet) of + true -> + ets:delete(Table, Id), + none; + false -> + none + end. + +gc_global_queues() -> + GbSet = gb_sets:from_list(rabbit_amqqueue:list_names()), + gc_process_and_entity(channel_queue_metrics, GbSet), + gc_process_and_entity(consumer_created, GbSet), + ExchangeGbSet = gb_sets:from_list(rabbit_exchange:list_names()), + gc_process_and_entities(channel_queue_exchange_metrics, GbSet, ExchangeGbSet). + +gc_exchanges() -> + Exchanges = rabbit_exchange:list_names(), + GbSet = gb_sets:from_list(Exchanges), + gc_process_and_entity(channel_exchange_metrics, GbSet). + +gc_nodes() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + GbSet = gb_sets:from_list(Nodes), + gc_entity(node_node_metrics, GbSet). + +gc_gen_server2() -> + gc_process(gen_server2_metrics). + +gc_process(Table) -> + ets:foldl(fun({Pid = Key, _}, none) -> + gc_process(Pid, Table, Key); + ({Pid = Key, _, _, _, _}, none) -> + gc_process(Pid, Table, Key); + ({Pid = Key, _, _, _}, none) -> + gc_process(Pid, Table, Key) + end, none, Table). + +gc_process(Pid, Table, Key) -> + case rabbit_misc:is_process_alive(Pid) of + true -> + none; + false -> + ets:delete(Table, Key), + none + end. + +gc_queue_metrics(GbSet, GbSetDown) -> + Table = queue_metrics, + ets:foldl(fun({Key, Props, Marker}, none) -> + case gb_sets:is_member(Key, GbSet) of + true -> + case gb_sets:is_member(Key, GbSetDown) of + true -> + ets:insert(Table, {Key, [{state, down} | lists:keydelete(state, 1, Props)], Marker}), + none; + false -> + none + end; + false -> + ets:delete(Table, Key), + none + end + end, none, Table). + +gc_entity(Table, GbSet) -> + ets:foldl(fun({{_, Id} = Key, _}, none) -> + gc_entity(Id, Table, Key, GbSet); + ({Id = Key, _}, none) -> + gc_entity(Id, Table, Key, GbSet); + ({Id = Key, _, _}, none) -> + gc_entity(Id, Table, Key, GbSet); + ({Id = Key, _, _, _, _}, none) -> + gc_entity(Id, Table, Key, GbSet) + end, none, Table). + +gc_entity(Id, Table, Key, GbSet) -> + case gb_sets:is_member(Id, GbSet) of + true -> + none; + false -> + ets:delete(Table, Key), + none + end. + +gc_process_and_entity(Table, GbSet) -> + ets:foldl(fun({{Pid, Id} = Key, _, _, _, _, _, _, _, _}, none) + when Table == channel_queue_metrics -> + gc_process_and_entity(Id, Pid, Table, Key, GbSet); + ({{Pid, Id} = Key, _, _, _, _, _}, none) + when Table == channel_exchange_metrics -> + gc_process_and_entity(Id, Pid, Table, Key, GbSet); + ({{Id, Pid, _} = Key, _, _, _, _, _, _}, none) + when Table == consumer_created -> + gc_process_and_entity(Id, Pid, Table, Key, GbSet); + ({{{Pid, Id}, _} = Key, _, _, _, _}, none) -> + gc_process_and_entity(Id, Pid, Table, Key, GbSet) + end, none, Table). + +gc_process_and_entity(Id, Pid, Table, Key, GbSet) -> + case rabbit_misc:is_process_alive(Pid) andalso gb_sets:is_member(Id, GbSet) of + true -> + none; + false -> + ets:delete(Table, Key), + none + end. + +gc_process_and_entities(Table, QueueGbSet, ExchangeGbSet) -> + ets:foldl(fun({{Pid, {Q, X}} = Key, _, _}, none) -> + gc_process(Pid, Table, Key), + gc_entity(Q, Table, Key, QueueGbSet), + gc_entity(X, Table, Key, ExchangeGbSet) + end, none, Table). + +gc_auth_attempts() -> + ets:delete_all_objects(auth_attempt_detailed_metrics). diff --git a/deps/rabbit/src/rabbit_credential_validation.erl b/deps/rabbit/src/rabbit_credential_validation.erl new file mode 100644 index 0000000000..8712628ade --- /dev/null +++ b/deps/rabbit/src/rabbit_credential_validation.erl @@ -0,0 +1,44 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_credential_validation). + +-include("rabbit.hrl"). + +%% used for backwards compatibility +-define(DEFAULT_BACKEND, rabbit_credential_validator_accept_everything). + +%% +%% API +%% + +-export([validate/2, backend/0]). + +%% Validates a username/password pair by delegating to the effective +%% `rabbit_credential_validator`. Used by `rabbit_auth_backend_internal`. +%% Note that some validators may choose to only validate passwords. +%% +%% Possible return values: +%% +%% * ok: provided credentials passed validation. +%% * {error, Error, Args}: provided password password failed validation. + +-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}. + +validate(Username, Password) -> + Backend = backend(), + Backend:validate(Username, Password). + +-spec backend() -> atom(). + +backend() -> + case application:get_env(rabbit, credential_validator) of + undefined -> + ?DEFAULT_BACKEND; + {ok, Proplist} -> + proplists:get_value(validation_backend, Proplist, ?DEFAULT_BACKEND) + end. diff --git a/deps/rabbit/src/rabbit_credential_validator.erl b/deps/rabbit/src/rabbit_credential_validator.erl new file mode 100644 index 0000000000..3b5d0752bf --- /dev/null +++ b/deps/rabbit/src/rabbit_credential_validator.erl @@ -0,0 +1,19 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_credential_validator). + +-include("rabbit.hrl"). + +%% Validates a password. Used by `rabbit_auth_backend_internal`. +%% +%% Possible return values: +%% +%% * ok: provided password passed validation. +%% * {error, Error, Args}: provided password password failed validation. + +-callback validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}. diff --git a/deps/rabbit/src/rabbit_credential_validator_accept_everything.erl b/deps/rabbit/src/rabbit_credential_validator_accept_everything.erl new file mode 100644 index 0000000000..fea10fd4b6 --- /dev/null +++ b/deps/rabbit/src/rabbit_credential_validator_accept_everything.erl @@ -0,0 +1,23 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_credential_validator_accept_everything). + +-include("rabbit.hrl"). + +-behaviour(rabbit_credential_validator). + +%% +%% API +%% + +-export([validate/2]). + +-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}. + +validate(_Username, _Password) -> + ok. diff --git a/deps/rabbit/src/rabbit_credential_validator_min_password_length.erl b/deps/rabbit/src/rabbit_credential_validator_min_password_length.erl new file mode 100644 index 0000000000..463090127f --- /dev/null +++ b/deps/rabbit/src/rabbit_credential_validator_min_password_length.erl @@ -0,0 +1,50 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_credential_validator_min_password_length). + +-include("rabbit.hrl"). + +-behaviour(rabbit_credential_validator). + +%% accommodates default (localhost-only) user credentials, +%% guest/guest +-define(DEFAULT_MIN_LENGTH, 5). + +%% +%% API +%% + +-export([validate/2]). +%% for tests +-export([validate/3]). + +-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}. + +validate(Username, Password) -> + MinLength = case application:get_env(rabbit, credential_validator) of + undefined -> + ?DEFAULT_MIN_LENGTH; + {ok, Proplist} -> + case proplists:get_value(min_length, Proplist) of + undefined -> ?DEFAULT_MIN_LENGTH; + Value -> rabbit_data_coercion:to_integer(Value) + end + end, + validate(Username, Password, MinLength). + + +-spec validate(rabbit_types:username(), rabbit_types:password(), integer()) -> 'ok' | {'error', string(), [any()]}. + +%% passwordless users +validate(_Username, undefined, MinLength) -> + {error, rabbit_misc:format("minimum required password length is ~B", [MinLength])}; +validate(_Username, Password, MinLength) -> + case size(Password) >= MinLength of + true -> ok; + false -> {error, rabbit_misc:format("minimum required password length is ~B", [MinLength])} + end. diff --git a/deps/rabbit/src/rabbit_credential_validator_password_regexp.erl b/deps/rabbit/src/rabbit_credential_validator_password_regexp.erl new file mode 100644 index 0000000000..dc64cf1d31 --- /dev/null +++ b/deps/rabbit/src/rabbit_credential_validator_password_regexp.erl @@ -0,0 +1,42 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + + +%% A `rabbit_credential_validator` implementation that matches +%% password against a pre-configured regular expression. +-module(rabbit_credential_validator_password_regexp). + +-include("rabbit.hrl"). + +-behaviour(rabbit_credential_validator). + +%% +%% API +%% + +-export([validate/2]). +%% for tests +-export([validate/3]). + +-spec validate(rabbit_types:username(), rabbit_types:password()) -> 'ok' | {'error', string()}. + +validate(Username, Password) -> + {ok, Proplist} = application:get_env(rabbit, credential_validator), + Regexp = case proplists:get_value(regexp, Proplist) of + undefined -> {error, "rabbit.credential_validator.regexp config key is undefined"}; + Value -> rabbit_data_coercion:to_list(Value) + end, + validate(Username, Password, Regexp). + + +-spec validate(rabbit_types:username(), rabbit_types:password(), string()) -> 'ok' | {'error', string(), [any()]}. + +validate(_Username, Password, Pattern) -> + case re:run(rabbit_data_coercion:to_list(Password), Pattern) of + {match, _} -> ok; + nomatch -> {error, "provided password does not match the validator regular expression"} + end. diff --git a/deps/rabbit/src/rabbit_dead_letter.erl b/deps/rabbit/src/rabbit_dead_letter.erl new file mode 100644 index 0000000000..755de5cf53 --- /dev/null +++ b/deps/rabbit/src/rabbit_dead_letter.erl @@ -0,0 +1,253 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_dead_letter). + +-export([publish/5]). + +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +%%---------------------------------------------------------------------------- + +-type reason() :: 'expired' | 'rejected' | 'maxlen' | delivery_limit. + +%%---------------------------------------------------------------------------- + +-spec publish(rabbit_types:message(), reason(), rabbit_types:exchange(), + 'undefined' | binary(), rabbit_amqqueue:name()) -> 'ok'. +publish(Msg, Reason, X, RK, QName) -> + DLMsg = make_msg(Msg, Reason, X#exchange.name, RK, QName), + Delivery = rabbit_basic:delivery(false, false, DLMsg, undefined), + {Queues, Cycles} = detect_cycles(Reason, DLMsg, + rabbit_exchange:route(X, Delivery)), + lists:foreach(fun log_cycle_once/1, Cycles), + _ = rabbit_queue_type:deliver(rabbit_amqqueue:lookup(Queues), + Delivery, stateless), + ok. + +make_msg(Msg = #basic_message{content = Content, + exchange_name = Exchange, + routing_keys = RoutingKeys}, + Reason, DLX, RK, #resource{name = QName}) -> + {DeathRoutingKeys, HeadersFun1} = + case RK of + undefined -> {RoutingKeys, fun (H) -> H end}; + _ -> {[RK], fun (H) -> lists:keydelete(<<"CC">>, 1, H) end} + end, + ReasonBin = list_to_binary(atom_to_list(Reason)), + TimeSec = os:system_time(seconds), + PerMsgTTL = per_msg_ttl_header(Content#content.properties), + HeadersFun2 = + fun (Headers) -> + %% The first routing key is the one specified in the + %% basic.publish; all others are CC or BCC keys. + RKs = [hd(RoutingKeys) | rabbit_basic:header_routes(Headers)], + RKs1 = [{longstr, Key} || Key <- RKs], + Info = [{<<"reason">>, longstr, ReasonBin}, + {<<"queue">>, longstr, QName}, + {<<"time">>, timestamp, TimeSec}, + {<<"exchange">>, longstr, Exchange#resource.name}, + {<<"routing-keys">>, array, RKs1}] ++ PerMsgTTL, + HeadersFun1(update_x_death_header(Info, Headers)) + end, + Content1 = #content{properties = Props} = + rabbit_basic:map_headers(HeadersFun2, Content), + Content2 = Content1#content{properties = + Props#'P_basic'{expiration = undefined}}, + Msg#basic_message{exchange_name = DLX, + id = rabbit_guid:gen(), + routing_keys = DeathRoutingKeys, + content = Content2}. + + +x_death_event_key(Info, Key) -> + case lists:keysearch(Key, 1, Info) of + false -> undefined; + {value, {Key, _KeyType, Val}} -> Val + end. + +maybe_append_to_event_group(Table, _Key, _SeenKeys, []) -> + [Table]; +maybe_append_to_event_group(Table, {_Queue, _Reason} = Key, SeenKeys, Acc) -> + case sets:is_element(Key, SeenKeys) of + true -> Acc; + false -> [Table | Acc] + end. + +group_by_queue_and_reason([]) -> + []; +group_by_queue_and_reason([Table]) -> + [Table]; +group_by_queue_and_reason(Tables) -> + {_, Grouped} = + lists:foldl( + fun ({table, Info}, {SeenKeys, Acc}) -> + Q = x_death_event_key(Info, <<"queue">>), + R = x_death_event_key(Info, <<"reason">>), + Matcher = queue_and_reason_matcher(Q, R), + {Matches, _} = lists:partition(Matcher, Tables), + {Augmented, N} = case Matches of + [X] -> {X, 1}; + [X|_] = Xs -> {X, length(Xs)} + end, + Key = {Q, R}, + Acc1 = maybe_append_to_event_group( + ensure_xdeath_event_count(Augmented, N), + Key, SeenKeys, Acc), + {sets:add_element(Key, SeenKeys), Acc1} + end, {sets:new(), []}, Tables), + Grouped. + +update_x_death_header(Info, undefined) -> + update_x_death_header(Info, []); +update_x_death_header(Info, Headers) -> + X = x_death_event_key(Info, <<"exchange">>), + Q = x_death_event_key(Info, <<"queue">>), + R = x_death_event_key(Info, <<"reason">>), + case rabbit_basic:header(<<"x-death">>, Headers) of + undefined -> + %% First x-death event gets its own top-level headers. + %% See rabbitmq/rabbitmq-server#1332. + Headers2 = rabbit_misc:set_table_value(Headers, <<"x-first-death-reason">>, + longstr, R), + Headers3 = rabbit_misc:set_table_value(Headers2, <<"x-first-death-queue">>, + longstr, Q), + Headers4 = rabbit_misc:set_table_value(Headers3, <<"x-first-death-exchange">>, + longstr, X), + rabbit_basic:prepend_table_header( + <<"x-death">>, + [{<<"count">>, long, 1} | Info], Headers4); + {<<"x-death">>, array, Tables} -> + %% group existing x-death headers in case we have some from + %% before rabbitmq-server#78 + GroupedTables = group_by_queue_and_reason(Tables), + {Matches, Others} = lists:partition( + queue_and_reason_matcher(Q, R), + GroupedTables), + Info1 = case Matches of + [] -> + [{<<"count">>, long, 1} | Info]; + [{table, M}] -> + increment_xdeath_event_count(M) + end, + rabbit_misc:set_table_value( + Headers, <<"x-death">>, array, + [{table, rabbit_misc:sort_field_table(Info1)} | Others]); + {<<"x-death">>, InvalidType, Header} -> + rabbit_log:warning("Message has invalid x-death header (type: ~p)." + " Resetting header ~p~n", + [InvalidType, Header]), + %% if x-death is something other than an array (list) + %% then we reset it: this happens when some clients consume + %% a message and re-publish is, converting header values + %% to strings, intentionally or not. + %% See rabbitmq/rabbitmq-server#767 for details. + rabbit_misc:set_table_value( + Headers, <<"x-death">>, array, + [{table, [{<<"count">>, long, 1} | Info]}]) + end. + +ensure_xdeath_event_count({table, Info}, InitialVal) when InitialVal >= 1 -> + {table, ensure_xdeath_event_count(Info, InitialVal)}; +ensure_xdeath_event_count(Info, InitialVal) when InitialVal >= 1 -> + case x_death_event_key(Info, <<"count">>) of + undefined -> + [{<<"count">>, long, InitialVal} | Info]; + _ -> + Info + end. + +increment_xdeath_event_count(Info) -> + case x_death_event_key(Info, <<"count">>) of + undefined -> + [{<<"count">>, long, 1} | Info]; + N -> + lists:keyreplace( + <<"count">>, 1, Info, + {<<"count">>, long, N + 1}) + end. + +queue_and_reason_matcher(Q, R) -> + F = fun(Info) -> + x_death_event_key(Info, <<"queue">>) =:= Q + andalso x_death_event_key(Info, <<"reason">>) =:= R + end, + fun({table, Info}) -> + F(Info); + (Info) when is_list(Info) -> + F(Info) + end. + +per_msg_ttl_header(#'P_basic'{expiration = undefined}) -> + []; +per_msg_ttl_header(#'P_basic'{expiration = Expiration}) -> + [{<<"original-expiration">>, longstr, Expiration}]; +per_msg_ttl_header(_) -> + []. + +detect_cycles(rejected, _Msg, Queues) -> + {Queues, []}; + +detect_cycles(_Reason, #basic_message{content = Content}, Queues) -> + #content{properties = #'P_basic'{headers = Headers}} = + rabbit_binary_parser:ensure_content_decoded(Content), + NoCycles = {Queues, []}, + case Headers of + undefined -> + NoCycles; + _ -> + case rabbit_misc:table_lookup(Headers, <<"x-death">>) of + {array, Deaths} -> + {Cycling, NotCycling} = + lists:partition(fun (#resource{name = Queue}) -> + is_cycle(Queue, Deaths) + end, Queues), + OldQueues = [rabbit_misc:table_lookup(D, <<"queue">>) || + {table, D} <- Deaths], + OldQueues1 = [QName || {longstr, QName} <- OldQueues], + {NotCycling, [[QName | OldQueues1] || + #resource{name = QName} <- Cycling]}; + _ -> + NoCycles + end + end. + +is_cycle(Queue, Deaths) -> + {Cycle, Rest} = + lists:splitwith( + fun ({table, D}) -> + {longstr, Queue} =/= rabbit_misc:table_lookup(D, <<"queue">>); + (_) -> + true + end, Deaths), + %% Is there a cycle, and if so, is it "fully automatic", i.e. with + %% no reject in it? + case Rest of + [] -> false; + [H|_] -> lists:all( + fun ({table, D}) -> + {longstr, <<"rejected">>} =/= + rabbit_misc:table_lookup(D, <<"reason">>); + (_) -> + %% There was something we didn't expect, therefore + %% a client must have put it there, therefore the + %% cycle was not "fully automatic". + false + end, Cycle ++ [H]) + end. + +log_cycle_once(Queues) -> + Key = {queue_cycle, Queues}, + case get(Key) of + true -> ok; + undefined -> rabbit_log:warning( + "Message dropped. Dead-letter queues cycle detected" ++ + ": ~p~nThis cycle will NOT be reported again.~n", + [Queues]), + put(Key, true) + end. diff --git a/deps/rabbit/src/rabbit_definitions.erl b/deps/rabbit/src/rabbit_definitions.erl new file mode 100644 index 0000000000..0d0212dbae --- /dev/null +++ b/deps/rabbit/src/rabbit_definitions.erl @@ -0,0 +1,767 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_definitions). +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([boot/0]). +%% automatic import on boot +-export([maybe_load_definitions/0, maybe_load_definitions/2, maybe_load_definitions_from/2, + has_configured_definitions_to_load/0]). +%% import +-export([import_raw/1, import_raw/2, import_parsed/1, import_parsed/2, + apply_defs/2, apply_defs/3, apply_defs/4, apply_defs/5]). + +-export([all_definitions/0]). +-export([ + list_users/0, list_vhosts/0, list_permissions/0, list_topic_permissions/0, + list_runtime_parameters/0, list_global_runtime_parameters/0, list_policies/0, + list_exchanges/0, list_queues/0, list_bindings/0, + is_internal_parameter/1 +]). +-export([decode/1, decode/2, args/1]). + +-import(rabbit_misc, [pget/2]). + +%% +%% API +%% + +-type definition_category() :: 'users' | + 'vhosts' | + 'permissions' | + 'topic_permissions' | + 'parameters' | + 'global_parameters' | + 'policies' | + 'queues' | + 'bindings' | + 'exchanges'. + +-type definition_object() :: #{binary() => any()}. +-type definition_list() :: [definition_object()]. + +-type definitions() :: #{ + definition_category() => definition_list() +}. + +-export_type([definition_object/0, definition_list/0, definition_category/0, definitions/0]). + +-define(IMPORT_WORK_POOL, definition_import_pool). + +boot() -> + PoolSize = application:get_env(rabbit, definition_import_work_pool_size, rabbit_runtime:guess_number_of_cpu_cores()), + rabbit_sup:start_supervisor_child(definition_import_pool_sup, worker_pool_sup, [PoolSize, ?IMPORT_WORK_POOL]). + +maybe_load_definitions() -> + %% Note that management.load_definitions is handled in the plugin for backwards compatibility. + %% This executes the "core" version of load_definitions. + maybe_load_definitions(rabbit, load_definitions). + +-spec import_raw(Body :: binary() | iolist()) -> ok | {error, term()}. +import_raw(Body) -> + rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]), + case decode([], Body) of + {error, E} -> {error, E}; + {ok, _, Map} -> apply_defs(Map, ?INTERNAL_USER) + end. + +-spec import_raw(Body :: binary() | iolist(), VHost :: vhost:name()) -> ok | {error, term()}. +import_raw(Body, VHost) -> + rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]), + case decode([], Body) of + {error, E} -> {error, E}; + {ok, _, Map} -> apply_defs(Map, ?INTERNAL_USER, fun() -> ok end, VHost) + end. + +-spec import_parsed(Defs :: #{any() => any()} | list()) -> ok | {error, term()}. +import_parsed(Body0) when is_list(Body0) -> + import_parsed(maps:from_list(Body0)); +import_parsed(Body0) when is_map(Body0) -> + rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]), + Body = atomise_map_keys(Body0), + apply_defs(Body, ?INTERNAL_USER). + +-spec import_parsed(Defs :: #{any() => any() | list()}, VHost :: vhost:name()) -> ok | {error, term()}. +import_parsed(Body0, VHost) when is_list(Body0) -> + import_parsed(maps:from_list(Body0), VHost); +import_parsed(Body0, VHost) -> + rabbit_log:info("Asked to import definitions. Acting user: ~s", [?INTERNAL_USER]), + Body = atomise_map_keys(Body0), + apply_defs(Body, ?INTERNAL_USER, fun() -> ok end, VHost). + +-spec all_definitions() -> map(). +all_definitions() -> + Xs = list_exchanges(), + Qs = list_queues(), + Bs = list_bindings(), + + Users = list_users(), + VHosts = list_vhosts(), + Params = list_runtime_parameters(), + GParams = list_global_runtime_parameters(), + Pols = list_policies(), + + Perms = list_permissions(), + TPerms = list_topic_permissions(), + + {ok, Vsn} = application:get_key(rabbit, vsn), + #{ + rabbit_version => rabbit_data_coercion:to_binary(Vsn), + rabbitmq_version => rabbit_data_coercion:to_binary(Vsn), + users => Users, + vhosts => VHosts, + permissions => Perms, + topic_permissions => TPerms, + parameters => Params, + global_parameters => GParams, + policies => Pols, + queues => Qs, + bindings => Bs, + exchanges => Xs + }. + +%% +%% Implementation +%% + +-spec has_configured_definitions_to_load() -> boolean(). +has_configured_definitions_to_load() -> + case application:get_env(rabbit, load_definitions) of + undefined -> false; + {ok, none} -> false; + {ok, _Path} -> true + end. + +maybe_load_definitions(App, Key) -> + case application:get_env(App, Key) of + undefined -> + rabbit_log:debug("No definition file configured to import via load_definitions"), + ok; + {ok, none} -> + rabbit_log:debug("No definition file configured to import via load_definitions"), + ok; + {ok, FileOrDir} -> + rabbit_log:debug("Will import definitions file from load_definitions"), + IsDir = filelib:is_dir(FileOrDir), + maybe_load_definitions_from(IsDir, FileOrDir) + end. + +maybe_load_definitions_from(true, Dir) -> + rabbit_log:info("Applying definitions from directory ~s", [Dir]), + load_definitions_from_files(file:list_dir(Dir), Dir); +maybe_load_definitions_from(false, File) -> + load_definitions_from_file(File). + +load_definitions_from_files({ok, Filenames0}, Dir) -> + Filenames1 = lists:sort(Filenames0), + Filenames2 = [filename:join(Dir, F) || F <- Filenames1], + load_definitions_from_filenames(Filenames2); +load_definitions_from_files({error, E}, Dir) -> + rabbit_log:error("Could not read definitions from directory ~s, Error: ~p", [Dir, E]), + {error, {could_not_read_defs, E}}. + +load_definitions_from_filenames([]) -> + ok; +load_definitions_from_filenames([File|Rest]) -> + case load_definitions_from_file(File) of + ok -> load_definitions_from_filenames(Rest); + {error, E} -> {error, {failed_to_import_definitions, File, E}} + end. + +load_definitions_from_file(File) -> + case file:read_file(File) of + {ok, Body} -> + rabbit_log:info("Applying definitions from file at '~s'", [File]), + import_raw(Body); + {error, E} -> + rabbit_log:error("Could not read definitions from file at '~s', error: ~p", [File, E]), + {error, {could_not_read_defs, {File, E}}} + end. + +decode(Keys, Body) -> + case decode(Body) of + {ok, J0} -> + J = maps:fold(fun(K, V, Acc) -> + Acc#{rabbit_data_coercion:to_atom(K, utf8) => V} + end, J0, J0), + Results = [get_or_missing(K, J) || K <- Keys], + case [E || E = {key_missing, _} <- Results] of + [] -> {ok, Results, J}; + Errors -> {error, Errors} + end; + Else -> Else + end. + +decode(<<"">>) -> + {ok, #{}}; +decode(Body) -> + try + Decoded = rabbit_json:decode(Body), + Normalised = atomise_map_keys(Decoded), + {ok, Normalised} + catch error:_ -> {error, not_json} + end. + +atomise_map_keys(Decoded) -> + maps:fold(fun(K, V, Acc) -> + Acc#{rabbit_data_coercion:to_atom(K, utf8) => V} + end, Decoded, Decoded). + +-spec apply_defs(Map :: #{atom() => any()}, ActingUser :: rabbit_types:username()) -> 'ok' | {error, term()}. + +apply_defs(Map, ActingUser) -> + apply_defs(Map, ActingUser, fun () -> ok end). + +-spec apply_defs(Map :: #{atom() => any()}, ActingUser :: rabbit_types:username(), + SuccessFun :: fun(() -> 'ok')) -> 'ok' | {error, term()}; + (Map :: #{atom() => any()}, ActingUser :: rabbit_types:username(), + VHost :: vhost:name()) -> 'ok' | {error, term()}. + +apply_defs(Map, ActingUser, VHost) when is_binary(VHost) -> + apply_defs(Map, ActingUser, fun () -> ok end, VHost); + +apply_defs(Map, ActingUser, SuccessFun) when is_function(SuccessFun) -> + Version = maps:get(rabbitmq_version, Map, maps:get(rabbit_version, Map, undefined)), + try + concurrent_for_all(users, ActingUser, Map, + fun(User, _Username) -> + rabbit_auth_backend_internal:put_user(User, Version, ActingUser) + end), + concurrent_for_all(vhosts, ActingUser, Map, fun add_vhost/2), + validate_limits(Map), + concurrent_for_all(permissions, ActingUser, Map, fun add_permission/2), + concurrent_for_all(topic_permissions, ActingUser, Map, fun add_topic_permission/2), + sequential_for_all(parameters, ActingUser, Map, fun add_parameter/2), + sequential_for_all(global_parameters, ActingUser, Map, fun add_global_parameter/2), + %% importing policies concurrently can be unsafe as queues will be getting + %% potentially out of order notifications of applicable policy changes + sequential_for_all(policies, ActingUser, Map, fun add_policy/2), + concurrent_for_all(queues, ActingUser, Map, fun add_queue/2), + concurrent_for_all(exchanges, ActingUser, Map, fun add_exchange/2), + concurrent_for_all(bindings, ActingUser, Map, fun add_binding/2), + SuccessFun(), + ok + catch {error, E} -> {error, E}; + exit:E -> {error, E} + end. + +-spec apply_defs(Map :: #{atom() => any()}, + ActingUser :: rabbit_types:username(), + SuccessFun :: fun(() -> 'ok'), + VHost :: vhost:name()) -> 'ok' | {error, term()}. + +apply_defs(Map, ActingUser, SuccessFun, VHost) when is_binary(VHost) -> + rabbit_log:info("Asked to import definitions for a virtual host. Virtual host: ~p, acting user: ~p", + [VHost, ActingUser]), + try + validate_limits(Map, VHost), + sequential_for_all(parameters, ActingUser, Map, VHost, fun add_parameter/3), + %% importing policies concurrently can be unsafe as queues will be getting + %% potentially out of order notifications of applicable policy changes + sequential_for_all(policies, ActingUser, Map, VHost, fun add_policy/3), + concurrent_for_all(queues, ActingUser, Map, VHost, fun add_queue/3), + concurrent_for_all(exchanges, ActingUser, Map, VHost, fun add_exchange/3), + concurrent_for_all(bindings, ActingUser, Map, VHost, fun add_binding/3), + SuccessFun() + catch {error, E} -> {error, format(E)}; + exit:E -> {error, format(E)} + end. + +-spec apply_defs(Map :: #{atom() => any()}, + ActingUser :: rabbit_types:username(), + SuccessFun :: fun(() -> 'ok'), + ErrorFun :: fun((any()) -> 'ok'), + VHost :: vhost:name()) -> 'ok' | {error, term()}. + +apply_defs(Map, ActingUser, SuccessFun, ErrorFun, VHost) -> + rabbit_log:info("Asked to import definitions for a virtual host. Virtual host: ~p, acting user: ~p", + [VHost, ActingUser]), + try + validate_limits(Map, VHost), + sequential_for_all(parameters, ActingUser, Map, VHost, fun add_parameter/3), + %% importing policies concurrently can be unsafe as queues will be getting + %% potentially out of order notifications of applicable policy changes + sequential_for_all(policies, ActingUser, Map, VHost, fun add_policy/3), + concurrent_for_all(queues, ActingUser, Map, VHost, fun add_queue/3), + concurrent_for_all(exchanges, ActingUser, Map, VHost, fun add_exchange/3), + concurrent_for_all(bindings, ActingUser, Map, VHost, fun add_binding/3), + SuccessFun() + catch {error, E} -> ErrorFun(format(E)); + exit:E -> ErrorFun(format(E)) + end. + +sequential_for_all(Category, ActingUser, Definitions, Fun) -> + case maps:get(rabbit_data_coercion:to_atom(Category), Definitions, undefined) of + undefined -> ok; + List -> + case length(List) of + 0 -> ok; + N -> rabbit_log:info("Importing sequentially ~p ~s...", [N, human_readable_category_name(Category)]) + end, + [begin + %% keys are expected to be atoms + Fun(atomize_keys(M), ActingUser) + end || M <- List, is_map(M)] + end. + +sequential_for_all(Name, ActingUser, Definitions, VHost, Fun) -> + case maps:get(rabbit_data_coercion:to_atom(Name), Definitions, undefined) of + undefined -> ok; + List -> [Fun(VHost, atomize_keys(M), ActingUser) || M <- List, is_map(M)] + end. + +concurrent_for_all(Category, ActingUser, Definitions, Fun) -> + case maps:get(rabbit_data_coercion:to_atom(Category), Definitions, undefined) of + undefined -> ok; + List -> + case length(List) of + 0 -> ok; + N -> rabbit_log:info("Importing concurrently ~p ~s...", [N, human_readable_category_name(Category)]) + end, + WorkPoolFun = fun(M) -> + Fun(atomize_keys(M), ActingUser) + end, + do_concurrent_for_all(List, WorkPoolFun) + end. + +concurrent_for_all(Name, ActingUser, Definitions, VHost, Fun) -> + case maps:get(rabbit_data_coercion:to_atom(Name), Definitions, undefined) of + undefined -> ok; + List -> + WorkPoolFun = fun(M) -> + Fun(VHost, atomize_keys(M), ActingUser) + end, + do_concurrent_for_all(List, WorkPoolFun) + end. + +do_concurrent_for_all(List, WorkPoolFun) -> + {ok, Gatherer} = gatherer:start_link(), + [begin + %% keys are expected to be atoms + ok = gatherer:fork(Gatherer), + worker_pool:submit_async( + ?IMPORT_WORK_POOL, + fun() -> + try + WorkPoolFun(M) + catch {error, E} -> gatherer:in(Gatherer, {error, E}); + _:E -> gatherer:in(Gatherer, {error, E}) + end, + gatherer:finish(Gatherer) + end) + end || M <- List, is_map(M)], + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer); + {value, {error, E}} -> + ok = gatherer:stop(Gatherer), + throw({error, E}) + end. + +-spec atomize_keys(#{any() => any()}) -> #{atom() => any()}. + +atomize_keys(M) -> + maps:fold(fun(K, V, Acc) -> + maps:put(rabbit_data_coercion:to_atom(K), V, Acc) + end, #{}, M). + +-spec human_readable_category_name(definition_category()) -> string(). + +human_readable_category_name(topic_permissions) -> "topic permissions"; +human_readable_category_name(parameters) -> "runtime parameters"; +human_readable_category_name(global_parameters) -> "global runtime parameters"; +human_readable_category_name(Other) -> rabbit_data_coercion:to_list(Other). + + +format(#amqp_error{name = Name, explanation = Explanation}) -> + rabbit_data_coercion:to_binary(rabbit_misc:format("~s: ~s", [Name, Explanation])); +format({no_such_vhost, undefined}) -> + rabbit_data_coercion:to_binary( + "Virtual host does not exist and is not specified in definitions file."); +format({no_such_vhost, VHost}) -> + rabbit_data_coercion:to_binary( + rabbit_misc:format("Please create virtual host \"~s\" prior to importing definitions.", + [VHost])); +format({vhost_limit_exceeded, ErrMsg}) -> + rabbit_data_coercion:to_binary(ErrMsg); +format(E) -> + rabbit_data_coercion:to_binary(rabbit_misc:format("~p", [E])). + +add_parameter(Param, Username) -> + VHost = maps:get(vhost, Param, undefined), + add_parameter(VHost, Param, Username). + +add_parameter(VHost, Param, Username) -> + Comp = maps:get(component, Param, undefined), + Key = maps:get(name, Param, undefined), + Term = maps:get(value, Param, undefined), + Result = case is_map(Term) of + true -> + %% coerce maps to proplists for backwards compatibility. + %% See rabbitmq-management#528. + TermProplist = rabbit_data_coercion:to_proplist(Term), + rabbit_runtime_parameters:set(VHost, Comp, Key, TermProplist, Username); + _ -> + rabbit_runtime_parameters:set(VHost, Comp, Key, Term, Username) + end, + case Result of + ok -> ok; + {error_string, E} -> + S = rabbit_misc:format(" (~s/~s/~s)", [VHost, Comp, Key]), + exit(rabbit_data_coercion:to_binary(rabbit_misc:escape_html_tags(E ++ S))) + end. + +add_global_parameter(Param, Username) -> + Key = maps:get(name, Param, undefined), + Term = maps:get(value, Param, undefined), + case is_map(Term) of + true -> + %% coerce maps to proplists for backwards compatibility. + %% See rabbitmq-management#528. + TermProplist = rabbit_data_coercion:to_proplist(Term), + rabbit_runtime_parameters:set_global(Key, TermProplist, Username); + _ -> + rabbit_runtime_parameters:set_global(Key, Term, Username) + end. + +add_policy(Param, Username) -> + VHost = maps:get(vhost, Param, undefined), + add_policy(VHost, Param, Username). + +add_policy(VHost, Param, Username) -> + Key = maps:get(name, Param, undefined), + case rabbit_policy:set( + VHost, Key, maps:get(pattern, Param, undefined), + case maps:get(definition, Param, undefined) of + undefined -> undefined; + Def -> rabbit_data_coercion:to_proplist(Def) + end, + maps:get(priority, Param, undefined), + maps:get('apply-to', Param, <<"all">>), + Username) of + ok -> ok; + {error_string, E} -> S = rabbit_misc:format(" (~s/~s)", [VHost, Key]), + exit(rabbit_data_coercion:to_binary(rabbit_misc:escape_html_tags(E ++ S))) + end. + +-spec add_vhost(map(), rabbit_types:username()) -> ok. + +add_vhost(VHost, ActingUser) -> + VHostName = maps:get(name, VHost, undefined), + VHostTrace = maps:get(tracing, VHost, undefined), + VHostDefinition = maps:get(definition, VHost, undefined), + VHostTags = maps:get(tags, VHost, undefined), + rabbit_vhost:put_vhost(VHostName, VHostDefinition, VHostTags, VHostTrace, ActingUser). + +add_permission(Permission, ActingUser) -> + rabbit_auth_backend_internal:set_permissions(maps:get(user, Permission, undefined), + maps:get(vhost, Permission, undefined), + maps:get(configure, Permission, undefined), + maps:get(write, Permission, undefined), + maps:get(read, Permission, undefined), + ActingUser). + +add_topic_permission(TopicPermission, ActingUser) -> + rabbit_auth_backend_internal:set_topic_permissions( + maps:get(user, TopicPermission, undefined), + maps:get(vhost, TopicPermission, undefined), + maps:get(exchange, TopicPermission, undefined), + maps:get(write, TopicPermission, undefined), + maps:get(read, TopicPermission, undefined), + ActingUser). + +add_queue(Queue, ActingUser) -> + add_queue_int(Queue, r(queue, Queue), ActingUser). + +add_queue(VHost, Queue, ActingUser) -> + add_queue_int(Queue, rv(VHost, queue, Queue), ActingUser). + +add_queue_int(_Queue, R = #resource{kind = queue, + name = <<"amq.", _/binary>>}, ActingUser) -> + Name = R#resource.name, + rabbit_log:warning("Skipping import of a queue whose name begins with 'amq.', " + "name: ~s, acting user: ~s", [Name, ActingUser]); +add_queue_int(Queue, Name, ActingUser) -> + rabbit_amqqueue:declare(Name, + maps:get(durable, Queue, undefined), + maps:get(auto_delete, Queue, undefined), + args(maps:get(arguments, Queue, undefined)), + none, + ActingUser). + +add_exchange(Exchange, ActingUser) -> + add_exchange_int(Exchange, r(exchange, Exchange), ActingUser). + +add_exchange(VHost, Exchange, ActingUser) -> + add_exchange_int(Exchange, rv(VHost, exchange, Exchange), ActingUser). + +add_exchange_int(_Exchange, #resource{kind = exchange, name = <<"">>}, ActingUser) -> + rabbit_log:warning("Not importing the default exchange, acting user: ~s", [ActingUser]); +add_exchange_int(_Exchange, R = #resource{kind = exchange, + name = <<"amq.", _/binary>>}, ActingUser) -> + Name = R#resource.name, + rabbit_log:warning("Skipping import of an exchange whose name begins with 'amq.', " + "name: ~s, acting user: ~s", [Name, ActingUser]); +add_exchange_int(Exchange, Name, ActingUser) -> + Internal = case maps:get(internal, Exchange, undefined) of + undefined -> false; %% =< 2.2.0 + I -> I + end, + rabbit_exchange:declare(Name, + rabbit_exchange:check_type(maps:get(type, Exchange, undefined)), + maps:get(durable, Exchange, undefined), + maps:get(auto_delete, Exchange, undefined), + Internal, + args(maps:get(arguments, Exchange, undefined)), + ActingUser). + +add_binding(Binding, ActingUser) -> + DestType = dest_type(Binding), + add_binding_int(Binding, r(exchange, source, Binding), + r(DestType, destination, Binding), ActingUser). + +add_binding(VHost, Binding, ActingUser) -> + DestType = dest_type(Binding), + add_binding_int(Binding, rv(VHost, exchange, source, Binding), + rv(VHost, DestType, destination, Binding), ActingUser). + +add_binding_int(Binding, Source, Destination, ActingUser) -> + rabbit_binding:add( + #binding{source = Source, + destination = Destination, + key = maps:get(routing_key, Binding, undefined), + args = args(maps:get(arguments, Binding, undefined))}, + ActingUser). + +dest_type(Binding) -> + rabbit_data_coercion:to_atom(maps:get(destination_type, Binding, undefined)). + +r(Type, Props) -> r(Type, name, Props). + +r(Type, Name, Props) -> + rabbit_misc:r(maps:get(vhost, Props, undefined), Type, maps:get(Name, Props, undefined)). + +rv(VHost, Type, Props) -> rv(VHost, Type, name, Props). + +rv(VHost, Type, Name, Props) -> + rabbit_misc:r(VHost, Type, maps:get(Name, Props, undefined)). + +%%-------------------------------------------------------------------- + +validate_limits(All) -> + case maps:get(queues, All, undefined) of + undefined -> ok; + Queues0 -> + {ok, VHostMap} = filter_out_existing_queues(Queues0), + maps:fold(fun validate_vhost_limit/3, ok, VHostMap) + end. + +validate_limits(All, VHost) -> + case maps:get(queues, All, undefined) of + undefined -> ok; + Queues0 -> + Queues1 = filter_out_existing_queues(VHost, Queues0), + AddCount = length(Queues1), + validate_vhost_limit(VHost, AddCount, ok) + end. + +filter_out_existing_queues(Queues) -> + build_filtered_map(Queues, maps:new()). + +filter_out_existing_queues(VHost, Queues) -> + Pred = fun(Queue) -> + Rec = rv(VHost, queue, <<"name">>, Queue), + case rabbit_amqqueue:lookup(Rec) of + {ok, _} -> false; + {error, not_found} -> true + end + end, + lists:filter(Pred, Queues). + +build_queue_data(Queue) -> + VHost = maps:get(<<"vhost">>, Queue, undefined), + Rec = rv(VHost, queue, <<"name">>, Queue), + {Rec, VHost}. + +build_filtered_map([], AccMap) -> + {ok, AccMap}; +build_filtered_map([Queue|Rest], AccMap0) -> + {Rec, VHost} = build_queue_data(Queue), + case rabbit_amqqueue:lookup(Rec) of + {error, not_found} -> + AccMap1 = maps:update_with(VHost, fun(V) -> V + 1 end, 1, AccMap0), + build_filtered_map(Rest, AccMap1); + {ok, _} -> + build_filtered_map(Rest, AccMap0) + end. + +validate_vhost_limit(VHost, AddCount, ok) -> + WouldExceed = rabbit_vhost_limit:would_exceed_queue_limit(AddCount, VHost), + validate_vhost_queue_limit(VHost, AddCount, WouldExceed). + +validate_vhost_queue_limit(_VHost, 0, _) -> + % Note: not adding any new queues so the upload + % must be update-only + ok; +validate_vhost_queue_limit(_VHost, _AddCount, false) -> + % Note: would not exceed queue limit + ok; +validate_vhost_queue_limit(VHost, AddCount, {true, Limit, QueueCount}) -> + ErrFmt = "Adding ~B queue(s) to virtual host \"~s\" would exceed the limit of ~B queue(s).~n~nThis virtual host currently has ~B queue(s) defined.~n~nImport aborted!", + ErrInfo = [AddCount, VHost, Limit, QueueCount], + ErrMsg = rabbit_misc:format(ErrFmt, ErrInfo), + exit({vhost_limit_exceeded, ErrMsg}). + +get_or_missing(K, L) -> + case maps:get(K, L, undefined) of + undefined -> {key_missing, K}; + V -> V + end. + +args([]) -> args(#{}); +args(L) -> rabbit_misc:to_amqp_table(L). + +%% +%% Export +%% + +list_exchanges() -> + %% exclude internal exchanges, they are not meant to be declared or used by + %% applications + [exchange_definition(X) || X <- lists:filter(fun(#exchange{internal = true}) -> false; + (#exchange{name = #resource{name = <<>>}}) -> false; + (X) -> not rabbit_exchange:is_amq_prefixed(X) + end, + rabbit_exchange:list())]. + +exchange_definition(#exchange{name = #resource{virtual_host = VHost, name = Name}, + type = Type, + durable = Durable, auto_delete = AD, arguments = Args}) -> + #{<<"vhost">> => VHost, + <<"name">> => Name, + <<"type">> => Type, + <<"durable">> => Durable, + <<"auto_delete">> => AD, + <<"arguments">> => rabbit_misc:amqp_table(Args)}. + +list_queues() -> + %% exclude exclusive queues, they cannot be restored + [queue_definition(Q) || Q <- lists:filter(fun(Q0) -> + amqqueue:get_exclusive_owner(Q0) =:= none + end, + rabbit_amqqueue:list())]. + +queue_definition(Q) -> + #resource{virtual_host = VHost, name = Name} = amqqueue:get_name(Q), + Type = case amqqueue:get_type(Q) of + rabbit_classic_queue -> classic; + rabbit_quorum_queue -> quorum; + rabbit_stream_queue -> stream; + T -> T + end, + #{ + <<"vhost">> => VHost, + <<"name">> => Name, + <<"type">> => Type, + <<"durable">> => amqqueue:is_durable(Q), + <<"auto_delete">> => amqqueue:is_auto_delete(Q), + <<"arguments">> => rabbit_misc:amqp_table(amqqueue:get_arguments(Q)) + }. + +list_bindings() -> + [binding_definition(B) || B <- rabbit_binding:list_explicit()]. + +binding_definition(#binding{source = S, + key = RoutingKey, + destination = D, + args = Args}) -> + #{ + <<"source">> => S#resource.name, + <<"vhost">> => S#resource.virtual_host, + <<"destination">> => D#resource.name, + <<"destination_type">> => D#resource.kind, + <<"routing_key">> => RoutingKey, + <<"arguments">> => rabbit_misc:amqp_table(Args) + }. + +list_vhosts() -> + [vhost_definition(V) || V <- rabbit_vhost:all()]. + +vhost_definition(VHost) -> + #{ + <<"name">> => vhost:get_name(VHost), + <<"limits">> => vhost:get_limits(VHost), + <<"metadata">> => vhost:get_metadata(VHost) + }. + +list_users() -> + [user_definition(U) || U <- rabbit_auth_backend_internal:all_users()]. + +user_definition(User) -> + #{<<"name">> => internal_user:get_username(User), + <<"password_hash">> => base64:encode(internal_user:get_password_hash(User)), + <<"hashing_algorithm">> => rabbit_auth_backend_internal:hashing_module_for_user(User), + <<"tags">> => tags_as_binaries(internal_user:get_tags(User)), + <<"limits">> => internal_user:get_limits(User) + }. + +list_runtime_parameters() -> + [runtime_parameter_definition(P) || P <- rabbit_runtime_parameters:list(), is_list(P)]. + +runtime_parameter_definition(Param) -> + #{ + <<"vhost">> => pget(vhost, Param), + <<"component">> => pget(component, Param), + <<"name">> => pget(name, Param), + <<"value">> => maps:from_list(pget(value, Param)) + }. + +list_global_runtime_parameters() -> + [global_runtime_parameter_definition(P) || P <- rabbit_runtime_parameters:list_global(), not is_internal_parameter(P)]. + +global_runtime_parameter_definition(P0) -> + P = [{rabbit_data_coercion:to_binary(K), V} || {K, V} <- P0], + maps:from_list(P). + +-define(INTERNAL_GLOBAL_PARAM_PREFIX, "internal"). + +is_internal_parameter(Param) -> + Name = rabbit_data_coercion:to_list(pget(name, Param)), + %% if global parameter name starts with an "internal", consider it to be internal + %% and exclude it from definition export + string:left(Name, length(?INTERNAL_GLOBAL_PARAM_PREFIX)) =:= ?INTERNAL_GLOBAL_PARAM_PREFIX. + +list_policies() -> + [policy_definition(P) || P <- rabbit_policy:list()]. + +policy_definition(Policy) -> + #{ + <<"vhost">> => pget(vhost, Policy), + <<"name">> => pget(name, Policy), + <<"pattern">> => pget(pattern, Policy), + <<"apply-to">> => pget('apply-to', Policy), + <<"priority">> => pget(priority, Policy), + <<"definition">> => maps:from_list(pget(definition, Policy)) + }. + +list_permissions() -> + [permission_definition(P) || P <- rabbit_auth_backend_internal:list_permissions()]. + +permission_definition(P0) -> + P = [{rabbit_data_coercion:to_binary(K), V} || {K, V} <- P0], + maps:from_list(P). + +list_topic_permissions() -> + [topic_permission_definition(P) || P <- rabbit_auth_backend_internal:list_topic_permissions()]. + +topic_permission_definition(P0) -> + P = [{rabbit_data_coercion:to_binary(K), V} || {K, V} <- P0], + maps:from_list(P). + +tags_as_binaries(Tags) -> + list_to_binary(string:join([atom_to_list(T) || T <- Tags], ",")). diff --git a/deps/rabbit/src/rabbit_diagnostics.erl b/deps/rabbit/src/rabbit_diagnostics.erl new file mode 100644 index 0000000000..999596cdc9 --- /dev/null +++ b/deps/rabbit/src/rabbit_diagnostics.erl @@ -0,0 +1,119 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_diagnostics). + +-define(PROCESS_INFO, + [registered_name, current_stacktrace, initial_call, message_queue_len, + links, monitors, monitored_by, heap_size]). + +-export([maybe_stuck/0, maybe_stuck/1, top_memory_use/0, top_memory_use/1, + top_binary_refs/0, top_binary_refs/1]). + +maybe_stuck() -> maybe_stuck(5000). + +maybe_stuck(Timeout) -> + Pids = processes(), + io:format("~s There are ~p processes.~n", [get_time(), length(Pids)]), + maybe_stuck(Pids, Timeout). + +maybe_stuck(Pids, Timeout) when Timeout =< 0 -> + io:format("~s Found ~p suspicious processes.~n", [get_time(), length(Pids)]), + [io:format("~s ~p~n", [get_time(), info(Pid)]) || Pid <- Pids], + ok; +maybe_stuck(Pids, Timeout) -> + Pids2 = [P || P <- Pids, looks_stuck(P)], + io:format("~s Investigated ~p processes this round, ~pms to go.~n", + [get_time(), length(Pids2), Timeout]), + timer:sleep(500), + maybe_stuck(Pids2, Timeout - 500). + +looks_stuck(Pid) -> + case info(Pid, status, gone) of + {status, waiting} -> + %% It's tempting to just check for message_queue_len > 0 + %% here rather than mess around with stack traces and + %% heuristics. But really, sometimes freshly stuck + %% processes can have 0 messages... + case info(Pid, current_stacktrace, gone) of + {current_stacktrace, [H|_]} -> + maybe_stuck_stacktrace(H); + _ -> + false + end; + _ -> + false + end. + +maybe_stuck_stacktrace({gen_server2, process_next_msg, _}) -> false; +maybe_stuck_stacktrace({gen_event, fetch_msg, _}) -> false; +maybe_stuck_stacktrace({prim_inet, accept0, _}) -> false; +maybe_stuck_stacktrace({prim_inet, recv0, _}) -> false; +maybe_stuck_stacktrace({rabbit_heartbeat, heartbeater, _}) -> false; +maybe_stuck_stacktrace({rabbit_net, recv, _}) -> false; +maybe_stuck_stacktrace({group, _, _}) -> false; +maybe_stuck_stacktrace({shell, _, _}) -> false; +maybe_stuck_stacktrace({io, _, _}) -> false; +maybe_stuck_stacktrace({M, F, A, _}) -> + maybe_stuck_stacktrace({M, F, A}); +maybe_stuck_stacktrace({_M, F, _A}) -> + case string:str(atom_to_list(F), "loop") of + 0 -> true; + _ -> false + end. + +top_memory_use() -> top_memory_use(30). + +top_memory_use(Count) -> + Pids = processes(), + io:format("~s Memory use: top ~p of ~p processes.~n", [get_time(), Count, length(Pids)]), + Procs = [{info(Pid, memory, 0), info(Pid)} || Pid <- Pids], + Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), + io:format("~s ~p~n", [get_time(), Sorted]). + +top_binary_refs() -> top_binary_refs(30). + +top_binary_refs(Count) -> + Pids = processes(), + io:format("~s Binary refs: top ~p of ~p processes.~n", [get_time(), Count, length(Pids)]), + Procs = [{{binary_refs, binary_refs(Pid)}, info(Pid)} || Pid <- Pids], + Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), + io:format("~s ~p~n", [get_time(), Sorted]). + +binary_refs(Pid) -> + case info(Pid, binary, []) of + {binary, Refs} -> + lists:sum([Sz || {_Ptr, Sz} <- lists:usort([{Ptr, Sz} || + {Ptr, Sz, _Cnt} <- Refs])]); + _ -> 0 + end. + +info(Pid) -> + [{pid, Pid} | info(Pid, ?PROCESS_INFO, [])]. + +info(Pid, Infos, Default) -> + try + process_info(Pid, Infos) + catch + _:_ -> case is_atom(Infos) of + true -> {Infos, Default}; + false -> Default + end + end. + +get_time() -> + {{Y,M,D}, {H,Min,Sec}} = calendar:local_time(), + [ integer_to_list(Y), "-", + prefix_zero(integer_to_list(M)), "-", + prefix_zero(integer_to_list(D)), " ", + prefix_zero(integer_to_list(H)), ":", + prefix_zero(integer_to_list(Min)), ":", + prefix_zero(integer_to_list(Sec)) + ]. + +prefix_zero([C]) -> [$0, C]; +prefix_zero([_,_] = Full) -> Full. diff --git a/deps/rabbit/src/rabbit_direct.erl b/deps/rabbit/src/rabbit_direct.erl new file mode 100644 index 0000000000..3fc2d75908 --- /dev/null +++ b/deps/rabbit/src/rabbit_direct.erl @@ -0,0 +1,235 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_direct). + +-export([boot/0, force_event_refresh/1, list/0, connect/5, + start_channel/10, disconnect/2]). + +-deprecated([{force_event_refresh, 1, eventually}]). + +%% Internal +-export([list_local/0]). + +%% For testing only +-export([extract_extra_auth_props/4]). + +-include("rabbit.hrl"). +-include("rabbit_misc.hrl"). + +%%---------------------------------------------------------------------------- + +-spec boot() -> 'ok'. + +boot() -> rabbit_sup:start_supervisor_child( + rabbit_direct_client_sup, rabbit_client_sup, + [{local, rabbit_direct_client_sup}, + {rabbit_channel_sup, start_link, []}]). + +-spec force_event_refresh(reference()) -> 'ok'. + +force_event_refresh(Ref) -> + [Pid ! {force_event_refresh, Ref} || Pid <- list()], + ok. + +-spec list_local() -> [pid()]. + +list_local() -> + pg_local:get_members(rabbit_direct). + +-spec list() -> [pid()]. + +list() -> + Nodes = rabbit_nodes:all_running(), + rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_direct, list_local, [], ?RPC_TIMEOUT). + +%%---------------------------------------------------------------------------- + +auth_fun({none, _}, _VHost, _ExtraAuthProps) -> + fun () -> {ok, rabbit_auth_backend_dummy:user()} end; + +auth_fun({Username, none}, _VHost, _ExtraAuthProps) -> + fun () -> rabbit_access_control:check_user_login(Username, []) end; + +auth_fun({Username, Password}, VHost, ExtraAuthProps) -> + fun () -> + rabbit_access_control:check_user_login( + Username, + [{password, Password}, {vhost, VHost}] ++ ExtraAuthProps) + end. + +-spec connect + (({'none', 'none'} | {rabbit_types:username(), 'none'} | + {rabbit_types:username(), rabbit_types:password()}), + rabbit_types:vhost(), rabbit_types:protocol(), pid(), + rabbit_event:event_props()) -> + rabbit_types:ok_or_error2( + {rabbit_types:user(), rabbit_framing:amqp_table()}, + 'broker_not_found_on_node' | + {'auth_failure', string()} | 'access_refused'). + +connect(Creds, VHost, Protocol, Pid, Infos) -> + ExtraAuthProps = extract_extra_auth_props(Creds, VHost, Pid, Infos), + AuthFun = auth_fun(Creds, VHost, ExtraAuthProps), + case rabbit:is_running() of + true -> + case whereis(rabbit_direct_client_sup) of + undefined -> + {error, broker_is_booting}; + _ -> + case is_over_vhost_connection_limit(VHost, Creds, Pid) of + true -> + {error, not_allowed}; + false -> + case is_vhost_alive(VHost, Creds, Pid) of + false -> + {error, {internal_error, vhost_is_down}}; + true -> + case AuthFun() of + {ok, User = #user{username = Username}} -> + notify_auth_result(Username, + user_authentication_success, []), + connect1(User, VHost, Protocol, Pid, Infos); + {refused, Username, Msg, Args} -> + notify_auth_result(Username, + user_authentication_failure, + [{error, rabbit_misc:format(Msg, Args)}]), + {error, {auth_failure, "Refused"}} + end %% AuthFun() + end %% is_vhost_alive + end %% is_over_vhost_connection_limit + end; + false -> {error, broker_not_found_on_node} + end. + +extract_extra_auth_props(Creds, VHost, Pid, Infos) -> + case extract_protocol(Infos) of + undefined -> + []; + Protocol -> + maybe_call_connection_info_module(Protocol, Creds, VHost, Pid, Infos) + end. + +extract_protocol(Infos) -> + case proplists:get_value(protocol, Infos, undefined) of + {Protocol, _Version} -> + Protocol; + _ -> + undefined + end. + +maybe_call_connection_info_module(Protocol, Creds, VHost, Pid, Infos) -> + Module = rabbit_data_coercion:to_atom(string:to_lower( + "rabbit_" ++ + lists:flatten(string:replace(rabbit_data_coercion:to_list(Protocol), " ", "_", all)) ++ + "_connection_info") + ), + Args = [Creds, VHost, Pid, Infos], + code_server_cache:maybe_call_mfa(Module, additional_authn_params, Args, []). + +is_vhost_alive(VHost, {Username, _Password}, Pid) -> + PrintedUsername = case Username of + none -> ""; + _ -> Username + end, + case rabbit_vhost_sup_sup:is_vhost_alive(VHost) of + true -> true; + false -> + rabbit_log_connection:error( + "Error on Direct connection ~p~n" + "access to vhost '~s' refused for user '~s': " + "vhost '~s' is down", + [Pid, VHost, PrintedUsername, VHost]), + false + end. + +is_over_vhost_connection_limit(VHost, {Username, _Password}, Pid) -> + PrintedUsername = case Username of + none -> ""; + _ -> Username + end, + try rabbit_vhost_limit:is_over_connection_limit(VHost) of + false -> false; + {true, Limit} -> + rabbit_log_connection:error( + "Error on Direct connection ~p~n" + "access to vhost '~s' refused for user '~s': " + "vhost connection limit (~p) is reached", + [Pid, VHost, PrintedUsername, Limit]), + true + catch + throw:{error, {no_such_vhost, VHost}} -> + rabbit_log_connection:error( + "Error on Direct connection ~p~n" + "vhost ~s not found", [Pid, VHost]), + true + end. + +notify_auth_result(Username, AuthResult, ExtraProps) -> + EventProps = [{connection_type, direct}, + {name, case Username of none -> ''; _ -> Username end}] ++ + ExtraProps, + rabbit_event:notify(AuthResult, [P || {_, V} = P <- EventProps, V =/= '']). + +connect1(User = #user{username = Username}, VHost, Protocol, Pid, Infos) -> + case rabbit_auth_backend_internal:is_over_connection_limit(Username) of + false -> + % Note: peer_host can be either a tuple or + % a binary if reverse_dns_lookups is enabled + PeerHost = proplists:get_value(peer_host, Infos), + AuthzContext = proplists:get_value(variable_map, Infos, #{}), + try rabbit_access_control:check_vhost_access(User, VHost, + {ip, PeerHost}, AuthzContext) of + ok -> ok = pg_local:join(rabbit_direct, Pid), + rabbit_core_metrics:connection_created(Pid, Infos), + rabbit_event:notify(connection_created, Infos), + {ok, {User, rabbit_reader:server_properties(Protocol)}} + catch + exit:#amqp_error{name = Reason = not_allowed} -> + {error, Reason} + end; + {true, Limit} -> + rabbit_log_connection:error( + "Error on Direct connection ~p~n" + "access refused for user '~s': " + "user connection limit (~p) is reached", + [Pid, Username, Limit]), + {error, not_allowed} + end. + +-spec start_channel + (rabbit_channel:channel_number(), pid(), pid(), string(), + rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), + rabbit_framing:amqp_table(), pid(), any()) -> + {'ok', pid()}. + +start_channel(Number, ClientChannelPid, ConnPid, ConnName, Protocol, + User = #user{username = Username}, VHost, Capabilities, + Collector, AmqpParams) -> + case rabbit_auth_backend_internal:is_over_channel_limit(Username) of + false -> + {ok, _, {ChannelPid, _}} = + supervisor2:start_child( + rabbit_direct_client_sup, + [{direct, Number, ClientChannelPid, ConnPid, ConnName, Protocol, + User, VHost, Capabilities, Collector, AmqpParams}]), + {ok, ChannelPid}; + {true, Limit} -> + rabbit_log_connection:error( + "Error on direct connection ~p~n" + "number of channels opened for user '~s' has reached the " + "maximum allowed limit of (~w)", + [ConnPid, Username, Limit]), + {error, not_allowed} + end. + +-spec disconnect(pid(), rabbit_event:event_props()) -> 'ok'. + +disconnect(Pid, Infos) -> + pg_local:leave(rabbit_direct, Pid), + rabbit_core_metrics:connection_closed(Pid), + rabbit_event:notify(connection_closed, Infos). diff --git a/deps/rabbit/src/rabbit_disk_monitor.erl b/deps/rabbit/src/rabbit_disk_monitor.erl new file mode 100644 index 0000000000..8277794098 --- /dev/null +++ b/deps/rabbit/src/rabbit_disk_monitor.erl @@ -0,0 +1,317 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_disk_monitor). + +%% Disk monitoring server. Monitors free disk space +%% periodically and sets alarms when it is below a certain +%% watermark (configurable either as an absolute value or +%% relative to the memory limit). +%% +%% Disk monitoring is done by shelling out to /usr/bin/df +%% instead of related built-in OTP functions because currently +%% this is the most reliable way of determining free disk space +%% for the partition our internal database is on. +%% +%% Update interval is dynamically calculated assuming disk +%% space is being filled at FAST_RATE. + +-behaviour(gen_server). + +-export([start_link/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-export([get_disk_free_limit/0, set_disk_free_limit/1, + get_min_check_interval/0, set_min_check_interval/1, + get_max_check_interval/0, set_max_check_interval/1, + get_disk_free/0, set_enabled/1]). + +-define(SERVER, ?MODULE). +-define(DEFAULT_MIN_DISK_CHECK_INTERVAL, 100). +-define(DEFAULT_MAX_DISK_CHECK_INTERVAL, 10000). +-define(DEFAULT_DISK_FREE_LIMIT, 50000000). +%% 250MB/s i.e. 250kB/ms +-define(FAST_RATE, (250 * 1000)). + +-record(state, { + %% monitor partition on which this directory resides + dir, + %% configured limit in bytes + limit, + %% last known free disk space amount in bytes + actual, + %% minimum check interval + min_interval, + %% maximum check interval + max_interval, + %% timer that drives periodic checks + timer, + %% is free disk space alarm currently in effect? + alarmed, + %% is monitoring enabled? false on unsupported + %% platforms + enabled, + %% number of retries to enable monitoring if it fails + %% on start-up + retries, + %% Interval between retries + interval +}). + +%%---------------------------------------------------------------------------- + +-type disk_free_limit() :: (integer() | string() | {'mem_relative', float() | integer()}). + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +-spec get_disk_free_limit() -> integer(). + +get_disk_free_limit() -> + gen_server:call(?MODULE, get_disk_free_limit, infinity). + +-spec set_disk_free_limit(disk_free_limit()) -> 'ok'. + +set_disk_free_limit(Limit) -> + gen_server:call(?MODULE, {set_disk_free_limit, Limit}, infinity). + +-spec get_min_check_interval() -> integer(). + +get_min_check_interval() -> + gen_server:call(?MODULE, get_min_check_interval, infinity). + +-spec set_min_check_interval(integer()) -> 'ok'. + +set_min_check_interval(Interval) -> + gen_server:call(?MODULE, {set_min_check_interval, Interval}, infinity). + +-spec get_max_check_interval() -> integer(). + +get_max_check_interval() -> + gen_server:call(?MODULE, get_max_check_interval, infinity). + +-spec set_max_check_interval(integer()) -> 'ok'. + +set_max_check_interval(Interval) -> + gen_server:call(?MODULE, {set_max_check_interval, Interval}, infinity). + +-spec get_disk_free() -> (integer() | 'unknown'). +-spec set_enabled(string()) -> 'ok'. + +get_disk_free() -> + gen_server:call(?MODULE, get_disk_free, infinity). + +set_enabled(Enabled) -> + gen_server:call(?MODULE, {set_enabled, Enabled}, infinity). + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + +-spec start_link(disk_free_limit()) -> rabbit_types:ok_pid_or_error(). + +start_link(Args) -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [Args], []). + +init([Limit]) -> + Dir = dir(), + {ok, Retries} = application:get_env(rabbit, disk_monitor_failure_retries), + {ok, Interval} = application:get_env(rabbit, disk_monitor_failure_retry_interval), + State = #state{dir = Dir, + min_interval = ?DEFAULT_MIN_DISK_CHECK_INTERVAL, + max_interval = ?DEFAULT_MAX_DISK_CHECK_INTERVAL, + alarmed = false, + enabled = true, + limit = Limit, + retries = Retries, + interval = Interval}, + {ok, enable(State)}. + +handle_call(get_disk_free_limit, _From, State = #state{limit = Limit}) -> + {reply, Limit, State}; + +handle_call({set_disk_free_limit, _}, _From, #state{enabled = false} = State) -> + rabbit_log:info("Cannot set disk free limit: " + "disabled disk free space monitoring", []), + {reply, ok, State}; + +handle_call({set_disk_free_limit, Limit}, _From, State) -> + {reply, ok, set_disk_limits(State, Limit)}; + +handle_call(get_min_check_interval, _From, State) -> + {reply, State#state.min_interval, State}; + +handle_call(get_max_check_interval, _From, State) -> + {reply, State#state.max_interval, State}; + +handle_call({set_min_check_interval, MinInterval}, _From, State) -> + {reply, ok, State#state{min_interval = MinInterval}}; + +handle_call({set_max_check_interval, MaxInterval}, _From, State) -> + {reply, ok, State#state{max_interval = MaxInterval}}; + +handle_call(get_disk_free, _From, State = #state { actual = Actual }) -> + {reply, Actual, State}; + +handle_call({set_enabled, _Enabled = true}, _From, State) -> + start_timer(set_disk_limits(State, State#state.limit)), + rabbit_log:info("Free disk space monitor was enabled"), + {reply, ok, State#state{enabled = true}}; +handle_call({set_enabled, _Enabled = false}, _From, State) -> + erlang:cancel_timer(State#state.timer), + rabbit_log:info("Free disk space monitor was manually disabled"), + {reply, ok, State#state{enabled = false}}; + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(try_enable, #state{retries = Retries} = State) -> + {noreply, enable(State#state{retries = Retries - 1})}; +handle_info(update, State) -> + {noreply, start_timer(internal_update(State))}; + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% Server Internals +%%---------------------------------------------------------------------------- + +% the partition / drive containing this directory will be monitored +dir() -> rabbit_mnesia:dir(). + +set_disk_limits(State, Limit0) -> + Limit = interpret_limit(Limit0), + State1 = State#state { limit = Limit }, + rabbit_log:info("Disk free limit set to ~pMB~n", + [trunc(Limit / 1000000)]), + internal_update(State1). + +internal_update(State = #state { limit = Limit, + dir = Dir, + alarmed = Alarmed}) -> + CurrentFree = get_disk_free(Dir), + NewAlarmed = CurrentFree < Limit, + case {Alarmed, NewAlarmed} of + {false, true} -> + emit_update_info("insufficient", CurrentFree, Limit), + rabbit_alarm:set_alarm({{resource_limit, disk, node()}, []}); + {true, false} -> + emit_update_info("sufficient", CurrentFree, Limit), + rabbit_alarm:clear_alarm({resource_limit, disk, node()}); + _ -> + ok + end, + State #state {alarmed = NewAlarmed, actual = CurrentFree}. + +get_disk_free(Dir) -> + get_disk_free(Dir, os:type()). + +get_disk_free(Dir, {unix, Sun}) + when Sun =:= sunos; Sun =:= sunos4; Sun =:= solaris -> + Df = os:find_executable("df"), + parse_free_unix(rabbit_misc:os_cmd(Df ++ " -k " ++ Dir)); +get_disk_free(Dir, {unix, _}) -> + Df = os:find_executable("df"), + parse_free_unix(rabbit_misc:os_cmd(Df ++ " -kP " ++ Dir)); +get_disk_free(Dir, {win32, _}) -> + %% On Windows, the Win32 API enforces a limit of 260 characters + %% (MAX_PATH). If we call `dir` with a path longer than that, it + %% fails with "File not found". Starting with Windows 10 version + %% 1607, this limit was removed, but the administrator has to + %% configure that. + %% + %% NTFS supports paths up to 32767 characters. Therefore, paths + %% longer than 260 characters exist but they are "inaccessible" to + %% `dir`. + %% + %% A workaround is to tell the Win32 API to not parse a path and + %% just pass it raw to the underlying filesystem. To do this, the + %% path must be prepended with "\\?\". That's what we do here. + %% + %% However, the underlying filesystem may not support forward + %% slashes transparently, as the Win32 API does. Therefore, we + %% convert all forward slashes to backslashes. + %% + %% See the following page to learn more about this: + %% https://ss64.com/nt/syntax-filenames.html + RawDir = "\\\\?\\" ++ string:replace(Dir, "/", "\\", all), + parse_free_win32(rabbit_misc:os_cmd("dir /-C /W \"" ++ RawDir ++ "\"")). + +parse_free_unix(Str) -> + case string:tokens(Str, "\n") of + [_, S | _] -> case string:tokens(S, " \t") of + [_, _, _, Free | _] -> list_to_integer(Free) * 1024; + _ -> exit({unparseable, Str}) + end; + _ -> exit({unparseable, Str}) + end. + +parse_free_win32(CommandResult) -> + LastLine = lists:last(string:tokens(CommandResult, "\r\n")), + {match, [Free]} = re:run(lists:reverse(LastLine), "(\\d+)", + [{capture, all_but_first, list}]), + list_to_integer(lists:reverse(Free)). + +interpret_limit({mem_relative, Relative}) + when is_number(Relative) -> + round(Relative * vm_memory_monitor:get_total_memory()); +interpret_limit(Absolute) -> + case rabbit_resource_monitor_misc:parse_information_unit(Absolute) of + {ok, ParsedAbsolute} -> ParsedAbsolute; + {error, parse_error} -> + rabbit_log:error("Unable to parse disk_free_limit value ~p", + [Absolute]), + ?DEFAULT_DISK_FREE_LIMIT + end. + +emit_update_info(StateStr, CurrentFree, Limit) -> + rabbit_log:info( + "Free disk space is ~s. Free bytes: ~p. Limit: ~p~n", + [StateStr, CurrentFree, Limit]). + +start_timer(State) -> + State#state{timer = erlang:send_after(interval(State), self(), update)}. + +interval(#state{alarmed = true, + max_interval = MaxInterval}) -> + MaxInterval; +interval(#state{limit = Limit, + actual = Actual, + min_interval = MinInterval, + max_interval = MaxInterval}) -> + IdealInterval = 2 * (Actual - Limit) / ?FAST_RATE, + trunc(erlang:max(MinInterval, erlang:min(MaxInterval, IdealInterval))). + +enable(#state{retries = 0} = State) -> + State; +enable(#state{dir = Dir, interval = Interval, limit = Limit, retries = Retries} + = State) -> + case {catch get_disk_free(Dir), + vm_memory_monitor:get_total_memory()} of + {N1, N2} when is_integer(N1), is_integer(N2) -> + rabbit_log:info("Enabling free disk space monitoring~n", []), + start_timer(set_disk_limits(State, Limit)); + Err -> + rabbit_log:info("Free disk space monitor encountered an error " + "(e.g. failed to parse output from OS tools): ~p, retries left: ~b~n", + [Err, Retries]), + erlang:send_after(Interval, self(), try_enable), + State#state{enabled = false} + end. diff --git a/deps/rabbit/src/rabbit_epmd_monitor.erl b/deps/rabbit/src/rabbit_epmd_monitor.erl new file mode 100644 index 0000000000..938826dba6 --- /dev/null +++ b/deps/rabbit/src/rabbit_epmd_monitor.erl @@ -0,0 +1,104 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_epmd_monitor). + +-behaviour(gen_server). + +-export([start_link/0]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-record(state, {timer, mod, me, host, port}). + +-define(SERVER, ?MODULE). +-define(CHECK_FREQUENCY, 60000). + +%%---------------------------------------------------------------------------- +%% It's possible for epmd to be killed out from underneath us. If that +%% happens, then obviously clustering and rabbitmqctl stop +%% working. This process checks up on epmd and restarts it / +%% re-registers us with it if it has gone away. +%% +%% How could epmd be killed? +%% +%% 1) The most popular way for this to happen is when running as a +%% Windows service. The user starts rabbitmqctl first, and this starts +%% epmd under the user's account. When they log out epmd is killed. +%% +%% 2) Some packagings of (non-RabbitMQ?) Erlang apps might do "killall +%% epmd" as a shutdown or uninstall step. +%% ---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +init([]) -> + {Me, Host} = rabbit_nodes:parts(node()), + Mod = net_kernel:epmd_module(), + {ok, Port} = handle_port_please(init, Mod:port_please(Me, Host), Me, undefined), + State = #state{mod = Mod, me = Me, host = Host, port = Port}, + {ok, ensure_timer(State)}. + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(check, State0) -> + {ok, State1} = check_epmd(State0), + {noreply, ensure_timer(State1#state{timer = undefined})}; +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(check, State0) -> + {ok, State1} = check_epmd(State0), + {noreply, ensure_timer(State1#state{timer = undefined})}; + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- + +ensure_timer(State) -> + rabbit_misc:ensure_timer(State, #state.timer, ?CHECK_FREQUENCY, check). + +check_epmd(State = #state{mod = Mod, + me = Me, + host = Host, + port = Port0}) -> + rabbit_log:debug("Asked to [re-]register this node (~s@~s) with epmd...", [Me, Host]), + {ok, Port1} = handle_port_please(check, Mod:port_please(Me, Host), Me, Port0), + rabbit_nodes:ensure_epmd(), + Mod:register_node(Me, Port1), + rabbit_log:debug("[Re-]registered this node (~s@~s) with epmd at port ~p", [Me, Host, Port1]), + {ok, State#state{port = Port1}}. + +handle_port_please(init, noport, Me, Port) -> + rabbit_log:info("epmd does not know us, re-registering as ~s~n", [Me]), + {ok, Port}; +handle_port_please(check, noport, Me, Port) -> + rabbit_log:warning("epmd does not know us, re-registering ~s at port ~b~n", [Me, Port]), + {ok, Port}; +handle_port_please(_, closed, _Me, Port) -> + rabbit_log:error("epmd monitor failed to retrieve our port from epmd: closed"), + {ok, Port}; +handle_port_please(init, {port, NewPort, _Version}, _Me, _Port) -> + rabbit_log:info("epmd monitor knows us, inter-node communication (distribution) port: ~p", [NewPort]), + {ok, NewPort}; +handle_port_please(check, {port, NewPort, _Version}, _Me, _Port) -> + {ok, NewPort}; +handle_port_please(_, {error, Error}, _Me, Port) -> + rabbit_log:error("epmd monitor failed to retrieve our port from epmd: ~p", [Error]), + {ok, Port}. diff --git a/deps/rabbit/src/rabbit_event_consumer.erl b/deps/rabbit/src/rabbit_event_consumer.erl new file mode 100644 index 0000000000..489d39312e --- /dev/null +++ b/deps/rabbit/src/rabbit_event_consumer.erl @@ -0,0 +1,197 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_event_consumer). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([register/4]). +-export([init/1, handle_call/2, handle_event/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {pid, ref, monitor, pattern}). + +%%---------------------------------------------------------------------------- + +register(Pid, Ref, Duration, Pattern) -> + case gen_event:add_handler(rabbit_event, ?MODULE, [Pid, Ref, Duration, Pattern]) of + ok -> + {ok, Ref}; + Error -> + Error + end. + +%%---------------------------------------------------------------------------- + +init([Pid, Ref, Duration, Pattern]) -> + MRef = erlang:monitor(process, Pid), + case Duration of + infinity -> infinity; + _ -> erlang:send_after(Duration * 1000, self(), rabbit_event_consumer_timeout) + end, + {ok, #state{pid = Pid, ref = Ref, monitor = MRef, pattern = Pattern}}. + +handle_call(_Request, State) -> {ok, not_understood, State}. + +handle_event(#event{type = Type, + props = Props, + timestamp = TS, + reference = none}, #state{pid = Pid, + ref = Ref, + pattern = Pattern} = State) -> + case key(Type) of + ignore -> ok; + Key -> case re:run(Key, Pattern, [{capture, none}]) of + match -> + Data = [{'event', Key}] ++ + fmt_proplist([{'timestamp_in_ms', TS} | Props]), + Pid ! {Ref, Data, confinue}; + _ -> + ok + end + end, + {ok, State}; +handle_event(_Event, State) -> + {ok, State}. + +handle_info({'DOWN', MRef, _, _, _}, #state{monitor = MRef}) -> + remove_handler; +handle_info(rabbit_event_consumer_timeout, #state{pid = Pid, ref = Ref}) -> + Pid ! {Ref, <<>>, finished}, + remove_handler; +handle_info(_Info, State) -> + {ok, State}. + +terminate(_Arg, #state{monitor = MRef}) -> + erlang:demonitor(MRef), + ok. + +code_change(_OldVsn, State, _Extra) -> {ok, State}. + +%%---------------------------------------------------------------------------- + +%% pattern matching is way more efficient that the string operations, +%% let's use all the keys we're aware of to speed up the handler. +%% Any unknown or new one will be processed as before (see last function clause). +key(queue_deleted) -> + <<"queue.deleted">>; +key(queue_created) -> + <<"queue.created">>; +key(exchange_created) -> + <<"exchange.created">>; +key(exchange_deleted) -> + <<"exchange.deleted">>; +key(binding_created) -> + <<"binding.created">>; +key(connection_created) -> + <<"connection.created">>; +key(connection_closed) -> + <<"connection.closed">>; +key(channel_created) -> + <<"channel.created">>; +key(channel_closed) -> + <<"channel.closed">>; +key(consumer_created) -> + <<"consumer.created">>; +key(consumer_deleted) -> + <<"consumer.deleted">>; +key(queue_stats) -> + ignore; +key(connection_stats) -> + ignore; +key(policy_set) -> + <<"policy.set">>; +key(policy_cleared) -> + <<"policy.cleared">>; +key(parameter_set) -> + <<"parameter.set">>; +key(parameter_cleared) -> + <<"parameter.cleared">>; +key(vhost_created) -> + <<"vhost.created">>; +key(vhost_deleted) -> + <<"vhost.deleted">>; +key(vhost_limits_set) -> + <<"vhost.limits.set">>; +key(vhost_limits_cleared) -> + <<"vhost.limits.cleared">>; +key(user_authentication_success) -> + <<"user.authentication.success">>; +key(user_authentication_failure) -> + <<"user.authentication.failure">>; +key(user_created) -> + <<"user.created">>; +key(user_deleted) -> + <<"user.deleted">>; +key(user_password_changed) -> + <<"user.password.changed">>; +key(user_password_cleared) -> + <<"user.password.cleared">>; +key(user_tags_set) -> + <<"user.tags.set">>; +key(permission_created) -> + <<"permission.created">>; +key(permission_deleted) -> + <<"permission.deleted">>; +key(topic_permission_created) -> + <<"topic.permission.created">>; +key(topic_permission_deleted) -> + <<"topic.permission.deleted">>; +key(alarm_set) -> + <<"alarm.set">>; +key(alarm_cleared) -> + <<"alarm.cleared">>; +key(shovel_worker_status) -> + <<"shovel.worker.status">>; +key(shovel_worker_removed) -> + <<"shovel.worker.removed">>; +key(federation_link_status) -> + <<"federation.link.status">>; +key(federation_link_removed) -> + <<"federation.link.removed">>; +key(S) -> + case string:tokens(atom_to_list(S), "_") of + [_, "stats"] -> ignore; + Tokens -> list_to_binary(string:join(Tokens, ".")) + end. + +fmt_proplist(Props) -> + lists:foldl(fun({K, V}, Acc) -> + case fmt(K, V) of + L when is_list(L) -> lists:append(L, Acc); + T -> [T | Acc] + end + end, [], Props). + +fmt(K, #resource{virtual_host = VHost, + name = Name}) -> [{K, Name}, + {'vhost', VHost}]; +fmt(K, true) -> {K, true}; +fmt(K, false) -> {K, false}; +fmt(K, V) when is_atom(V) -> {K, atom_to_binary(V, utf8)}; +fmt(K, V) when is_integer(V) -> {K, V}; +fmt(K, V) when is_number(V) -> {K, V}; +fmt(K, V) when is_binary(V) -> {K, V}; +fmt(K, [{_, _}|_] = Vs) -> {K, fmt_proplist(Vs)}; +fmt(K, Vs) when is_list(Vs) -> {K, [fmt(V) || V <- Vs]}; +fmt(K, V) when is_pid(V) -> {K, list_to_binary(rabbit_misc:pid_to_string(V))}; +fmt(K, V) -> {K, + list_to_binary( + rabbit_misc:format("~1000000000p", [V]))}. + +%% Exactly the same as fmt/2, duplicated only for performance issues +fmt(true) -> true; +fmt(false) -> false; +fmt(V) when is_atom(V) -> atom_to_binary(V, utf8); +fmt(V) when is_integer(V) -> V; +fmt(V) when is_number(V) -> V; +fmt(V) when is_binary(V) -> V; +fmt([{_, _}|_] = Vs) -> fmt_proplist(Vs); +fmt(Vs) when is_list(Vs) -> [fmt(V) || V <- Vs]; +fmt(V) when is_pid(V) -> list_to_binary(rabbit_misc:pid_to_string(V)); +fmt(V) -> list_to_binary( + rabbit_misc:format("~1000000000p", [V])). diff --git a/deps/rabbit/src/rabbit_exchange.erl b/deps/rabbit/src/rabbit_exchange.erl new file mode 100644 index 0000000000..129b2b868b --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange.erl @@ -0,0 +1,592 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange). +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +-export([recover/1, policy_changed/2, callback/4, declare/7, + assert_equivalence/6, assert_args_equivalence/2, check_type/1, + lookup/1, lookup_many/1, lookup_or_die/1, list/0, list/1, lookup_scratch/2, + update_scratch/3, update_decorators/1, immutable/1, + info_keys/0, info/1, info/2, info_all/1, info_all/2, info_all/4, + route/2, delete/3, validate_binding/2, count/0]). +-export([list_names/0, is_amq_prefixed/1]). +%% these must be run inside a mnesia tx +-export([maybe_auto_delete/2, serial/1, peek_serial/1, update/2]). + +%%---------------------------------------------------------------------------- + +-export_type([name/0, type/0]). + +-type name() :: rabbit_types:r('exchange'). +-type type() :: atom(). +-type fun_name() :: atom(). + +%%---------------------------------------------------------------------------- + +-define(INFO_KEYS, [name, type, durable, auto_delete, internal, arguments, + policy, user_who_performed_action]). + +-spec recover(rabbit_types:vhost()) -> [name()]. + +recover(VHost) -> + Xs = rabbit_misc:table_filter( + fun (#exchange{name = XName}) -> + XName#resource.virtual_host =:= VHost andalso + mnesia:read({rabbit_exchange, XName}) =:= [] + end, + fun (X, Tx) -> + X1 = case Tx of + true -> store_ram(X); + false -> rabbit_exchange_decorator:set(X) + end, + callback(X1, create, map_create_tx(Tx), [X1]) + end, + rabbit_durable_exchange), + [XName || #exchange{name = XName} <- Xs]. + +-spec callback + (rabbit_types:exchange(), fun_name(), + fun((boolean()) -> non_neg_integer()) | atom(), [any()]) -> 'ok'. + +callback(X = #exchange{type = XType, + decorators = Decorators}, Fun, Serial0, Args) -> + Serial = if is_function(Serial0) -> Serial0; + is_atom(Serial0) -> fun (_Bool) -> Serial0 end + end, + [ok = apply(M, Fun, [Serial(M:serialise_events(X)) | Args]) || + M <- rabbit_exchange_decorator:select(all, Decorators)], + Module = type_to_module(XType), + apply(Module, Fun, [Serial(Module:serialise_events()) | Args]). + +-spec policy_changed + (rabbit_types:exchange(), rabbit_types:exchange()) -> 'ok'. + +policy_changed(X = #exchange{type = XType, + decorators = Decorators}, + X1 = #exchange{decorators = Decorators1}) -> + D = rabbit_exchange_decorator:select(all, Decorators), + D1 = rabbit_exchange_decorator:select(all, Decorators1), + DAll = lists:usort(D ++ D1), + [ok = M:policy_changed(X, X1) || M <- [type_to_module(XType) | DAll]], + ok. + +serialise_events(X = #exchange{type = Type, decorators = Decorators}) -> + lists:any(fun (M) -> M:serialise_events(X) end, + rabbit_exchange_decorator:select(all, Decorators)) + orelse (type_to_module(Type)):serialise_events(). + +-spec serial(rabbit_types:exchange()) -> + fun((boolean()) -> 'none' | pos_integer()). + +serial(#exchange{name = XName} = X) -> + Serial = case serialise_events(X) of + true -> next_serial(XName); + false -> none + end, + fun (true) -> Serial; + (false) -> none + end. + +-spec is_amq_prefixed(rabbit_types:exchange() | binary()) -> boolean(). + +is_amq_prefixed(Name) when is_binary(Name) -> + case re:run(Name, <<"^amq\.">>) of + nomatch -> false; + {match, _} -> true + end; +is_amq_prefixed(#exchange{name = #resource{name = <<>>}}) -> + false; +is_amq_prefixed(#exchange{name = #resource{name = Name}}) -> + is_amq_prefixed(Name). + +-spec declare + (name(), type(), boolean(), boolean(), boolean(), + rabbit_framing:amqp_table(), rabbit_types:username()) + -> rabbit_types:exchange(). + +declare(XName, Type, Durable, AutoDelete, Internal, Args, Username) -> + X = rabbit_exchange_decorator:set( + rabbit_policy:set(#exchange{name = XName, + type = Type, + durable = Durable, + auto_delete = AutoDelete, + internal = Internal, + arguments = Args, + options = #{user => Username}})), + XT = type_to_module(Type), + %% We want to upset things if it isn't ok + ok = XT:validate(X), + %% Avoid a channel exception if there's a race condition + %% with an exchange.delete operation. + %% + %% See rabbitmq/rabbitmq-federation#7. + case rabbit_runtime_parameters:lookup(XName#resource.virtual_host, + ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, + XName#resource.name) of + not_found -> + rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:wread({rabbit_exchange, XName}) of + [] -> + {new, store(X)}; + [ExistingX] -> + {existing, ExistingX} + end + end, + fun ({new, Exchange}, Tx) -> + ok = callback(X, create, map_create_tx(Tx), [Exchange]), + rabbit_event:notify_if(not Tx, exchange_created, info(Exchange)), + Exchange; + ({existing, Exchange}, _Tx) -> + Exchange; + (Err, _Tx) -> + Err + end); + _ -> + rabbit_log:warning("ignoring exchange.declare for exchange ~p, + exchange.delete in progress~n.", [XName]), + X + end. + +map_create_tx(true) -> transaction; +map_create_tx(false) -> none. + + +store(X = #exchange{durable = true}) -> + mnesia:write(rabbit_durable_exchange, X#exchange{decorators = undefined}, + write), + store_ram(X); +store(X = #exchange{durable = false}) -> + store_ram(X). + +store_ram(X) -> + X1 = rabbit_exchange_decorator:set(X), + ok = mnesia:write(rabbit_exchange, rabbit_exchange_decorator:set(X1), + write), + X1. + +%% Used with binaries sent over the wire; the type may not exist. + +-spec check_type + (binary()) -> atom() | rabbit_types:connection_exit(). + +check_type(TypeBin) -> + case rabbit_registry:binary_to_type(rabbit_data_coercion:to_binary(TypeBin)) of + {error, not_found} -> + rabbit_misc:protocol_error( + command_invalid, "unknown exchange type '~s'", [TypeBin]); + T -> + case rabbit_registry:lookup_module(exchange, T) of + {error, not_found} -> rabbit_misc:protocol_error( + command_invalid, + "invalid exchange type '~s'", [T]); + {ok, _Module} -> T + end + end. + +-spec assert_equivalence + (rabbit_types:exchange(), atom(), boolean(), boolean(), boolean(), + rabbit_framing:amqp_table()) + -> 'ok' | rabbit_types:connection_exit(). + +assert_equivalence(X = #exchange{ name = XName, + durable = Durable, + auto_delete = AutoDelete, + internal = Internal, + type = Type}, + ReqType, ReqDurable, ReqAutoDelete, ReqInternal, ReqArgs) -> + AFE = fun rabbit_misc:assert_field_equivalence/4, + AFE(Type, ReqType, XName, type), + AFE(Durable, ReqDurable, XName, durable), + AFE(AutoDelete, ReqAutoDelete, XName, auto_delete), + AFE(Internal, ReqInternal, XName, internal), + (type_to_module(Type)):assert_args_equivalence(X, ReqArgs). + +-spec assert_args_equivalence + (rabbit_types:exchange(), rabbit_framing:amqp_table()) + -> 'ok' | rabbit_types:connection_exit(). + +assert_args_equivalence(#exchange{ name = Name, arguments = Args }, + RequiredArgs) -> + %% The spec says "Arguments are compared for semantic + %% equivalence". The only arg we care about is + %% "alternate-exchange". + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name, + [<<"alternate-exchange">>]). + +-spec lookup + (name()) -> rabbit_types:ok(rabbit_types:exchange()) | + rabbit_types:error('not_found'). + +lookup(Name) -> + rabbit_misc:dirty_read({rabbit_exchange, Name}). + + +-spec lookup_many([name()]) -> [rabbit_types:exchange()]. + +lookup_many([]) -> []; +lookup_many([Name]) -> ets:lookup(rabbit_exchange, Name); +lookup_many(Names) when is_list(Names) -> + %% Normally we'd call mnesia:dirty_read/1 here, but that is quite + %% expensive for reasons explained in rabbit_misc:dirty_read/1. + lists:append([ets:lookup(rabbit_exchange, Name) || Name <- Names]). + + +-spec lookup_or_die + (name()) -> rabbit_types:exchange() | + rabbit_types:channel_exit(). + +lookup_or_die(Name) -> + case lookup(Name) of + {ok, X} -> X; + {error, not_found} -> rabbit_amqqueue:not_found(Name) + end. + +-spec list() -> [rabbit_types:exchange()]. + +list() -> mnesia:dirty_match_object(rabbit_exchange, #exchange{_ = '_'}). + +-spec count() -> non_neg_integer(). + +count() -> + mnesia:table_info(rabbit_exchange, size). + +-spec list_names() -> [rabbit_exchange:name()]. + +list_names() -> mnesia:dirty_all_keys(rabbit_exchange). + +%% Not dirty_match_object since that would not be transactional when used in a +%% tx context + +-spec list(rabbit_types:vhost()) -> [rabbit_types:exchange()]. + +list(VHostPath) -> + mnesia:async_dirty( + fun () -> + mnesia:match_object( + rabbit_exchange, + #exchange{name = rabbit_misc:r(VHostPath, exchange), _ = '_'}, + read) + end). + +-spec lookup_scratch(name(), atom()) -> + rabbit_types:ok(term()) | + rabbit_types:error('not_found'). + +lookup_scratch(Name, App) -> + case lookup(Name) of + {ok, #exchange{scratches = undefined}} -> + {error, not_found}; + {ok, #exchange{scratches = Scratches}} -> + case orddict:find(App, Scratches) of + {ok, Value} -> {ok, Value}; + error -> {error, not_found} + end; + {error, not_found} -> + {error, not_found} + end. + +-spec update_scratch(name(), atom(), fun((any()) -> any())) -> 'ok'. + +update_scratch(Name, App, Fun) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + update(Name, + fun(X = #exchange{scratches = Scratches0}) -> + Scratches1 = case Scratches0 of + undefined -> orddict:new(); + _ -> Scratches0 + end, + Scratch = case orddict:find(App, Scratches1) of + {ok, S} -> S; + error -> undefined + end, + Scratches2 = orddict:store( + App, Fun(Scratch), Scratches1), + X#exchange{scratches = Scratches2} + end), + ok + end). + +-spec update_decorators(name()) -> 'ok'. + +update_decorators(Name) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + case mnesia:wread({rabbit_exchange, Name}) of + [X] -> store_ram(X), + ok; + [] -> ok + end + end). + +-spec update + (name(), + fun((rabbit_types:exchange()) -> rabbit_types:exchange())) + -> not_found | rabbit_types:exchange(). + +update(Name, Fun) -> + case mnesia:wread({rabbit_exchange, Name}) of + [X] -> X1 = Fun(X), + store(X1); + [] -> not_found + end. + +-spec immutable(rabbit_types:exchange()) -> rabbit_types:exchange(). + +immutable(X) -> X#exchange{scratches = none, + policy = none, + decorators = none}. + +-spec info_keys() -> rabbit_types:info_keys(). + +info_keys() -> ?INFO_KEYS. + +map(VHostPath, F) -> + %% TODO: there is scope for optimisation here, e.g. using a + %% cursor, parallelising the function invocation + lists:map(F, list(VHostPath)). + +infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items]. + +i(name, #exchange{name = Name}) -> Name; +i(type, #exchange{type = Type}) -> Type; +i(durable, #exchange{durable = Durable}) -> Durable; +i(auto_delete, #exchange{auto_delete = AutoDelete}) -> AutoDelete; +i(internal, #exchange{internal = Internal}) -> Internal; +i(arguments, #exchange{arguments = Arguments}) -> Arguments; +i(policy, X) -> case rabbit_policy:name(X) of + none -> ''; + Policy -> Policy + end; +i(user_who_performed_action, #exchange{options = Opts}) -> + maps:get(user, Opts, ?UNKNOWN_USER); +i(Item, #exchange{type = Type} = X) -> + case (type_to_module(Type)):info(X, [Item]) of + [{Item, I}] -> I; + [] -> throw({bad_argument, Item}) + end. + +-spec info(rabbit_types:exchange()) -> rabbit_types:infos(). + +info(X = #exchange{type = Type}) -> + infos(?INFO_KEYS, X) ++ (type_to_module(Type)):info(X). + +-spec info + (rabbit_types:exchange(), rabbit_types:info_keys()) + -> rabbit_types:infos(). + +info(X = #exchange{type = _Type}, Items) -> + infos(Items, X). + +-spec info_all(rabbit_types:vhost()) -> [rabbit_types:infos()]. + +info_all(VHostPath) -> map(VHostPath, fun (X) -> info(X) end). + +-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys()) + -> [rabbit_types:infos()]. + +info_all(VHostPath, Items) -> map(VHostPath, fun (X) -> info(X, Items) end). + +-spec info_all(rabbit_types:vhost(), rabbit_types:info_keys(), + reference(), pid()) + -> 'ok'. + +info_all(VHostPath, Items, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, fun(X) -> info(X, Items) end, list(VHostPath)). + +-spec route(rabbit_types:exchange(), rabbit_types:delivery()) + -> [rabbit_amqqueue:name()]. + +route(#exchange{name = #resource{virtual_host = VHost, name = RName} = XName, + decorators = Decorators} = X, + #delivery{message = #basic_message{routing_keys = RKs}} = Delivery) -> + case RName of + <<>> -> + RKsSorted = lists:usort(RKs), + [rabbit_channel:deliver_reply(RK, Delivery) || + RK <- RKsSorted, virtual_reply_queue(RK)], + [rabbit_misc:r(VHost, queue, RK) || RK <- RKsSorted, + not virtual_reply_queue(RK)]; + _ -> + Decs = rabbit_exchange_decorator:select(route, Decorators), + lists:usort(route1(Delivery, Decs, {[X], XName, []})) + end. + +virtual_reply_queue(<<"amq.rabbitmq.reply-to.", _/binary>>) -> true; +virtual_reply_queue(_) -> false. + +route1(_, _, {[], _, QNames}) -> + QNames; +route1(Delivery, Decorators, + {[X = #exchange{type = Type} | WorkList], SeenXs, QNames}) -> + ExchangeDests = (type_to_module(Type)):route(X, Delivery), + DecorateDests = process_decorators(X, Decorators, Delivery), + AlternateDests = process_alternate(X, ExchangeDests), + route1(Delivery, Decorators, + lists:foldl(fun process_route/2, {WorkList, SeenXs, QNames}, + AlternateDests ++ DecorateDests ++ ExchangeDests)). + +process_alternate(X = #exchange{name = XName}, []) -> + case rabbit_policy:get_arg( + <<"alternate-exchange">>, <<"alternate-exchange">>, X) of + undefined -> []; + AName -> [rabbit_misc:r(XName, exchange, AName)] + end; +process_alternate(_X, _Results) -> + []. + +process_decorators(_, [], _) -> %% optimisation + []; +process_decorators(X, Decorators, Delivery) -> + lists:append([Decorator:route(X, Delivery) || Decorator <- Decorators]). + +process_route(#resource{kind = exchange} = XName, + {_WorkList, XName, _QNames} = Acc) -> + Acc; +process_route(#resource{kind = exchange} = XName, + {WorkList, #resource{kind = exchange} = SeenX, QNames}) -> + {cons_if_present(XName, WorkList), + gb_sets:from_list([SeenX, XName]), QNames}; +process_route(#resource{kind = exchange} = XName, + {WorkList, SeenXs, QNames} = Acc) -> + case gb_sets:is_element(XName, SeenXs) of + true -> Acc; + false -> {cons_if_present(XName, WorkList), + gb_sets:add_element(XName, SeenXs), QNames} + end; +process_route(#resource{kind = queue} = QName, + {WorkList, SeenXs, QNames}) -> + {WorkList, SeenXs, [QName | QNames]}. + +cons_if_present(XName, L) -> + case lookup(XName) of + {ok, X} -> [X | L]; + {error, not_found} -> L + end. + +call_with_exchange(XName, Fun) -> + rabbit_misc:execute_mnesia_tx_with_tail( + fun () -> case mnesia:read({rabbit_exchange, XName}) of + [] -> rabbit_misc:const({error, not_found}); + [X] -> Fun(X) + end + end). + +-spec delete + (name(), 'true', rabbit_types:username()) -> + 'ok'| rabbit_types:error('not_found' | 'in_use'); + (name(), 'false', rabbit_types:username()) -> + 'ok' | rabbit_types:error('not_found'). + +delete(XName, IfUnused, Username) -> + Fun = case IfUnused of + true -> fun conditional_delete/2; + false -> fun unconditional_delete/2 + end, + try + %% guard exchange.declare operations from failing when there's + %% a race condition between it and an exchange.delete. + %% + %% see rabbitmq/rabbitmq-federation#7 + rabbit_runtime_parameters:set(XName#resource.virtual_host, + ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, + XName#resource.name, true, Username), + call_with_exchange( + XName, + fun (X) -> + case Fun(X, false) of + {deleted, X, Bs, Deletions} -> + rabbit_binding:process_deletions( + rabbit_binding:add_deletion( + XName, {X, deleted, Bs}, Deletions), Username); + {error, _InUseOrNotFound} = E -> + rabbit_misc:const(E) + end + end) + after + rabbit_runtime_parameters:clear(XName#resource.virtual_host, + ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, + XName#resource.name, Username) + end. + +-spec validate_binding + (rabbit_types:exchange(), rabbit_types:binding()) + -> rabbit_types:ok_or_error({'binding_invalid', string(), [any()]}). + +validate_binding(X = #exchange{type = XType}, Binding) -> + Module = type_to_module(XType), + Module:validate_binding(X, Binding). + +-spec maybe_auto_delete + (rabbit_types:exchange(), boolean()) + -> 'not_deleted' | {'deleted', rabbit_binding:deletions()}. + +maybe_auto_delete(#exchange{auto_delete = false}, _OnlyDurable) -> + not_deleted; +maybe_auto_delete(#exchange{auto_delete = true} = X, OnlyDurable) -> + case conditional_delete(X, OnlyDurable) of + {error, in_use} -> not_deleted; + {deleted, X, [], Deletions} -> {deleted, Deletions} + end. + +conditional_delete(X = #exchange{name = XName}, OnlyDurable) -> + case rabbit_binding:has_for_source(XName) of + false -> internal_delete(X, OnlyDurable, false); + true -> {error, in_use} + end. + +unconditional_delete(X, OnlyDurable) -> + internal_delete(X, OnlyDurable, true). + +internal_delete(X = #exchange{name = XName}, OnlyDurable, RemoveBindingsForSource) -> + ok = mnesia:delete({rabbit_exchange, XName}), + ok = mnesia:delete({rabbit_exchange_serial, XName}), + mnesia:delete({rabbit_durable_exchange, XName}), + Bindings = case RemoveBindingsForSource of + true -> rabbit_binding:remove_for_source(XName); + false -> [] + end, + {deleted, X, Bindings, rabbit_binding:remove_for_destination( + XName, OnlyDurable)}. + +next_serial(XName) -> + Serial = peek_serial(XName, write), + ok = mnesia:write(rabbit_exchange_serial, + #exchange_serial{name = XName, next = Serial + 1}, write), + Serial. + +-spec peek_serial(name()) -> pos_integer() | 'undefined'. + +peek_serial(XName) -> peek_serial(XName, read). + +peek_serial(XName, LockType) -> + case mnesia:read(rabbit_exchange_serial, XName, LockType) of + [#exchange_serial{next = Serial}] -> Serial; + _ -> 1 + end. + +invalid_module(T) -> + rabbit_log:warning("Could not find exchange type ~s.~n", [T]), + put({xtype_to_module, T}, rabbit_exchange_type_invalid), + rabbit_exchange_type_invalid. + +%% Used with atoms from records; e.g., the type is expected to exist. +type_to_module(T) -> + case get({xtype_to_module, T}) of + undefined -> + case rabbit_registry:lookup_module(exchange, T) of + {ok, Module} -> put({xtype_to_module, T}, Module), + Module; + {error, not_found} -> invalid_module(T) + end; + Module -> + Module + end. diff --git a/deps/rabbit/src/rabbit_exchange_decorator.erl b/deps/rabbit/src/rabbit_exchange_decorator.erl new file mode 100644 index 0000000000..02d0258d3c --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_decorator.erl @@ -0,0 +1,105 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_decorator). + +-include("rabbit.hrl"). + +-export([select/2, set/1]). + +-behaviour(rabbit_registry_class). + +-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). + +%% This is like an exchange type except that: +%% +%% 1) It applies to all exchanges as soon as it is installed, therefore +%% 2) It is not allowed to affect validation, so no validate/1 or +%% assert_args_equivalence/2 +%% +%% It's possible in the future we might make decorators +%% able to manipulate messages as they are published. + +-type(tx() :: 'transaction' | 'none'). +-type(serial() :: pos_integer() | tx()). + +-callback description() -> [proplists:property()]. + +%% Should Rabbit ensure that all binding events that are +%% delivered to an individual exchange can be serialised? (they +%% might still be delivered out of order, but there'll be a +%% serial number). +-callback serialise_events(rabbit_types:exchange()) -> boolean(). + +%% called after declaration and recovery +-callback create(tx(), rabbit_types:exchange()) -> 'ok'. + +%% called after exchange (auto)deletion. +-callback delete(tx(), rabbit_types:exchange(), [rabbit_types:binding()]) -> + 'ok'. + +%% called when the policy attached to this exchange changes. +-callback policy_changed(rabbit_types:exchange(), rabbit_types:exchange()) -> + 'ok'. + +%% called after a binding has been added or recovered +-callback add_binding(serial(), rabbit_types:exchange(), + rabbit_types:binding()) -> 'ok'. + +%% called after bindings have been deleted. +-callback remove_bindings(serial(), rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'. + +%% Allows additional destinations to be added to the routing decision. +-callback route(rabbit_types:exchange(), rabbit_types:delivery()) -> + [rabbit_amqqueue:name() | rabbit_exchange:name()]. + +%% Whether the decorator wishes to receive callbacks for the exchange +%% none:no callbacks, noroute:all callbacks except route, all:all callbacks +-callback active_for(rabbit_types:exchange()) -> 'none' | 'noroute' | 'all'. + +%%---------------------------------------------------------------------------- + +added_to_rabbit_registry(_Type, _ModuleName) -> + [maybe_recover(X) || X <- rabbit_exchange:list()], + ok. +removed_from_rabbit_registry(_Type) -> + [maybe_recover(X) || X <- rabbit_exchange:list()], + ok. + +%% select a subset of active decorators +select(all, {Route, NoRoute}) -> filter(Route ++ NoRoute); +select(route, {Route, _NoRoute}) -> filter(Route); +select(raw, {Route, NoRoute}) -> Route ++ NoRoute. + +filter(Modules) -> + [M || M <- Modules, code:which(M) =/= non_existing]. + +set(X) -> + Decs = lists:foldl(fun (D, {Route, NoRoute}) -> + ActiveFor = D:active_for(X), + {cons_if_eq(all, ActiveFor, D, Route), + cons_if_eq(noroute, ActiveFor, D, NoRoute)} + end, {[], []}, list()), + X#exchange{decorators = Decs}. + +list() -> [M || {_, M} <- rabbit_registry:lookup_all(exchange_decorator)]. + +cons_if_eq(Select, Select, Item, List) -> [Item | List]; +cons_if_eq(_Select, _Other, _Item, List) -> List. + +maybe_recover(X = #exchange{name = Name, + decorators = Decs}) -> + #exchange{decorators = Decs1} = set(X), + Old = lists:sort(select(all, Decs)), + New = lists:sort(select(all, Decs1)), + case New of + Old -> ok; + _ -> %% TODO create a tx here for non-federation decorators + [M:create(none, X) || M <- New -- Old], + rabbit_exchange:update_decorators(Name) + end. diff --git a/deps/rabbit/src/rabbit_exchange_parameters.erl b/deps/rabbit/src/rabbit_exchange_parameters.erl new file mode 100644 index 0000000000..f9de648cfa --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_parameters.erl @@ -0,0 +1,39 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_parameters). + +-behaviour(rabbit_runtime_parameter). + +-include("rabbit.hrl"). + +-export([register/0]). +-export([validate/5, notify/5, notify_clear/4]). + +-rabbit_boot_step({?MODULE, + [{description, "exchange parameters"}, + {mfa, {rabbit_exchange_parameters, register, []}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + +register() -> + rabbit_registry:register(runtime_parameter, + ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, ?MODULE), + %% ensure there are no leftovers from before node restart/crash + rabbit_runtime_parameters:clear_component( + ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, + ?INTERNAL_USER), + ok. + +validate(_VHost, ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, _Name, _Term, _User) -> + ok. + +notify(_VHost, ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, _Name, _Term, _Username) -> + ok. + +notify_clear(_VHost, ?EXCHANGE_DELETE_IN_PROGRESS_COMPONENT, _Name, _Username) -> + ok. diff --git a/deps/rabbit/src/rabbit_exchange_type_direct.erl b/deps/rabbit/src/rabbit_exchange_type_direct.erl new file mode 100644 index 0000000000..3f4350e7b0 --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_type_direct.erl @@ -0,0 +1,46 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_type_direct). +-include("rabbit.hrl"). + +-behaviour(rabbit_exchange_type). + +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). +-export([info/1, info/2]). + +-rabbit_boot_step({?MODULE, + [{description, "exchange type direct"}, + {mfa, {rabbit_registry, register, + [exchange, <<"direct">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +info(_X) -> []. +info(_X, _) -> []. + +description() -> + [{description, <<"AMQP direct exchange, as per the AMQP specification">>}]. + +serialise_events() -> false. + +route(#exchange{name = Name}, + #delivery{message = #basic_message{routing_keys = Routes}}) -> + rabbit_router:match_routing_key(Name, Routes). + +validate(_X) -> ok. +validate_binding(_X, _B) -> ok. +create(_Tx, _X) -> ok. +delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. +add_binding(_Tx, _X, _B) -> ok. +remove_bindings(_Tx, _X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/deps/rabbit/src/rabbit_exchange_type_fanout.erl b/deps/rabbit/src/rabbit_exchange_type_fanout.erl new file mode 100644 index 0000000000..a8778cf0c7 --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_type_fanout.erl @@ -0,0 +1,45 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_type_fanout). +-include("rabbit.hrl"). + +-behaviour(rabbit_exchange_type). + +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). +-export([info/1, info/2]). + +-rabbit_boot_step({?MODULE, + [{description, "exchange type fanout"}, + {mfa, {rabbit_registry, register, + [exchange, <<"fanout">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +info(_X) -> []. +info(_X, _) -> []. + +description() -> + [{description, <<"AMQP fanout exchange, as per the AMQP specification">>}]. + +serialise_events() -> false. + +route(#exchange{name = Name}, _Delivery) -> + rabbit_router:match_routing_key(Name, ['_']). + +validate(_X) -> ok. +validate_binding(_X, _B) -> ok. +create(_Tx, _X) -> ok. +delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. +add_binding(_Tx, _X, _B) -> ok. +remove_bindings(_Tx, _X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/deps/rabbit/src/rabbit_exchange_type_headers.erl b/deps/rabbit/src/rabbit_exchange_type_headers.erl new file mode 100644 index 0000000000..e40195de7a --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_type_headers.erl @@ -0,0 +1,136 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_type_headers). +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +-behaviour(rabbit_exchange_type). + +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). +-export([info/1, info/2]). + +-rabbit_boot_step({?MODULE, + [{description, "exchange type headers"}, + {mfa, {rabbit_registry, register, + [exchange, <<"headers">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +info(_X) -> []. +info(_X, _) -> []. + +description() -> + [{description, <<"AMQP headers exchange, as per the AMQP specification">>}]. + +serialise_events() -> false. + +route(#exchange{name = Name}, + #delivery{message = #basic_message{content = Content}}) -> + Headers = case (Content#content.properties)#'P_basic'.headers of + undefined -> []; + H -> rabbit_misc:sort_field_table(H) + end, + rabbit_router:match_bindings( + Name, fun (#binding{args = Spec}) -> headers_match(Spec, Headers) end). + +validate_binding(_X, #binding{args = Args}) -> + case rabbit_misc:table_lookup(Args, <<"x-match">>) of + {longstr, <<"all">>} -> ok; + {longstr, <<"any">>} -> ok; + {longstr, Other} -> {error, + {binding_invalid, + "Invalid x-match field value ~p; " + "expected all or any", [Other]}}; + {Type, Other} -> {error, + {binding_invalid, + "Invalid x-match field type ~p (value ~p); " + "expected longstr", [Type, Other]}}; + undefined -> ok %% [0] + end. +%% [0] spec is vague on whether it can be omitted but in practice it's +%% useful to allow people to do this + +parse_x_match({longstr, <<"all">>}) -> all; +parse_x_match({longstr, <<"any">>}) -> any; +parse_x_match(_) -> all. %% legacy; we didn't validate + +%% Horrendous matching algorithm. Depends for its merge-like +%% (linear-time) behaviour on the lists:keysort +%% (rabbit_misc:sort_field_table) that route/1 and +%% rabbit_binding:{add,remove}/2 do. +%% +%% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +%% In other words: REQUIRES BOTH PATTERN AND DATA TO BE SORTED ASCENDING BY KEY. +%% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +%% + +-spec headers_match + (rabbit_framing:amqp_table(), rabbit_framing:amqp_table()) -> + boolean(). + +headers_match(Args, Data) -> + MK = parse_x_match(rabbit_misc:table_lookup(Args, <<"x-match">>)), + headers_match(Args, Data, true, false, MK). + +% A bit less horrendous algorithm :) +headers_match(_, _, false, _, all) -> false; +headers_match(_, _, _, true, any) -> true; + +% No more bindings, return current state +headers_match([], _Data, AllMatch, _AnyMatch, all) -> AllMatch; +headers_match([], _Data, _AllMatch, AnyMatch, any) -> AnyMatch; + +% Delete bindings starting with x- +headers_match([{<<"x-", _/binary>>, _PT, _PV} | PRest], Data, + AllMatch, AnyMatch, MatchKind) -> + headers_match(PRest, Data, AllMatch, AnyMatch, MatchKind); + +% No more data, but still bindings, false with all +headers_match(_Pattern, [], _AllMatch, AnyMatch, MatchKind) -> + headers_match([], [], false, AnyMatch, MatchKind); + +% Data key header not in binding, go next data +headers_match(Pattern = [{PK, _PT, _PV} | _], [{DK, _DT, _DV} | DRest], + AllMatch, AnyMatch, MatchKind) when PK > DK -> + headers_match(Pattern, DRest, AllMatch, AnyMatch, MatchKind); + +% Binding key header not in data, false with all, go next binding +headers_match([{PK, _PT, _PV} | PRest], Data = [{DK, _DT, _DV} | _], + _AllMatch, AnyMatch, MatchKind) when PK < DK -> + headers_match(PRest, Data, false, AnyMatch, MatchKind); + +%% It's not properly specified, but a "no value" in a +%% pattern field is supposed to mean simple presence of +%% the corresponding data field. I've interpreted that to +%% mean a type of "void" for the pattern field. +headers_match([{PK, void, _PV} | PRest], [{DK, _DT, _DV} | DRest], + AllMatch, _AnyMatch, MatchKind) when PK == DK -> + headers_match(PRest, DRest, AllMatch, true, MatchKind); + +% Complete match, true with any, go next +headers_match([{PK, _PT, PV} | PRest], [{DK, _DT, DV} | DRest], + AllMatch, _AnyMatch, MatchKind) when PK == DK andalso PV == DV -> + headers_match(PRest, DRest, AllMatch, true, MatchKind); + +% Value does not match, false with all, go next +headers_match([{PK, _PT, _PV} | PRest], [{DK, _DT, _DV} | DRest], + _AllMatch, AnyMatch, MatchKind) when PK == DK -> + headers_match(PRest, DRest, false, AnyMatch, MatchKind). + + +validate(_X) -> ok. +create(_Tx, _X) -> ok. +delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. +add_binding(_Tx, _X, _B) -> ok. +remove_bindings(_Tx, _X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/deps/rabbit/src/rabbit_exchange_type_invalid.erl b/deps/rabbit/src/rabbit_exchange_type_invalid.erl new file mode 100644 index 0000000000..3fa27d28e9 --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_type_invalid.erl @@ -0,0 +1,45 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_type_invalid). +-include("rabbit.hrl"). + +-behaviour(rabbit_exchange_type). + +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). +-export([info/1, info/2]). + +info(_X) -> []. +info(_X, _) -> []. + +description() -> + [{description, + <<"Dummy exchange type, to be used when the intended one is not found.">> + }]. + +serialise_events() -> false. + +-spec route(rabbit_types:exchange(), rabbit_types:delivery()) -> no_return(). + +route(#exchange{name = Name, type = Type}, _) -> + rabbit_misc:protocol_error( + precondition_failed, + "Cannot route message through ~s: exchange type ~s not found", + [rabbit_misc:rs(Name), Type]). + +validate(_X) -> ok. +validate_binding(_X, _B) -> ok. +create(_Tx, _X) -> ok. +delete(_Tx, _X, _Bs) -> ok. +policy_changed(_X1, _X2) -> ok. +add_binding(_Tx, _X, _B) -> ok. +remove_bindings(_Tx, _X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/deps/rabbit/src/rabbit_exchange_type_topic.erl b/deps/rabbit/src/rabbit_exchange_type_topic.erl new file mode 100644 index 0000000000..38b05895f2 --- /dev/null +++ b/deps/rabbit/src/rabbit_exchange_type_topic.erl @@ -0,0 +1,266 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_exchange_type_topic). + +-include("rabbit.hrl"). + +-behaviour(rabbit_exchange_type). + +-export([description/0, serialise_events/0, route/2]). +-export([validate/1, validate_binding/2, + create/2, delete/3, policy_changed/2, add_binding/3, + remove_bindings/3, assert_args_equivalence/2]). +-export([info/1, info/2]). + +-rabbit_boot_step({?MODULE, + [{description, "exchange type topic"}, + {mfa, {rabbit_registry, register, + [exchange, <<"topic">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +%%---------------------------------------------------------------------------- + +info(_X) -> []. +info(_X, _) -> []. + +description() -> + [{description, <<"AMQP topic exchange, as per the AMQP specification">>}]. + +serialise_events() -> false. + +%% NB: This may return duplicate results in some situations (that's ok) +route(#exchange{name = X}, + #delivery{message = #basic_message{routing_keys = Routes}}) -> + lists:append([begin + Words = split_topic_key(RKey), + mnesia:async_dirty(fun trie_match/2, [X, Words]) + end || RKey <- Routes]). + +validate(_X) -> ok. +validate_binding(_X, _B) -> ok. +create(_Tx, _X) -> ok. + +delete(transaction, #exchange{name = X}, _Bs) -> + trie_remove_all_nodes(X), + trie_remove_all_edges(X), + trie_remove_all_bindings(X), + ok; +delete(none, _Exchange, _Bs) -> + ok. + +policy_changed(_X1, _X2) -> ok. + +add_binding(transaction, _Exchange, Binding) -> + internal_add_binding(Binding); +add_binding(none, _Exchange, _Binding) -> + ok. + +remove_bindings(transaction, _X, Bs) -> + %% See rabbit_binding:lock_route_tables for the rationale for + %% taking table locks. + case Bs of + [_] -> ok; + _ -> [mnesia:lock({table, T}, write) || + T <- [rabbit_topic_trie_node, + rabbit_topic_trie_edge, + rabbit_topic_trie_binding]] + end, + [case follow_down_get_path(X, split_topic_key(K)) of + {ok, Path = [{FinalNode, _} | _]} -> + trie_remove_binding(X, FinalNode, D, Args), + remove_path_if_empty(X, Path); + {error, _Node, _RestW} -> + %% We're trying to remove a binding that no longer exists. + %% That's unexpected, but shouldn't be a problem. + ok + end || #binding{source = X, key = K, destination = D, args = Args} <- Bs], + ok; +remove_bindings(none, _X, _Bs) -> + ok. + +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). + +%%---------------------------------------------------------------------------- + +internal_add_binding(#binding{source = X, key = K, destination = D, + args = Args}) -> + FinalNode = follow_down_create(X, split_topic_key(K)), + trie_add_binding(X, FinalNode, D, Args), + ok. + +trie_match(X, Words) -> + trie_match(X, root, Words, []). + +trie_match(X, Node, [], ResAcc) -> + trie_match_part(X, Node, "#", fun trie_match_skip_any/4, [], + trie_bindings(X, Node) ++ ResAcc); +trie_match(X, Node, [W | RestW] = Words, ResAcc) -> + lists:foldl(fun ({WArg, MatchFun, RestWArg}, Acc) -> + trie_match_part(X, Node, WArg, MatchFun, RestWArg, Acc) + end, ResAcc, [{W, fun trie_match/4, RestW}, + {"*", fun trie_match/4, RestW}, + {"#", fun trie_match_skip_any/4, Words}]). + +trie_match_part(X, Node, Search, MatchFun, RestW, ResAcc) -> + case trie_child(X, Node, Search) of + {ok, NextNode} -> MatchFun(X, NextNode, RestW, ResAcc); + error -> ResAcc + end. + +trie_match_skip_any(X, Node, [], ResAcc) -> + trie_match(X, Node, [], ResAcc); +trie_match_skip_any(X, Node, [_ | RestW] = Words, ResAcc) -> + trie_match_skip_any(X, Node, RestW, + trie_match(X, Node, Words, ResAcc)). + +follow_down_create(X, Words) -> + case follow_down_last_node(X, Words) of + {ok, FinalNode} -> FinalNode; + {error, Node, RestW} -> lists:foldl( + fun (W, CurNode) -> + NewNode = new_node_id(), + trie_add_edge(X, CurNode, NewNode, W), + NewNode + end, Node, RestW) + end. + +follow_down_last_node(X, Words) -> + follow_down(X, fun (_, Node, _) -> Node end, root, Words). + +follow_down_get_path(X, Words) -> + follow_down(X, fun (W, Node, PathAcc) -> [{Node, W} | PathAcc] end, + [{root, none}], Words). + +follow_down(X, AccFun, Acc0, Words) -> + follow_down(X, root, AccFun, Acc0, Words). + +follow_down(_X, _CurNode, _AccFun, Acc, []) -> + {ok, Acc}; +follow_down(X, CurNode, AccFun, Acc, Words = [W | RestW]) -> + case trie_child(X, CurNode, W) of + {ok, NextNode} -> follow_down(X, NextNode, AccFun, + AccFun(W, NextNode, Acc), RestW); + error -> {error, Acc, Words} + end. + +remove_path_if_empty(_, [{root, none}]) -> + ok; +remove_path_if_empty(X, [{Node, W} | [{Parent, _} | _] = RestPath]) -> + case mnesia:read(rabbit_topic_trie_node, + #trie_node{exchange_name = X, node_id = Node}, write) of + [] -> trie_remove_edge(X, Parent, Node, W), + remove_path_if_empty(X, RestPath); + _ -> ok + end. + +trie_child(X, Node, Word) -> + case mnesia:read({rabbit_topic_trie_edge, + #trie_edge{exchange_name = X, + node_id = Node, + word = Word}}) of + [#topic_trie_edge{node_id = NextNode}] -> {ok, NextNode}; + [] -> error + end. + +trie_bindings(X, Node) -> + MatchHead = #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, + node_id = Node, + destination = '$1', + arguments = '_'}}, + mnesia:select(rabbit_topic_trie_binding, [{MatchHead, [], ['$1']}]). + +trie_update_node_counts(X, Node, Field, Delta) -> + E = case mnesia:read(rabbit_topic_trie_node, + #trie_node{exchange_name = X, + node_id = Node}, write) of + [] -> #topic_trie_node{trie_node = #trie_node{ + exchange_name = X, + node_id = Node}, + edge_count = 0, + binding_count = 0}; + [E0] -> E0 + end, + case setelement(Field, E, element(Field, E) + Delta) of + #topic_trie_node{edge_count = 0, binding_count = 0} -> + ok = mnesia:delete_object(rabbit_topic_trie_node, E, write); + EN -> + ok = mnesia:write(rabbit_topic_trie_node, EN, write) + end. + +trie_add_edge(X, FromNode, ToNode, W) -> + trie_update_node_counts(X, FromNode, #topic_trie_node.edge_count, +1), + trie_edge_op(X, FromNode, ToNode, W, fun mnesia:write/3). + +trie_remove_edge(X, FromNode, ToNode, W) -> + trie_update_node_counts(X, FromNode, #topic_trie_node.edge_count, -1), + trie_edge_op(X, FromNode, ToNode, W, fun mnesia:delete_object/3). + +trie_edge_op(X, FromNode, ToNode, W, Op) -> + ok = Op(rabbit_topic_trie_edge, + #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X, + node_id = FromNode, + word = W}, + node_id = ToNode}, + write). + +trie_add_binding(X, Node, D, Args) -> + trie_update_node_counts(X, Node, #topic_trie_node.binding_count, +1), + trie_binding_op(X, Node, D, Args, fun mnesia:write/3). + +trie_remove_binding(X, Node, D, Args) -> + trie_update_node_counts(X, Node, #topic_trie_node.binding_count, -1), + trie_binding_op(X, Node, D, Args, fun mnesia:delete_object/3). + +trie_binding_op(X, Node, D, Args, Op) -> + ok = Op(rabbit_topic_trie_binding, + #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, + node_id = Node, + destination = D, + arguments = Args}}, + write). + +trie_remove_all_nodes(X) -> + remove_all(rabbit_topic_trie_node, + #topic_trie_node{trie_node = #trie_node{exchange_name = X, + _ = '_'}, + _ = '_'}). + +trie_remove_all_edges(X) -> + remove_all(rabbit_topic_trie_edge, + #topic_trie_edge{trie_edge = #trie_edge{exchange_name = X, + _ = '_'}, + _ = '_'}). + +trie_remove_all_bindings(X) -> + remove_all(rabbit_topic_trie_binding, + #topic_trie_binding{ + trie_binding = #trie_binding{exchange_name = X, _ = '_'}, + _ = '_'}). + +remove_all(Table, Pattern) -> + lists:foreach(fun (R) -> mnesia:delete_object(Table, R, write) end, + mnesia:match_object(Table, Pattern, write)). + +new_node_id() -> + rabbit_guid:gen(). + +split_topic_key(Key) -> + split_topic_key(Key, [], []). + +split_topic_key(<<>>, [], []) -> + []; +split_topic_key(<<>>, RevWordAcc, RevResAcc) -> + lists:reverse([lists:reverse(RevWordAcc) | RevResAcc]); +split_topic_key(<<$., Rest/binary>>, RevWordAcc, RevResAcc) -> + split_topic_key(Rest, [], [lists:reverse(RevWordAcc) | RevResAcc]); +split_topic_key(<<C:8, Rest/binary>>, RevWordAcc, RevResAcc) -> + split_topic_key(Rest, [C | RevWordAcc], RevResAcc). diff --git a/deps/rabbit/src/rabbit_feature_flags.erl b/deps/rabbit/src/rabbit_feature_flags.erl new file mode 100644 index 0000000000..921ec9ab53 --- /dev/null +++ b/deps/rabbit/src/rabbit_feature_flags.erl @@ -0,0 +1,2470 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% @author The RabbitMQ team +%% @copyright 2018-2020 VMware, Inc. or its affiliates. +%% +%% @doc +%% This module offers a framework to declare capabilities a RabbitMQ node +%% supports and therefore a way to determine if multiple RabbitMQ nodes in +%% a cluster are compatible and can work together. +%% +%% == What a feature flag is == +%% +%% A <strong>feature flag</strong> is a name and several properties given +%% to a change in RabbitMQ which impacts its communication with other +%% RabbitMQ nodes. This kind of change can be: +%% <ul> +%% <li>an update to an Erlang record</li> +%% <li>a modification to a replicated Mnesia table schema</li> +%% <li>a modification to Erlang messages exchanged between Erlang processes +%% which might run on remote nodes</li> +%% </ul> +%% +%% A feature flag is qualified by: +%% <ul> +%% <li>a <strong>name</strong></li> +%% <li>a <strong>description</strong> (optional)</li> +%% <li>a list of other <strong>feature flags this feature flag depends on +%% </strong> (optional). This can be useful when the change builds up on +%% top of a previous change. For instance, it expands a record which was +%% already modified by a previous feature flag.</li> +%% <li>a <strong>migration function</strong> (optional). If provided, this +%% function is called when the feature flag is enabled. It is responsible +%% for doing all the data conversion, if any, and confirming the feature +%% flag can be enabled.</li> +%% <li>a level of stability (stable or experimental). For now, this is only +%% informational. But it might be used for specific purposes in the +%% future.</li> +%% </ul> +%% +%% == How to declare a feature flag == +%% +%% To define a new feature flag, you need to use the +%% `rabbit_feature_flag()' module attribute: +%% +%% ``` +%% -rabbit_feature_flag(FeatureFlag). +%% ''' +%% +%% `FeatureFlag' is a {@type feature_flag_modattr()}. +%% +%% == How to enable a feature flag == +%% +%% To enable a supported feature flag, you have the following solutions: +%% +%% <ul> +%% <li>Using this module API: +%% ``` +%% rabbit_feature_flags:enable(FeatureFlagName). +%% ''' +%% </li> +%% <li>Using the `rabbitmqctl' CLI: +%% ``` +%% rabbitmqctl enable_feature_flag "$feature_flag_name" +%% ''' +%% </li> +%% </ul> +%% +%% == How to disable a feature flag == +%% +%% Once enabled, there is <strong>currently no way to disable</strong> a +%% feature flag. + +-module(rabbit_feature_flags). + +-export([list/0, + list/1, + list/2, + enable/1, + enable_all/0, + disable/1, + disable_all/0, + is_supported/1, + is_supported/2, + is_supported_locally/1, + is_supported_remotely/1, + is_supported_remotely/2, + is_supported_remotely/3, + is_enabled/1, + is_enabled/2, + is_disabled/1, + is_disabled/2, + info/0, + info/1, + init/0, + get_state/1, + get_stability/1, + check_node_compatibility/1, + check_node_compatibility/2, + is_node_compatible/1, + is_node_compatible/2, + sync_feature_flags_with_cluster/2, + sync_feature_flags_with_cluster/3, + refresh_feature_flags_after_app_load/1, + enabled_feature_flags_list_file/0 + ]). + +%% RabbitMQ internal use only. +-export([initialize_registry/0, + initialize_registry/1, + mark_as_enabled_locally/2, + remote_nodes/0, + running_remote_nodes/0, + does_node_support/3, + merge_feature_flags_from_unknown_apps/1, + do_sync_feature_flags_with_node/1]). + +-ifdef(TEST). +-export([inject_test_feature_flags/1, + initialize_registry/3, + query_supported_feature_flags/0, + mark_as_enabled_remotely/2, + mark_as_enabled_remotely/4, + registry_loading_lock/0]). +-endif. + +%% Default timeout for operations on remote nodes. +-define(TIMEOUT, 60000). + +-define(FF_REGISTRY_LOADING_LOCK, {feature_flags_registry_loading, self()}). +-define(FF_STATE_CHANGE_LOCK, {feature_flags_state_change, self()}). + +-type feature_flag_modattr() :: {feature_name(), + feature_props()}. +%% The value of a `-rabbitmq_feature_flag()' module attribute used to +%% declare a new feature flag. + +-type feature_name() :: atom(). +%% The feature flag's name. It is used in many places to identify a +%% specific feature flag. In particular, this is how an end-user (or +%% the CLI) can enable a feature flag. This is also the only bit which +%% is persisted so a node remember which feature flags are enabled. + +-type feature_props() :: #{desc => string(), + doc_url => string(), + stability => stability(), + depends_on => [feature_name()], + migration_fun => migration_fun_name()}. +%% The feature flag properties. +%% +%% All properties are optional. +%% +%% The properties are: +%% <ul> +%% <li>`desc': a description of the feature flag</li> +%% <li>`doc_url': a URL pointing to more documentation about the feature +%% flag</li> +%% <li>`stability': the level of stability</li> +%% <li>`depends_on': a list of feature flags name which must be enabled +%% before this one</li> +%% <li>`migration_fun': a migration function specified by its module and +%% function names</li> +%% </ul> +%% +%% Note that the `migration_fun' is a {@type migration_fun_name()}, +%% not a {@type migration_fun()}. However, the function signature +%% must conform to the {@type migration_fun()} signature. The reason +%% is that we must be able to represent it as an Erlang term when +%% we regenerate the registry module source code (using {@link +%% erl_syntax:abstract/1}). + +-type feature_flags() :: #{feature_name() => feature_props_extended()}. +%% The feature flags map as returned or accepted by several functions in +%% this module. In particular, this what the {@link list/0} function +%% returns. + +-type feature_props_extended() :: #{desc => string(), + doc_url => string(), + stability => stability(), + migration_fun => migration_fun_name(), + depends_on => [feature_name()], + provided_by => atom()}. +%% The feature flag properties, once expanded by this module when feature +%% flags are discovered. +%% +%% The new properties compared to {@type feature_props()} are: +%% <ul> +%% <li>`provided_by': the name of the application providing the feature flag</li> +%% </ul> + +-type feature_state() :: boolean() | state_changing. +%% The state of the feature flag: enabled if `true', disabled if `false' +%% or `state_changing'. + +-type feature_states() :: #{feature_name() => feature_state()}. + +-type stability() :: stable | experimental. +%% The level of stability of a feature flag. Currently, only informational. + +-type migration_fun_name() :: {Module :: atom(), Function :: atom()}. +%% The name of the module and function to call when changing the state of +%% the feature flag. + +-type migration_fun() :: fun((feature_name(), + feature_props_extended(), + migration_fun_context()) + -> ok | {error, any()} | % context = enable + boolean() | undefined). % context = is_enabled +%% The migration function signature. +%% +%% It is called with context `enable' when a feature flag is being enabled. +%% The function is responsible for this feature-flag-specific verification +%% and data conversion. It returns `ok' if RabbitMQ can mark the feature +%% flag as enabled an continue with the next one, if any. Otherwise, it +%% returns `{error, any()}' if there is an error and the feature flag should +%% remain disabled. The function must be idempotent: if the feature flag is +%% already enabled on another node and the local node is running this function +%% again because it is syncing its feature flags state, it should succeed. +%% +%% It is called with the context `is_enabled' to check if a feature flag +%% is actually enabled. It is useful on RabbitMQ startup, just in case +%% the previous instance failed to write the feature flags list file. + +-type migration_fun_context() :: enable | is_enabled. + +-type registry_vsn() :: term(). + +-export_type([feature_flag_modattr/0, + feature_props/0, + feature_name/0, + feature_flags/0, + feature_props_extended/0, + feature_state/0, + feature_states/0, + stability/0, + migration_fun_name/0, + migration_fun/0, + migration_fun_context/0]). + +-on_load(on_load/0). + +-spec list() -> feature_flags(). +%% @doc +%% Lists all supported feature flags. +%% +%% @returns A map of all supported feature flags. + +list() -> list(all). + +-spec list(Which :: all | enabled | disabled) -> feature_flags(). +%% @doc +%% Lists all, enabled or disabled feature flags, depending on the argument. +%% +%% @param Which The group of feature flags to return: `all', `enabled' or +%% `disabled'. +%% @returns A map of selected feature flags. + +list(all) -> rabbit_ff_registry:list(all); +list(enabled) -> rabbit_ff_registry:list(enabled); +list(disabled) -> maps:filter( + fun(FeatureName, _) -> is_disabled(FeatureName) end, + list(all)). + +-spec list(all | enabled | disabled, stability()) -> feature_flags(). +%% @doc +%% Lists all, enabled or disabled feature flags, depending on the first +%% argument, only keeping those having the specified stability. +%% +%% @param Which The group of feature flags to return: `all', `enabled' or +%% `disabled'. +%% @param Stability The level of stability used to filter the map of feature +%% flags. +%% @returns A map of selected feature flags. + +list(Which, Stability) + when Stability =:= stable orelse Stability =:= experimental -> + maps:filter(fun(_, FeatureProps) -> + Stability =:= get_stability(FeatureProps) + end, list(Which)). + +-spec enable(feature_name() | [feature_name()]) -> ok | + {error, Reason :: any()}. +%% @doc +%% Enables the specified feature flag or set of feature flags. +%% +%% @param FeatureName The name or the list of names of feature flags to +%% enable. +%% @returns `ok' if the feature flags (and all the feature flags they +%% depend on) were successfully enabled, or `{error, Reason}' if one +%% feature flag could not be enabled (subsequent feature flags in the +%% dependency tree are left unchanged). + +enable(FeatureName) when is_atom(FeatureName) -> + rabbit_log_feature_flags:debug( + "Feature flag `~s`: REQUEST TO ENABLE", + [FeatureName]), + case is_enabled(FeatureName) of + true -> + rabbit_log_feature_flags:debug( + "Feature flag `~s`: already enabled", + [FeatureName]), + ok; + false -> + rabbit_log_feature_flags:debug( + "Feature flag `~s`: not enabled, check if supported by cluster", + [FeatureName]), + %% The feature flag must be supported locally and remotely + %% (i.e. by all members of the cluster). + case is_supported(FeatureName) of + true -> + rabbit_log_feature_flags:info( + "Feature flag `~s`: supported, attempt to enable...", + [FeatureName]), + do_enable(FeatureName); + false -> + rabbit_log_feature_flags:error( + "Feature flag `~s`: not supported", + [FeatureName]), + {error, unsupported} + end + end; +enable(FeatureNames) when is_list(FeatureNames) -> + with_feature_flags(FeatureNames, fun enable/1). + +-spec enable_all() -> ok | {error, any()}. +%% @doc +%% Enables all supported feature flags. +%% +%% @returns `ok' if the feature flags were successfully enabled, +%% or `{error, Reason}' if one feature flag could not be enabled +%% (subsequent feature flags in the dependency tree are left +%% unchanged). + +enable_all() -> + with_feature_flags(maps:keys(list(all)), fun enable/1). + +-spec disable(feature_name() | [feature_name()]) -> ok | {error, any()}. +%% @doc +%% Disables the specified feature flag or set of feature flags. +%% +%% @param FeatureName The name or the list of names of feature flags to +%% disable. +%% @returns `ok' if the feature flags (and all the feature flags they +%% depend on) were successfully disabled, or `{error, Reason}' if one +%% feature flag could not be disabled (subsequent feature flags in the +%% dependency tree are left unchanged). + +disable(FeatureName) when is_atom(FeatureName) -> + {error, unsupported}; +disable(FeatureNames) when is_list(FeatureNames) -> + with_feature_flags(FeatureNames, fun disable/1). + +-spec disable_all() -> ok | {error, any()}. +%% @doc +%% Disables all supported feature flags. +%% +%% @returns `ok' if the feature flags were successfully disabled, +%% or `{error, Reason}' if one feature flag could not be disabled +%% (subsequent feature flags in the dependency tree are left +%% unchanged). + +disable_all() -> + with_feature_flags(maps:keys(list(all)), fun disable/1). + +-spec with_feature_flags([feature_name()], + fun((feature_name()) -> ok | {error, any()})) -> + ok | {error, any()}. +%% @private + +with_feature_flags([FeatureName | Rest], Fun) -> + case Fun(FeatureName) of + ok -> with_feature_flags(Rest, Fun); + Error -> Error + end; +with_feature_flags([], _) -> + ok. + +-spec is_supported(feature_name() | [feature_name()]) -> boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% supported by the entire cluster. +%% +%% This is the same as calling both {@link is_supported_locally/1} and +%% {@link is_supported_remotely/1} with a logical AND. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if the set of feature flags is entirely supported, or +%% `false' if one of them is not or the RPC timed out. + +is_supported(FeatureNames) -> + is_supported_locally(FeatureNames) andalso + is_supported_remotely(FeatureNames). + +-spec is_supported(feature_name() | [feature_name()], timeout()) -> + boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% supported by the entire cluster. +%% +%% This is the same as calling both {@link is_supported_locally/1} and +%% {@link is_supported_remotely/2} with a logical AND. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @param Timeout Time in milliseconds after which the RPC gives up. +%% @returns `true' if the set of feature flags is entirely supported, or +%% `false' if one of them is not or the RPC timed out. + +is_supported(FeatureNames, Timeout) -> + is_supported_locally(FeatureNames) andalso + is_supported_remotely(FeatureNames, Timeout). + +-spec is_supported_locally(feature_name() | [feature_name()]) -> boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% supported by the local node. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if the set of feature flags is entirely supported, or +%% `false' if one of them is not. + +is_supported_locally(FeatureName) when is_atom(FeatureName) -> + rabbit_ff_registry:is_supported(FeatureName); +is_supported_locally(FeatureNames) when is_list(FeatureNames) -> + lists:all(fun(F) -> rabbit_ff_registry:is_supported(F) end, FeatureNames). + +-spec is_supported_remotely(feature_name() | [feature_name()]) -> boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% supported by all remote nodes. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if the set of feature flags is entirely supported, or +%% `false' if one of them is not or the RPC timed out. + +is_supported_remotely(FeatureNames) -> + is_supported_remotely(FeatureNames, ?TIMEOUT). + +-spec is_supported_remotely(feature_name() | [feature_name()], timeout()) -> boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% supported by all remote nodes. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @param Timeout Time in milliseconds after which the RPC gives up. +%% @returns `true' if the set of feature flags is entirely supported, or +%% `false' if one of them is not or the RPC timed out. + +is_supported_remotely(FeatureName, Timeout) when is_atom(FeatureName) -> + is_supported_remotely([FeatureName], Timeout); +is_supported_remotely([], _) -> + rabbit_log_feature_flags:debug( + "Feature flags: skipping query for feature flags support as the " + "given list is empty"), + true; +is_supported_remotely(FeatureNames, Timeout) when is_list(FeatureNames) -> + case running_remote_nodes() of + [] -> + rabbit_log_feature_flags:debug( + "Feature flags: isolated node; skipping remote node query " + "=> consider `~p` supported", + [FeatureNames]), + true; + RemoteNodes -> + rabbit_log_feature_flags:debug( + "Feature flags: about to query these remote nodes about " + "support for `~p`: ~p", + [FeatureNames, RemoteNodes]), + is_supported_remotely(RemoteNodes, FeatureNames, Timeout) + end. + +-spec is_supported_remotely([node()], + feature_name() | [feature_name()], + timeout()) -> boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% supported by specified remote nodes. +%% +%% @param RemoteNodes The list of remote nodes to query. +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @param Timeout Time in milliseconds after which the RPC gives up. +%% @returns `true' if the set of feature flags is entirely supported by +%% all nodes, or `false' if one of them is not or the RPC timed out. + +is_supported_remotely(_, [], _) -> + rabbit_log_feature_flags:debug( + "Feature flags: skipping query for feature flags support as the " + "given list is empty"), + true; +is_supported_remotely([Node | Rest], FeatureNames, Timeout) -> + case does_node_support(Node, FeatureNames, Timeout) of + true -> + is_supported_remotely(Rest, FeatureNames, Timeout); + false -> + rabbit_log_feature_flags:debug( + "Feature flags: stopping query for support for `~p` here", + [FeatureNames]), + false + end; +is_supported_remotely([], FeatureNames, _) -> + rabbit_log_feature_flags:debug( + "Feature flags: all running remote nodes support `~p`", + [FeatureNames]), + true. + +-spec is_enabled(feature_name() | [feature_name()]) -> boolean(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% enabled. +%% +%% This is the same as calling {@link is_enabled/2} as a `blocking' +%% call. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if the set of feature flags is enabled, or +%% `false' if one of them is not. + +is_enabled(FeatureNames) -> + is_enabled(FeatureNames, blocking). + +-spec is_enabled +(feature_name() | [feature_name()], blocking) -> + boolean(); +(feature_name() | [feature_name()], non_blocking) -> + feature_state(). +%% @doc +%% Returns if a single feature flag or a set of feature flags is +%% enabled. +%% +%% When `blocking' is passed, the function waits (blocks) for the +%% state of a feature flag being disabled or enabled stabilizes before +%% returning its final state. +%% +%% When `non_blocking' is passed, the function returns immediately with +%% the state of the feature flag (`true' if enabled, `false' otherwise) +%% or `state_changing' is the state is being changed at the time of the +%% call. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if the set of feature flags is enabled, +%% `false' if one of them is not, or `state_changing' if one of them +%% is being worked on. Note that `state_changing' has precedence over +%% `false', so if one is `false' and another one is `state_changing', +%% `state_changing' is returned. + +is_enabled(FeatureNames, non_blocking) -> + is_enabled_nb(FeatureNames); +is_enabled(FeatureNames, blocking) -> + case is_enabled_nb(FeatureNames) of + state_changing -> + global:set_lock(?FF_STATE_CHANGE_LOCK), + global:del_lock(?FF_STATE_CHANGE_LOCK), + is_enabled(FeatureNames, blocking); + IsEnabled -> + IsEnabled + end. + +is_enabled_nb(FeatureName) when is_atom(FeatureName) -> + rabbit_ff_registry:is_enabled(FeatureName); +is_enabled_nb(FeatureNames) when is_list(FeatureNames) -> + lists:foldl( + fun + (_F, state_changing = Acc) -> + Acc; + (F, false = Acc) -> + case rabbit_ff_registry:is_enabled(F) of + state_changing -> state_changing; + _ -> Acc + end; + (F, _) -> + rabbit_ff_registry:is_enabled(F) + end, + true, FeatureNames). + +-spec is_disabled(feature_name() | [feature_name()]) -> boolean(). +%% @doc +%% Returns if a single feature flag or one feature flag in a set of +%% feature flags is disabled. +%% +%% This is the same as negating the result of {@link is_enabled/1}. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if one of the feature flags is disabled, or +%% `false' if they are all enabled. + +is_disabled(FeatureNames) -> + is_disabled(FeatureNames, blocking). + +-spec is_disabled +(feature_name() | [feature_name()], blocking) -> + boolean(); +(feature_name() | [feature_name()], non_blocking) -> + feature_state(). +%% @doc +%% Returns if a single feature flag or one feature flag in a set of +%% feature flags is disabled. +%% +%% This is the same as negating the result of {@link is_enabled/2}, +%% except that `state_changing' is returned as is. +%% +%% See {@link is_enabled/2} for a description of the `blocking' and +%% `non_blocking' modes. +%% +%% @param FeatureNames The name or a list of names of the feature flag(s) +%% to be checked. +%% @returns `true' if one feature flag in the set of feature flags is +%% disabled, `false' if they are all enabled, or `state_changing' if +%% one of them is being worked on. Note that `state_changing' has +%% precedence over `true', so if one is `true' (i.e. disabled) and +%% another one is `state_changing', `state_changing' is returned. +%% +%% @see is_enabled/2 + +is_disabled(FeatureName, Blocking) -> + case is_enabled(FeatureName, Blocking) of + state_changing -> state_changing; + IsEnabled -> not IsEnabled + end. + +-spec info() -> ok. +%% @doc +%% Displays a table on stdout summing up the supported feature flags, +%% their state and various informations about them. + +info() -> + info(#{}). + +-spec info(#{color => boolean(), + lines => boolean(), + verbose => non_neg_integer()}) -> ok. +%% @doc +%% Displays a table on stdout summing up the supported feature flags, +%% their state and various informations about them. +%% +%% Supported options are: +%% <ul> +%% <li>`color': a boolean to indicate if colors should be used to +%% highlight some elements.</li> +%% <li>`lines': a boolean to indicate if table borders should be drawn +%% using ASCII lines instead of regular characters.</li> +%% <li>`verbose': a non-negative integer to specify the level of +%% verbosity.</li> +%% </ul> +%% +%% @param Options A map of various options to tune the displayed table. + +info(Options) when is_map(Options) -> + rabbit_ff_extra:info(Options). + +-spec get_state(feature_name()) -> enabled | disabled | unavailable. +%% @doc +%% Returns the state of a feature flag. +%% +%% The possible states are: +%% <ul> +%% <li>`enabled': the feature flag is enabled.</li> +%% <li>`disabled': the feature flag is supported by all nodes in the +%% cluster but currently disabled.</li> +%% <li>`unavailable': the feature flag is unsupported by at least one +%% node in the cluster and can not be enabled for now.</li> +%% </ul> +%% +%% @param FeatureName The name of the feature flag to check. +%% @returns `enabled', `disabled' or `unavailable'. + +get_state(FeatureName) when is_atom(FeatureName) -> + IsEnabled = is_enabled(FeatureName), + IsSupported = is_supported(FeatureName), + case IsEnabled of + true -> enabled; + false -> case IsSupported of + true -> disabled; + false -> unavailable + end + end. + +-spec get_stability(feature_name() | feature_props_extended()) -> stability(). +%% @doc +%% Returns the stability of a feature flag. +%% +%% The possible stability levels are: +%% <ul> +%% <li>`stable': the feature flag is stable and will not change in future +%% releases: it can be enabled in production.</li> +%% <li>`experimental': the feature flag is experimental and may change in +%% the future (without a guaranteed upgrade path): enabling it in +%% production is not recommended.</li> +%% <li>`unavailable': the feature flag is unsupported by at least one +%% node in the cluster and can not be enabled for now.</li> +%% </ul> +%% +%% @param FeatureName The name of the feature flag to check. +%% @returns `stable' or `experimental'. + +get_stability(FeatureName) when is_atom(FeatureName) -> + case rabbit_ff_registry:get(FeatureName) of + undefined -> undefined; + FeatureProps -> get_stability(FeatureProps) + end; +get_stability(FeatureProps) when is_map(FeatureProps) -> + maps:get(stability, FeatureProps, stable). + +%% ------------------------------------------------------------------- +%% Feature flags registry. +%% ------------------------------------------------------------------- + +-spec init() -> ok | no_return(). +%% @private + +init() -> + %% We want to make sure the `feature_flags` file exists once + %% RabbitMQ was started at least once. This is not required by + %% this module (it works fine if the file is missing) but it helps + %% external tools. + _ = ensure_enabled_feature_flags_list_file_exists(), + + %% We also "list" supported feature flags. We are not interested in + %% that list, however, it triggers the first initialization of the + %% registry. + _ = list(all), + ok. + +-spec initialize_registry() -> ok | {error, any()} | no_return(). +%% @private +%% @doc +%% Initializes or reinitializes the registry. +%% +%% The registry is an Erlang module recompiled at runtime to hold the +%% state of all supported feature flags. +%% +%% That Erlang module is called {@link rabbit_ff_registry}. The initial +%% source code of this module simply calls this function so it is +%% replaced by a proper registry. +%% +%% Once replaced, the registry contains the map of all supported feature +%% flags and their state. This is makes it very efficient to query a +%% feature flag state or property. +%% +%% The registry is local to all RabbitMQ nodes. + +initialize_registry() -> + initialize_registry(#{}). + +-spec initialize_registry(feature_flags()) -> + ok | {error, any()} | no_return(). +%% @private +%% @doc +%% Initializes or reinitializes the registry. +%% +%% See {@link initialize_registry/0} for a description of the registry. +%% +%% This function takes a map of new supported feature flags (so their +%% name and extended properties) to add to the existing known feature +%% flags. + +initialize_registry(NewSupportedFeatureFlags) -> + %% The first step is to get the feature flag states: if this is the + %% first time we initialize it, we read the list from disk (the + %% `feature_flags` file). Otherwise we query the existing registry + %% before it is replaced. + RegistryInitialized = rabbit_ff_registry:is_registry_initialized(), + FeatureStates = case RegistryInitialized of + true -> + rabbit_ff_registry:states(); + false -> + EnabledFeatureNames = + read_enabled_feature_flags_list(), + list_of_enabled_feature_flags_to_feature_states( + EnabledFeatureNames) + end, + + %% We also record if the feature flags state was correctly written + %% to disk. Currently we don't use this information, but in the + %% future, we might want to retry the write if it failed so far. + %% + %% TODO: Retry to write the feature flags state if the first try + %% failed. + WrittenToDisk = case RegistryInitialized of + true -> + rabbit_ff_registry:is_registry_written_to_disk(); + false -> + true + end, + initialize_registry(NewSupportedFeatureFlags, + FeatureStates, + WrittenToDisk). + +-spec list_of_enabled_feature_flags_to_feature_states([feature_name()]) -> + feature_states(). + +list_of_enabled_feature_flags_to_feature_states(FeatureNames) -> + maps:from_list([{FeatureName, true} || FeatureName <- FeatureNames]). + +-spec initialize_registry(feature_flags(), + feature_states(), + boolean()) -> + ok | {error, any()} | no_return(). +%% @private +%% @doc +%% Initializes or reinitializes the registry. +%% +%% See {@link initialize_registry/0} for a description of the registry. +%% +%% This function takes a map of new supported feature flags (so their +%% name and extended properties) to add to the existing known feature +%% flags, a map of the new feature flag states (whether they are +%% enabled, disabled or `state_changing'), and a flag to indicate if the +%% feature flag states was recorded to disk. +%% +%% The latter is used to block callers asking if a feature flag is +%% enabled or disabled while its state is changing. + +initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk) -> + Ret = maybe_initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk), + case Ret of + ok -> ok; + restart -> initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk); + Error -> Error + end. + +-spec maybe_initialize_registry(feature_flags(), + feature_states(), + boolean()) -> + ok | restart | {error, any()} | no_return(). + +maybe_initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk) -> + %% We save the version of the current registry before computing + %% the new one. This is used when we do the actual reload: if the + %% current registry was reloaded in the meantime, we need to restart + %% the computation to make sure we don't loose data. + RegistryVsn = registry_vsn(), + + %% We take the feature flags already registered. + RegistryInitialized = rabbit_ff_registry:is_registry_initialized(), + KnownFeatureFlags1 = case RegistryInitialized of + true -> rabbit_ff_registry:list(all); + false -> #{} + end, + + %% Query the list (it's a map to be exact) of known + %% supported feature flags. That list comes from the + %% `-rabbitmq_feature_flag().` module attributes exposed by all + %% currently loaded Erlang modules. + KnownFeatureFlags2 = query_supported_feature_flags(), + + %% We merge the feature flags we already knew about + %% (KnownFeatureFlags1), those found in the loaded applications + %% (KnownFeatureFlags2) and those specified in arguments + %% (NewSupportedFeatureFlags). The latter come from remote nodes + %% usually: for example, they can come from plugins loaded on remote + %% node but the plugins are missing locally. In this case, we + %% consider those feature flags supported because there is no code + %% locally which would cause issues. + %% + %% It means that the list of feature flags only grows. we don't try + %% to clean it at some point because we want to remember about the + %% feature flags we saw (and their state). It should be fine because + %% that list should remain small. + KnownFeatureFlags = maps:merge(KnownFeatureFlags1, + KnownFeatureFlags2), + AllFeatureFlags = maps:merge(KnownFeatureFlags, + NewSupportedFeatureFlags), + + %% Next we want to update the feature states, based on the new + %% states passed as arguments. + FeatureStates0 = case RegistryInitialized of + true -> + maps:merge(rabbit_ff_registry:states(), + NewFeatureStates); + false -> + NewFeatureStates + end, + FeatureStates = maps:filter( + fun(_, true) -> true; + (_, state_changing) -> true; + (_, false) -> false + end, FeatureStates0), + + Proceed = does_registry_need_refresh(AllFeatureFlags, + FeatureStates, + WrittenToDisk), + + case Proceed of + true -> + rabbit_log_feature_flags:debug( + "Feature flags: (re)initialize registry (~p)", + [self()]), + T0 = erlang:timestamp(), + Ret = do_initialize_registry(RegistryVsn, + AllFeatureFlags, + FeatureStates, + WrittenToDisk), + T1 = erlang:timestamp(), + rabbit_log_feature_flags:debug( + "Feature flags: time to regen registry: ~p µs", + [timer:now_diff(T1, T0)]), + Ret; + false -> + rabbit_log_feature_flags:debug( + "Feature flags: registry already up-to-date, skipping init"), + ok + end. + +-spec does_registry_need_refresh(feature_flags(), + feature_states(), + boolean()) -> + boolean(). + +does_registry_need_refresh(AllFeatureFlags, + FeatureStates, + WrittenToDisk) -> + case rabbit_ff_registry:is_registry_initialized() of + true -> + %% Before proceeding with the actual + %% (re)initialization, let's see if there are any + %% changes. + CurrentAllFeatureFlags = rabbit_ff_registry:list(all), + CurrentFeatureStates = rabbit_ff_registry:states(), + CurrentWrittenToDisk = + rabbit_ff_registry:is_registry_written_to_disk(), + + if + AllFeatureFlags =/= CurrentAllFeatureFlags -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, list of feature flags differs"), + true; + FeatureStates =/= CurrentFeatureStates -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, feature flag states differ"), + true; + WrittenToDisk =/= CurrentWrittenToDisk -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, \"written to disk\" state changed"), + true; + true -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: no"), + false + end; + false -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, first-time initialization"), + true + end. + +-spec do_initialize_registry(registry_vsn(), + feature_flags(), + feature_states(), + boolean()) -> + ok | restart | {error, any()} | no_return(). +%% @private + +do_initialize_registry(RegistryVsn, + AllFeatureFlags, + FeatureStates, + WrittenToDisk) -> + %% We log the state of those feature flags. + rabbit_log_feature_flags:info( + "Feature flags: list of feature flags found:"), + lists:foreach( + fun(FeatureName) -> + rabbit_log_feature_flags:info( + "Feature flags: [~s] ~s", + [case maps:is_key(FeatureName, FeatureStates) of + true -> + case maps:get(FeatureName, FeatureStates) of + true -> "x"; + state_changing -> "~" + end; + false -> + " " + end, + FeatureName]) + end, lists:sort(maps:keys(AllFeatureFlags))), + rabbit_log_feature_flags:info( + "Feature flags: feature flag states written to disk: ~s", + [case WrittenToDisk of + true -> "yes"; + false -> "no" + end]), + + %% We request the registry to be regenerated and reloaded with the + %% new state. + regen_registry_mod(RegistryVsn, + AllFeatureFlags, + FeatureStates, + WrittenToDisk). + +-spec query_supported_feature_flags() -> feature_flags(). +%% @private + +-ifdef(TEST). +-define(PT_TESTSUITE_ATTRS, {?MODULE, testsuite_feature_flags_attrs}). + +inject_test_feature_flags(AttributesFromTestsuite) -> + rabbit_log_feature_flags:debug( + "Feature flags: injecting feature flags from testsuite: ~p", + [AttributesFromTestsuite]), + ok = persistent_term:put(?PT_TESTSUITE_ATTRS, AttributesFromTestsuite), + initialize_registry(). + +module_attributes_from_testsuite() -> + persistent_term:get(?PT_TESTSUITE_ATTRS, []). + +query_supported_feature_flags() -> + rabbit_log_feature_flags:debug( + "Feature flags: query feature flags in loaded applications " + "+ testsuite"), + T0 = erlang:timestamp(), + AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes( + rabbit_feature_flag), + AttributesFromTestsuite = module_attributes_from_testsuite(), + T1 = erlang:timestamp(), + rabbit_log_feature_flags:debug( + "Feature flags: time to find supported feature flags: ~p µs", + [timer:now_diff(T1, T0)]), + AllAttributes = AttributesPerApp ++ AttributesFromTestsuite, + prepare_queried_feature_flags(AllAttributes, #{}). +-else. +query_supported_feature_flags() -> + rabbit_log_feature_flags:debug( + "Feature flags: query feature flags in loaded applications"), + T0 = erlang:timestamp(), + AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes( + rabbit_feature_flag), + T1 = erlang:timestamp(), + rabbit_log_feature_flags:debug( + "Feature flags: time to find supported feature flags: ~p µs", + [timer:now_diff(T1, T0)]), + prepare_queried_feature_flags(AttributesPerApp, #{}). +-endif. + +prepare_queried_feature_flags([{App, _Module, Attributes} | Rest], + AllFeatureFlags) -> + rabbit_log_feature_flags:debug( + "Feature flags: application `~s` has ~b feature flags", + [App, length(Attributes)]), + AllFeatureFlags1 = lists:foldl( + fun({FeatureName, FeatureProps}, AllFF) -> + merge_new_feature_flags(AllFF, + App, + FeatureName, + FeatureProps) + end, AllFeatureFlags, Attributes), + prepare_queried_feature_flags(Rest, AllFeatureFlags1); +prepare_queried_feature_flags([], AllFeatureFlags) -> + AllFeatureFlags. + +-spec merge_new_feature_flags(feature_flags(), + atom(), + feature_name(), + feature_props()) -> feature_flags(). +%% @private + +merge_new_feature_flags(AllFeatureFlags, App, FeatureName, FeatureProps) + when is_atom(FeatureName) andalso is_map(FeatureProps) -> + %% We expand the feature flag properties map with: + %% - the name of the application providing it: only informational + %% for now, but can be handy to understand that a feature flag + %% comes from a plugin. + FeatureProps1 = maps:put(provided_by, App, FeatureProps), + maps:merge(AllFeatureFlags, + #{FeatureName => FeatureProps1}). + +-spec regen_registry_mod(registry_vsn(), + feature_flags(), + feature_states(), + boolean()) -> + ok | restart | {error, any()} | no_return(). +%% @private + +regen_registry_mod(RegistryVsn, + AllFeatureFlags, + FeatureStates, + WrittenToDisk) -> + %% Here, we recreate the source code of the `rabbit_ff_registry` + %% module from scratch. + %% + %% IMPORTANT: We want both modules to have the exact same public + %% API in order to simplify the life of developers and their tools + %% (Dialyzer, completion, and so on). + + %% -module(rabbit_ff_registry). + ModuleAttr = erl_syntax:attribute( + erl_syntax:atom(module), + [erl_syntax:atom(rabbit_ff_registry)]), + ModuleForm = erl_syntax:revert(ModuleAttr), + %% -export([...]). + ExportAttr = erl_syntax:attribute( + erl_syntax:atom(export), + [erl_syntax:list( + [erl_syntax:arity_qualifier( + erl_syntax:atom(F), + erl_syntax:integer(A)) + || {F, A} <- [{get, 1}, + {list, 1}, + {states, 0}, + {is_supported, 1}, + {is_enabled, 1}, + {is_registry_initialized, 0}, + {is_registry_written_to_disk, 0}]] + ) + ] + ), + ExportForm = erl_syntax:revert(ExportAttr), + %% get(_) -> ... + GetClauses = [erl_syntax:clause( + [erl_syntax:atom(FeatureName)], + [], + [erl_syntax:abstract(maps:get(FeatureName, + AllFeatureFlags))]) + || FeatureName <- maps:keys(AllFeatureFlags) + ], + GetUnknownClause = erl_syntax:clause( + [erl_syntax:variable("_")], + [], + [erl_syntax:atom(undefined)]), + GetFun = erl_syntax:function( + erl_syntax:atom(get), + GetClauses ++ [GetUnknownClause]), + GetFunForm = erl_syntax:revert(GetFun), + %% list(_) -> ... + ListAllBody = erl_syntax:abstract(AllFeatureFlags), + ListAllClause = erl_syntax:clause([erl_syntax:atom(all)], + [], + [ListAllBody]), + EnabledFeatureFlags = maps:filter( + fun(FeatureName, _) -> + maps:is_key(FeatureName, + FeatureStates) + andalso + maps:get(FeatureName, FeatureStates) + =:= + true + end, AllFeatureFlags), + ListEnabledBody = erl_syntax:abstract(EnabledFeatureFlags), + ListEnabledClause = erl_syntax:clause( + [erl_syntax:atom(enabled)], + [], + [ListEnabledBody]), + DisabledFeatureFlags = maps:filter( + fun(FeatureName, _) -> + not maps:is_key(FeatureName, + FeatureStates) + end, AllFeatureFlags), + ListDisabledBody = erl_syntax:abstract(DisabledFeatureFlags), + ListDisabledClause = erl_syntax:clause( + [erl_syntax:atom(disabled)], + [], + [ListDisabledBody]), + StateChangingFeatureFlags = maps:filter( + fun(FeatureName, _) -> + maps:is_key(FeatureName, + FeatureStates) + andalso + maps:get(FeatureName, FeatureStates) + =:= + state_changing + end, AllFeatureFlags), + ListStateChangingBody = erl_syntax:abstract(StateChangingFeatureFlags), + ListStateChangingClause = erl_syntax:clause( + [erl_syntax:atom(state_changing)], + [], + [ListStateChangingBody]), + ListFun = erl_syntax:function( + erl_syntax:atom(list), + [ListAllClause, + ListEnabledClause, + ListDisabledClause, + ListStateChangingClause]), + ListFunForm = erl_syntax:revert(ListFun), + %% states() -> ... + StatesBody = erl_syntax:abstract(FeatureStates), + StatesClause = erl_syntax:clause([], [], [StatesBody]), + StatesFun = erl_syntax:function( + erl_syntax:atom(states), + [StatesClause]), + StatesFunForm = erl_syntax:revert(StatesFun), + %% is_supported(_) -> ... + IsSupportedClauses = [erl_syntax:clause( + [erl_syntax:atom(FeatureName)], + [], + [erl_syntax:atom(true)]) + || FeatureName <- maps:keys(AllFeatureFlags) + ], + NotSupportedClause = erl_syntax:clause( + [erl_syntax:variable("_")], + [], + [erl_syntax:atom(false)]), + IsSupportedFun = erl_syntax:function( + erl_syntax:atom(is_supported), + IsSupportedClauses ++ [NotSupportedClause]), + IsSupportedFunForm = erl_syntax:revert(IsSupportedFun), + %% is_enabled(_) -> ... + IsEnabledClauses = [erl_syntax:clause( + [erl_syntax:atom(FeatureName)], + [], + [case maps:is_key(FeatureName, FeatureStates) of + true -> + erl_syntax:atom( + maps:get(FeatureName, FeatureStates)); + false -> + erl_syntax:atom(false) + end]) + || FeatureName <- maps:keys(AllFeatureFlags) + ], + NotEnabledClause = erl_syntax:clause( + [erl_syntax:variable("_")], + [], + [erl_syntax:atom(false)]), + IsEnabledFun = erl_syntax:function( + erl_syntax:atom(is_enabled), + IsEnabledClauses ++ [NotEnabledClause]), + IsEnabledFunForm = erl_syntax:revert(IsEnabledFun), + %% is_registry_initialized() -> ... + IsInitializedClauses = [erl_syntax:clause( + [], + [], + [erl_syntax:atom(true)]) + ], + IsInitializedFun = erl_syntax:function( + erl_syntax:atom(is_registry_initialized), + IsInitializedClauses), + IsInitializedFunForm = erl_syntax:revert(IsInitializedFun), + %% is_registry_written_to_disk() -> ... + IsWrittenToDiskClauses = [erl_syntax:clause( + [], + [], + [erl_syntax:atom(WrittenToDisk)]) + ], + IsWrittenToDiskFun = erl_syntax:function( + erl_syntax:atom(is_registry_written_to_disk), + IsWrittenToDiskClauses), + IsWrittenToDiskFunForm = erl_syntax:revert(IsWrittenToDiskFun), + %% Compilation! + Forms = [ModuleForm, + ExportForm, + GetFunForm, + ListFunForm, + StatesFunForm, + IsSupportedFunForm, + IsEnabledFunForm, + IsInitializedFunForm, + IsWrittenToDiskFunForm], + maybe_log_registry_source_code(Forms), + CompileOpts = [return_errors, + return_warnings], + case compile:forms(Forms, CompileOpts) of + {ok, Mod, Bin, _} -> + load_registry_mod(RegistryVsn, Mod, Bin); + {error, Errors, Warnings} -> + rabbit_log_feature_flags:error( + "Feature flags: registry compilation:~n" + "Errors: ~p~n" + "Warnings: ~p", + [Errors, Warnings]), + {error, {compilation_failure, Errors, Warnings}} + end. + +maybe_log_registry_source_code(Forms) -> + case rabbit_prelaunch:get_context() of + #{log_feature_flags_registry := true} -> + rabbit_log_feature_flags:debug( + "== FEATURE FLAGS REGISTRY ==~n" + "~s~n" + "== END ==~n", + [erl_prettypr:format(erl_syntax:form_list(Forms))]); + _ -> + ok + end. + +-ifdef(TEST). +registry_loading_lock() -> ?FF_REGISTRY_LOADING_LOCK. +-endif. + +-spec load_registry_mod(registry_vsn(), atom(), binary()) -> + ok | restart | no_return(). +%% @private + +load_registry_mod(RegistryVsn, Mod, Bin) -> + rabbit_log_feature_flags:debug( + "Feature flags: registry module ready, loading it (~p)...", + [self()]), + FakeFilename = "Compiled and loaded by " ?MODULE_STRING, + %% Time to load the new registry, replacing the old one. We use a + %% lock here to synchronize concurrent reloads. + global:set_lock(?FF_REGISTRY_LOADING_LOCK, [node()]), + rabbit_log_feature_flags:debug( + "Feature flags: acquired lock before reloading registry module (~p)", + [self()]), + %% We want to make sure that the old registry (not the one being + %% currently in use) is purged by the code server. It means no + %% process lingers on that old code. + %% + %% We use code:soft_purge() for that (meaning no process is killed) + %% and we wait in an infinite loop for that to succeed. + ok = purge_old_registry(Mod), + %% Now we can replace the currently loaded registry by the new one. + %% The code server takes care of marking the current registry as old + %% and load the new module in an atomic operation. + %% + %% Therefore there is no chance of a window where there is no + %% registry module available, causing the one on disk to be + %% reloaded. + Ret = case registry_vsn() of + RegistryVsn -> code:load_binary(Mod, FakeFilename, Bin); + OtherVsn -> {error, {restart, RegistryVsn, OtherVsn}} + end, + rabbit_log_feature_flags:debug( + "Feature flags: releasing lock after reloading registry module (~p)", + [self()]), + global:del_lock(?FF_REGISTRY_LOADING_LOCK, [node()]), + case Ret of + {module, _} -> + rabbit_log_feature_flags:debug( + "Feature flags: registry module loaded (vsn: ~p -> ~p)", + [RegistryVsn, registry_vsn()]), + ok; + {error, {restart, Expected, Current}} -> + rabbit_log_feature_flags:error( + "Feature flags: another registry module was loaded in the " + "meantime (expected old vsn: ~p, current vsn: ~p); " + "restarting the regen", + [Expected, Current]), + restart; + {error, Reason} -> + rabbit_log_feature_flags:error( + "Feature flags: failed to load registry module: ~p", + [Reason]), + throw({feature_flag_registry_reload_failure, Reason}) + end. + +-spec registry_vsn() -> registry_vsn(). +%% @private + +registry_vsn() -> + Attrs = rabbit_ff_registry:module_info(attributes), + proplists:get_value(vsn, Attrs, undefined). + +purge_old_registry(Mod) -> + case code:is_loaded(Mod) of + {file, _} -> do_purge_old_registry(Mod); + false -> ok + end. + +do_purge_old_registry(Mod) -> + case code:soft_purge(Mod) of + true -> ok; + false -> do_purge_old_registry(Mod) + end. + +%% ------------------------------------------------------------------- +%% Feature flags state storage. +%% ------------------------------------------------------------------- + +-spec ensure_enabled_feature_flags_list_file_exists() -> ok | {error, any()}. +%% @private + +ensure_enabled_feature_flags_list_file_exists() -> + File = enabled_feature_flags_list_file(), + case filelib:is_regular(File) of + true -> ok; + false -> write_enabled_feature_flags_list([]) + end. + +-spec read_enabled_feature_flags_list() -> + [feature_name()] | no_return(). +%% @private + +read_enabled_feature_flags_list() -> + case try_to_read_enabled_feature_flags_list() of + {error, Reason} -> + File = enabled_feature_flags_list_file(), + throw({feature_flags_file_read_error, File, Reason}); + Ret -> + Ret + end. + +-spec try_to_read_enabled_feature_flags_list() -> + [feature_name()] | {error, any()}. +%% @private + +try_to_read_enabled_feature_flags_list() -> + File = enabled_feature_flags_list_file(), + case file:consult(File) of + {ok, [List]} -> + List; + {error, enoent} -> + %% If the file is missing, we consider the list of enabled + %% feature flags to be empty. + []; + {error, Reason} = Error -> + rabbit_log_feature_flags:error( + "Feature flags: failed to read the `feature_flags` " + "file at `~s`: ~s", + [File, file:format_error(Reason)]), + Error + end. + +-spec write_enabled_feature_flags_list([feature_name()]) -> + ok | no_return(). +%% @private + +write_enabled_feature_flags_list(FeatureNames) -> + case try_to_write_enabled_feature_flags_list(FeatureNames) of + {error, Reason} -> + File = enabled_feature_flags_list_file(), + throw({feature_flags_file_write_error, File, Reason}); + Ret -> + Ret + end. + +-spec try_to_write_enabled_feature_flags_list([feature_name()]) -> + ok | {error, any()}. +%% @private + +try_to_write_enabled_feature_flags_list(FeatureNames) -> + %% Before writing the new file, we read the existing one. If there + %% are unknown feature flags in that file, we want to keep their + %% state, even though they are unsupported at this time. It could be + %% that a plugin was disabled in the meantime. + %% + %% FIXME: Lock this code to fix concurrent read/modify/write. + PreviouslyEnabled = case try_to_read_enabled_feature_flags_list() of + {error, _} -> []; + List -> List + end, + FeatureNames1 = lists:foldl( + fun(Name, Acc) -> + case is_supported_locally(Name) of + true -> Acc; + false -> [Name | Acc] + end + end, FeatureNames, PreviouslyEnabled), + FeatureNames2 = lists:sort(FeatureNames1), + + File = enabled_feature_flags_list_file(), + Content = io_lib:format("~p.~n", [FeatureNames2]), + %% TODO: If we fail to write the the file, we should spawn a process + %% to retry the operation. + case file:write_file(File, Content) of + ok -> + ok; + {error, Reason} = Error -> + rabbit_log_feature_flags:error( + "Feature flags: failed to write the `feature_flags` " + "file at `~s`: ~s", + [File, file:format_error(Reason)]), + Error + end. + +-spec enabled_feature_flags_list_file() -> file:filename(). +%% @doc +%% Returns the path to the file where the state of feature flags is stored. +%% +%% @returns the path to the file. + +enabled_feature_flags_list_file() -> + case application:get_env(rabbit, feature_flags_file) of + {ok, Val} -> Val; + undefined -> throw(feature_flags_file_not_set) + end. + +%% ------------------------------------------------------------------- +%% Feature flags management: enabling. +%% ------------------------------------------------------------------- + +-spec do_enable(feature_name()) -> ok | {error, any()} | no_return(). +%% @private + +do_enable(FeatureName) -> + %% We mark this feature flag as "state changing" before doing the + %% actual state change. We also take a global lock: this permits + %% to block callers asking about a feature flag changing state. + global:set_lock(?FF_STATE_CHANGE_LOCK), + Ret = case mark_as_enabled(FeatureName, state_changing) of + ok -> + case enable_dependencies(FeatureName, true) of + ok -> + case run_migration_fun(FeatureName, enable) of + ok -> + mark_as_enabled(FeatureName, true); + {error, no_migration_fun} -> + mark_as_enabled(FeatureName, true); + Error -> + Error + end; + Error -> + Error + end; + Error -> + Error + end, + case Ret of + ok -> ok; + _ -> mark_as_enabled(FeatureName, false) + end, + global:del_lock(?FF_STATE_CHANGE_LOCK), + Ret. + +-spec enable_locally(feature_name()) -> ok | {error, any()} | no_return(). +%% @private + +enable_locally(FeatureName) when is_atom(FeatureName) -> + case is_enabled(FeatureName) of + true -> + ok; + false -> + rabbit_log_feature_flags:debug( + "Feature flag `~s`: enable locally (as part of feature " + "flag states synchronization)", + [FeatureName]), + do_enable_locally(FeatureName) + end. + +-spec do_enable_locally(feature_name()) -> ok | {error, any()} | no_return(). +%% @private + +do_enable_locally(FeatureName) -> + case enable_dependencies(FeatureName, false) of + ok -> + case run_migration_fun(FeatureName, enable) of + ok -> + mark_as_enabled_locally(FeatureName, true); + {error, no_migration_fun} -> + mark_as_enabled_locally(FeatureName, true); + Error -> + Error + end; + Error -> + Error + end. + +-spec enable_dependencies(feature_name(), boolean()) -> + ok | {error, any()} | no_return(). +%% @private + +enable_dependencies(FeatureName, Everywhere) -> + FeatureProps = rabbit_ff_registry:get(FeatureName), + DependsOn = maps:get(depends_on, FeatureProps, []), + rabbit_log_feature_flags:debug( + "Feature flag `~s`: enable dependencies: ~p", + [FeatureName, DependsOn]), + enable_dependencies(FeatureName, DependsOn, Everywhere). + +-spec enable_dependencies(feature_name(), [feature_name()], boolean()) -> + ok | {error, any()} | no_return(). +%% @private + +enable_dependencies(TopLevelFeatureName, [FeatureName | Rest], Everywhere) -> + Ret = case Everywhere of + true -> enable(FeatureName); + false -> enable_locally(FeatureName) + end, + case Ret of + ok -> enable_dependencies(TopLevelFeatureName, Rest, Everywhere); + Error -> Error + end; +enable_dependencies(_, [], _) -> + ok. + +-spec run_migration_fun(feature_name(), any()) -> + any() | {error, any()}. +%% @private + +run_migration_fun(FeatureName, Arg) -> + FeatureProps = rabbit_ff_registry:get(FeatureName), + run_migration_fun(FeatureName, FeatureProps, Arg). + +run_migration_fun(FeatureName, FeatureProps, Arg) -> + case maps:get(migration_fun, FeatureProps, none) of + {MigrationMod, MigrationFun} + when is_atom(MigrationMod) andalso is_atom(MigrationFun) -> + rabbit_log_feature_flags:debug( + "Feature flag `~s`: run migration function ~p with arg: ~p", + [FeatureName, MigrationFun, Arg]), + try + erlang:apply(MigrationMod, + MigrationFun, + [FeatureName, FeatureProps, Arg]) + catch + _:Reason:Stacktrace -> + rabbit_log_feature_flags:error( + "Feature flag `~s`: migration function crashed: ~p~n~p", + [FeatureName, Reason, Stacktrace]), + {error, {migration_fun_crash, Reason, Stacktrace}} + end; + none -> + {error, no_migration_fun}; + Invalid -> + rabbit_log_feature_flags:error( + "Feature flag `~s`: invalid migration function: ~p", + [FeatureName, Invalid]), + {error, {invalid_migration_fun, Invalid}} + end. + +-spec mark_as_enabled(feature_name(), feature_state()) -> + any() | {error, any()} | no_return(). +%% @private + +mark_as_enabled(FeatureName, IsEnabled) -> + case mark_as_enabled_locally(FeatureName, IsEnabled) of + ok -> + mark_as_enabled_remotely(FeatureName, IsEnabled); + Error -> + Error + end. + +-spec mark_as_enabled_locally(feature_name(), feature_state()) -> + any() | {error, any()} | no_return(). +%% @private + +mark_as_enabled_locally(FeatureName, IsEnabled) -> + rabbit_log_feature_flags:info( + "Feature flag `~s`: mark as enabled=~p", + [FeatureName, IsEnabled]), + EnabledFeatureNames = maps:keys(list(enabled)), + NewEnabledFeatureNames = case IsEnabled of + true -> + [FeatureName | EnabledFeatureNames]; + false -> + EnabledFeatureNames -- [FeatureName]; + state_changing -> + EnabledFeatureNames + end, + WrittenToDisk = case NewEnabledFeatureNames of + EnabledFeatureNames -> + rabbit_ff_registry:is_registry_written_to_disk(); + _ -> + ok =:= try_to_write_enabled_feature_flags_list( + NewEnabledFeatureNames) + end, + initialize_registry(#{}, + #{FeatureName => IsEnabled}, + WrittenToDisk). + +-spec mark_as_enabled_remotely(feature_name(), feature_state()) -> + any() | {error, any()} | no_return(). +%% @private + +mark_as_enabled_remotely(FeatureName, IsEnabled) -> + Nodes = running_remote_nodes(), + mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, ?TIMEOUT). + +-spec mark_as_enabled_remotely([node()], + feature_name(), + feature_state(), + timeout()) -> + any() | {error, any()} | no_return(). +%% @private + +mark_as_enabled_remotely([], _FeatureName, _IsEnabled, _Timeout) -> + ok; +mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, Timeout) -> + T0 = erlang:timestamp(), + Rets = [{Node, rpc:call(Node, + ?MODULE, + mark_as_enabled_locally, + [FeatureName, IsEnabled], + Timeout)} + || Node <- Nodes], + FailedNodes = [Node || {Node, Ret} <- Rets, Ret =/= ok], + case FailedNodes of + [] -> + rabbit_log_feature_flags:debug( + "Feature flags: `~s` successfully marked as enabled=~p on all " + "nodes", [FeatureName, IsEnabled]), + ok; + _ -> + rabbit_log_feature_flags:error( + "Feature flags: failed to mark feature flag `~s` as enabled=~p " + "on the following nodes:", [FeatureName, IsEnabled]), + [rabbit_log_feature_flags:error( + "Feature flags: - ~s: ~p", + [Node, Ret]) + || {Node, Ret} <- Rets, + Ret =/= ok], + Sleep = 1000, + T1 = erlang:timestamp(), + Duration = timer:now_diff(T1, T0), + NewTimeout = (Timeout * 1000 - Duration) div 1000 - Sleep, + if + NewTimeout > 0 -> + rabbit_log_feature_flags:debug( + "Feature flags: retrying with a timeout of ~b " + "ms after sleeping for ~b ms", + [NewTimeout, Sleep]), + timer:sleep(Sleep), + mark_as_enabled_remotely(FailedNodes, + FeatureName, + IsEnabled, + NewTimeout); + true -> + rabbit_log_feature_flags:debug( + "Feature flags: not retrying; RPC went over the " + "~b milliseconds timeout", [Timeout]), + %% FIXME: Is crashing the process the best solution here? + throw( + {failed_to_mark_feature_flag_as_enabled_on_remote_nodes, + FeatureName, IsEnabled, FailedNodes}) + end + end. + +%% ------------------------------------------------------------------- +%% Coordination with remote nodes. +%% ------------------------------------------------------------------- + +-spec remote_nodes() -> [node()]. +%% @private + +remote_nodes() -> + mnesia:system_info(db_nodes) -- [node()]. + +-spec running_remote_nodes() -> [node()]. +%% @private + +running_remote_nodes() -> + mnesia:system_info(running_db_nodes) -- [node()]. + +query_running_remote_nodes(Node, Timeout) -> + case rpc:call(Node, mnesia, system_info, [running_db_nodes], Timeout) of + {badrpc, _} = Error -> Error; + Nodes -> Nodes -- [node()] + end. + +-spec does_node_support(node(), [feature_name()], timeout()) -> boolean(). +%% @private + +does_node_support(Node, FeatureNames, Timeout) -> + rabbit_log_feature_flags:debug( + "Feature flags: querying `~p` support on node ~s...", + [FeatureNames, Node]), + Ret = case node() of + Node -> + is_supported_locally(FeatureNames); + _ -> + run_feature_flags_mod_on_remote_node( + Node, is_supported_locally, [FeatureNames], Timeout) + end, + case Ret of + {error, pre_feature_flags_rabbitmq} -> + %% See run_feature_flags_mod_on_remote_node/4 for + %% an explanation why we consider this node a 3.7.x + %% pre-feature-flags node. + rabbit_log_feature_flags:debug( + "Feature flags: no feature flags support on node `~s`, " + "consider the feature flags unsupported: ~p", + [Node, FeatureNames]), + false; + {error, Reason} -> + rabbit_log_feature_flags:error( + "Feature flags: error while querying `~p` support on " + "node ~s: ~p", + [FeatureNames, Node, Reason]), + false; + true -> + rabbit_log_feature_flags:debug( + "Feature flags: node `~s` supports `~p`", + [Node, FeatureNames]), + true; + false -> + rabbit_log_feature_flags:debug( + "Feature flags: node `~s` does not support `~p`; " + "stopping query here", + [Node, FeatureNames]), + false + end. + +-spec check_node_compatibility(node()) -> ok | {error, any()}. +%% @doc +%% Checks if a node is compatible with the local node. +%% +%% To be compatible, the following two conditions must be met: +%% <ol> +%% <li>feature flags enabled on the local node must be supported by the +%% remote node</li> +%% <li>feature flags enabled on the remote node must be supported by the +%% local node</li> +%% </ol> +%% +%% @param Node the name of the remote node to test. +%% @returns `ok' if they are compatible, `{error, Reason}' if they are not. + +check_node_compatibility(Node) -> + check_node_compatibility(Node, ?TIMEOUT). + +-spec check_node_compatibility(node(), timeout()) -> ok | {error, any()}. +%% @doc +%% Checks if a node is compatible with the local node. +%% +%% See {@link check_node_compatibility/1} for the conditions required to +%% consider two nodes compatible. +%% +%% @param Node the name of the remote node to test. +%% @param Timeout Time in milliseconds after which the RPC gives up. +%% @returns `ok' if they are compatible, `{error, Reason}' if they are not. +%% +%% @see check_node_compatibility/1 + +check_node_compatibility(Node, Timeout) -> + %% Before checking compatibility, we exchange feature flags from + %% unknown Erlang applications. So we fetch remote feature flags + %% from applications which are not loaded locally, and the opposite. + %% + %% The goal is that such feature flags are not blocking the + %% communication between nodes because the code (which would + %% break) is missing on those nodes. Therefore they should not be + %% considered when determining compatibility. + exchange_feature_flags_from_unknown_apps(Node, Timeout), + + %% FIXME: + %% When we try to cluster two nodes, we get: + %% Feature flags: starting an unclustered node: all feature flags + %% will be enabled by default + %% It should probably not be the case... + + %% We can now proceed with the actual compatibility check. + rabbit_log_feature_flags:debug( + "Feature flags: node `~s` compatibility check, part 1/2", + [Node]), + Part1 = local_enabled_feature_flags_is_supported_remotely(Node, Timeout), + rabbit_log_feature_flags:debug( + "Feature flags: node `~s` compatibility check, part 2/2", + [Node]), + Part2 = remote_enabled_feature_flags_is_supported_locally(Node, Timeout), + case {Part1, Part2} of + {true, true} -> + rabbit_log_feature_flags:debug( + "Feature flags: node `~s` is compatible", + [Node]), + ok; + {false, _} -> + rabbit_log_feature_flags:error( + "Feature flags: node `~s` is INCOMPATIBLE: " + "feature flags enabled locally are not supported remotely", + [Node]), + {error, incompatible_feature_flags}; + {_, false} -> + rabbit_log_feature_flags:error( + "Feature flags: node `~s` is INCOMPATIBLE: " + "feature flags enabled remotely are not supported locally", + [Node]), + {error, incompatible_feature_flags} + end. + +-spec is_node_compatible(node()) -> boolean(). +%% @doc +%% Returns if a node is compatible with the local node. +%% +%% This function calls {@link check_node_compatibility/2} and returns +%% `true' the latter returns `ok'. Therefore this is the same code, +%% except that this function returns a boolean, but not the reason of +%% the incompatibility if any. +%% +%% @param Node the name of the remote node to test. +%% @returns `true' if they are compatible, `false' otherwise. + +is_node_compatible(Node) -> + is_node_compatible(Node, ?TIMEOUT). + +-spec is_node_compatible(node(), timeout()) -> boolean(). +%% @doc +%% Returns if a node is compatible with the local node. +%% +%% This function calls {@link check_node_compatibility/2} and returns +%% `true' the latter returns `ok'. Therefore this is the same code, +%% except that this function returns a boolean, but not the reason +%% of the incompatibility if any. If the RPC times out, nodes are +%% considered incompatible. +%% +%% @param Node the name of the remote node to test. +%% @param Timeout Time in milliseconds after which the RPC gives up. +%% @returns `true' if they are compatible, `false' otherwise. + +is_node_compatible(Node, Timeout) -> + check_node_compatibility(Node, Timeout) =:= ok. + +-spec local_enabled_feature_flags_is_supported_remotely(node(), + timeout()) -> + boolean(). +%% @private + +local_enabled_feature_flags_is_supported_remotely(Node, Timeout) -> + LocalEnabledFeatureNames = maps:keys(list(enabled)), + is_supported_remotely([Node], LocalEnabledFeatureNames, Timeout). + +-spec remote_enabled_feature_flags_is_supported_locally(node(), + timeout()) -> + boolean(). +%% @private + +remote_enabled_feature_flags_is_supported_locally(Node, Timeout) -> + case query_remote_feature_flags(Node, enabled, Timeout) of + {error, _} -> + false; + RemoteEnabledFeatureFlags when is_map(RemoteEnabledFeatureFlags) -> + RemoteEnabledFeatureNames = maps:keys(RemoteEnabledFeatureFlags), + is_supported_locally(RemoteEnabledFeatureNames) + end. + +-spec run_feature_flags_mod_on_remote_node(node(), + atom(), + [term()], + timeout()) -> + term() | {error, term()}. +%% @private + +run_feature_flags_mod_on_remote_node(Node, Function, Args, Timeout) -> + case rpc:call(Node, ?MODULE, Function, Args, Timeout) of + {badrpc, {'EXIT', + {undef, + [{?MODULE, Function, Args, []} + | _]}}} -> + %% If rabbit_feature_flags:Function() is undefined + %% on the remote node, we consider it to be a 3.7.x + %% pre-feature-flags node. + %% + %% Theoretically, it could be an older version (3.6.x and + %% older). But the RabbitMQ version consistency check + %% (rabbit_misc:version_minor_equivalent/2) called from + %% rabbit_mnesia:check_rabbit_consistency/2 already blocked + %% this situation from happening before we reach this point. + rabbit_log_feature_flags:debug( + "Feature flags: ~s:~s~p unavailable on node `~s`: " + "assuming it is a RabbitMQ 3.7.x pre-feature-flags node", + [?MODULE, Function, Args, Node]), + {error, pre_feature_flags_rabbitmq}; + {badrpc, Reason} = Error -> + rabbit_log_feature_flags:error( + "Feature flags: error while running ~s:~s~p " + "on node `~s`: ~p", + [?MODULE, Function, Args, Node, Reason]), + {error, Error}; + Ret -> + Ret + end. + +-spec query_remote_feature_flags(node(), + Which :: all | enabled | disabled, + timeout()) -> + feature_flags() | {error, any()}. +%% @private + +query_remote_feature_flags(Node, Which, Timeout) -> + rabbit_log_feature_flags:debug( + "Feature flags: querying ~s feature flags on node `~s`...", + [Which, Node]), + case run_feature_flags_mod_on_remote_node(Node, list, [Which], Timeout) of + {error, pre_feature_flags_rabbitmq} -> + %% See run_feature_flags_mod_on_remote_node/4 for + %% an explanation why we consider this node a 3.7.x + %% pre-feature-flags node. + rabbit_log_feature_flags:debug( + "Feature flags: no feature flags support on node `~s`, " + "consider the list of feature flags empty", [Node]), + #{}; + {error, Reason} = Error -> + rabbit_log_feature_flags:error( + "Feature flags: error while querying ~s feature flags " + "on node `~s`: ~p", + [Which, Node, Reason]), + Error; + RemoteFeatureFlags when is_map(RemoteFeatureFlags) -> + RemoteFeatureNames = maps:keys(RemoteFeatureFlags), + rabbit_log_feature_flags:debug( + "Feature flags: querying ~s feature flags on node `~s` " + "done; ~s features: ~p", + [Which, Node, Which, RemoteFeatureNames]), + RemoteFeatureFlags + end. + +-spec merge_feature_flags_from_unknown_apps(feature_flags()) -> + ok | {error, any()}. +%% @private + +merge_feature_flags_from_unknown_apps(FeatureFlags) + when is_map(FeatureFlags) -> + LoadedApps = [App || {App, _, _} <- application:loaded_applications()], + FeatureFlagsFromUnknownApps = + maps:fold( + fun(FeatureName, FeatureProps, UnknownFF) -> + case is_supported_locally(FeatureName) of + true -> + UnknownFF; + false -> + FeatureProvider = maps:get(provided_by, FeatureProps), + case lists:member(FeatureProvider, LoadedApps) of + true -> UnknownFF; + false -> maps:put(FeatureName, FeatureProps, + UnknownFF) + end + end + end, + #{}, + FeatureFlags), + case maps:keys(FeatureFlagsFromUnknownApps) of + [] -> + ok; + _ -> + rabbit_log_feature_flags:debug( + "Feature flags: register feature flags provided by applications " + "unknown locally: ~p", + [maps:keys(FeatureFlagsFromUnknownApps)]), + initialize_registry(FeatureFlagsFromUnknownApps) + end. + +exchange_feature_flags_from_unknown_apps(Node, Timeout) -> + %% The first step is to fetch feature flags from Erlang applications + %% we don't know locally (they are loaded remotely, but not + %% locally). + fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout), + + %% The next step is to do the opposite: push feature flags to remote + %% nodes so they can register those from applications they don't + %% know. + push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout). + +fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout) -> + RemoteFeatureFlags = query_remote_feature_flags(Node, all, Timeout), + merge_feature_flags_from_unknown_apps(RemoteFeatureFlags). + +push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout) -> + LocalFeatureFlags = list(all), + push_local_feature_flags_from_apps_unknown_remotely( + Node, LocalFeatureFlags, Timeout). + +push_local_feature_flags_from_apps_unknown_remotely( + Node, FeatureFlags, Timeout) + when map_size(FeatureFlags) > 0 -> + case query_running_remote_nodes(Node, Timeout) of + {badrpc, Reason} -> + {error, Reason}; + Nodes -> + lists:foreach( + fun(N) -> + run_feature_flags_mod_on_remote_node( + N, + merge_feature_flags_from_unknown_apps, + [FeatureFlags], + Timeout) + end, Nodes) + end; +push_local_feature_flags_from_apps_unknown_remotely(_, _, _) -> + ok. + +-spec sync_feature_flags_with_cluster([node()], boolean()) -> + ok | {error, any()} | no_return(). +%% @private + +sync_feature_flags_with_cluster(Nodes, NodeIsVirgin) -> + sync_feature_flags_with_cluster(Nodes, NodeIsVirgin, ?TIMEOUT). + +-spec sync_feature_flags_with_cluster([node()], boolean(), timeout()) -> + ok | {error, any()} | no_return(). +%% @private + +sync_feature_flags_with_cluster([], NodeIsVirgin, _) -> + verify_which_feature_flags_are_actually_enabled(), + case NodeIsVirgin of + true -> + FeatureNames = get_forced_feature_flag_names(), + case remote_nodes() of + [] when FeatureNames =:= undefined -> + rabbit_log_feature_flags:debug( + "Feature flags: starting an unclustered node " + "for the first time: all feature flags will be " + "enabled by default"), + enable_all(); + [] -> + case FeatureNames of + [] -> + rabbit_log_feature_flags:debug( + "Feature flags: starting an unclustered " + "node for the first time: all feature " + "flags are forcibly left disabled from " + "the $RABBITMQ_FEATURE_FLAGS environment " + "variable"), + ok; + _ -> + rabbit_log_feature_flags:debug( + "Feature flags: starting an unclustered " + "node for the first time: only the " + "following feature flags specified in " + "the $RABBITMQ_FEATURE_FLAGS environment " + "variable will be enabled: ~p", + [FeatureNames]), + enable(FeatureNames) + end; + _ -> + ok + end; + false -> + rabbit_log_feature_flags:debug( + "Feature flags: starting an unclustered node which is " + "already initialized: all feature flags left in their " + "current state"), + ok + end; +sync_feature_flags_with_cluster(Nodes, _, Timeout) -> + verify_which_feature_flags_are_actually_enabled(), + RemoteNodes = Nodes -- [node()], + sync_feature_flags_with_cluster1(RemoteNodes, Timeout). + +sync_feature_flags_with_cluster1([], _) -> + ok; +sync_feature_flags_with_cluster1(RemoteNodes, Timeout) -> + RandomRemoteNode = pick_one_node(RemoteNodes), + rabbit_log_feature_flags:debug( + "Feature flags: SYNCING FEATURE FLAGS with node `~s`...", + [RandomRemoteNode]), + case query_remote_feature_flags(RandomRemoteNode, enabled, Timeout) of + {error, _} = Error -> + Error; + RemoteFeatureFlags -> + RemoteFeatureNames = maps:keys(RemoteFeatureFlags), + rabbit_log_feature_flags:debug( + "Feature flags: enabling locally feature flags already " + "enabled on node `~s`...", + [RandomRemoteNode]), + case do_sync_feature_flags_with_node(RemoteFeatureNames) of + ok -> + sync_feature_flags_with_cluster2( + RandomRemoteNode, Timeout); + Error -> + Error + end + end. + +sync_feature_flags_with_cluster2(RandomRemoteNode, Timeout) -> + LocalFeatureNames = maps:keys(list(enabled)), + rabbit_log_feature_flags:debug( + "Feature flags: enabling on node `~s` feature flags already " + "enabled locally...", + [RandomRemoteNode]), + Ret = run_feature_flags_mod_on_remote_node( + RandomRemoteNode, + do_sync_feature_flags_with_node, + [LocalFeatureNames], + Timeout), + case Ret of + {error, pre_feature_flags_rabbitmq} -> ok; + _ -> Ret + end. + +pick_one_node(Nodes) -> + RandomIndex = rand:uniform(length(Nodes)), + lists:nth(RandomIndex, Nodes). + +do_sync_feature_flags_with_node([FeatureFlag | Rest]) -> + case enable_locally(FeatureFlag) of + ok -> do_sync_feature_flags_with_node(Rest); + Error -> Error + end; +do_sync_feature_flags_with_node([]) -> + ok. + +-spec get_forced_feature_flag_names() -> [feature_name()] | undefined. +%% @private +%% @doc +%% Returns the (possibly empty) list of feature flags the user want +%% to enable out-of-the-box when starting a node for the first time. +%% +%% Without this, the default is to enable all the supported feature +%% flags. +%% +%% There are two ways to specify that list: +%% <ol> +%% <li>Using the `$RABBITMQ_FEATURE_FLAGS' environment variable; for +%% instance `RABBITMQ_FEATURE_FLAGS=quorum_queue,mnevis'.</li> +%% <li>Using the `forced_feature_flags_on_init' configuration parameter; +%% for instance +%% `{rabbit, [{forced_feature_flags_on_init, [quorum_queue, mnevis]}]}'.</li> +%% </ol> +%% +%% The environment variable has precedence over the configuration +%% parameter. + +get_forced_feature_flag_names() -> + Ret = case get_forced_feature_flag_names_from_env() of + undefined -> get_forced_feature_flag_names_from_config(); + List -> List + end, + case Ret of + undefined -> ok; + [] -> rabbit_log_feature_flags:info( + "Feature flags: automatic enablement of feature " + "flags disabled (i.e. none will be enabled " + "automatically)"); + _ -> rabbit_log_feature_flags:info( + "Feature flags: automatic enablement of feature " + "flags limited to the following list: ~p", [Ret]) + end, + Ret. + +-spec get_forced_feature_flag_names_from_env() -> [feature_name()] | undefined. +%% @private + +get_forced_feature_flag_names_from_env() -> + case rabbit_prelaunch:get_context() of + #{forced_feature_flags_on_init := ForcedFFs} + when is_list(ForcedFFs) -> + ForcedFFs; + _ -> + undefined + end. + +-spec get_forced_feature_flag_names_from_config() -> [feature_name()] | undefined. +%% @private + +get_forced_feature_flag_names_from_config() -> + Value = application:get_env(rabbit, + forced_feature_flags_on_init, + undefined), + case Value of + undefined -> + Value; + _ when is_list(Value) -> + case lists:all(fun is_atom/1, Value) of + true -> Value; + false -> undefined + end; + _ -> + undefined + end. + +-spec verify_which_feature_flags_are_actually_enabled() -> + ok | {error, any()} | no_return(). +%% @private + +verify_which_feature_flags_are_actually_enabled() -> + AllFeatureFlags = list(all), + EnabledFeatureNames = read_enabled_feature_flags_list(), + rabbit_log_feature_flags:debug( + "Feature flags: double-checking feature flag states..."), + %% In case the previous instance of the node failed to write the + %% feature flags list file, we want to double-check the list of + %% enabled feature flags read from disk. For each feature flag, + %% we call the migration function to query if the feature flag is + %% actually enabled. + %% + %% If a feature flag doesn't provide a migration function (or if the + %% function fails), we keep the current state of the feature flag. + List1 = maps:fold( + fun(Name, Props, Acc) -> + Ret = run_migration_fun(Name, Props, is_enabled), + case Ret of + true -> + [Name | Acc]; + false -> + Acc; + _ -> + MarkedAsEnabled = is_enabled(Name), + case MarkedAsEnabled of + true -> [Name | Acc]; + false -> Acc + end + end + end, + [], AllFeatureFlags), + RepairedEnabledFeatureNames = lists:sort(List1), + %% We log the list of feature flags for which the state changes + %% after the check above. + WereEnabled = RepairedEnabledFeatureNames -- EnabledFeatureNames, + WereDisabled = EnabledFeatureNames -- RepairedEnabledFeatureNames, + case {WereEnabled, WereDisabled} of + {[], []} -> ok; + _ -> rabbit_log_feature_flags:warning( + "Feature flags: the previous instance of this node " + "must have failed to write the `feature_flags` " + "file at `~s`:", + [enabled_feature_flags_list_file()]) + end, + case WereEnabled of + [] -> ok; + _ -> rabbit_log_feature_flags:warning( + "Feature flags: - list of previously enabled " + "feature flags now marked as such: ~p", [WereEnabled]) + end, + case WereDisabled of + [] -> ok; + _ -> rabbit_log_feature_flags:warning( + "Feature flags: - list of previously disabled " + "feature flags now marked as such: ~p", [WereDisabled]) + end, + %% Finally, if the new list of enabled feature flags is different + %% than the one on disk, we write the new list and re-initialize the + %% registry. + case RepairedEnabledFeatureNames of + EnabledFeatureNames -> + ok; + _ -> + rabbit_log_feature_flags:debug( + "Feature flags: write the repaired list of enabled feature " + "flags"), + WrittenToDisk = ok =:= try_to_write_enabled_feature_flags_list( + RepairedEnabledFeatureNames), + initialize_registry( + #{}, + list_of_enabled_feature_flags_to_feature_states( + RepairedEnabledFeatureNames), + WrittenToDisk) + end. + +-spec refresh_feature_flags_after_app_load([atom()]) -> + ok | {error, any()} | no_return(). + +refresh_feature_flags_after_app_load([]) -> + ok; +refresh_feature_flags_after_app_load(Apps) -> + rabbit_log_feature_flags:debug( + "Feature flags: new apps loaded: ~p -> refreshing feature flags", + [Apps]), + + FeatureFlags0 = list(all), + FeatureFlags1 = query_supported_feature_flags(), + + %% The following list contains all the feature flags this node + %% learned about only because remote nodes have them. Now, the + %% applications providing them are loaded locally as well. + %% Therefore, we may run their migration function in case the state + %% of this node needs it. + AlreadySupportedFeatureNames = maps:keys( + maps:filter( + fun(_, #{provided_by := App}) -> + lists:member(App, Apps) + end, FeatureFlags0)), + case AlreadySupportedFeatureNames of + [] -> + ok; + _ -> + rabbit_log_feature_flags:debug( + "Feature flags: new apps loaded: feature flags already " + "supported: ~p", + [lists:sort(AlreadySupportedFeatureNames)]) + end, + + %% The following list contains all the feature flags no nodes in the + %% cluster knew about before: this is the first time we see them in + %% this instance of the cluster. We need to register them on all + %% nodes. + NewSupportedFeatureFlags = maps:filter( + fun(FeatureName, _) -> + not maps:is_key(FeatureName, + FeatureFlags0) + end, FeatureFlags1), + case maps:keys(NewSupportedFeatureFlags) of + [] -> + ok; + NewSupportedFeatureNames -> + rabbit_log_feature_flags:debug( + "Feature flags: new apps loaded: new feature flags (unseen so " + "far): ~p ", + [lists:sort(NewSupportedFeatureNames)]) + end, + + case initialize_registry() of + ok -> + Ret = maybe_enable_locally_after_app_load( + AlreadySupportedFeatureNames), + case Ret of + ok -> + share_new_feature_flags_after_app_load( + NewSupportedFeatureFlags, ?TIMEOUT); + Error -> + Error + end; + Error -> + Error + end. + +maybe_enable_locally_after_app_load([]) -> + ok; +maybe_enable_locally_after_app_load([FeatureName | Rest]) -> + case is_enabled(FeatureName) of + true -> + case do_enable_locally(FeatureName) of + ok -> maybe_enable_locally_after_app_load(Rest); + Error -> Error + end; + false -> + maybe_enable_locally_after_app_load(Rest) + end. + +share_new_feature_flags_after_app_load(FeatureFlags, Timeout) -> + push_local_feature_flags_from_apps_unknown_remotely( + node(), FeatureFlags, Timeout). + +on_load() -> + %% The goal of this `on_load()` code server hook is to prevent this + %% module from being loaded in an already running RabbitMQ node if + %% the running version does not have the feature flags subsystem. + %% + %% This situation happens when an upgrade overwrites RabbitMQ files + %% with the node still running. This is the case with many packages: + %% files are updated on disk, then a post-install step takes care of + %% restarting the service. + %% + %% The problem is that if many nodes in a cluster are updated at the + %% same time, one node running the newer version might query feature + %% flags on an old node where this module is already available + %% (because files were already overwritten). This causes the query + %% to report an unexpected answer and the newer node to refuse to + %% start. + %% + %% However, when the module is executed outside of RabbitMQ (for + %% debugging purpose or in the context of EUnit for instance), we + %% want to allow the load. That's why we first check if RabbitMQ is + %% actually running. + case rabbit:is_running() of + true -> + %% RabbitMQ is running. + %% + %% Now we want to differentiate a pre-feature-flags node + %% from one having the subsystem. + %% + %% To do that, we verify if the `feature_flags_file` + %% application environment variable is defined. With a + %% feature-flags-enabled node, this application environment + %% variable is defined by rabbitmq-server(8). + case application:get_env(rabbit, feature_flags_file) of + {ok, _} -> + %% This is a feature-flags-enabled version. Loading + %% the module is permitted. + ok; + _ -> + %% This is a pre-feature-flags version. We deny the + %% load and report why, possibly specifying the + %% version of RabbitMQ. + Vsn = case application:get_key(rabbit, vsn) of + {ok, V} -> V; + undefined -> "unknown version" + end, + "Refusing to load '" ?MODULE_STRING "' on this " + "node. It appears to be running a pre-feature-flags " + "version of RabbitMQ (" ++ Vsn ++ "). This is fine: " + "a newer version of RabbitMQ was deployed on this " + "node, but it was not restarted yet. This warning " + "is probably caused by a remote node querying this " + "node for its feature flags." + end; + false -> + %% RabbitMQ is not running. Loading the module is permitted + %% because this Erlang node will never be queried for its + %% feature flags. + ok + end. diff --git a/deps/rabbit/src/rabbit_ff_extra.erl b/deps/rabbit/src/rabbit_ff_extra.erl new file mode 100644 index 0000000000..f0728d491e --- /dev/null +++ b/deps/rabbit/src/rabbit_ff_extra.erl @@ -0,0 +1,244 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% @copyright 2018-2020 VMware, Inc. or its affiliates. +%% +%% @doc +%% This module provides extra functions unused by the feature flags +%% subsystem core functionality. + +-module(rabbit_ff_extra). + +-include_lib("stdout_formatter/include/stdout_formatter.hrl"). + +-export([cli_info/0, + info/1, + info/2, + format_error/1]). + +-type cli_info() :: [cli_info_entry()]. +%% A list of feature flags properties, formatted for the RabbitMQ CLI. + +-type cli_info_entry() :: [{name, rabbit_feature_flags:feature_name()} | + {state, enabled | disabled | unavailable} | + {stability, rabbit_feature_flags:stability()} | + {provided_by, atom()} | + {desc, string()} | + {doc_url, string()}]. +%% A list of properties for a single feature flag, formatted for the +%% RabbitMQ CLI. + +-type info_options() :: #{colors => boolean(), + lines => boolean(), + verbose => non_neg_integer()}. +%% Options accepted by {@link info/1} and {@link info/2}. + +-export_type([info_options/0]). + +-spec cli_info() -> cli_info(). +%% @doc +%% Returns a list of all feature flags properties. +%% +%% @returns the list of all feature flags properties. + +cli_info() -> + cli_info(rabbit_feature_flags:list(all)). + +-spec cli_info(rabbit_feature_flags:feature_flags()) -> cli_info(). +%% @doc +%% Formats a map of feature flags and their properties into a list of +%% feature flags properties as expected by the RabbitMQ CLI. +%% +%% @param FeatureFlags A map of feature flags. +%% @returns the list of feature flags properties, created from the map +%% specified in arguments. + +cli_info(FeatureFlags) -> + lists:foldr( + fun(FeatureName, Acc) -> + FeatureProps = maps:get(FeatureName, FeatureFlags), + State = rabbit_feature_flags:get_state(FeatureName), + Stability = rabbit_feature_flags:get_stability(FeatureProps), + App = maps:get(provided_by, FeatureProps), + Desc = maps:get(desc, FeatureProps, ""), + DocUrl = maps:get(doc_url, FeatureProps, ""), + FFInfo = [{name, FeatureName}, + {desc, unicode:characters_to_binary(Desc)}, + {doc_url, unicode:characters_to_binary(DocUrl)}, + {state, State}, + {stability, Stability}, + {provided_by, App}], + [FFInfo | Acc] + end, [], lists:sort(maps:keys(FeatureFlags))). + +-spec info(info_options()) -> ok. +%% @doc +%% Displays an array of all supported feature flags and their properties +%% on `stdout'. +%% +%% @param Options Options to tune what is displayed and how. + +info(Options) -> + %% Two tables: one for stable feature flags, one for experimental ones. + StableFF = rabbit_feature_flags:list(all, stable), + case maps:size(StableFF) of + 0 -> + ok; + _ -> + stdout_formatter:display( + #paragraph{content = "\n## Stable feature flags:", + props = #{bold => true}}), + info(StableFF, Options) + end, + ExpFF = rabbit_feature_flags:list(all, experimental), + case maps:size(ExpFF) of + 0 -> + ok; + _ -> + stdout_formatter:display( + #paragraph{content = "\n## Experimental feature flags:", + props = #{bold => true}}), + info(ExpFF, Options) + end, + case maps:size(StableFF) + maps:size(ExpFF) of + 0 -> ok; + _ -> state_legend(Options) + end. + +-spec info(rabbit_feature_flags:feature_flags(), info_options()) -> ok. +%% @doc +%% Displays an array of feature flags and their properties on `stdout', +%% based on the specified feature flags map. +%% +%% @param FeatureFlags Map of the feature flags to display. +%% @param Options Options to tune what is displayed and how. + +info(FeatureFlags, Options) -> + Verbose = maps:get(verbose, Options, 0), + UseColors = use_colors(Options), + UseLines = use_lines(Options), + Title = case UseColors of + true -> #{title => true}; + false -> #{} + end, + Bold = case UseColors of + true -> #{bold => true}; + false -> #{} + end, + {Green, Yellow, Red} = case UseColors of + true -> + {#{fg => green}, + #{fg => yellow}, + #{bold => true, + bg => red}}; + false -> + {#{}, #{}, #{}} + end, + Border = case UseLines of + true -> #{border_drawing => ansi}; + false -> #{border_drawing => ascii} + end, + %% Table columns: + %% | Name | State | Provided by | Description + %% + %% where: + %% State = Enabled | Disabled | Unavailable (if a node doesn't + %% support it). + TableHeader = #row{cells = ["Name", + "State", + "Provided", + "Description"], + props = Title}, + Nodes = lists:sort([node() | rabbit_feature_flags:remote_nodes()]), + Rows = lists:map( + fun(FeatureName) -> + FeatureProps = maps:get(FeatureName, FeatureFlags), + State0 = rabbit_feature_flags:get_state(FeatureName), + {State, Color} = case State0 of + enabled -> + {"Enabled", Green}; + disabled -> + {"Disabled", Yellow}; + unavailable -> + {"Unavailable", Red} + end, + App = maps:get(provided_by, FeatureProps), + Desc = maps:get(desc, FeatureProps, ""), + VFun = fun(Node) -> + Supported = + rabbit_feature_flags:does_node_support( + Node, [FeatureName], 60000), + {Label, LabelColor} = + case Supported of + true -> {"supported", #{}}; + false -> {"unsupported", Red} + end, + #paragraph{content = + [rabbit_misc:format(" ~s: ", + [Node]), + #paragraph{content = Label, + props = LabelColor}]} + end, + ExtraLines = if + Verbose > 0 -> + NodesList = lists:join( + "\n", + lists:map( + VFun, Nodes)), + ["\n\n", + "Per-node support level:\n" + | NodesList]; + true -> + [] + end, + [#paragraph{content = FeatureName, + props = Bold}, + #paragraph{content = State, + props = Color}, + #paragraph{content = App}, + #paragraph{content = [Desc | ExtraLines]}] + end, lists:sort(maps:keys(FeatureFlags))), + io:format("~n", []), + stdout_formatter:display(#table{rows = [TableHeader | Rows], + props = Border#{cell_padding => {0, 1}}}). + +use_colors(Options) -> + maps:get(colors, Options, true). + +use_lines(Options) -> + maps:get(lines, Options, true). + +state_legend(Options) -> + UseColors = use_colors(Options), + {Green, Yellow, Red} = case UseColors of + true -> + {#{fg => green}, + #{fg => yellow}, + #{bold => true, + bg => red}}; + false -> + {#{}, #{}, #{}} + end, + Enabled = #paragraph{content = "Enabled", props = Green}, + Disabled = #paragraph{content = "Disabled", props = Yellow}, + Unavailable = #paragraph{content = "Unavailable", props = Red}, + stdout_formatter:display( + #paragraph{ + content = + ["\n", + "Possible states:\n", + " ", Enabled, ": The feature flag is enabled on all nodes\n", + " ", Disabled, ": The feature flag is disabled on all nodes\n", + " ", Unavailable, ": The feature flag cannot be enabled because" + " one or more nodes do not support it\n"]}). + +-spec format_error(any()) -> string(). +%% @doc +%% Formats the error reason term so it can be presented to human beings. +%% +%% @param Reason The term in the `{error, Reason}' tuple. +%% @returns the formatted error reason. + +format_error(Reason) -> + rabbit_misc:format("~p", [Reason]). diff --git a/deps/rabbit/src/rabbit_ff_registry.erl b/deps/rabbit/src/rabbit_ff_registry.erl new file mode 100644 index 0000000000..372971f949 --- /dev/null +++ b/deps/rabbit/src/rabbit_ff_registry.erl @@ -0,0 +1,189 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% @author The RabbitMQ team +%% @copyright 2018-2020 VMware, Inc. or its affiliates. +%% +%% @doc +%% This module exposes the API of the {@link rabbit_feature_flags} +%% registry. The feature flags registry is an Erlang module, compiled at +%% runtime, storing all the informations about feature flags: which are +%% supported, which are enabled, etc. +%% +%% Because it is compiled at runtime, the initial source code is mostly +%% an API reference. What the initial module does is merely ask {@link +%% rabbit_feature_flags} to generate the real registry. + +-module(rabbit_ff_registry). + +-export([get/1, + list/1, + states/0, + is_supported/1, + is_enabled/1, + is_registry_initialized/0, + is_registry_written_to_disk/0]). + +-ifdef(TEST). +-on_load(on_load/0). +-endif. + +-spec get(rabbit_feature_flags:feature_name()) -> + rabbit_feature_flags:feature_props() | undefined. +%% @doc +%% Returns the properties of a feature flag. +%% +%% Only the informations stored in the local registry is used to answer +%% this call. +%% +%% @param FeatureName The name of the feature flag. +%% @returns the properties of the specified feature flag. + +get(FeatureName) -> + rabbit_feature_flags:initialize_registry(), + %% Initially, is_registry_initialized/0 always returns `false` + %% and this ?MODULE:get(FeatureName) is always called. The case + %% statement is here to please Dialyzer. + case is_registry_initialized() of + false -> ?MODULE:get(FeatureName); + true -> undefined + end. + +-spec list(all | enabled | disabled) -> rabbit_feature_flags:feature_flags(). +%% @doc +%% Lists all, enabled or disabled feature flags, depending on the argument. +%% +%% Only the informations stored in the local registry is used to answer +%% this call. +%% +%% @param Which The group of feature flags to return: `all', `enabled' or +%% `disabled'. +%% @returns A map of selected feature flags. + +list(Which) -> + rabbit_feature_flags:initialize_registry(), + %% See get/1 for an explanation of the case statement below. + case is_registry_initialized() of + false -> ?MODULE:list(Which); + true -> #{} + end. + +-spec states() -> rabbit_feature_flags:feature_states(). +%% @doc +%% Returns the states of supported feature flags. +%% +%% Only the informations stored in the local registry is used to answer +%% this call. +%% +%% @returns A map of feature flag states. + +states() -> + rabbit_feature_flags:initialize_registry(), + %% See get/1 for an explanation of the case statement below. + case is_registry_initialized() of + false -> ?MODULE:states(); + true -> #{} + end. + +-spec is_supported(rabbit_feature_flags:feature_name()) -> boolean(). +%% @doc +%% Returns if a feature flag is supported. +%% +%% Only the informations stored in the local registry is used to answer +%% this call. +%% +%% @param FeatureName The name of the feature flag to be checked. +%% @returns `true' if the feature flag is supported, or `false' +%% otherwise. + +is_supported(FeatureName) -> + rabbit_feature_flags:initialize_registry(), + %% See get/1 for an explanation of the case statement below. + case is_registry_initialized() of + false -> ?MODULE:is_supported(FeatureName); + true -> false + end. + +-spec is_enabled(rabbit_feature_flags:feature_name()) -> boolean() | state_changing. +%% @doc +%% Returns if a feature flag is supported or if its state is changing. +%% +%% Only the informations stored in the local registry is used to answer +%% this call. +%% +%% @param FeatureName The name of the feature flag to be checked. +%% @returns `true' if the feature flag is supported, `state_changing' if +%% its state is transient, or `false' otherwise. + +is_enabled(FeatureName) -> + rabbit_feature_flags:initialize_registry(), + %% See get/1 for an explanation of the case statement below. + case is_registry_initialized() of + false -> ?MODULE:is_enabled(FeatureName); + true -> false + end. + +-spec is_registry_initialized() -> boolean(). +%% @doc +%% Indicates if the registry is initialized. +%% +%% The registry is considered initialized once the initial Erlang module +%% was replaced by the copy compiled at runtime. +%% +%% @returns `true' when the module is the one compiled at runtime, +%% `false' when the module is the initial one compiled from RabbitMQ +%% source code. + +is_registry_initialized() -> + always_return_false(). + +-spec is_registry_written_to_disk() -> boolean(). +%% @doc +%% Indicates if the feature flags state was successfully persisted to disk. +%% +%% Note that on startup, {@link rabbit_feature_flags} tries to determine +%% the state of each supported feature flag, regardless of the +%% information on disk, to ensure maximum consistency. However, this can +%% be done for feature flags supporting it only. +%% +%% @returns `true' if the state was successfully written to disk and +%% the registry can be initialized from that during the next RabbitMQ +%% startup, `false' if the write failed and the node might loose feature +%% flags state on restart. + +is_registry_written_to_disk() -> + always_return_true(). + +always_return_true() -> + %% This function is here to trick Dialyzer. We want some functions + %% in this initial on-disk registry to always return `true` or + %% `false`. However the generated registry will return actual + %% booleans. The `-spec()` correctly advertises a return type of + %% `boolean()`. But in the meantime, Dialyzer only knows about this + %% copy which, without the trick below, would always return either + %% `true` (e.g. in is_registry_written_to_disk/0) or `false` (e.g. + %% is_registry_initialized/0). This obviously causes some warnings + %% where the registry functions are used: Dialyzer believes that + %% e.g. matching the return value of is_registry_initialized/0 + %% against `true` will never succeed. + %% + %% That's why this function makes a call which we know the result, + %% but not Dialyzer, to "create" that hard-coded `true` return + %% value. + erlang:get({?MODULE, always_undefined}) =:= undefined. + +always_return_false() -> + not always_return_true(). + +-ifdef(TEST). +on_load() -> + _ = (catch rabbit_log_feature_flags:debug( + "Feature flags: Loading initial (uninitialized) registry " + "module (~p)", + [self()])), + ok. +-endif. diff --git a/deps/rabbit/src/rabbit_fhc_helpers.erl b/deps/rabbit/src/rabbit_fhc_helpers.erl new file mode 100644 index 0000000000..d310e84008 --- /dev/null +++ b/deps/rabbit/src/rabbit_fhc_helpers.erl @@ -0,0 +1,45 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_fhc_helpers). + +-export([clear_read_cache/0]). + +-include("amqqueue.hrl"). + +clear_read_cache() -> + case application:get_env(rabbit, fhc_read_buffering) of + {ok, true} -> + file_handle_cache:clear_read_cache(), + clear_vhost_read_cache(rabbit_vhost:list_names()); + _ -> %% undefined or {ok, false} + ok + end. + +clear_vhost_read_cache([]) -> + ok; +clear_vhost_read_cache([VHost | Rest]) -> + clear_queue_read_cache(rabbit_amqqueue:list(VHost)), + clear_vhost_read_cache(Rest). + +clear_queue_read_cache([]) -> + ok; +clear_queue_read_cache([Q | Rest]) when ?is_amqqueue(Q) -> + MPid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + %% Limit the action to the current node. + Pids = [P || P <- [MPid | SPids], node(P) =:= node()], + %% This function is executed in the context of the backing queue + %% process because the read buffer is stored in the process + %% dictionary. + Fun = fun(_, State) -> + _ = file_handle_cache:clear_process_read_cache(), + State + end, + [rabbit_amqqueue:run_backing_queue(Pid, rabbit_variable_queue, Fun) + || Pid <- Pids], + clear_queue_read_cache(Rest). diff --git a/deps/rabbit/src/rabbit_fifo.erl b/deps/rabbit/src/rabbit_fifo.erl new file mode 100644 index 0000000000..51acfffd0d --- /dev/null +++ b/deps/rabbit/src/rabbit_fifo.erl @@ -0,0 +1,2124 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_fifo). + +-behaviour(ra_machine). + +-compile(inline_list_funcs). +-compile(inline). +-compile({no_auto_import, [apply/3]}). + +-include("rabbit_fifo.hrl"). +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([ + init/1, + apply/3, + state_enter/2, + tick/2, + overview/1, + get_checked_out/4, + %% versioning + version/0, + which_module/1, + %% aux + init_aux/1, + handle_aux/6, + % queries + query_messages_ready/1, + query_messages_checked_out/1, + query_messages_total/1, + query_processes/1, + query_ra_indexes/1, + query_consumer_count/1, + query_consumers/1, + query_stat/1, + query_single_active_consumer/1, + query_in_memory_usage/1, + query_peek/2, + usage/1, + + zero/1, + + %% misc + dehydrate_state/1, + normalize/1, + + %% protocol helpers + make_enqueue/3, + make_register_enqueuer/1, + make_checkout/3, + make_settle/2, + make_return/2, + make_discard/2, + make_credit/4, + make_purge/0, + make_purge_nodes/1, + make_update_config/1, + make_garbage_collection/0 + ]). + +%% command records representing all the protocol actions that are supported +-record(enqueue, {pid :: option(pid()), + seq :: option(msg_seqno()), + msg :: raw_msg()}). +-record(register_enqueuer, {pid :: pid()}). +-record(checkout, {consumer_id :: consumer_id(), + spec :: checkout_spec(), + meta :: consumer_meta()}). +-record(settle, {consumer_id :: consumer_id(), + msg_ids :: [msg_id()]}). +-record(return, {consumer_id :: consumer_id(), + msg_ids :: [msg_id()]}). +-record(discard, {consumer_id :: consumer_id(), + msg_ids :: [msg_id()]}). +-record(credit, {consumer_id :: consumer_id(), + credit :: non_neg_integer(), + delivery_count :: non_neg_integer(), + drain :: boolean()}). +-record(purge, {}). +-record(purge_nodes, {nodes :: [node()]}). +-record(update_config, {config :: config()}). +-record(garbage_collection, {}). + +-opaque protocol() :: + #enqueue{} | + #register_enqueuer{} | + #checkout{} | + #settle{} | + #return{} | + #discard{} | + #credit{} | + #purge{} | + #purge_nodes{} | + #update_config{} | + #garbage_collection{}. + +-type command() :: protocol() | ra_machine:builtin_command(). +%% all the command types supported by ra fifo + +-type client_msg() :: delivery(). +%% the messages `rabbit_fifo' can send to consumers. + +-opaque state() :: #?MODULE{}. + +-export_type([protocol/0, + delivery/0, + command/0, + credit_mode/0, + consumer_tag/0, + consumer_meta/0, + consumer_id/0, + client_msg/0, + msg/0, + msg_id/0, + msg_seqno/0, + delivery_msg/0, + state/0, + config/0]). + +-spec init(config()) -> state(). +init(#{name := Name, + queue_resource := Resource} = Conf) -> + update_config(Conf, #?MODULE{cfg = #cfg{name = Name, + resource = Resource}}). + +update_config(Conf, State) -> + DLH = maps:get(dead_letter_handler, Conf, undefined), + BLH = maps:get(become_leader_handler, Conf, undefined), + RCI = maps:get(release_cursor_interval, Conf, ?RELEASE_CURSOR_EVERY), + Overflow = maps:get(overflow_strategy, Conf, drop_head), + MaxLength = maps:get(max_length, Conf, undefined), + MaxBytes = maps:get(max_bytes, Conf, undefined), + MaxMemoryLength = maps:get(max_in_memory_length, Conf, undefined), + MaxMemoryBytes = maps:get(max_in_memory_bytes, Conf, undefined), + DeliveryLimit = maps:get(delivery_limit, Conf, undefined), + Expires = maps:get(expires, Conf, undefined), + ConsumerStrategy = case maps:get(single_active_consumer_on, Conf, false) of + true -> + single_active; + false -> + competing + end, + Cfg = State#?MODULE.cfg, + RCISpec = {RCI, RCI}, + + LastActive = maps:get(created, Conf, undefined), + State#?MODULE{cfg = Cfg#cfg{release_cursor_interval = RCISpec, + dead_letter_handler = DLH, + become_leader_handler = BLH, + overflow_strategy = Overflow, + max_length = MaxLength, + max_bytes = MaxBytes, + max_in_memory_length = MaxMemoryLength, + max_in_memory_bytes = MaxMemoryBytes, + consumer_strategy = ConsumerStrategy, + delivery_limit = DeliveryLimit, + expires = Expires}, + last_active = LastActive}. + +zero(_) -> + 0. + +% msg_ids are scoped per consumer +% ra_indexes holds all raft indexes for enqueues currently on queue +-spec apply(ra_machine:command_meta_data(), command(), state()) -> + {state(), Reply :: term(), ra_machine:effects()} | + {state(), Reply :: term()}. +apply(Meta, #enqueue{pid = From, seq = Seq, + msg = RawMsg}, State00) -> + apply_enqueue(Meta, From, Seq, RawMsg, State00); +apply(_Meta, #register_enqueuer{pid = Pid}, + #?MODULE{enqueuers = Enqueuers0, + cfg = #cfg{overflow_strategy = Overflow}} = State0) -> + + State = case maps:is_key(Pid, Enqueuers0) of + true -> + %% if the enqueuer exits just echo the overflow state + State0; + false -> + State0#?MODULE{enqueuers = Enqueuers0#{Pid => #enqueuer{}}} + end, + Res = case is_over_limit(State) of + true when Overflow == reject_publish -> + reject_publish; + _ -> + ok + end, + {State, Res, [{monitor, process, Pid}]}; +apply(Meta, + #settle{msg_ids = MsgIds, consumer_id = ConsumerId}, + #?MODULE{consumers = Cons0} = State) -> + case Cons0 of + #{ConsumerId := Con0} -> + % need to increment metrics before completing as any snapshot + % states taken need to include them + complete_and_checkout(Meta, MsgIds, ConsumerId, + Con0, [], State); + _ -> + {State, ok} + + end; +apply(Meta, #discard{msg_ids = MsgIds, consumer_id = ConsumerId}, + #?MODULE{consumers = Cons0} = State0) -> + case Cons0 of + #{ConsumerId := Con0} -> + Discarded = maps:with(MsgIds, Con0#consumer.checked_out), + Effects = dead_letter_effects(rejected, Discarded, State0, []), + complete_and_checkout(Meta, MsgIds, ConsumerId, Con0, + Effects, State0); + _ -> + {State0, ok} + end; +apply(Meta, #return{msg_ids = MsgIds, consumer_id = ConsumerId}, + #?MODULE{consumers = Cons0} = State) -> + case Cons0 of + #{ConsumerId := #consumer{checked_out = Checked0}} -> + Returned = maps:with(MsgIds, Checked0), + return(Meta, ConsumerId, Returned, [], State); + _ -> + {State, ok} + end; +apply(Meta, #credit{credit = NewCredit, delivery_count = RemoteDelCnt, + drain = Drain, consumer_id = ConsumerId}, + #?MODULE{consumers = Cons0, + service_queue = ServiceQueue0, + waiting_consumers = Waiting0} = State0) -> + case Cons0 of + #{ConsumerId := #consumer{delivery_count = DelCnt} = Con0} -> + %% this can go below 0 when credit is reduced + C = max(0, RemoteDelCnt + NewCredit - DelCnt), + %% grant the credit + Con1 = Con0#consumer{credit = C}, + ServiceQueue = maybe_queue_consumer(ConsumerId, Con1, + ServiceQueue0), + Cons = maps:put(ConsumerId, Con1, Cons0), + {State1, ok, Effects} = + checkout(Meta, State0, + State0#?MODULE{service_queue = ServiceQueue, + consumers = Cons}, []), + Response = {send_credit_reply, messages_ready(State1)}, + %% by this point all checkouts for the updated credit value + %% should be processed so we can evaluate the drain + case Drain of + false -> + %% just return the result of the checkout + {State1, Response, Effects}; + true -> + Con = #consumer{credit = PostCred} = + maps:get(ConsumerId, State1#?MODULE.consumers), + %% add the outstanding credit to the delivery count + DeliveryCount = Con#consumer.delivery_count + PostCred, + Consumers = maps:put(ConsumerId, + Con#consumer{delivery_count = DeliveryCount, + credit = 0}, + State1#?MODULE.consumers), + Drained = Con#consumer.credit, + {CTag, _} = ConsumerId, + {State1#?MODULE{consumers = Consumers}, + %% returning a multi response with two client actions + %% for the channel to execute + {multi, [Response, {send_drained, {CTag, Drained}}]}, + Effects} + end; + _ when Waiting0 /= [] -> + %% there are waiting consuemrs + case lists:keytake(ConsumerId, 1, Waiting0) of + {value, {_, Con0 = #consumer{delivery_count = DelCnt}}, Waiting} -> + %% the consumer is a waiting one + %% grant the credit + C = max(0, RemoteDelCnt + NewCredit - DelCnt), + Con = Con0#consumer{credit = C}, + State = State0#?MODULE{waiting_consumers = + [{ConsumerId, Con} | Waiting]}, + {State, {send_credit_reply, messages_ready(State)}}; + false -> + {State0, ok} + end; + _ -> + %% credit for unknown consumer - just ignore + {State0, ok} + end; +apply(_, #checkout{spec = {dequeue, _}}, + #?MODULE{cfg = #cfg{consumer_strategy = single_active}} = State0) -> + {State0, {error, {unsupported, single_active_consumer}}}; +apply(#{index := Index, + system_time := Ts, + from := From} = Meta, #checkout{spec = {dequeue, Settlement}, + meta = ConsumerMeta, + consumer_id = ConsumerId}, + #?MODULE{consumers = Consumers} = State00) -> + %% dequeue always updates last_active + State0 = State00#?MODULE{last_active = Ts}, + %% all dequeue operations result in keeping the queue from expiring + Exists = maps:is_key(ConsumerId, Consumers), + case messages_ready(State0) of + 0 -> + {State0, {dequeue, empty}}; + _ when Exists -> + %% a dequeue using the same consumer_id isn't possible at this point + {State0, {dequeue, empty}}; + Ready -> + State1 = update_consumer(ConsumerId, ConsumerMeta, + {once, 1, simple_prefetch}, 0, + State0), + {success, _, MsgId, Msg, State2} = checkout_one(Meta, State1), + {State4, Effects1} = case Settlement of + unsettled -> + {_, Pid} = ConsumerId, + {State2, [{monitor, process, Pid}]}; + settled -> + %% immediately settle the checkout + {State3, _, Effects0} = + apply(Meta, make_settle(ConsumerId, [MsgId]), + State2), + {State3, Effects0} + end, + {Reply, Effects2} = + case Msg of + {RaftIdx, {Header, empty}} -> + %% TODO add here new log effect with reply + {'$ra_no_reply', + [reply_log_effect(RaftIdx, MsgId, Header, Ready - 1, From) | + Effects1]}; + _ -> + {{dequeue, {MsgId, Msg}, Ready-1}, Effects1} + + end, + + case evaluate_limit(Index, false, State0, State4, Effects2) of + {State, true, Effects} -> + update_smallest_raft_index(Index, Reply, State, Effects); + {State, false, Effects} -> + {State, Reply, Effects} + end + end; +apply(Meta, #checkout{spec = cancel, consumer_id = ConsumerId}, State0) -> + {State, Effects} = cancel_consumer(Meta, ConsumerId, State0, [], + consumer_cancel), + checkout(Meta, State0, State, Effects); +apply(Meta, #checkout{spec = Spec, meta = ConsumerMeta, + consumer_id = {_, Pid} = ConsumerId}, + State0) -> + Priority = get_priority_from_args(ConsumerMeta), + State1 = update_consumer(ConsumerId, ConsumerMeta, Spec, Priority, State0), + checkout(Meta, State0, State1, [{monitor, process, Pid}]); +apply(#{index := Index}, #purge{}, + #?MODULE{ra_indexes = Indexes0, + returns = Returns, + messages = Messages} = State0) -> + Total = messages_ready(State0), + Indexes1 = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0, + [I || {_, {I, _}} <- lqueue:to_list(Messages)]), + Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes1, + [I || {_, {I, _}} <- lqueue:to_list(Returns)]), + + State1 = State0#?MODULE{ra_indexes = Indexes, + messages = lqueue:new(), + returns = lqueue:new(), + msg_bytes_enqueue = 0, + prefix_msgs = {0, [], 0, []}, + msg_bytes_in_memory = 0, + msgs_ready_in_memory = 0}, + Effects0 = [garbage_collection], + Reply = {purge, Total}, + {State, _, Effects} = evaluate_limit(Index, false, State0, + State1, Effects0), + update_smallest_raft_index(Index, Reply, State, Effects); +apply(_Meta, #garbage_collection{}, State) -> + {State, ok, [{aux, garbage_collection}]}; +apply(#{system_time := Ts} = Meta, {down, Pid, noconnection}, + #?MODULE{consumers = Cons0, + cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = Waiting0, + enqueuers = Enqs0} = State0) -> + Node = node(Pid), + %% if the pid refers to an active or cancelled consumer, + %% mark it as suspected and return it to the waiting queue + {State1, Effects0} = + maps:fold(fun({_, P} = Cid, C0, {S0, E0}) + when node(P) =:= Node -> + %% the consumer should be returned to waiting + %% and checked out messages should be returned + Effs = consumer_update_active_effects( + S0, Cid, C0, false, suspected_down, E0), + Checked = C0#consumer.checked_out, + Credit = increase_credit(C0, maps:size(Checked)), + {St, Effs1} = return_all(Meta, S0, Effs, + Cid, C0#consumer{credit = Credit}), + %% if the consumer was cancelled there is a chance it got + %% removed when returning hence we need to be defensive here + Waiting = case St#?MODULE.consumers of + #{Cid := C} -> + Waiting0 ++ [{Cid, C}]; + _ -> + Waiting0 + end, + {St#?MODULE{consumers = maps:remove(Cid, St#?MODULE.consumers), + waiting_consumers = Waiting, + last_active = Ts}, + Effs1}; + (_, _, S) -> + S + end, {State0, []}, Cons0), + WaitingConsumers = update_waiting_consumer_status(Node, State1, + suspected_down), + + %% select a new consumer from the waiting queue and run a checkout + State2 = State1#?MODULE{waiting_consumers = WaitingConsumers}, + {State, Effects1} = activate_next_consumer(State2, Effects0), + + %% mark any enquers as suspected + Enqs = maps:map(fun(P, E) when node(P) =:= Node -> + E#enqueuer{status = suspected_down}; + (_, E) -> E + end, Enqs0), + Effects = [{monitor, node, Node} | Effects1], + checkout(Meta, State0, State#?MODULE{enqueuers = Enqs}, Effects); +apply(#{system_time := Ts} = Meta, {down, Pid, noconnection}, + #?MODULE{consumers = Cons0, + enqueuers = Enqs0} = State0) -> + %% A node has been disconnected. This doesn't necessarily mean that + %% any processes on this node are down, they _may_ come back so here + %% we just mark them as suspected (effectively deactivated) + %% and return all checked out messages to the main queue for delivery to any + %% live consumers + %% + %% all pids for the disconnected node will be marked as suspected not just + %% the one we got the `down' command for + Node = node(Pid), + + {State, Effects1} = + maps:fold( + fun({_, P} = Cid, #consumer{checked_out = Checked0, + status = up} = C0, + {St0, Eff}) when node(P) =:= Node -> + Credit = increase_credit(C0, map_size(Checked0)), + C = C0#consumer{status = suspected_down, + credit = Credit}, + {St, Eff0} = return_all(Meta, St0, Eff, Cid, C), + Eff1 = consumer_update_active_effects(St, Cid, C, false, + suspected_down, Eff0), + {St, Eff1}; + (_, _, {St, Eff}) -> + {St, Eff} + end, {State0, []}, Cons0), + Enqs = maps:map(fun(P, E) when node(P) =:= Node -> + E#enqueuer{status = suspected_down}; + (_, E) -> E + end, Enqs0), + + % Monitor the node so that we can "unsuspect" these processes when the node + % comes back, then re-issue all monitors and discover the final fate of + % these processes + Effects = case maps:size(State#?MODULE.consumers) of + 0 -> + [{aux, inactive}, {monitor, node, Node}]; + _ -> + [{monitor, node, Node}] + end ++ Effects1, + checkout(Meta, State0, State#?MODULE{enqueuers = Enqs, + last_active = Ts}, Effects); +apply(Meta, {down, Pid, _Info}, State0) -> + {State, Effects} = handle_down(Meta, Pid, State0), + checkout(Meta, State0, State, Effects); +apply(Meta, {nodeup, Node}, #?MODULE{consumers = Cons0, + enqueuers = Enqs0, + service_queue = _SQ0} = State0) -> + %% A node we are monitoring has come back. + %% If we have suspected any processes of being + %% down we should now re-issue the monitors for them to detect if they're + %% actually down or not + Monitors = [{monitor, process, P} + || P <- suspected_pids_for(Node, State0)], + + Enqs1 = maps:map(fun(P, E) when node(P) =:= Node -> + E#enqueuer{status = up}; + (_, E) -> E + end, Enqs0), + ConsumerUpdateActiveFun = consumer_active_flag_update_function(State0), + %% mark all consumers as up + {State1, Effects1} = + maps:fold(fun({_, P} = ConsumerId, C, {SAcc, EAcc}) + when (node(P) =:= Node) and + (C#consumer.status =/= cancelled) -> + EAcc1 = ConsumerUpdateActiveFun(SAcc, ConsumerId, + C, true, up, EAcc), + {update_or_remove_sub(Meta, ConsumerId, + C#consumer{status = up}, + SAcc), EAcc1}; + (_, _, Acc) -> + Acc + end, {State0, Monitors}, Cons0), + Waiting = update_waiting_consumer_status(Node, State1, up), + State2 = State1#?MODULE{ + enqueuers = Enqs1, + waiting_consumers = Waiting}, + {State, Effects} = activate_next_consumer(State2, Effects1), + checkout(Meta, State0, State, Effects); +apply(_, {nodedown, _Node}, State) -> + {State, ok}; +apply(Meta, #purge_nodes{nodes = Nodes}, State0) -> + {State, Effects} = lists:foldl(fun(Node, {S, E}) -> + purge_node(Meta, Node, S, E) + end, {State0, []}, Nodes), + {State, ok, Effects}; +apply(Meta, #update_config{config = Conf}, State) -> + checkout(Meta, State, update_config(Conf, State), []); +apply(_Meta, {machine_version, 0, 1}, V0State) -> + State = convert_v0_to_v1(V0State), + {State, ok, []}. + +convert_v0_to_v1(V0State0) -> + V0State = rabbit_fifo_v0:normalize_for_v1(V0State0), + V0Msgs = rabbit_fifo_v0:get_field(messages, V0State), + V1Msgs = lqueue:from_list(lists:sort(maps:to_list(V0Msgs))), + V0Enqs = rabbit_fifo_v0:get_field(enqueuers, V0State), + V1Enqs = maps:map( + fun (_EPid, E) -> + #enqueuer{next_seqno = element(2, E), + pending = element(3, E), + status = element(4, E)} + end, V0Enqs), + V0Cons = rabbit_fifo_v0:get_field(consumers, V0State), + V1Cons = maps:map( + fun (_CId, C0) -> + %% add the priority field + list_to_tuple(tuple_to_list(C0) ++ [0]) + end, V0Cons), + V0SQ = rabbit_fifo_v0:get_field(service_queue, V0State), + V1SQ = priority_queue:from_list(queue:to_list(V0SQ)), + Cfg = #cfg{name = rabbit_fifo_v0:get_cfg_field(name, V0State), + resource = rabbit_fifo_v0:get_cfg_field(resource, V0State), + release_cursor_interval = rabbit_fifo_v0:get_cfg_field(release_cursor_interval, V0State), + dead_letter_handler = rabbit_fifo_v0:get_cfg_field(dead_letter_handler, V0State), + become_leader_handler = rabbit_fifo_v0:get_cfg_field(become_leader_handler, V0State), + %% TODO: what if policy enabling reject_publish was applied before conversion? + overflow_strategy = drop_head, + max_length = rabbit_fifo_v0:get_cfg_field(max_length, V0State), + max_bytes = rabbit_fifo_v0:get_cfg_field(max_bytes, V0State), + consumer_strategy = rabbit_fifo_v0:get_cfg_field(consumer_strategy, V0State), + delivery_limit = rabbit_fifo_v0:get_cfg_field(delivery_limit, V0State), + max_in_memory_length = rabbit_fifo_v0:get_cfg_field(max_in_memory_length, V0State), + max_in_memory_bytes = rabbit_fifo_v0:get_cfg_field(max_in_memory_bytes, V0State) + }, + + #?MODULE{cfg = Cfg, + messages = V1Msgs, + next_msg_num = rabbit_fifo_v0:get_field(next_msg_num, V0State), + returns = rabbit_fifo_v0:get_field(returns, V0State), + enqueue_count = rabbit_fifo_v0:get_field(enqueue_count, V0State), + enqueuers = V1Enqs, + ra_indexes = rabbit_fifo_v0:get_field(ra_indexes, V0State), + release_cursors = rabbit_fifo_v0:get_field(release_cursors, V0State), + consumers = V1Cons, + service_queue = V1SQ, + prefix_msgs = rabbit_fifo_v0:get_field(prefix_msgs, V0State), + msg_bytes_enqueue = rabbit_fifo_v0:get_field(msg_bytes_enqueue, V0State), + msg_bytes_checkout = rabbit_fifo_v0:get_field(msg_bytes_checkout, V0State), + waiting_consumers = rabbit_fifo_v0:get_field(waiting_consumers, V0State), + msg_bytes_in_memory = rabbit_fifo_v0:get_field(msg_bytes_in_memory, V0State), + msgs_ready_in_memory = rabbit_fifo_v0:get_field(msgs_ready_in_memory, V0State) + }. + +purge_node(Meta, Node, State, Effects) -> + lists:foldl(fun(Pid, {S0, E0}) -> + {S, E} = handle_down(Meta, Pid, S0), + {S, E0 ++ E} + end, {State, Effects}, all_pids_for(Node, State)). + +%% any downs that re not noconnection +handle_down(Meta, Pid, #?MODULE{consumers = Cons0, + enqueuers = Enqs0} = State0) -> + % Remove any enqueuer for the same pid and enqueue any pending messages + % This should be ok as we won't see any more enqueues from this pid + State1 = case maps:take(Pid, Enqs0) of + {#enqueuer{pending = Pend}, Enqs} -> + lists:foldl(fun ({_, RIdx, RawMsg}, S) -> + enqueue(RIdx, RawMsg, S) + end, State0#?MODULE{enqueuers = Enqs}, Pend); + error -> + State0 + end, + {Effects1, State2} = handle_waiting_consumer_down(Pid, State1), + % return checked out messages to main queue + % Find the consumers for the down pid + DownConsumers = maps:keys( + maps:filter(fun({_, P}, _) -> P =:= Pid end, Cons0)), + lists:foldl(fun(ConsumerId, {S, E}) -> + cancel_consumer(Meta, ConsumerId, S, E, down) + end, {State2, Effects1}, DownConsumers). + +consumer_active_flag_update_function(#?MODULE{cfg = #cfg{consumer_strategy = competing}}) -> + fun(State, ConsumerId, Consumer, Active, ActivityStatus, Effects) -> + consumer_update_active_effects(State, ConsumerId, Consumer, Active, + ActivityStatus, Effects) + end; +consumer_active_flag_update_function(#?MODULE{cfg = #cfg{consumer_strategy = single_active}}) -> + fun(_, _, _, _, _, Effects) -> + Effects + end. + +handle_waiting_consumer_down(_Pid, + #?MODULE{cfg = #cfg{consumer_strategy = competing}} = State) -> + {[], State}; +handle_waiting_consumer_down(_Pid, + #?MODULE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = []} = State) -> + {[], State}; +handle_waiting_consumer_down(Pid, + #?MODULE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = WaitingConsumers0} = State0) -> + % get cancel effects for down waiting consumers + Down = lists:filter(fun({{_, P}, _}) -> P =:= Pid end, + WaitingConsumers0), + Effects = lists:foldl(fun ({ConsumerId, _}, Effects) -> + cancel_consumer_effects(ConsumerId, State0, + Effects) + end, [], Down), + % update state to have only up waiting consumers + StillUp = lists:filter(fun({{_, P}, _}) -> P =/= Pid end, + WaitingConsumers0), + State = State0#?MODULE{waiting_consumers = StillUp}, + {Effects, State}. + +update_waiting_consumer_status(Node, + #?MODULE{waiting_consumers = WaitingConsumers}, + Status) -> + [begin + case node(Pid) of + Node -> + {ConsumerId, Consumer#consumer{status = Status}}; + _ -> + {ConsumerId, Consumer} + end + end || {{_, Pid} = ConsumerId, Consumer} <- WaitingConsumers, + Consumer#consumer.status =/= cancelled]. + +-spec state_enter(ra_server:ra_state(), state()) -> ra_machine:effects(). +state_enter(leader, #?MODULE{consumers = Cons, + enqueuers = Enqs, + waiting_consumers = WaitingConsumers, + cfg = #cfg{name = Name, + resource = Resource, + become_leader_handler = BLH}, + prefix_msgs = {0, [], 0, []} + }) -> + % return effects to monitor all current consumers and enqueuers + Pids = lists:usort(maps:keys(Enqs) + ++ [P || {_, P} <- maps:keys(Cons)] + ++ [P || {{_, P}, _} <- WaitingConsumers]), + Mons = [{monitor, process, P} || P <- Pids], + Nots = [{send_msg, P, leader_change, ra_event} || P <- Pids], + NodeMons = lists:usort([{monitor, node, node(P)} || P <- Pids]), + FHReservation = [{mod_call, rabbit_quorum_queue, file_handle_leader_reservation, [Resource]}], + Effects = Mons ++ Nots ++ NodeMons ++ FHReservation, + case BLH of + undefined -> + Effects; + {Mod, Fun, Args} -> + [{mod_call, Mod, Fun, Args ++ [Name]} | Effects] + end; +state_enter(eol, #?MODULE{enqueuers = Enqs, + consumers = Custs0, + waiting_consumers = WaitingConsumers0}) -> + Custs = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Custs0), + WaitingConsumers1 = lists:foldl(fun({{_, P}, V}, Acc) -> Acc#{P => V} end, + #{}, WaitingConsumers0), + AllConsumers = maps:merge(Custs, WaitingConsumers1), + [{send_msg, P, eol, ra_event} + || P <- maps:keys(maps:merge(Enqs, AllConsumers))] ++ + [{mod_call, rabbit_quorum_queue, file_handle_release_reservation, []}]; +state_enter(State, #?MODULE{cfg = #cfg{resource = _Resource}}) when State =/= leader -> + FHReservation = {mod_call, rabbit_quorum_queue, file_handle_other_reservation, []}, + [FHReservation]; + state_enter(_, _) -> + %% catch all as not handling all states + []. + + +-spec tick(non_neg_integer(), state()) -> ra_machine:effects(). +tick(Ts, #?MODULE{cfg = #cfg{name = Name, + resource = QName}, + msg_bytes_enqueue = EnqueueBytes, + msg_bytes_checkout = CheckoutBytes} = State) -> + case is_expired(Ts, State) of + true -> + [{mod_call, rabbit_quorum_queue, spawn_deleter, [QName]}]; + false -> + Metrics = {Name, + messages_ready(State), + num_checked_out(State), % checked out + messages_total(State), + query_consumer_count(State), % Consumers + EnqueueBytes, + CheckoutBytes}, + [{mod_call, rabbit_quorum_queue, + handle_tick, [QName, Metrics, all_nodes(State)]}] + end. + +-spec overview(state()) -> map(). +overview(#?MODULE{consumers = Cons, + enqueuers = Enqs, + release_cursors = Cursors, + enqueue_count = EnqCount, + msg_bytes_enqueue = EnqueueBytes, + msg_bytes_checkout = CheckoutBytes, + cfg = Cfg} = State) -> + Conf = #{name => Cfg#cfg.name, + resource => Cfg#cfg.resource, + release_cursor_interval => Cfg#cfg.release_cursor_interval, + dead_lettering_enabled => undefined =/= Cfg#cfg.dead_letter_handler, + max_length => Cfg#cfg.max_length, + max_bytes => Cfg#cfg.max_bytes, + consumer_strategy => Cfg#cfg.consumer_strategy, + max_in_memory_length => Cfg#cfg.max_in_memory_length, + max_in_memory_bytes => Cfg#cfg.max_in_memory_bytes, + expires => Cfg#cfg.expires, + delivery_limit => Cfg#cfg.delivery_limit + }, + #{type => ?MODULE, + config => Conf, + num_consumers => maps:size(Cons), + num_checked_out => num_checked_out(State), + num_enqueuers => maps:size(Enqs), + num_ready_messages => messages_ready(State), + num_messages => messages_total(State), + num_release_cursors => lqueue:len(Cursors), + release_cursors => [I || {_, I, _} <- lqueue:to_list(Cursors)], + release_cursor_enqueue_counter => EnqCount, + enqueue_message_bytes => EnqueueBytes, + checkout_message_bytes => CheckoutBytes}. + +-spec get_checked_out(consumer_id(), msg_id(), msg_id(), state()) -> + [delivery_msg()]. +get_checked_out(Cid, From, To, #?MODULE{consumers = Consumers}) -> + case Consumers of + #{Cid := #consumer{checked_out = Checked}} -> + [{K, snd(snd(maps:get(K, Checked)))} + || K <- lists:seq(From, To), + maps:is_key(K, Checked)]; + _ -> + [] + end. + +-spec version() -> pos_integer(). +version() -> 1. + +which_module(0) -> rabbit_fifo_v0; +which_module(1) -> ?MODULE. + +-record(aux_gc, {last_raft_idx = 0 :: ra:index()}). +-record(aux, {name :: atom(), + utilisation :: term(), + gc = #aux_gc{} :: #aux_gc{}}). + +init_aux(Name) when is_atom(Name) -> + %% TODO: catch specific exception throw if table already exists + ok = ra_machine_ets:create_table(rabbit_fifo_usage, + [named_table, set, public, + {write_concurrency, true}]), + Now = erlang:monotonic_time(micro_seconds), + #aux{name = Name, + utilisation = {inactive, Now, 1, 1.0}}. + +handle_aux(leader, _, garbage_collection, State, Log, _MacState) -> + ra_log_wal:force_roll_over(ra_log_wal), + {no_reply, State, Log}; +handle_aux(follower, _, garbage_collection, State, Log, MacState) -> + ra_log_wal:force_roll_over(ra_log_wal), + {no_reply, force_eval_gc(Log, MacState, State), Log}; +handle_aux(_RaState, cast, eval, Aux0, Log, _MacState) -> + {no_reply, Aux0, Log}; +handle_aux(_RaState, cast, Cmd, #aux{utilisation = Use0} = Aux0, + Log, _MacState) + when Cmd == active orelse Cmd == inactive -> + {no_reply, Aux0#aux{utilisation = update_use(Use0, Cmd)}, Log}; +handle_aux(_RaState, cast, tick, #aux{name = Name, + utilisation = Use0} = State0, + Log, MacState) -> + true = ets:insert(rabbit_fifo_usage, + {Name, utilisation(Use0)}), + Aux = eval_gc(Log, MacState, State0), + {no_reply, Aux, Log}; +handle_aux(_RaState, {call, _From}, {peek, Pos}, Aux0, + Log0, MacState) -> + case rabbit_fifo:query_peek(Pos, MacState) of + {ok, {Idx, {Header, empty}}} -> + %% need to re-hydrate from the log + {{_, _, {_, _, Cmd, _}}, Log} = ra_log:fetch(Idx, Log0), + #enqueue{msg = Msg} = Cmd, + {reply, {ok, {Header, Msg}}, Aux0, Log}; + {ok, {_Idx, {Header, Msg}}} -> + {reply, {ok, {Header, Msg}}, Aux0, Log0}; + Err -> + {reply, Err, Aux0, Log0} + end. + + +eval_gc(Log, #?MODULE{cfg = #cfg{resource = QR}} = MacState, + #aux{gc = #aux_gc{last_raft_idx = LastGcIdx} = Gc} = AuxState) -> + {Idx, _} = ra_log:last_index_term(Log), + {memory, Mem} = erlang:process_info(self(), memory), + case messages_total(MacState) of + 0 when Idx > LastGcIdx andalso + Mem > ?GC_MEM_LIMIT_B -> + garbage_collect(), + {memory, MemAfter} = erlang:process_info(self(), memory), + rabbit_log:debug("~s: full GC sweep complete. " + "Process memory changed from ~.2fMB to ~.2fMB.", + [rabbit_misc:rs(QR), Mem/?MB, MemAfter/?MB]), + AuxState#aux{gc = Gc#aux_gc{last_raft_idx = Idx}}; + _ -> + AuxState + end. + +force_eval_gc(Log, #?MODULE{cfg = #cfg{resource = QR}}, + #aux{gc = #aux_gc{last_raft_idx = LastGcIdx} = Gc} = AuxState) -> + {Idx, _} = ra_log:last_index_term(Log), + {memory, Mem} = erlang:process_info(self(), memory), + case Idx > LastGcIdx of + true -> + garbage_collect(), + {memory, MemAfter} = erlang:process_info(self(), memory), + rabbit_log:debug("~s: full GC sweep complete. " + "Process memory changed from ~.2fMB to ~.2fMB.", + [rabbit_misc:rs(QR), Mem/?MB, MemAfter/?MB]), + AuxState#aux{gc = Gc#aux_gc{last_raft_idx = Idx}}; + false -> + AuxState + end. + +%%% Queries + +query_messages_ready(State) -> + messages_ready(State). + +query_messages_checked_out(#?MODULE{consumers = Consumers}) -> + maps:fold(fun (_, #consumer{checked_out = C}, S) -> + maps:size(C) + S + end, 0, Consumers). + +query_messages_total(State) -> + messages_total(State). + +query_processes(#?MODULE{enqueuers = Enqs, consumers = Cons0}) -> + Cons = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Cons0), + maps:keys(maps:merge(Enqs, Cons)). + + +query_ra_indexes(#?MODULE{ra_indexes = RaIndexes}) -> + RaIndexes. + +query_consumer_count(#?MODULE{consumers = Consumers, + waiting_consumers = WaitingConsumers}) -> + Up = maps:filter(fun(_ConsumerId, #consumer{status = Status}) -> + Status =/= suspected_down + end, Consumers), + maps:size(Up) + length(WaitingConsumers). + +query_consumers(#?MODULE{consumers = Consumers, + waiting_consumers = WaitingConsumers, + cfg = #cfg{consumer_strategy = ConsumerStrategy}} = State) -> + ActiveActivityStatusFun = + case ConsumerStrategy of + competing -> + fun(_ConsumerId, + #consumer{status = Status}) -> + case Status of + suspected_down -> + {false, Status}; + _ -> + {true, Status} + end + end; + single_active -> + SingleActiveConsumer = query_single_active_consumer(State), + fun({Tag, Pid} = _Consumer, _) -> + case SingleActiveConsumer of + {value, {Tag, Pid}} -> + {true, single_active}; + _ -> + {false, waiting} + end + end + end, + FromConsumers = + maps:fold(fun (_, #consumer{status = cancelled}, Acc) -> + Acc; + ({Tag, Pid}, #consumer{meta = Meta} = Consumer, Acc) -> + {Active, ActivityStatus} = + ActiveActivityStatusFun({Tag, Pid}, Consumer), + maps:put({Tag, Pid}, + {Pid, Tag, + maps:get(ack, Meta, undefined), + maps:get(prefetch, Meta, undefined), + Active, + ActivityStatus, + maps:get(args, Meta, []), + maps:get(username, Meta, undefined)}, + Acc) + end, #{}, Consumers), + FromWaitingConsumers = + lists:foldl(fun ({_, #consumer{status = cancelled}}, Acc) -> + Acc; + ({{Tag, Pid}, #consumer{meta = Meta} = Consumer}, Acc) -> + {Active, ActivityStatus} = + ActiveActivityStatusFun({Tag, Pid}, Consumer), + maps:put({Tag, Pid}, + {Pid, Tag, + maps:get(ack, Meta, undefined), + maps:get(prefetch, Meta, undefined), + Active, + ActivityStatus, + maps:get(args, Meta, []), + maps:get(username, Meta, undefined)}, + Acc) + end, #{}, WaitingConsumers), + maps:merge(FromConsumers, FromWaitingConsumers). + + +query_single_active_consumer(#?MODULE{cfg = #cfg{consumer_strategy = single_active}, + consumers = Consumers}) -> + case maps:size(Consumers) of + 0 -> + {error, no_value}; + 1 -> + {value, lists:nth(1, maps:keys(Consumers))}; + _ + -> + {error, illegal_size} + end ; +query_single_active_consumer(_) -> + disabled. + +query_stat(#?MODULE{consumers = Consumers} = State) -> + {messages_ready(State), maps:size(Consumers)}. + +query_in_memory_usage(#?MODULE{msg_bytes_in_memory = Bytes, + msgs_ready_in_memory = Length}) -> + {Length, Bytes}. + +query_peek(Pos, State0) when Pos > 0 -> + case take_next_msg(State0) of + empty -> + {error, no_message_at_pos}; + {{_Seq, IdxMsg}, _State} + when Pos == 1 -> + {ok, IdxMsg}; + {_Msg, State} -> + query_peek(Pos-1, State) + end. + + +-spec usage(atom()) -> float(). +usage(Name) when is_atom(Name) -> + case ets:lookup(rabbit_fifo_usage, Name) of + [] -> 0.0; + [{_, Use}] -> Use + end. + +%%% Internal + +messages_ready(#?MODULE{messages = M, + prefix_msgs = {RCnt, _R, PCnt, _P}, + returns = R}) -> + %% prefix messages will rarely have anything in them during normal + %% operations so length/1 is fine here + lqueue:len(M) + lqueue:len(R) + RCnt + PCnt. + +messages_total(#?MODULE{ra_indexes = I, + prefix_msgs = {RCnt, _R, PCnt, _P}}) -> + rabbit_fifo_index:size(I) + RCnt + PCnt. + +update_use({inactive, _, _, _} = CUInfo, inactive) -> + CUInfo; +update_use({active, _, _} = CUInfo, active) -> + CUInfo; +update_use({active, Since, Avg}, inactive) -> + Now = erlang:monotonic_time(micro_seconds), + {inactive, Now, Now - Since, Avg}; +update_use({inactive, Since, Active, Avg}, active) -> + Now = erlang:monotonic_time(micro_seconds), + {active, Now, use_avg(Active, Now - Since, Avg)}. + +utilisation({active, Since, Avg}) -> + use_avg(erlang:monotonic_time(micro_seconds) - Since, 0, Avg); +utilisation({inactive, Since, Active, Avg}) -> + use_avg(Active, erlang:monotonic_time(micro_seconds) - Since, Avg). + +use_avg(0, 0, Avg) -> + Avg; +use_avg(Active, Inactive, Avg) -> + Time = Inactive + Active, + moving_average(Time, ?USE_AVG_HALF_LIFE, Active / Time, Avg). + +moving_average(_Time, _, Next, undefined) -> + Next; +moving_average(Time, HalfLife, Next, Current) -> + Weight = math:exp(Time * math:log(0.5) / HalfLife), + Next * (1 - Weight) + Current * Weight. + +num_checked_out(#?MODULE{consumers = Cons}) -> + maps:fold(fun (_, #consumer{checked_out = C}, Acc) -> + maps:size(C) + Acc + end, 0, Cons). + +cancel_consumer(Meta, ConsumerId, + #?MODULE{cfg = #cfg{consumer_strategy = competing}} = State, + Effects, Reason) -> + cancel_consumer0(Meta, ConsumerId, State, Effects, Reason); +cancel_consumer(Meta, ConsumerId, + #?MODULE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = []} = State, + Effects, Reason) -> + %% single active consumer on, no consumers are waiting + cancel_consumer0(Meta, ConsumerId, State, Effects, Reason); +cancel_consumer(Meta, ConsumerId, + #?MODULE{consumers = Cons0, + cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = Waiting0} = State0, + Effects0, Reason) -> + %% single active consumer on, consumers are waiting + case maps:is_key(ConsumerId, Cons0) of + true -> + % The active consumer is to be removed + {State1, Effects1} = cancel_consumer0(Meta, ConsumerId, State0, + Effects0, Reason), + activate_next_consumer(State1, Effects1); + false -> + % The cancelled consumer is not active or cancelled + % Just remove it from idle_consumers + Waiting = lists:keydelete(ConsumerId, 1, Waiting0), + Effects = cancel_consumer_effects(ConsumerId, State0, Effects0), + % A waiting consumer isn't supposed to have any checked out messages, + % so nothing special to do here + {State0#?MODULE{waiting_consumers = Waiting}, Effects} + end. + +consumer_update_active_effects(#?MODULE{cfg = #cfg{resource = QName}}, + ConsumerId, #consumer{meta = Meta}, + Active, ActivityStatus, + Effects) -> + Ack = maps:get(ack, Meta, undefined), + Prefetch = maps:get(prefetch, Meta, undefined), + Args = maps:get(args, Meta, []), + [{mod_call, rabbit_quorum_queue, update_consumer_handler, + [QName, ConsumerId, false, Ack, Prefetch, Active, ActivityStatus, Args]} + | Effects]. + +cancel_consumer0(Meta, ConsumerId, + #?MODULE{consumers = C0} = S0, Effects0, Reason) -> + case C0 of + #{ConsumerId := Consumer} -> + {S, Effects2} = maybe_return_all(Meta, ConsumerId, Consumer, + S0, Effects0, Reason), + %% The effects are emitted before the consumer is actually removed + %% if the consumer has unacked messages. This is a bit weird but + %% in line with what classic queues do (from an external point of + %% view) + Effects = cancel_consumer_effects(ConsumerId, S, Effects2), + case maps:size(S#?MODULE.consumers) of + 0 -> + {S, [{aux, inactive} | Effects]}; + _ -> + {S, Effects} + end; + _ -> + %% already removed: do nothing + {S0, Effects0} + end. + +activate_next_consumer(#?MODULE{consumers = Cons, + waiting_consumers = Waiting0} = State0, + Effects0) -> + case maps:filter(fun (_, #consumer{status = S}) -> S == up end, Cons) of + Up when map_size(Up) == 0 -> + %% there are no active consumer in the consumer map + case lists:filter(fun ({_, #consumer{status = Status}}) -> + Status == up + end, Waiting0) of + [{NextConsumerId, NextConsumer} | _] -> + %% there is a potential next active consumer + Remaining = lists:keydelete(NextConsumerId, 1, Waiting0), + #?MODULE{service_queue = ServiceQueue} = State0, + ServiceQueue1 = maybe_queue_consumer(NextConsumerId, + NextConsumer, + ServiceQueue), + State = State0#?MODULE{consumers = Cons#{NextConsumerId => NextConsumer}, + service_queue = ServiceQueue1, + waiting_consumers = Remaining}, + Effects = consumer_update_active_effects(State, NextConsumerId, + NextConsumer, true, + single_active, Effects0), + {State, Effects}; + [] -> + {State0, [{aux, inactive} | Effects0]} + end; + _ -> + {State0, Effects0} + end. + + + +maybe_return_all(#{system_time := Ts} = Meta, ConsumerId, Consumer, S0, Effects0, Reason) -> + case Reason of + consumer_cancel -> + {update_or_remove_sub(Meta, ConsumerId, + Consumer#consumer{lifetime = once, + credit = 0, + status = cancelled}, + S0), Effects0}; + down -> + {S1, Effects1} = return_all(Meta, S0, Effects0, ConsumerId, Consumer), + {S1#?MODULE{consumers = maps:remove(ConsumerId, S1#?MODULE.consumers), + last_active = Ts}, + Effects1} + end. + +apply_enqueue(#{index := RaftIdx} = Meta, From, Seq, RawMsg, State0) -> + case maybe_enqueue(RaftIdx, From, Seq, RawMsg, [], State0) of + {ok, State1, Effects1} -> + State2 = append_to_master_index(RaftIdx, State1), + {State, ok, Effects} = checkout(Meta, State0, State2, Effects1), + {maybe_store_dehydrated_state(RaftIdx, State), ok, Effects}; + {duplicate, State, Effects} -> + {State, ok, Effects} + end. + +drop_head(#?MODULE{ra_indexes = Indexes0} = State0, Effects0) -> + case take_next_msg(State0) of + {FullMsg = {_MsgId, {RaftIdxToDrop, {Header, Msg}}}, + State1} -> + Indexes = rabbit_fifo_index:delete(RaftIdxToDrop, Indexes0), + State2 = add_bytes_drop(Header, State1#?MODULE{ra_indexes = Indexes}), + State = case Msg of + 'empty' -> State2; + _ -> subtract_in_memory_counts(Header, State2) + end, + Effects = dead_letter_effects(maxlen, #{none => FullMsg}, + State, Effects0), + {State, Effects}; + {{'$prefix_msg', Header}, State1} -> + State2 = subtract_in_memory_counts(Header, add_bytes_drop(Header, State1)), + {State2, Effects0}; + {{'$empty_msg', Header}, State1} -> + State2 = add_bytes_drop(Header, State1), + {State2, Effects0}; + empty -> + {State0, Effects0} + end. + +enqueue(RaftIdx, RawMsg, #?MODULE{messages = Messages, + next_msg_num = NextMsgNum} = State0) -> + %% the initial header is an integer only - it will get expanded to a map + %% when the next required key is added + Header = message_size(RawMsg), + {State1, Msg} = + case evaluate_memory_limit(Header, State0) of + true -> + % indexed message with header map + {State0, {RaftIdx, {Header, 'empty'}}}; + false -> + {add_in_memory_counts(Header, State0), + {RaftIdx, {Header, RawMsg}}} % indexed message with header map + end, + State = add_bytes_enqueue(Header, State1), + State#?MODULE{messages = lqueue:in({NextMsgNum, Msg}, Messages), + next_msg_num = NextMsgNum + 1}. + +append_to_master_index(RaftIdx, + #?MODULE{ra_indexes = Indexes0} = State0) -> + State = incr_enqueue_count(State0), + Indexes = rabbit_fifo_index:append(RaftIdx, Indexes0), + State#?MODULE{ra_indexes = Indexes}. + + +incr_enqueue_count(#?MODULE{enqueue_count = EC, + cfg = #cfg{release_cursor_interval = {_Base, C}} + } = State0) when EC >= C-> + %% this will trigger a dehydrated version of the state to be stored + %% at this raft index for potential future snapshot generation + %% Q: Why don't we just stash the release cursor here? + %% A: Because it needs to be the very last thing we do and we + %% first needs to run the checkout logic. + State0#?MODULE{enqueue_count = 0}; +incr_enqueue_count(#?MODULE{enqueue_count = C} = State) -> + State#?MODULE{enqueue_count = C + 1}. + +maybe_store_dehydrated_state(RaftIdx, + #?MODULE{cfg = + #cfg{release_cursor_interval = {Base, _}} + = Cfg, + ra_indexes = Indexes, + enqueue_count = 0, + release_cursors = Cursors0} = State0) -> + case rabbit_fifo_index:exists(RaftIdx, Indexes) of + false -> + %% the incoming enqueue must already have been dropped + State0; + true -> + Interval = case Base of + 0 -> 0; + _ -> + Total = messages_total(State0), + min(max(Total, Base), ?RELEASE_CURSOR_EVERY_MAX) + end, + State = State0#?MODULE{cfg = Cfg#cfg{release_cursor_interval = + {Base, Interval}}}, + Dehydrated = dehydrate_state(State), + Cursor = {release_cursor, RaftIdx, Dehydrated}, + Cursors = lqueue:in(Cursor, Cursors0), + State#?MODULE{release_cursors = Cursors} + end; +maybe_store_dehydrated_state(_RaftIdx, State) -> + State. + +enqueue_pending(From, + #enqueuer{next_seqno = Next, + pending = [{Next, RaftIdx, RawMsg} | Pending]} = Enq0, + State0) -> + State = enqueue(RaftIdx, RawMsg, State0), + Enq = Enq0#enqueuer{next_seqno = Next + 1, pending = Pending}, + enqueue_pending(From, Enq, State); +enqueue_pending(From, Enq, #?MODULE{enqueuers = Enqueuers0} = State) -> + State#?MODULE{enqueuers = Enqueuers0#{From => Enq}}. + +maybe_enqueue(RaftIdx, undefined, undefined, RawMsg, Effects, State0) -> + % direct enqueue without tracking + State = enqueue(RaftIdx, RawMsg, State0), + {ok, State, Effects}; +maybe_enqueue(RaftIdx, From, MsgSeqNo, RawMsg, Effects0, + #?MODULE{enqueuers = Enqueuers0} = State0) -> + case maps:get(From, Enqueuers0, undefined) of + undefined -> + State1 = State0#?MODULE{enqueuers = Enqueuers0#{From => #enqueuer{}}}, + {ok, State, Effects} = maybe_enqueue(RaftIdx, From, MsgSeqNo, + RawMsg, Effects0, State1), + {ok, State, [{monitor, process, From} | Effects]}; + #enqueuer{next_seqno = MsgSeqNo} = Enq0 -> + % it is the next expected seqno + State1 = enqueue(RaftIdx, RawMsg, State0), + Enq = Enq0#enqueuer{next_seqno = MsgSeqNo + 1}, + State = enqueue_pending(From, Enq, State1), + {ok, State, Effects0}; + #enqueuer{next_seqno = Next, + pending = Pending0} = Enq0 + when MsgSeqNo > Next -> + % out of order delivery + Pending = [{MsgSeqNo, RaftIdx, RawMsg} | Pending0], + Enq = Enq0#enqueuer{pending = lists:sort(Pending)}, + {ok, State0#?MODULE{enqueuers = Enqueuers0#{From => Enq}}, Effects0}; + #enqueuer{next_seqno = Next} when MsgSeqNo =< Next -> + % duplicate delivery - remove the raft index from the ra_indexes + % map as it was added earlier + {duplicate, State0, Effects0} + end. + +snd(T) -> + element(2, T). + +return(#{index := IncomingRaftIdx} = Meta, ConsumerId, Returned, + Effects0, State0) -> + {State1, Effects1} = maps:fold( + fun(MsgId, {Tag, _} = Msg, {S0, E0}) + when Tag == '$prefix_msg'; + Tag == '$empty_msg'-> + return_one(Meta, MsgId, 0, Msg, S0, E0, ConsumerId); + (MsgId, {MsgNum, Msg}, {S0, E0}) -> + return_one(Meta, MsgId, MsgNum, Msg, S0, E0, + ConsumerId) + end, {State0, Effects0}, Returned), + State2 = + case State1#?MODULE.consumers of + #{ConsumerId := Con0} -> + Con = Con0#consumer{credit = increase_credit(Con0, + map_size(Returned))}, + update_or_remove_sub(Meta, ConsumerId, Con, State1); + _ -> + State1 + end, + {State, ok, Effects} = checkout(Meta, State0, State2, Effects1), + update_smallest_raft_index(IncomingRaftIdx, State, Effects). + +% used to processes messages that are finished +complete(Meta, ConsumerId, Discarded, + #consumer{checked_out = Checked} = Con0, Effects, + #?MODULE{ra_indexes = Indexes0} = State0) -> + %% TODO optimise use of Discarded map here + MsgRaftIdxs = [RIdx || {_, {RIdx, _}} <- maps:values(Discarded)], + %% credit_mode = simple_prefetch should automatically top-up credit + %% as messages are simple_prefetch or otherwise returned + Con = Con0#consumer{checked_out = maps:without(maps:keys(Discarded), Checked), + credit = increase_credit(Con0, map_size(Discarded))}, + State1 = update_or_remove_sub(Meta, ConsumerId, Con, State0), + Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0, + MsgRaftIdxs), + %% TODO: use maps:fold instead + State2 = lists:foldl(fun({_, {_, {Header, _}}}, Acc) -> + add_bytes_settle(Header, Acc); + ({'$prefix_msg', Header}, Acc) -> + add_bytes_settle(Header, Acc); + ({'$empty_msg', Header}, Acc) -> + add_bytes_settle(Header, Acc) + end, State1, maps:values(Discarded)), + {State2#?MODULE{ra_indexes = Indexes}, Effects}. + +increase_credit(#consumer{lifetime = once, + credit = Credit}, _) -> + %% once consumers cannot increment credit + Credit; +increase_credit(#consumer{lifetime = auto, + credit_mode = credited, + credit = Credit}, _) -> + %% credit_mode: credit also doesn't automatically increment credit + Credit; +increase_credit(#consumer{credit = Current}, Credit) -> + Current + Credit. + +complete_and_checkout(#{index := IncomingRaftIdx} = Meta, MsgIds, ConsumerId, + #consumer{checked_out = Checked0} = Con0, + Effects0, State0) -> + Discarded = maps:with(MsgIds, Checked0), + {State2, Effects1} = complete(Meta, ConsumerId, Discarded, Con0, + Effects0, State0), + {State, ok, Effects} = checkout(Meta, State0, State2, Effects1), + update_smallest_raft_index(IncomingRaftIdx, State, Effects). + +dead_letter_effects(_Reason, _Discarded, + #?MODULE{cfg = #cfg{dead_letter_handler = undefined}}, + Effects) -> + Effects; +dead_letter_effects(Reason, Discarded, + #?MODULE{cfg = #cfg{dead_letter_handler = {Mod, Fun, Args}}}, + Effects) -> + RaftIdxs = maps:fold( + fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) -> + [RaftIdx | Acc]; + (_, _, Acc) -> + Acc + end, [], Discarded), + [{log, RaftIdxs, + fun (Log) -> + Lookup = maps:from_list(lists:zip(RaftIdxs, Log)), + DeadLetters = maps:fold( + fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) -> + {enqueue, _, _, Msg} = maps:get(RaftIdx, Lookup), + [{Reason, Msg} | Acc]; + (_, {_, {_, {_Header, Msg}}}, Acc) -> + [{Reason, Msg} | Acc]; + (_, _, Acc) -> + Acc + end, [], Discarded), + [{mod_call, Mod, Fun, Args ++ [DeadLetters]}] + end} | Effects]. + +cancel_consumer_effects(ConsumerId, + #?MODULE{cfg = #cfg{resource = QName}}, Effects) -> + [{mod_call, rabbit_quorum_queue, + cancel_consumer_handler, [QName, ConsumerId]} | Effects]. + +update_smallest_raft_index(Idx, State, Effects) -> + update_smallest_raft_index(Idx, ok, State, Effects). + +update_smallest_raft_index(IncomingRaftIdx, Reply, + #?MODULE{cfg = Cfg, + ra_indexes = Indexes, + release_cursors = Cursors0} = State0, + Effects) -> + case rabbit_fifo_index:size(Indexes) of + 0 -> + % there are no messages on queue anymore and no pending enqueues + % we can forward release_cursor all the way until + % the last received command, hooray + %% reset the release cursor interval + #cfg{release_cursor_interval = {Base, _}} = Cfg, + RCI = {Base, Base}, + State = State0#?MODULE{cfg = Cfg#cfg{release_cursor_interval = RCI}, + release_cursors = lqueue:new(), + enqueue_count = 0}, + {State, Reply, Effects ++ [{release_cursor, IncomingRaftIdx, State}]}; + _ -> + Smallest = rabbit_fifo_index:smallest(Indexes), + case find_next_cursor(Smallest, Cursors0) of + {empty, Cursors} -> + {State0#?MODULE{release_cursors = Cursors}, Reply, Effects}; + {Cursor, Cursors} -> + %% we can emit a release cursor when we've passed the smallest + %% release cursor available. + {State0#?MODULE{release_cursors = Cursors}, Reply, + Effects ++ [Cursor]} + end + end. + +find_next_cursor(Idx, Cursors) -> + find_next_cursor(Idx, Cursors, empty). + +find_next_cursor(Smallest, Cursors0, Potential) -> + case lqueue:out(Cursors0) of + {{value, {_, Idx, _} = Cursor}, Cursors} when Idx < Smallest -> + %% we found one but it may not be the largest one + find_next_cursor(Smallest, Cursors, Cursor); + _ -> + {Potential, Cursors0} + end. + +update_header(Key, UpdateFun, Default, Header) + when is_integer(Header) -> + update_header(Key, UpdateFun, Default, #{size => Header}); +update_header(Key, UpdateFun, Default, Header) -> + maps:update_with(Key, UpdateFun, Default, Header). + + +return_one(Meta, MsgId, 0, {Tag, Header0}, + #?MODULE{returns = Returns, + consumers = Consumers, + cfg = #cfg{delivery_limit = DeliveryLimit}} = State0, + Effects0, ConsumerId) + when Tag == '$prefix_msg'; Tag == '$empty_msg' -> + #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers), + Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0), + Msg0 = {Tag, Header}, + case maps:get(delivery_count, Header) of + DeliveryCount when DeliveryCount > DeliveryLimit -> + complete(Meta, ConsumerId, #{MsgId => Msg0}, Con0, Effects0, State0); + _ -> + %% this should not affect the release cursor in any way + Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)}, + {Msg, State1} = case Tag of + '$empty_msg' -> + {Msg0, State0}; + _ -> case evaluate_memory_limit(Header, State0) of + true -> + {{'$empty_msg', Header}, State0}; + false -> + {Msg0, add_in_memory_counts(Header, State0)} + end + end, + {add_bytes_return( + Header, + State1#?MODULE{consumers = Consumers#{ConsumerId => Con}, + returns = lqueue:in(Msg, Returns)}), + Effects0} + end; +return_one(Meta, MsgId, MsgNum, {RaftId, {Header0, RawMsg}}, + #?MODULE{returns = Returns, + consumers = Consumers, + cfg = #cfg{delivery_limit = DeliveryLimit}} = State0, + Effects0, ConsumerId) -> + #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers), + Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0), + Msg0 = {RaftId, {Header, RawMsg}}, + case maps:get(delivery_count, Header) of + DeliveryCount when DeliveryCount > DeliveryLimit -> + DlMsg = {MsgNum, Msg0}, + Effects = dead_letter_effects(delivery_limit, #{none => DlMsg}, + State0, Effects0), + complete(Meta, ConsumerId, #{MsgId => DlMsg}, Con0, Effects, State0); + _ -> + Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)}, + %% this should not affect the release cursor in any way + {Msg, State1} = case RawMsg of + 'empty' -> + {Msg0, State0}; + _ -> + case evaluate_memory_limit(Header, State0) of + true -> + {{RaftId, {Header, 'empty'}}, State0}; + false -> + {Msg0, add_in_memory_counts(Header, State0)} + end + end, + {add_bytes_return( + Header, + State1#?MODULE{consumers = Consumers#{ConsumerId => Con}, + returns = lqueue:in({MsgNum, Msg}, Returns)}), + Effects0} + end. + +return_all(Meta, #?MODULE{consumers = Cons} = State0, Effects0, ConsumerId, + #consumer{checked_out = Checked0} = Con) -> + %% need to sort the list so that we return messages in the order + %% they were checked out + Checked = lists:sort(maps:to_list(Checked0)), + State = State0#?MODULE{consumers = Cons#{ConsumerId => Con}}, + lists:foldl(fun ({MsgId, {'$prefix_msg', _} = Msg}, {S, E}) -> + return_one(Meta, MsgId, 0, Msg, S, E, ConsumerId); + ({MsgId, {'$empty_msg', _} = Msg}, {S, E}) -> + return_one(Meta, MsgId, 0, Msg, S, E, ConsumerId); + ({MsgId, {MsgNum, Msg}}, {S, E}) -> + return_one(Meta, MsgId, MsgNum, Msg, S, E, ConsumerId) + end, {State, Effects0}, Checked). + +%% checkout new messages to consumers +checkout(#{index := Index} = Meta, OldState, State0, Effects0) -> + {State1, _Result, Effects1} = checkout0(Meta, checkout_one(Meta, State0), + Effects0, {#{}, #{}}), + case evaluate_limit(Index, false, OldState, State1, Effects1) of + {State, true, Effects} -> + update_smallest_raft_index(Index, State, Effects); + {State, false, Effects} -> + {State, ok, Effects} + end. + +checkout0(Meta, {success, ConsumerId, MsgId, {RaftIdx, {Header, 'empty'}}, State}, + Effects, {SendAcc, LogAcc0}) -> + DelMsg = {RaftIdx, {MsgId, Header}}, + LogAcc = maps:update_with(ConsumerId, + fun (M) -> [DelMsg | M] end, + [DelMsg], LogAcc0), + checkout0(Meta, checkout_one(Meta, State), Effects, {SendAcc, LogAcc}); +checkout0(Meta, {success, ConsumerId, MsgId, Msg, State}, Effects, + {SendAcc0, LogAcc}) -> + DelMsg = {MsgId, Msg}, + SendAcc = maps:update_with(ConsumerId, + fun (M) -> [DelMsg | M] end, + [DelMsg], SendAcc0), + checkout0(Meta, checkout_one(Meta, State), Effects, {SendAcc, LogAcc}); +checkout0(_Meta, {Activity, State0}, Effects0, {SendAcc, LogAcc}) -> + Effects1 = case Activity of + nochange -> + append_send_msg_effects( + append_log_effects(Effects0, LogAcc), SendAcc); + inactive -> + [{aux, inactive} + | append_send_msg_effects( + append_log_effects(Effects0, LogAcc), SendAcc)] + end, + {State0, ok, lists:reverse(Effects1)}. + +evaluate_limit(_Index, Result, _BeforeState, + #?MODULE{cfg = #cfg{max_length = undefined, + max_bytes = undefined}} = State, + Effects) -> + {State, Result, Effects}; +evaluate_limit(Index, Result, BeforeState, + #?MODULE{cfg = #cfg{overflow_strategy = Strategy}, + enqueuers = Enqs0} = State0, + Effects0) -> + case is_over_limit(State0) of + true when Strategy == drop_head -> + {State, Effects} = drop_head(State0, Effects0), + evaluate_limit(Index, true, BeforeState, State, Effects); + true when Strategy == reject_publish -> + %% generate send_msg effect for each enqueuer to let them know + %% they need to block + {Enqs, Effects} = + maps:fold( + fun (P, #enqueuer{blocked = undefined} = E0, {Enqs, Acc}) -> + E = E0#enqueuer{blocked = Index}, + {Enqs#{P => E}, + [{send_msg, P, {queue_status, reject_publish}, + [ra_event]} | Acc]}; + (_P, _E, Acc) -> + Acc + end, {Enqs0, Effects0}, Enqs0), + {State0#?MODULE{enqueuers = Enqs}, Result, Effects}; + false when Strategy == reject_publish -> + %% TODO: optimise as this case gets called for every command + %% pretty much + Before = is_below_soft_limit(BeforeState), + case {Before, is_below_soft_limit(State0)} of + {false, true} -> + %% we have moved below the lower limit which + {Enqs, Effects} = + maps:fold( + fun (P, #enqueuer{} = E0, {Enqs, Acc}) -> + E = E0#enqueuer{blocked = undefined}, + {Enqs#{P => E}, + [{send_msg, P, {queue_status, go}, [ra_event]} + | Acc]}; + (_P, _E, Acc) -> + Acc + end, {Enqs0, Effects0}, Enqs0), + {State0#?MODULE{enqueuers = Enqs}, Result, Effects}; + _ -> + {State0, Result, Effects0} + end; + false -> + {State0, Result, Effects0} + end. + +evaluate_memory_limit(_Header, + #?MODULE{cfg = #cfg{max_in_memory_length = undefined, + max_in_memory_bytes = undefined}}) -> + false; +evaluate_memory_limit(#{size := Size}, State) -> + evaluate_memory_limit(Size, State); +evaluate_memory_limit(Size, + #?MODULE{cfg = #cfg{max_in_memory_length = MaxLength, + max_in_memory_bytes = MaxBytes}, + msg_bytes_in_memory = Bytes, + msgs_ready_in_memory = Length}) + when is_integer(Size) -> + (Length >= MaxLength) orelse ((Bytes + Size) > MaxBytes). + +append_send_msg_effects(Effects, AccMap) when map_size(AccMap) == 0 -> + Effects; +append_send_msg_effects(Effects0, AccMap) -> + Effects = maps:fold(fun (C, Msgs, Ef) -> + [send_msg_effect(C, lists:reverse(Msgs)) | Ef] + end, Effects0, AccMap), + [{aux, active} | Effects]. + +append_log_effects(Effects0, AccMap) -> + maps:fold(fun (C, Msgs, Ef) -> + [send_log_effect(C, lists:reverse(Msgs)) | Ef] + end, Effects0, AccMap). + +%% next message is determined as follows: +%% First we check if there are are prefex returns +%% Then we check if there are current returns +%% then we check prefix msgs +%% then we check current messages +%% +%% When we return it is always done to the current return queue +%% for both prefix messages and current messages +take_next_msg(#?MODULE{prefix_msgs = {R, P}} = State) -> + %% conversion + take_next_msg(State#?MODULE{prefix_msgs = {length(R), R, length(P), P}}); +take_next_msg(#?MODULE{prefix_msgs = {NumR, [{'$empty_msg', _} = Msg | Rem], + NumP, P}} = State) -> + %% there are prefix returns, these should be served first + {Msg, State#?MODULE{prefix_msgs = {NumR-1, Rem, NumP, P}}}; +take_next_msg(#?MODULE{prefix_msgs = {NumR, [Header | Rem], NumP, P}} = State) -> + %% there are prefix returns, these should be served first + {{'$prefix_msg', Header}, + State#?MODULE{prefix_msgs = {NumR-1, Rem, NumP, P}}}; +take_next_msg(#?MODULE{returns = Returns, + messages = Messages0, + prefix_msgs = {NumR, R, NumP, P}} = State) -> + %% use peek rather than out there as the most likely case is an empty + %% queue + case lqueue:peek(Returns) of + {value, NextMsg} -> + {NextMsg, + State#?MODULE{returns = lqueue:drop(Returns)}}; + empty when P == [] -> + case lqueue:out(Messages0) of + {empty, _} -> + empty; + {{value, {_, _} = SeqMsg}, Messages} -> + {SeqMsg, State#?MODULE{messages = Messages }} + end; + empty -> + [Msg | Rem] = P, + case Msg of + {Header, 'empty'} -> + %% There are prefix msgs + {{'$empty_msg', Header}, + State#?MODULE{prefix_msgs = {NumR, R, NumP-1, Rem}}}; + Header -> + {{'$prefix_msg', Header}, + State#?MODULE{prefix_msgs = {NumR, R, NumP-1, Rem}}} + end + end. + +send_msg_effect({CTag, CPid}, Msgs) -> + {send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}. + +send_log_effect({CTag, CPid}, IdxMsgs) -> + {RaftIdxs, Data} = lists:unzip(IdxMsgs), + {log, RaftIdxs, + fun(Log) -> + Msgs = lists:zipwith(fun ({enqueue, _, _, Msg}, {MsgId, Header}) -> + {MsgId, {Header, Msg}} + end, Log, Data), + [{send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}] + end, + {local, node(CPid)}}. + +reply_log_effect(RaftIdx, MsgId, Header, Ready, From) -> + {log, [RaftIdx], + fun([{enqueue, _, _, Msg}]) -> + [{reply, From, {wrap_reply, + {dequeue, {MsgId, {Header, Msg}}, Ready}}}] + end}. + +checkout_one(Meta, #?MODULE{service_queue = SQ0, + messages = Messages0, + consumers = Cons0} = InitState) -> + case priority_queue:out(SQ0) of + {{value, ConsumerId}, SQ1} -> + case take_next_msg(InitState) of + {ConsumerMsg, State0} -> + %% there are consumers waiting to be serviced + %% process consumer checkout + case maps:find(ConsumerId, Cons0) of + {ok, #consumer{credit = 0}} -> + %% no credit but was still on queue + %% can happen when draining + %% recurse without consumer on queue + checkout_one(Meta, InitState#?MODULE{service_queue = SQ1}); + {ok, #consumer{status = cancelled}} -> + checkout_one(Meta, InitState#?MODULE{service_queue = SQ1}); + {ok, #consumer{status = suspected_down}} -> + checkout_one(Meta, InitState#?MODULE{service_queue = SQ1}); + {ok, #consumer{checked_out = Checked0, + next_msg_id = Next, + credit = Credit, + delivery_count = DelCnt} = Con0} -> + Checked = maps:put(Next, ConsumerMsg, Checked0), + Con = Con0#consumer{checked_out = Checked, + next_msg_id = Next + 1, + credit = Credit - 1, + delivery_count = DelCnt + 1}, + State1 = update_or_remove_sub(Meta, + ConsumerId, Con, + State0#?MODULE{service_queue = SQ1}), + {State, Msg} = + case ConsumerMsg of + {'$prefix_msg', Header} -> + {subtract_in_memory_counts( + Header, add_bytes_checkout(Header, State1)), + ConsumerMsg}; + {'$empty_msg', Header} -> + {add_bytes_checkout(Header, State1), + ConsumerMsg}; + {_, {_, {Header, 'empty'}} = M} -> + {add_bytes_checkout(Header, State1), + M}; + {_, {_, {Header, _} = M}} -> + {subtract_in_memory_counts( + Header, + add_bytes_checkout(Header, State1)), + M} + end, + {success, ConsumerId, Next, Msg, State}; + error -> + %% consumer did not exist but was queued, recurse + checkout_one(Meta, InitState#?MODULE{service_queue = SQ1}) + end; + empty -> + {nochange, InitState} + end; + {empty, _} -> + case lqueue:len(Messages0) of + 0 -> {nochange, InitState}; + _ -> {inactive, InitState} + end + end. + +update_or_remove_sub(_Meta, ConsumerId, #consumer{lifetime = auto, + credit = 0} = Con, + #?MODULE{consumers = Cons} = State) -> + State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons)}; +update_or_remove_sub(_Meta, ConsumerId, #consumer{lifetime = auto} = Con, + #?MODULE{consumers = Cons, + service_queue = ServiceQueue} = State) -> + State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons), + service_queue = uniq_queue_in(ConsumerId, Con, ServiceQueue)}; +update_or_remove_sub(#{system_time := Ts}, + ConsumerId, #consumer{lifetime = once, + checked_out = Checked, + credit = 0} = Con, + #?MODULE{consumers = Cons} = State) -> + case maps:size(Checked) of + 0 -> + % we're done with this consumer + State#?MODULE{consumers = maps:remove(ConsumerId, Cons), + last_active = Ts}; + _ -> + % there are unsettled items so need to keep around + State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons)} + end; +update_or_remove_sub(_Meta, ConsumerId, #consumer{lifetime = once} = Con, + #?MODULE{consumers = Cons, + service_queue = ServiceQueue} = State) -> + State#?MODULE{consumers = maps:put(ConsumerId, Con, Cons), + service_queue = uniq_queue_in(ConsumerId, Con, ServiceQueue)}. + +uniq_queue_in(Key, #consumer{priority = P}, Queue) -> + % TODO: queue:member could surely be quite expensive, however the practical + % number of unique consumers may not be large enough for it to matter + case priority_queue:member(Key, Queue) of + true -> + Queue; + false -> + priority_queue:in(Key, P, Queue) + end. + +update_consumer(ConsumerId, Meta, Spec, Priority, + #?MODULE{cfg = #cfg{consumer_strategy = competing}} = State0) -> + %% general case, single active consumer off + update_consumer0(ConsumerId, Meta, Spec, Priority, State0); +update_consumer(ConsumerId, Meta, Spec, Priority, + #?MODULE{consumers = Cons0, + cfg = #cfg{consumer_strategy = single_active}} = State0) + when map_size(Cons0) == 0 -> + %% single active consumer on, no one is consuming yet + update_consumer0(ConsumerId, Meta, Spec, Priority, State0); +update_consumer(ConsumerId, Meta, {Life, Credit, Mode}, Priority, + #?MODULE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = WaitingConsumers0} = State0) -> + %% single active consumer on and one active consumer already + %% adding the new consumer to the waiting list + Consumer = #consumer{lifetime = Life, meta = Meta, + priority = Priority, + credit = Credit, credit_mode = Mode}, + WaitingConsumers1 = WaitingConsumers0 ++ [{ConsumerId, Consumer}], + State0#?MODULE{waiting_consumers = WaitingConsumers1}. + +update_consumer0(ConsumerId, Meta, {Life, Credit, Mode}, Priority, + #?MODULE{consumers = Cons0, + service_queue = ServiceQueue0} = State0) -> + %% TODO: this logic may not be correct for updating a pre-existing consumer + Init = #consumer{lifetime = Life, meta = Meta, + priority = Priority, + credit = Credit, credit_mode = Mode}, + Cons = maps:update_with(ConsumerId, + fun(S) -> + %% remove any in-flight messages from + %% the credit update + N = maps:size(S#consumer.checked_out), + C = max(0, Credit - N), + S#consumer{lifetime = Life, credit = C} + end, Init, Cons0), + ServiceQueue = maybe_queue_consumer(ConsumerId, maps:get(ConsumerId, Cons), + ServiceQueue0), + State0#?MODULE{consumers = Cons, service_queue = ServiceQueue}. + +maybe_queue_consumer(ConsumerId, #consumer{credit = Credit} = Con, + ServiceQueue0) -> + case Credit > 0 of + true -> + % consumerect needs service - check if already on service queue + uniq_queue_in(ConsumerId, Con, ServiceQueue0); + false -> + ServiceQueue0 + end. + +%% creates a dehydrated version of the current state to be cached and +%% potentially used to for a snaphot at a later point +dehydrate_state(#?MODULE{messages = Messages, + consumers = Consumers, + returns = Returns, + prefix_msgs = {PRCnt, PrefRet0, PPCnt, PrefMsg0}, + waiting_consumers = Waiting0} = State) -> + RCnt = lqueue:len(Returns), + %% TODO: optimise this function as far as possible + PrefRet1 = lists:foldr(fun ({'$prefix_msg', Header}, Acc) -> + [Header | Acc]; + ({'$empty_msg', _} = Msg, Acc) -> + [Msg | Acc]; + ({_, {_, {Header, 'empty'}}}, Acc) -> + [{'$empty_msg', Header} | Acc]; + ({_, {_, {Header, _}}}, Acc) -> + [Header | Acc] + end, + [], + lqueue:to_list(Returns)), + PrefRet = PrefRet0 ++ PrefRet1, + PrefMsgsSuff = dehydrate_messages(Messages, []), + %% prefix messages are not populated in normal operation only after + %% recovering from a snapshot + PrefMsgs = PrefMsg0 ++ PrefMsgsSuff, + Waiting = [{Cid, dehydrate_consumer(C)} || {Cid, C} <- Waiting0], + State#?MODULE{messages = lqueue:new(), + ra_indexes = rabbit_fifo_index:empty(), + release_cursors = lqueue:new(), + consumers = maps:map(fun (_, C) -> + dehydrate_consumer(C) + end, Consumers), + returns = lqueue:new(), + prefix_msgs = {PRCnt + RCnt, PrefRet, + PPCnt + lqueue:len(Messages), PrefMsgs}, + waiting_consumers = Waiting}. + +%% TODO make body recursive to avoid allocating lists:reverse call +dehydrate_messages(Msgs0, Acc0) -> + {OutRes, Msgs} = lqueue:out(Msgs0), + case OutRes of + {value, {_MsgId, {_RaftId, {_, 'empty'} = Msg}}} -> + dehydrate_messages(Msgs, [Msg | Acc0]); + {value, {_MsgId, {_RaftId, {Header, _}}}} -> + dehydrate_messages(Msgs, [Header | Acc0]); + empty -> + lists:reverse(Acc0) + end. + +dehydrate_consumer(#consumer{checked_out = Checked0} = Con) -> + Checked = maps:map(fun (_, {'$prefix_msg', _} = M) -> + M; + (_, {'$empty_msg', _} = M) -> + M; + (_, {_, {_, {Header, 'empty'}}}) -> + {'$empty_msg', Header}; + (_, {_, {_, {Header, _}}}) -> + {'$prefix_msg', Header} + end, Checked0), + Con#consumer{checked_out = Checked}. + +%% make the state suitable for equality comparison +normalize(#?MODULE{messages = Messages, + release_cursors = Cursors} = State) -> + State#?MODULE{messages = lqueue:from_list(lqueue:to_list(Messages)), + release_cursors = lqueue:from_list(lqueue:to_list(Cursors))}. + +is_over_limit(#?MODULE{cfg = #cfg{max_length = undefined, + max_bytes = undefined}}) -> + false; +is_over_limit(#?MODULE{cfg = #cfg{max_length = MaxLength, + max_bytes = MaxBytes}, + msg_bytes_enqueue = BytesEnq} = State) -> + messages_ready(State) > MaxLength orelse (BytesEnq > MaxBytes). + +is_below_soft_limit(#?MODULE{cfg = #cfg{max_length = undefined, + max_bytes = undefined}}) -> + false; +is_below_soft_limit(#?MODULE{cfg = #cfg{max_length = MaxLength, + max_bytes = MaxBytes}, + msg_bytes_enqueue = BytesEnq} = State) -> + is_below(MaxLength, messages_ready(State)) andalso + is_below(MaxBytes, BytesEnq). + +is_below(undefined, _Num) -> + true; +is_below(Val, Num) when is_integer(Val) andalso is_integer(Num) -> + Num =< trunc(Val * ?LOW_LIMIT). + +-spec make_enqueue(option(pid()), option(msg_seqno()), raw_msg()) -> protocol(). +make_enqueue(Pid, Seq, Msg) -> + #enqueue{pid = Pid, seq = Seq, msg = Msg}. + +-spec make_register_enqueuer(pid()) -> protocol(). +make_register_enqueuer(Pid) -> + #register_enqueuer{pid = Pid}. + +-spec make_checkout(consumer_id(), + checkout_spec(), consumer_meta()) -> protocol(). +make_checkout(ConsumerId, Spec, Meta) -> + #checkout{consumer_id = ConsumerId, + spec = Spec, meta = Meta}. + +-spec make_settle(consumer_id(), [msg_id()]) -> protocol(). +make_settle(ConsumerId, MsgIds) when is_list(MsgIds) -> + #settle{consumer_id = ConsumerId, msg_ids = MsgIds}. + +-spec make_return(consumer_id(), [msg_id()]) -> protocol(). +make_return(ConsumerId, MsgIds) -> + #return{consumer_id = ConsumerId, msg_ids = MsgIds}. + +-spec make_discard(consumer_id(), [msg_id()]) -> protocol(). +make_discard(ConsumerId, MsgIds) -> + #discard{consumer_id = ConsumerId, msg_ids = MsgIds}. + +-spec make_credit(consumer_id(), non_neg_integer(), non_neg_integer(), + boolean()) -> protocol(). +make_credit(ConsumerId, Credit, DeliveryCount, Drain) -> + #credit{consumer_id = ConsumerId, + credit = Credit, + delivery_count = DeliveryCount, + drain = Drain}. + +-spec make_purge() -> protocol(). +make_purge() -> #purge{}. + +-spec make_garbage_collection() -> protocol(). +make_garbage_collection() -> #garbage_collection{}. + +-spec make_purge_nodes([node()]) -> protocol(). +make_purge_nodes(Nodes) -> + #purge_nodes{nodes = Nodes}. + +-spec make_update_config(config()) -> protocol(). +make_update_config(Config) -> + #update_config{config = Config}. + +add_bytes_enqueue(Bytes, + #?MODULE{msg_bytes_enqueue = Enqueue} = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_enqueue = Enqueue + Bytes}; +add_bytes_enqueue(#{size := Bytes}, State) -> + add_bytes_enqueue(Bytes, State). + +add_bytes_drop(Bytes, + #?MODULE{msg_bytes_enqueue = Enqueue} = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_enqueue = Enqueue - Bytes}; +add_bytes_drop(#{size := Bytes}, State) -> + add_bytes_drop(Bytes, State). + +add_bytes_checkout(Bytes, + #?MODULE{msg_bytes_checkout = Checkout, + msg_bytes_enqueue = Enqueue } = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_checkout = Checkout + Bytes, + msg_bytes_enqueue = Enqueue - Bytes}; +add_bytes_checkout(#{size := Bytes}, State) -> + add_bytes_checkout(Bytes, State). + +add_bytes_settle(Bytes, + #?MODULE{msg_bytes_checkout = Checkout} = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_checkout = Checkout - Bytes}; +add_bytes_settle(#{size := Bytes}, State) -> + add_bytes_settle(Bytes, State). + +add_bytes_return(Bytes, + #?MODULE{msg_bytes_checkout = Checkout, + msg_bytes_enqueue = Enqueue} = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_checkout = Checkout - Bytes, + msg_bytes_enqueue = Enqueue + Bytes}; +add_bytes_return(#{size := Bytes}, State) -> + add_bytes_return(Bytes, State). + +add_in_memory_counts(Bytes, + #?MODULE{msg_bytes_in_memory = InMemoryBytes, + msgs_ready_in_memory = InMemoryCount} = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_in_memory = InMemoryBytes + Bytes, + msgs_ready_in_memory = InMemoryCount + 1}; +add_in_memory_counts(#{size := Bytes}, State) -> + add_in_memory_counts(Bytes, State). + +subtract_in_memory_counts(Bytes, + #?MODULE{msg_bytes_in_memory = InMemoryBytes, + msgs_ready_in_memory = InMemoryCount} = State) + when is_integer(Bytes) -> + State#?MODULE{msg_bytes_in_memory = InMemoryBytes - Bytes, + msgs_ready_in_memory = InMemoryCount - 1}; +subtract_in_memory_counts(#{size := Bytes}, State) -> + subtract_in_memory_counts(Bytes, State). + +message_size(#basic_message{content = Content}) -> + #content{payload_fragments_rev = PFR} = Content, + iolist_size(PFR); +message_size({'$prefix_msg', H}) -> + get_size_from_header(H); +message_size({'$empty_msg', H}) -> + get_size_from_header(H); +message_size(B) when is_binary(B) -> + byte_size(B); +message_size(Msg) -> + %% probably only hit this for testing so ok to use erts_debug + erts_debug:size(Msg). + +get_size_from_header(Size) when is_integer(Size) -> + Size; +get_size_from_header(#{size := B}) -> + B. + + +all_nodes(#?MODULE{consumers = Cons0, + enqueuers = Enqs0, + waiting_consumers = WaitingConsumers0}) -> + Nodes0 = maps:fold(fun({_, P}, _, Acc) -> + Acc#{node(P) => ok} + end, #{}, Cons0), + Nodes1 = maps:fold(fun(P, _, Acc) -> + Acc#{node(P) => ok} + end, Nodes0, Enqs0), + maps:keys( + lists:foldl(fun({{_, P}, _}, Acc) -> + Acc#{node(P) => ok} + end, Nodes1, WaitingConsumers0)). + +all_pids_for(Node, #?MODULE{consumers = Cons0, + enqueuers = Enqs0, + waiting_consumers = WaitingConsumers0}) -> + Cons = maps:fold(fun({_, P}, _, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, [], Cons0), + Enqs = maps:fold(fun(P, _, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, Cons, Enqs0), + lists:foldl(fun({{_, P}, _}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, Acc) -> Acc + end, Enqs, WaitingConsumers0). + +suspected_pids_for(Node, #?MODULE{consumers = Cons0, + enqueuers = Enqs0, + waiting_consumers = WaitingConsumers0}) -> + Cons = maps:fold(fun({_, P}, #consumer{status = suspected_down}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, [], Cons0), + Enqs = maps:fold(fun(P, #enqueuer{status = suspected_down}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, Cons, Enqs0), + lists:foldl(fun({{_, P}, + #consumer{status = suspected_down}}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, Acc) -> Acc + end, Enqs, WaitingConsumers0). + +is_expired(Ts, #?MODULE{cfg = #cfg{expires = Expires}, + last_active = LastActive, + consumers = Consumers}) + when is_number(LastActive) andalso is_number(Expires) -> + %% TODO: should it be active consumers? + Active = maps:filter(fun (_, #consumer{status = suspected_down}) -> + false; + (_, _) -> + true + end, Consumers), + + Ts > (LastActive + Expires) andalso maps:size(Active) == 0; +is_expired(_Ts, _State) -> + false. + +get_priority_from_args(#{args := Args}) -> + case rabbit_misc:table_lookup(Args, <<"x-priority">>) of + {_Key, Value} -> + Value; + _ -> 0 + end; +get_priority_from_args(_) -> + 0. diff --git a/deps/rabbit/src/rabbit_fifo.hrl b/deps/rabbit/src/rabbit_fifo.hrl new file mode 100644 index 0000000000..a63483becd --- /dev/null +++ b/deps/rabbit/src/rabbit_fifo.hrl @@ -0,0 +1,210 @@ + +-type option(T) :: undefined | T. + +-type raw_msg() :: term(). +%% The raw message. It is opaque to rabbit_fifo. + +-type msg_in_id() :: non_neg_integer(). +% a queue scoped monotonically incrementing integer used to enforce order +% in the unassigned messages map + +-type msg_id() :: non_neg_integer(). +%% A consumer-scoped monotonically incrementing integer included with a +%% {@link delivery/0.}. Used to settle deliveries using +%% {@link rabbit_fifo_client:settle/3.} + +-type msg_seqno() :: non_neg_integer(). +%% A sender process scoped monotonically incrementing integer included +%% in enqueue messages. Used to ensure ordering of messages send from the +%% same process + +-type msg_header() :: msg_size() | + #{size := msg_size(), + delivery_count => non_neg_integer()}. +%% The message header: +%% delivery_count: the number of unsuccessful delivery attempts. +%% A non-zero value indicates a previous attempt. +%% If it only contains the size it can be condensed to an integer only + +-type msg() :: {msg_header(), raw_msg()}. +%% message with a header map. + +-type msg_size() :: non_neg_integer(). +%% the size in bytes of the msg payload + +-type indexed_msg() :: {ra:index(), msg()}. + +-type prefix_msg() :: {'$prefix_msg', msg_header()}. + +-type delivery_msg() :: {msg_id(), msg()}. +%% A tuple consisting of the message id and the headered message. + +-type consumer_tag() :: binary(). +%% An arbitrary binary tag used to distinguish between different consumers +%% set up by the same process. See: {@link rabbit_fifo_client:checkout/3.} + +-type delivery() :: {delivery, consumer_tag(), [delivery_msg()]}. +%% Represents the delivery of one or more rabbit_fifo messages. + +-type consumer_id() :: {consumer_tag(), pid()}. +%% The entity that receives messages. Uniquely identifies a consumer. + +-type credit_mode() :: simple_prefetch | credited. +%% determines how credit is replenished + +-type checkout_spec() :: {once | auto, Num :: non_neg_integer(), + credit_mode()} | + {dequeue, settled | unsettled} | + cancel. + +-type consumer_meta() :: #{ack => boolean(), + username => binary(), + prefetch => non_neg_integer(), + args => list()}. +%% static meta data associated with a consumer + + +-type applied_mfa() :: {module(), atom(), list()}. +% represents a partially applied module call + +-define(RELEASE_CURSOR_EVERY, 2048). +-define(RELEASE_CURSOR_EVERY_MAX, 3200000). +-define(USE_AVG_HALF_LIFE, 10000.0). +%% an average QQ without any message uses about 100KB so setting this limit +%% to ~10 times that should be relatively safe. +-define(GC_MEM_LIMIT_B, 2000000). + +-define(MB, 1048576). +-define(LOW_LIMIT, 0.8). + +-record(consumer, + {meta = #{} :: consumer_meta(), + checked_out = #{} :: #{msg_id() => {msg_in_id(), indexed_msg()}}, + next_msg_id = 0 :: msg_id(), % part of snapshot data + %% max number of messages that can be sent + %% decremented for each delivery + credit = 0 : non_neg_integer(), + %% total number of checked out messages - ever + %% incremented for each delivery + delivery_count = 0 :: non_neg_integer(), + %% the mode of how credit is incremented + %% simple_prefetch: credit is re-filled as deliveries are settled + %% or returned. + %% credited: credit can only be changed by receiving a consumer_credit + %% command: `{consumer_credit, ReceiverDeliveryCount, Credit}' + credit_mode = simple_prefetch :: credit_mode(), % part of snapshot data + lifetime = once :: once | auto, + status = up :: up | suspected_down | cancelled, + priority = 0 :: non_neg_integer() + }). + +-type consumer() :: #consumer{}. + +-type consumer_strategy() :: competing | single_active. + +-type milliseconds() :: non_neg_integer(). + +-record(enqueuer, + {next_seqno = 1 :: msg_seqno(), + % out of order enqueues - sorted list + pending = [] :: [{msg_seqno(), ra:index(), raw_msg()}], + status = up :: up | + suspected_down, + %% it is useful to have a record of when this was blocked + %% so that we can retry sending the block effect if + %% the publisher did not receive the initial one + blocked :: undefined | ra:index(), + unused_1, + unused_2 + }). + +-record(cfg, + {name :: atom(), + resource :: rabbit_types:r('queue'), + release_cursor_interval :: option({non_neg_integer(), non_neg_integer()}), + dead_letter_handler :: option(applied_mfa()), + become_leader_handler :: option(applied_mfa()), + overflow_strategy = drop_head :: drop_head | reject_publish, + max_length :: option(non_neg_integer()), + max_bytes :: option(non_neg_integer()), + %% whether single active consumer is on or not for this queue + consumer_strategy = competing :: consumer_strategy(), + %% the maximum number of unsuccessful delivery attempts permitted + delivery_limit :: option(non_neg_integer()), + max_in_memory_length :: option(non_neg_integer()), + max_in_memory_bytes :: option(non_neg_integer()), + expires :: undefined | milliseconds(), + unused_1, + unused_2 + }). + +-type prefix_msgs() :: {list(), list()} | + {non_neg_integer(), list(), + non_neg_integer(), list()}. + +-record(rabbit_fifo, + {cfg :: #cfg{}, + % unassigned messages + messages = lqueue:new() :: lqueue:lqueue({msg_in_id(), indexed_msg()}), + % defines the next message id + next_msg_num = 1 :: msg_in_id(), + % queue of returned msg_in_ids - when checking out it picks from + returns = lqueue:new() :: lqueue:lqueue(prefix_msg() | + {msg_in_id(), indexed_msg()}), + % a counter of enqueues - used to trigger shadow copy points + enqueue_count = 0 :: non_neg_integer(), + % a map containing all the live processes that have ever enqueued + % a message to this queue as well as a cached value of the smallest + % ra_index of all pending enqueues + enqueuers = #{} :: #{pid() => #enqueuer{}}, + % master index of all enqueue raft indexes including pending + % enqueues + % rabbit_fifo_index can be slow when calculating the smallest + % index when there are large gaps but should be faster than gb_trees + % for normal appending operations as it's backed by a map + ra_indexes = rabbit_fifo_index:empty() :: rabbit_fifo_index:state(), + release_cursors = lqueue:new() :: lqueue:lqueue({release_cursor, + ra:index(), #rabbit_fifo{}}), + % consumers need to reflect consumer state at time of snapshot + % needs to be part of snapshot + consumers = #{} :: #{consumer_id() => #consumer{}}, + % consumers that require further service are queued here + % needs to be part of snapshot + service_queue = priority_queue:new() :: priority_queue:q(), + %% This is a special field that is only used for snapshots + %% It represents the queued messages at the time the + %% dehydrated snapshot state was cached. + %% As release_cursors are only emitted for raft indexes where all + %% prior messages no longer contribute to the current state we can + %% replace all message payloads with their sizes (to be used for + %% overflow calculations). + %% This is done so that consumers are still served in a deterministic + %% order on recovery. + prefix_msgs = {0, [], 0, []} :: prefix_msgs(), + msg_bytes_enqueue = 0 :: non_neg_integer(), + msg_bytes_checkout = 0 :: non_neg_integer(), + %% waiting consumers, one is picked active consumer is cancelled or dies + %% used only when single active consumer is on + waiting_consumers = [] :: [{consumer_id(), consumer()}], + msg_bytes_in_memory = 0 :: non_neg_integer(), + msgs_ready_in_memory = 0 :: non_neg_integer(), + last_active :: undefined | non_neg_integer(), + unused_1, + unused_2 + }). + +-type config() :: #{name := atom(), + queue_resource := rabbit_types:r('queue'), + dead_letter_handler => applied_mfa(), + become_leader_handler => applied_mfa(), + release_cursor_interval => non_neg_integer(), + max_length => non_neg_integer(), + max_bytes => non_neg_integer(), + max_in_memory_length => non_neg_integer(), + max_in_memory_bytes => non_neg_integer(), + overflow_strategy => drop_head | reject_publish, + single_active_consumer_on => boolean(), + delivery_limit => non_neg_integer(), + expires => non_neg_integer(), + created => non_neg_integer() + }. diff --git a/deps/rabbit/src/rabbit_fifo_client.erl b/deps/rabbit/src/rabbit_fifo_client.erl new file mode 100644 index 0000000000..3990222b15 --- /dev/null +++ b/deps/rabbit/src/rabbit_fifo_client.erl @@ -0,0 +1,888 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% @doc Provides an easy to consume API for interacting with the {@link rabbit_fifo.} +%% state machine implementation running inside a `ra' raft system. +%% +%% Handles command tracking and other non-functional concerns. +-module(rabbit_fifo_client). + +-export([ + init/2, + init/3, + init/5, + checkout/5, + cancel_checkout/2, + enqueue/2, + enqueue/3, + dequeue/3, + settle/3, + return/3, + discard/3, + credit/4, + handle_ra_event/3, + untracked_enqueue/2, + purge/1, + cluster_name/1, + update_machine_state/2, + pending_size/1, + stat/1, + stat/2 + ]). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-define(SOFT_LIMIT, 32). +-define(TIMER_TIME, 10000). + +-type seq() :: non_neg_integer(). +%% last_applied is initialised to -1 +-type maybe_seq() :: integer(). +-type action() :: {send_credit_reply, Available :: non_neg_integer()} | + {send_drained, CTagCredit :: + {rabbit_fifo:consumer_tag(), non_neg_integer()}}. +-type actions() :: [action()]. + +-type cluster_name() :: rabbit_types:r(queue). + +-record(consumer, {last_msg_id :: seq() | -1, + ack = false :: boolean(), + delivery_count = 0 :: non_neg_integer()}). + +-record(cfg, {cluster_name :: cluster_name(), + servers = [] :: [ra:server_id()], + soft_limit = ?SOFT_LIMIT :: non_neg_integer(), + block_handler = fun() -> ok end :: fun(() -> term()), + unblock_handler = fun() -> ok end :: fun(() -> ok), + timeout :: non_neg_integer(), + version = 0 :: non_neg_integer()}). + +-record(state, {cfg :: #cfg{}, + leader :: undefined | ra:server_id(), + queue_status :: undefined | go | reject_publish, + next_seq = 0 :: seq(), + %% Last applied is initialise to -1 to note that no command has yet been + %% applied, but allowing to resend messages if the first ones on the sequence + %% are lost (messages are sent from last_applied + 1) + last_applied = -1 :: maybe_seq(), + next_enqueue_seq = 1 :: seq(), + %% indicates that we've exceeded the soft limit + slow = false :: boolean(), + unsent_commands = #{} :: #{rabbit_fifo:consumer_id() => + {[seq()], [seq()], [seq()]}}, + pending = #{} :: #{seq() => + {term(), rabbit_fifo:command()}}, + consumer_deliveries = #{} :: #{rabbit_fifo:consumer_tag() => + #consumer{}}, + timer_state :: term() + }). + +-opaque state() :: #state{}. + +-export_type([ + state/0, + actions/0 + ]). + + +%% @doc Create the initial state for a new rabbit_fifo sessions. A state is needed +%% to interact with a rabbit_fifo queue using @module. +%% @param ClusterName the id of the cluster to interact with +%% @param Servers The known servers of the queue. If the current leader is known +%% ensure the leader node is at the head of the list. +-spec init(cluster_name(), [ra:server_id()]) -> state(). +init(ClusterName, Servers) -> + init(ClusterName, Servers, ?SOFT_LIMIT). + +%% @doc Create the initial state for a new rabbit_fifo sessions. A state is needed +%% to interact with a rabbit_fifo queue using @module. +%% @param ClusterName the id of the cluster to interact with +%% @param Servers The known servers of the queue. If the current leader is known +%% ensure the leader node is at the head of the list. +%% @param MaxPending size defining the max number of pending commands. +-spec init(cluster_name(), [ra:server_id()], non_neg_integer()) -> state(). +init(ClusterName = #resource{}, Servers, SoftLimit) -> + Timeout = application:get_env(kernel, net_ticktime, 60) + 5, + #state{cfg = #cfg{cluster_name = ClusterName, + servers = Servers, + soft_limit = SoftLimit, + timeout = Timeout * 1000}}. + +-spec init(cluster_name(), [ra:server_id()], non_neg_integer(), fun(() -> ok), + fun(() -> ok)) -> state(). +init(ClusterName = #resource{}, Servers, SoftLimit, BlockFun, UnblockFun) -> + %% net ticktime is in seconds + Timeout = application:get_env(kernel, net_ticktime, 60) + 5, + #state{cfg = #cfg{cluster_name = ClusterName, + servers = Servers, + block_handler = BlockFun, + unblock_handler = UnblockFun, + soft_limit = SoftLimit, + timeout = Timeout * 1000}}. + + +%% @doc Enqueues a message. +%% @param Correlation an arbitrary erlang term used to correlate this +%% command when it has been applied. +%% @param Msg an arbitrary erlang term representing the message. +%% @param State the current {@module} state. +%% @returns +%% `{ok | slow, State}' if the command was successfully sent. If the return +%% tag is `slow' it means the limit is approaching and it is time to slow down +%% the sending rate. +%% {@module} assigns a sequence number to every raft command it issues. The +%% SequenceNumber can be correlated to the applied sequence numbers returned +%% by the {@link handle_ra_event/2. handle_ra_event/2} function. +-spec enqueue(Correlation :: term(), Msg :: term(), State :: state()) -> + {ok | slow | reject_publish, state()}. +enqueue(Correlation, Msg, + #state{queue_status = undefined, + next_enqueue_seq = 1, + cfg = #cfg{timeout = Timeout}} = State0) -> + %% it is the first enqueue, check the version + {_, Node} = Server = pick_server(State0), + case rpc:call(Node, ra_machine, version, [{machine, rabbit_fifo, #{}}]) of + 0 -> + %% the leader is running the old version + %% so we can't initialize the enqueuer session safely + %% fall back on old behavour + enqueue(Correlation, Msg, State0#state{queue_status = go}); + 1 -> + %% were running the new version on the leader do sync initialisation + %% of enqueuer session + Reg = rabbit_fifo:make_register_enqueuer(self()), + case ra:process_command(Server, Reg, Timeout) of + {ok, reject_publish, _} -> + {reject_publish, State0#state{queue_status = reject_publish}}; + {ok, ok, _} -> + enqueue(Correlation, Msg, State0#state{queue_status = go}); + {timeout, _} -> + %% if we timeout it is probably better to reject + %% the message than being uncertain + {reject_publish, State0}; + Err -> + exit(Err) + end; + {badrpc, nodedown} -> + {reject_publish, State0} + end; +enqueue(_Correlation, _Msg, + #state{queue_status = reject_publish, + cfg = #cfg{}} = State) -> + {reject_publish, State}; +enqueue(Correlation, Msg, + #state{slow = Slow, + queue_status = go, + cfg = #cfg{block_handler = BlockFun}} = State0) -> + Node = pick_server(State0), + {Next, State1} = next_enqueue_seq(State0), + % by default there is no correlation id + Cmd = rabbit_fifo:make_enqueue(self(), Next, Msg), + case send_command(Node, Correlation, Cmd, low, State1) of + {slow, State} when not Slow -> + BlockFun(), + {slow, set_timer(State)}; + Any -> + Any + end. + +%% @doc Enqueues a message. +%% @param Msg an arbitrary erlang term representing the message. +%% @param State the current {@module} state. +%% @returns +%% `{ok | slow, State}' if the command was successfully sent. If the return +%% tag is `slow' it means the limit is approaching and it is time to slow down +%% the sending rate. +%% {@module} assigns a sequence number to every raft command it issues. The +%% SequenceNumber can be correlated to the applied sequence numbers returned +%% by the {@link handle_ra_event/2. handle_ra_event/2} function. +%% +-spec enqueue(Msg :: term(), State :: state()) -> + {ok | slow | reject_publish, state()}. +enqueue(Msg, State) -> + enqueue(undefined, Msg, State). + +%% @doc Dequeue a message from the queue. +%% +%% This is a synchronous call. I.e. the call will block until the command +%% has been accepted by the ra process or it times out. +%% +%% @param ConsumerTag a unique tag to identify this particular consumer. +%% @param Settlement either `settled' or `unsettled'. When `settled' no +%% further settlement needs to be done. +%% @param State The {@module} state. +%% +%% @returns `{ok, IdMsg, State}' or `{error | timeout, term()}' +-spec dequeue(rabbit_fifo:consumer_tag(), + Settlement :: settled | unsettled, state()) -> + {ok, non_neg_integer(), term(), non_neg_integer()} + | {empty, state()} | {error | timeout, term()}. +dequeue(ConsumerTag, Settlement, + #state{cfg = #cfg{timeout = Timeout, + cluster_name = QName}} = State0) -> + Node = pick_server(State0), + ConsumerId = consumer_id(ConsumerTag), + case ra:process_command(Node, + rabbit_fifo:make_checkout(ConsumerId, + {dequeue, Settlement}, + #{}), + Timeout) of + {ok, {dequeue, empty}, Leader} -> + {empty, State0#state{leader = Leader}}; + {ok, {dequeue, {MsgId, {MsgHeader, Msg0}}, MsgsReady}, Leader} -> + Count = case MsgHeader of + #{delivery_count := C} -> C; + _ -> 0 + end, + IsDelivered = Count > 0, + Msg = add_delivery_count_header(Msg0, Count), + {ok, MsgsReady, + {QName, qref(Leader), MsgId, IsDelivered, Msg}, + State0#state{leader = Leader}}; + {ok, {error, _} = Err, _Leader} -> + Err; + Err -> + Err + end. + +add_delivery_count_header(#basic_message{} = Msg0, Count) + when is_integer(Count) -> + rabbit_basic:add_header(<<"x-delivery-count">>, long, Count, Msg0); +add_delivery_count_header(Msg, _Count) -> + Msg. + + +%% @doc Settle a message. Permanently removes message from the queue. +%% @param ConsumerTag the tag uniquely identifying the consumer. +%% @param MsgIds the message ids received with the {@link rabbit_fifo:delivery/0.} +%% @param State the {@module} state +%% @returns +%% `{ok | slow, State}' if the command was successfully sent. If the return +%% tag is `slow' it means the limit is approaching and it is time to slow down +%% the sending rate. +%% +-spec settle(rabbit_fifo:consumer_tag(), [rabbit_fifo:msg_id()], state()) -> + {state(), list()}. +settle(ConsumerTag, [_|_] = MsgIds, #state{slow = false} = State0) -> + Node = pick_server(State0), + Cmd = rabbit_fifo:make_settle(consumer_id(ConsumerTag), MsgIds), + case send_command(Node, undefined, Cmd, normal, State0) of + {_, S} -> + % turn slow into ok for this function + {S, []} + end; +settle(ConsumerTag, [_|_] = MsgIds, + #state{unsent_commands = Unsent0} = State0) -> + ConsumerId = consumer_id(ConsumerTag), + %% we've reached the soft limit so will stash the command to be + %% sent once we have seen enough notifications + Unsent = maps:update_with(ConsumerId, + fun ({Settles, Returns, Discards}) -> + {Settles ++ MsgIds, Returns, Discards} + end, {MsgIds, [], []}, Unsent0), + {State0#state{unsent_commands = Unsent}, []}. + +%% @doc Return a message to the queue. +%% @param ConsumerTag the tag uniquely identifying the consumer. +%% @param MsgIds the message ids to return received +%% from {@link rabbit_fifo:delivery/0.} +%% @param State the {@module} state +%% @returns +%% `{ok | slow, State}' if the command was successfully sent. If the return +%% tag is `slow' it means the limit is approaching and it is time to slow down +%% the sending rate. +%% +-spec return(rabbit_fifo:consumer_tag(), [rabbit_fifo:msg_id()], state()) -> + {state(), list()}. +return(ConsumerTag, [_|_] = MsgIds, #state{slow = false} = State0) -> + Node = pick_server(State0), + % TODO: make rabbit_fifo return support lists of message ids + Cmd = rabbit_fifo:make_return(consumer_id(ConsumerTag), MsgIds), + case send_command(Node, undefined, Cmd, normal, State0) of + {_, S} -> + {S, []} + end; +return(ConsumerTag, [_|_] = MsgIds, + #state{unsent_commands = Unsent0} = State0) -> + ConsumerId = consumer_id(ConsumerTag), + %% we've reached the soft limit so will stash the command to be + %% sent once we have seen enough notifications + Unsent = maps:update_with(ConsumerId, + fun ({Settles, Returns, Discards}) -> + {Settles, Returns ++ MsgIds, Discards} + end, {[], MsgIds, []}, Unsent0), + {State0#state{unsent_commands = Unsent}, []}. + +%% @doc Discards a checked out message. +%% If the queue has a dead_letter_handler configured this will be called. +%% @param ConsumerTag the tag uniquely identifying the consumer. +%% @param MsgIds the message ids to discard +%% from {@link rabbit_fifo:delivery/0.} +%% @param State the {@module} state +%% @returns +%% `{ok | slow, State}' if the command was successfully sent. If the return +%% tag is `slow' it means the limit is approaching and it is time to slow down +%% the sending rate. +-spec discard(rabbit_fifo:consumer_tag(), [rabbit_fifo:msg_id()], state()) -> + {state(), list()}. +discard(ConsumerTag, [_|_] = MsgIds, #state{slow = false} = State0) -> + Node = pick_server(State0), + Cmd = rabbit_fifo:make_discard(consumer_id(ConsumerTag), MsgIds), + case send_command(Node, undefined, Cmd, normal, State0) of + {_, S} -> + % turn slow into ok for this function + {S, []} + end; +discard(ConsumerTag, [_|_] = MsgIds, + #state{unsent_commands = Unsent0} = State0) -> + ConsumerId = consumer_id(ConsumerTag), + %% we've reached the soft limit so will stash the command to be + %% sent once we have seen enough notifications + Unsent = maps:update_with(ConsumerId, + fun ({Settles, Returns, Discards}) -> + {Settles, Returns, Discards ++ MsgIds} + end, {[], [], MsgIds}, Unsent0), + {State0#state{unsent_commands = Unsent}, []}. + +%% @doc Register with the rabbit_fifo queue to "checkout" messages as they +%% become available. +%% +%% This is a synchronous call. I.e. the call will block until the command +%% has been accepted by the ra process or it times out. +%% +%% @param ConsumerTag a unique tag to identify this particular consumer. +%% @param NumUnsettled the maximum number of in-flight messages. Once this +%% number of messages has been received but not settled no further messages +%% will be delivered to the consumer. +%% @param CreditMode The credit mode to use for the checkout. +%% simple_prefetch: credit is auto topped up as deliveries are settled +%% credited: credit is only increased by sending credit to the queue +%% @param State The {@module} state. +%% +%% @returns `{ok, State}' or `{error | timeout, term()}' +-spec checkout(rabbit_fifo:consumer_tag(), + NumUnsettled :: non_neg_integer(), + CreditMode :: rabbit_fifo:credit_mode(), + Meta :: rabbit_fifo:consumer_meta(), + state()) -> {ok, state()} | {error | timeout, term()}. +checkout(ConsumerTag, NumUnsettled, CreditMode, Meta, + #state{consumer_deliveries = CDels0} = State0) -> + Servers = sorted_servers(State0), + ConsumerId = {ConsumerTag, self()}, + Cmd = rabbit_fifo:make_checkout(ConsumerId, + {auto, NumUnsettled, CreditMode}, + Meta), + %% ??? + Ack = maps:get(ack, Meta, true), + + SDels = maps:update_with(ConsumerTag, + fun (V) -> + V#consumer{ack = Ack} + end, + #consumer{last_msg_id = -1, + ack = Ack}, CDels0), + try_process_command(Servers, Cmd, State0#state{consumer_deliveries = SDels}). + +%% @doc Provide credit to the queue +%% +%% This only has an effect if the consumer uses credit mode: credited +%% @param ConsumerTag a unique tag to identify this particular consumer. +%% @param Credit the amount of credit to provide to theq queue +%% @param Drain tells the queue to use up any credit that cannot be immediately +%% fulfilled. (i.e. there are not enough messages on queue to use up all the +%% provided credit). +-spec credit(rabbit_fifo:consumer_tag(), + Credit :: non_neg_integer(), + Drain :: boolean(), + state()) -> + {state(), actions()}. +credit(ConsumerTag, Credit, Drain, + #state{consumer_deliveries = CDels} = State0) -> + ConsumerId = consumer_id(ConsumerTag), + %% the last received msgid provides us with the delivery count if we + %% add one as it is 0 indexed + C = maps:get(ConsumerTag, CDels, #consumer{last_msg_id = -1}), + Node = pick_server(State0), + Cmd = rabbit_fifo:make_credit(ConsumerId, Credit, + C#consumer.last_msg_id + 1, Drain), + case send_command(Node, undefined, Cmd, normal, State0) of + {_, S} -> + % turn slow into ok for this function + {S, []} + end. + +%% @doc Cancels a checkout with the rabbit_fifo queue for the consumer tag +%% +%% This is a synchronous call. I.e. the call will block until the command +%% has been accepted by the ra process or it times out. +%% +%% @param ConsumerTag a unique tag to identify this particular consumer. +%% @param State The {@module} state. +%% +%% @returns `{ok, State}' or `{error | timeout, term()}' +-spec cancel_checkout(rabbit_fifo:consumer_tag(), state()) -> + {ok, state()} | {error | timeout, term()}. +cancel_checkout(ConsumerTag, #state{consumer_deliveries = CDels} = State0) -> + Servers = sorted_servers(State0), + ConsumerId = {ConsumerTag, self()}, + Cmd = rabbit_fifo:make_checkout(ConsumerId, cancel, #{}), + State = State0#state{consumer_deliveries = maps:remove(ConsumerTag, CDels)}, + try_process_command(Servers, Cmd, State). + +%% @doc Purges all the messages from a rabbit_fifo queue and returns the number +%% of messages purged. +-spec purge(ra:server_id()) -> {ok, non_neg_integer()} | {error | timeout, term()}. +purge(Node) -> + case ra:process_command(Node, rabbit_fifo:make_purge()) of + {ok, {purge, Reply}, _} -> + {ok, Reply}; + Err -> + Err + end. + +-spec pending_size(state()) -> non_neg_integer(). +pending_size(#state{pending = Pend}) -> + maps:size(Pend). + +-spec stat(ra:server_id()) -> + {ok, non_neg_integer(), non_neg_integer()} + | {error | timeout, term()}. +stat(Leader) -> + %% short timeout as we don't want to spend too long if it is going to + %% fail anyway + stat(Leader, 250). + +-spec stat(ra:server_id(), non_neg_integer()) -> + {ok, non_neg_integer(), non_neg_integer()} + | {error | timeout, term()}. +stat(Leader, Timeout) -> + %% short timeout as we don't want to spend too long if it is going to + %% fail anyway + case ra:local_query(Leader, fun rabbit_fifo:query_stat/1, Timeout) of + {ok, {_, {R, C}}, _} -> {ok, R, C}; + {error, _} = Error -> Error; + {timeout, _} = Error -> Error + end. + +%% @doc returns the cluster name +-spec cluster_name(state()) -> cluster_name(). +cluster_name(#state{cfg = #cfg{cluster_name = ClusterName}}) -> + ClusterName. + +update_machine_state(Server, Conf) -> + case ra:process_command(Server, rabbit_fifo:make_update_config(Conf)) of + {ok, ok, _} -> + ok; + Err -> + Err + end. + +%% @doc Handles incoming `ra_events'. Events carry both internal "bookeeping" +%% events emitted by the `ra' leader as well as `rabbit_fifo' emitted events such +%% as message deliveries. All ra events need to be handled by {@module} +%% to ensure bookeeping, resends and flow control is correctly handled. +%% +%% If the `ra_event' contains a `rabbit_fifo' generated message it will be returned +%% for further processing. +%% +%% Example: +%% +%% ``` +%% receive +%% {ra_event, From, Evt} -> +%% case rabbit_fifo_client:handle_ra_event(From, Evt, State0) of +%% {internal, _Seq, State} -> State; +%% {{delivery, _ConsumerTag, Msgs}, State} -> +%% handle_messages(Msgs), +%% ... +%% end +%% end +%% ''' +%% +%% @param From the {@link ra:server_id().} of the sending process. +%% @param Event the body of the `ra_event'. +%% @param State the current {@module} state. +%% +%% @returns +%% `{internal, AppliedCorrelations, State}' if the event contained an internally +%% handled event such as a notification and a correlation was included with +%% the command (e.g. in a call to `enqueue/3' the correlation terms are returned +%% here. +%% +%% `{RaFifoEvent, State}' if the event contained a client message generated by +%% the `rabbit_fifo' state machine such as a delivery. +%% +%% The type of `rabbit_fifo' client messages that can be received are: +%% +%% `{delivery, ConsumerTag, [{MsgId, {MsgHeader, Msg}}]}' +%% +%% <li>`ConsumerTag' the binary tag passed to {@link checkout/3.}</li> +%% <li>`MsgId' is a consumer scoped monotonically incrementing id that can be +%% used to {@link settle/3.} (roughly: AMQP 0.9.1 ack) message once finished +%% with them.</li> +-spec handle_ra_event(ra:server_id(), ra_server_proc:ra_event_body(), state()) -> + {internal, Correlators :: [term()], actions(), state()} | + {rabbit_fifo:client_msg(), state()} | eol. +handle_ra_event(From, {applied, Seqs}, + #state{cfg = #cfg{cluster_name = QRef, + soft_limit = SftLmt, + unblock_handler = UnblockFun}} = State0) -> + + {Corrs, Actions0, State1} = lists:foldl(fun seq_applied/2, + {[], [], State0#state{leader = From}}, + Seqs), + Actions = case Corrs of + [] -> + lists:reverse(Actions0); + _ -> + [{settled, QRef, Corrs} + | lists:reverse(Actions0)] + end, + case maps:size(State1#state.pending) < SftLmt of + true when State1#state.slow == true -> + % we have exited soft limit state + % send any unsent commands and cancel the time as + % TODO: really the timer should only be cancelled when the channel + % exits flow state (which depends on the state of all queues the + % channel is interacting with) + % but the fact the queue has just applied suggests + % it's ok to cancel here anyway + State2 = cancel_timer(State1#state{slow = false, + unsent_commands = #{}}), + % build up a list of commands to issue + Commands = maps:fold( + fun (Cid, {Settled, Returns, Discards}, Acc) -> + add_command(Cid, settle, Settled, + add_command(Cid, return, Returns, + add_command(Cid, discard, + Discards, Acc))) + end, [], State1#state.unsent_commands), + Node = pick_server(State2), + %% send all the settlements and returns + State = lists:foldl(fun (C, S0) -> + case send_command(Node, undefined, + C, normal, S0) of + {T, S} when T =/= error -> + S + end + end, State2, Commands), + UnblockFun(), + {ok, State, Actions}; + _ -> + {ok, State1, Actions} + end; +handle_ra_event(From, {machine, {delivery, _ConsumerTag, _} = Del}, State0) -> + handle_delivery(From, Del, State0); +handle_ra_event(_, {machine, {queue_status, Status}}, + #state{} = State) -> + %% just set the queue status + {ok, State#state{queue_status = Status}, []}; +handle_ra_event(Leader, {machine, leader_change}, + #state{leader = Leader} = State) -> + %% leader already known + {ok, State, []}; +handle_ra_event(Leader, {machine, leader_change}, State0) -> + %% we need to update leader + %% and resend any pending commands + State = resend_all_pending(State0#state{leader = Leader}), + {ok, State, []}; +handle_ra_event(_From, {rejected, {not_leader, undefined, _Seq}}, State0) -> + % TODO: how should these be handled? re-sent on timer or try random + {ok, State0, []}; +handle_ra_event(_From, {rejected, {not_leader, Leader, Seq}}, State0) -> + State1 = State0#state{leader = Leader}, + State = resend(Seq, State1), + {ok, State, []}; +handle_ra_event(_, timeout, #state{cfg = #cfg{servers = Servers}} = State0) -> + case find_leader(Servers) of + undefined -> + %% still no leader, set the timer again + {ok, set_timer(State0), []}; + Leader -> + State = resend_all_pending(State0#state{leader = Leader}), + {ok, State, []} + end; +handle_ra_event(_Leader, {machine, eol}, _State0) -> + eol. + +%% @doc Attempts to enqueue a message using cast semantics. This provides no +%% guarantees or retries if the message fails to achieve consensus or if the +%% servers sent to happens not to be available. If the message is sent to a +%% follower it will attempt the deliver it to the leader, if known. Else it will +%% drop the messages. +%% +%% NB: only use this for non-critical enqueues where a full rabbit_fifo_client state +%% cannot be maintained. +%% +%% @param CusterId the cluster id. +%% @param Servers the known servers in the cluster. +%% @param Msg the message to enqueue. +%% +%% @returns `ok' +-spec untracked_enqueue([ra:server_id()], term()) -> + ok. +untracked_enqueue([Node | _], Msg) -> + Cmd = rabbit_fifo:make_enqueue(undefined, undefined, Msg), + ok = ra:pipeline_command(Node, Cmd), + ok. + +%% Internal + +try_process_command([Server | Rem], Cmd, State) -> + case ra:process_command(Server, Cmd, 30000) of + {ok, _, Leader} -> + {ok, State#state{leader = Leader}}; + Err when length(Rem) =:= 0 -> + Err; + _ -> + try_process_command(Rem, Cmd, State) + end. + +seq_applied({Seq, MaybeAction}, + {Corrs, Actions0, #state{last_applied = Last} = State0}) + when Seq > Last -> + State1 = do_resends(Last+1, Seq-1, State0), + {Actions, State} = maybe_add_action(MaybeAction, Actions0, State1), + case maps:take(Seq, State#state.pending) of + {{undefined, _}, Pending} -> + {Corrs, Actions, State#state{pending = Pending, + last_applied = Seq}}; + {{Corr, _}, Pending} -> + {[Corr | Corrs], Actions, State#state{pending = Pending, + last_applied = Seq}}; + error -> + % must have already been resent or removed for some other reason + % still need to update last_applied or we may inadvertently resend + % stuff later + {Corrs, Actions, State#state{last_applied = Seq}} + end; +seq_applied(_Seq, Acc) -> + Acc. + +maybe_add_action(ok, Acc, State) -> + {Acc, State}; +maybe_add_action({multi, Actions}, Acc0, State0) -> + lists:foldl(fun (Act, {Acc, State}) -> + maybe_add_action(Act, Acc, State) + end, {Acc0, State0}, Actions); +maybe_add_action({send_drained, {Tag, Credit}} = Action, Acc, + #state{consumer_deliveries = CDels} = State) -> + %% add credit to consumer delivery_count + C = maps:get(Tag, CDels), + {[Action | Acc], + State#state{consumer_deliveries = + update_consumer(Tag, C#consumer.last_msg_id, + Credit, C, CDels)}}; +maybe_add_action(Action, Acc, State) -> + %% anything else is assumed to be an action + {[Action | Acc], State}. + +do_resends(From, To, State) when From =< To -> + % ?INFO("rabbit_fifo_client: doing resends From ~w To ~w~n", [From, To]), + lists:foldl(fun resend/2, State, lists:seq(From, To)); +do_resends(_, _, State) -> + State. + +% resends a command with a new sequence number +resend(OldSeq, #state{pending = Pending0, leader = Leader} = State) -> + case maps:take(OldSeq, Pending0) of + {{Corr, Cmd}, Pending} -> + %% resends aren't subject to flow control here + resend_command(Leader, Corr, Cmd, State#state{pending = Pending}); + error -> + State + end. + +resend_all_pending(#state{pending = Pend} = State) -> + Seqs = lists:sort(maps:keys(Pend)), + lists:foldl(fun resend/2, State, Seqs). + +maybe_auto_ack(true, Deliver, State0) -> + %% manual ack is enabled + {ok, State0, [Deliver]}; +maybe_auto_ack(false, {deliver, Tag, _Ack, Msgs} = Deliver, State0) -> + %% we have to auto ack these deliveries + MsgIds = [I || {_, _, I, _, _} <- Msgs], + {State, Actions} = settle(Tag, MsgIds, State0), + {ok, State, [Deliver] ++ Actions}. + + +handle_delivery(Leader, {delivery, Tag, [{FstId, _} | _] = IdMsgs}, + #state{cfg = #cfg{cluster_name = QName}, + consumer_deliveries = CDels0} = State0) -> + QRef = qref(Leader), + {LastId, _} = lists:last(IdMsgs), + Consumer = #consumer{ack = Ack} = maps:get(Tag, CDels0), + %% format as a deliver action + Del = {deliver, Tag, Ack, transform_msgs(QName, QRef, IdMsgs)}, + %% TODO: remove potential default allocation + case Consumer of + #consumer{last_msg_id = Prev} = C + when FstId =:= Prev+1 -> + maybe_auto_ack(Ack, Del, + State0#state{consumer_deliveries = + update_consumer(Tag, LastId, + length(IdMsgs), C, + CDels0)}); + #consumer{last_msg_id = Prev} = C + when FstId > Prev+1 -> + NumMissing = FstId - Prev + 1, + %% there may actually be fewer missing messages returned than expected + %% This can happen when a node the channel is on gets disconnected + %% from the node the leader is on and then reconnected afterwards. + %% When the node is disconnected the leader will return all checked + %% out messages to the main queue to ensure they don't get stuck in + %% case the node never comes back. + case get_missing_deliveries(Leader, Prev+1, FstId-1, Tag) of + {protocol_error, _, _, _} = Err -> + Err; + Missing -> + XDel = {deliver, Tag, Ack, transform_msgs(QName, QRef, + Missing ++ IdMsgs)}, + maybe_auto_ack(Ack, XDel, + State0#state{consumer_deliveries = + update_consumer(Tag, LastId, + length(IdMsgs) + NumMissing, + C, CDels0)}) + end; + #consumer{last_msg_id = Prev} + when FstId =< Prev -> + case lists:dropwhile(fun({Id, _}) -> Id =< Prev end, IdMsgs) of + [] -> + {ok, State0, []}; + IdMsgs2 -> + handle_delivery(Leader, {delivery, Tag, IdMsgs2}, State0) + end; + C when FstId =:= 0 -> + % the very first delivery + maybe_auto_ack(Ack, Del, + State0#state{consumer_deliveries = + update_consumer(Tag, LastId, + length(IdMsgs), + C#consumer{last_msg_id = LastId}, + CDels0)}) + end. + +transform_msgs(QName, QRef, Msgs) -> + lists:map( + fun({MsgId, {MsgHeader, Msg0}}) -> + {Msg, Redelivered} = case MsgHeader of + #{delivery_count := C} -> + {add_delivery_count_header(Msg0, C), true}; + _ -> + {Msg0, false} + end, + {QName, QRef, MsgId, Redelivered, Msg} + end, Msgs). + +update_consumer(Tag, LastId, DelCntIncr, + #consumer{delivery_count = D} = C, Consumers) -> + maps:put(Tag, + C#consumer{last_msg_id = LastId, + delivery_count = D + DelCntIncr}, + Consumers). + + +get_missing_deliveries(Leader, From, To, ConsumerTag) -> + ConsumerId = consumer_id(ConsumerTag), + % ?INFO("get_missing_deliveries for ~w from ~b to ~b", + % [ConsumerId, From, To]), + Query = fun (State) -> + rabbit_fifo:get_checked_out(ConsumerId, From, To, State) + end, + case ra:local_query(Leader, Query) of + {ok, {_, Missing}, _} -> + Missing; + {error, Error} -> + {protocol_error, internal_error, "Cannot query missing deliveries from ~p: ~p", + [Leader, Error]}; + {timeout, _} -> + {protocol_error, internal_error, "Cannot query missing deliveries from ~p: timeout", + [Leader]} + end. + +pick_server(#state{leader = undefined, + cfg = #cfg{servers = [N | _]}}) -> + %% TODO: pick random rather that first? + N; +pick_server(#state{leader = Leader}) -> + Leader. + +% servers sorted by last known leader +sorted_servers(#state{leader = undefined, + cfg = #cfg{servers = Servers}}) -> + Servers; +sorted_servers(#state{leader = Leader, + cfg = #cfg{servers = Servers}}) -> + [Leader | lists:delete(Leader, Servers)]. + +next_seq(#state{next_seq = Seq} = State) -> + {Seq, State#state{next_seq = Seq + 1}}. + +next_enqueue_seq(#state{next_enqueue_seq = Seq} = State) -> + {Seq, State#state{next_enqueue_seq = Seq + 1}}. + +consumer_id(ConsumerTag) -> + {ConsumerTag, self()}. + +send_command(Server, Correlation, Command, Priority, + #state{pending = Pending, + cfg = #cfg{soft_limit = SftLmt}} = State0) -> + {Seq, State} = next_seq(State0), + ok = ra:pipeline_command(Server, Command, Seq, Priority), + Tag = case maps:size(Pending) >= SftLmt of + true -> slow; + false -> ok + end, + {Tag, State#state{pending = Pending#{Seq => {Correlation, Command}}, + slow = Tag == slow}}. + +resend_command(Node, Correlation, Command, + #state{pending = Pending} = State0) -> + {Seq, State} = next_seq(State0), + ok = ra:pipeline_command(Node, Command, Seq), + State#state{pending = Pending#{Seq => {Correlation, Command}}}. + +add_command(_, _, [], Acc) -> + Acc; +add_command(Cid, settle, MsgIds, Acc) -> + [rabbit_fifo:make_settle(Cid, MsgIds) | Acc]; +add_command(Cid, return, MsgIds, Acc) -> + [rabbit_fifo:make_return(Cid, MsgIds) | Acc]; +add_command(Cid, discard, MsgIds, Acc) -> + [rabbit_fifo:make_discard(Cid, MsgIds) | Acc]. + +set_timer(#state{leader = Leader0, + cfg = #cfg{servers = [Server | _], + cluster_name = QName}} = State) -> + Leader = case Leader0 of + undefined -> Server; + _ -> + Leader0 + end, + Ref = erlang:send_after(?TIMER_TIME, self(), + {'$gen_cast', + {queue_event, QName, {Leader, timeout}}}), + State#state{timer_state = Ref}. + +cancel_timer(#state{timer_state = undefined} = State) -> + State; +cancel_timer(#state{timer_state = Ref} = State) -> + erlang:cancel_timer(Ref, [{async, true}, {info, false}]), + State#state{timer_state = undefined}. + +find_leader([]) -> + undefined; +find_leader([Server | Servers]) -> + case ra:members(Server, 500) of + {ok, _, Leader} -> Leader; + _ -> + find_leader(Servers) + end. + +qref({Ref, _}) -> Ref; +qref(Ref) -> Ref. diff --git a/deps/rabbit/src/rabbit_fifo_index.erl b/deps/rabbit/src/rabbit_fifo_index.erl new file mode 100644 index 0000000000..14ac89faff --- /dev/null +++ b/deps/rabbit/src/rabbit_fifo_index.erl @@ -0,0 +1,119 @@ +-module(rabbit_fifo_index). + +-export([ + empty/0, + exists/2, + append/2, + delete/2, + size/1, + smallest/1, + map/2 + ]). + +-compile({no_auto_import, [size/1]}). + +%% the empty atom is a lot smaller (4 bytes) than e.g. `undefined` (13 bytes). +%% This matters as the data map gets persisted as part of the snapshot +-define(NIL, ''). + +-record(?MODULE, {data = #{} :: #{integer() => ?NIL}, + smallest :: undefined | non_neg_integer(), + largest :: undefined | non_neg_integer() + }). + + +-opaque state() :: #?MODULE{}. + +-export_type([state/0]). + +-spec empty() -> state(). +empty() -> + #?MODULE{}. + +-spec exists(integer(), state()) -> boolean(). +exists(Key, #?MODULE{data = Data}) -> + maps:is_key(Key, Data). + +% only integer keys are supported +-spec append(integer(), state()) -> state(). +append(Key, + #?MODULE{data = Data, + smallest = Smallest, + largest = Largest} = State) + when Key > Largest orelse Largest =:= undefined -> + State#?MODULE{data = maps:put(Key, ?NIL, Data), + smallest = ra_lib:default(Smallest, Key), + largest = Key}. + +-spec delete(Index :: integer(), state()) -> state(). +delete(Smallest, #?MODULE{data = Data0, + largest = Largest, + smallest = Smallest} = State) -> + Data = maps:remove(Smallest, Data0), + case find_next(Smallest + 1, Largest, Data) of + undefined -> + State#?MODULE{data = Data, + smallest = undefined, + largest = undefined}; + Next -> + State#?MODULE{data = Data, smallest = Next} + end; +delete(Key, #?MODULE{data = Data} = State) -> + State#?MODULE{data = maps:remove(Key, Data)}. + +-spec size(state()) -> non_neg_integer(). +size(#?MODULE{data = Data}) -> + maps:size(Data). + +-spec smallest(state()) -> undefined | integer(). +smallest(#?MODULE{smallest = Smallest}) -> + Smallest. + + +-spec map(fun(), state()) -> state(). +map(F, #?MODULE{data = Data} = State) -> + State#?MODULE{data = maps:map(F, Data)}. + + +%% internal + +find_next(Next, Last, _Map) when Next > Last -> + undefined; +find_next(Next, Last, Map) -> + case Map of + #{Next := _} -> + Next; + _ -> + % in degenerate cases the range here could be very large + % and hence this could be very slow + % the typical case should ideally be better + % assuming fifo-ish deletion of entries + find_next(Next+1, Last, Map) + end. + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +append_test() -> + S0 = empty(), + false = exists(99, S0), + undefined = smallest(S0), + 0 = size(S0), + S1 = append(1, S0), + false = exists(99, S1), + true = exists(1, S1), + 1 = size(S1), + 1 = smallest(S1), + S2 = append(2, S1), + true = exists(2, S2), + 2 = size(S2), + 1 = smallest(S2), + S3 = delete(1, S2), + 2 = smallest(S3), + 1 = size(S3), + S5 = delete(2, S3), + undefined = smallest(S5), + 0 = size(S0), + ok. + +-endif. diff --git a/deps/rabbit/src/rabbit_fifo_v0.erl b/deps/rabbit/src/rabbit_fifo_v0.erl new file mode 100644 index 0000000000..a61f42616d --- /dev/null +++ b/deps/rabbit/src/rabbit_fifo_v0.erl @@ -0,0 +1,1961 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at https://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_fifo_v0). + +-behaviour(ra_machine). + +-compile(inline_list_funcs). +-compile(inline). +-compile({no_auto_import, [apply/3]}). + +-include("rabbit_fifo_v0.hrl"). +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([ + init/1, + apply/3, + state_enter/2, + tick/2, + overview/1, + get_checked_out/4, + %% aux + init_aux/1, + handle_aux/6, + % queries + query_messages_ready/1, + query_messages_checked_out/1, + query_messages_total/1, + query_processes/1, + query_ra_indexes/1, + query_consumer_count/1, + query_consumers/1, + query_stat/1, + query_single_active_consumer/1, + query_in_memory_usage/1, + usage/1, + + zero/1, + + %% misc + dehydrate_state/1, + normalize/1, + normalize_for_v1/1, + %% getters for coversions + get_field/2, + get_cfg_field/2, + + %% protocol helpers + make_enqueue/3, + make_checkout/3, + make_settle/2, + make_return/2, + make_discard/2, + make_credit/4, + make_purge/0, + make_purge_nodes/1, + make_update_config/1 + ]). + +%% command records representing all the protocol actions that are supported +-record(enqueue, {pid :: option(pid()), + seq :: option(msg_seqno()), + msg :: raw_msg()}). +-record(checkout, {consumer_id :: consumer_id(), + spec :: checkout_spec(), + meta :: consumer_meta()}). +-record(settle, {consumer_id :: consumer_id(), + msg_ids :: [msg_id()]}). +-record(return, {consumer_id :: consumer_id(), + msg_ids :: [msg_id()]}). +-record(discard, {consumer_id :: consumer_id(), + msg_ids :: [msg_id()]}). +-record(credit, {consumer_id :: consumer_id(), + credit :: non_neg_integer(), + delivery_count :: non_neg_integer(), + drain :: boolean()}). +-record(purge, {}). +-record(purge_nodes, {nodes :: [node()]}). +-record(update_config, {config :: config()}). + +-opaque protocol() :: + #enqueue{} | + #checkout{} | + #settle{} | + #return{} | + #discard{} | + #credit{} | + #purge{} | + #purge_nodes{} | + #update_config{}. + +-type command() :: protocol() | ra_machine:builtin_command(). +%% all the command types supported by ra fifo + +-type client_msg() :: delivery(). +%% the messages `rabbit_fifo' can send to consumers. + +-opaque state() :: #?STATE{}. + +-export_type([protocol/0, + delivery/0, + command/0, + credit_mode/0, + consumer_tag/0, + consumer_meta/0, + consumer_id/0, + client_msg/0, + msg/0, + msg_id/0, + msg_seqno/0, + delivery_msg/0, + state/0, + config/0]). + +-spec init(config()) -> state(). +init(#{name := Name, + queue_resource := Resource} = Conf) -> + update_config(Conf, #?STATE{cfg = #cfg{name = Name, + resource = Resource}}). + +update_config(Conf, State) -> + DLH = maps:get(dead_letter_handler, Conf, undefined), + BLH = maps:get(become_leader_handler, Conf, undefined), + SHI = maps:get(release_cursor_interval, Conf, ?RELEASE_CURSOR_EVERY), + MaxLength = maps:get(max_length, Conf, undefined), + MaxBytes = maps:get(max_bytes, Conf, undefined), + MaxMemoryLength = maps:get(max_in_memory_length, Conf, undefined), + MaxMemoryBytes = maps:get(max_in_memory_bytes, Conf, undefined), + DeliveryLimit = maps:get(delivery_limit, Conf, undefined), + ConsumerStrategy = case maps:get(single_active_consumer_on, Conf, false) of + true -> + single_active; + false -> + competing + end, + Cfg = State#?STATE.cfg, + SHICur = case State#?STATE.cfg of + #cfg{release_cursor_interval = {_, C}} -> + C; + #cfg{release_cursor_interval = undefined} -> + SHI; + #cfg{release_cursor_interval = C} -> + C + end, + + State#?STATE{cfg = Cfg#cfg{release_cursor_interval = {SHI, SHICur}, + dead_letter_handler = DLH, + become_leader_handler = BLH, + max_length = MaxLength, + max_bytes = MaxBytes, + max_in_memory_length = MaxMemoryLength, + max_in_memory_bytes = MaxMemoryBytes, + consumer_strategy = ConsumerStrategy, + delivery_limit = DeliveryLimit}}. + +zero(_) -> + 0. + +% msg_ids are scoped per consumer +% ra_indexes holds all raft indexes for enqueues currently on queue +-spec apply(ra_machine:command_meta_data(), command(), state()) -> + {state(), Reply :: term(), ra_machine:effects()} | + {state(), Reply :: term()}. +apply(Metadata, #enqueue{pid = From, seq = Seq, + msg = RawMsg}, State00) -> + apply_enqueue(Metadata, From, Seq, RawMsg, State00); +apply(Meta, + #settle{msg_ids = MsgIds, consumer_id = ConsumerId}, + #?STATE{consumers = Cons0} = State) -> + case Cons0 of + #{ConsumerId := Con0} -> + % need to increment metrics before completing as any snapshot + % states taken need to include them + complete_and_checkout(Meta, MsgIds, ConsumerId, + Con0, [], State); + _ -> + {State, ok} + + end; +apply(Meta, #discard{msg_ids = MsgIds, consumer_id = ConsumerId}, + #?STATE{consumers = Cons0} = State0) -> + case Cons0 of + #{ConsumerId := Con0} -> + Discarded = maps:with(MsgIds, Con0#consumer.checked_out), + Effects = dead_letter_effects(rejected, Discarded, State0, []), + complete_and_checkout(Meta, MsgIds, ConsumerId, Con0, + Effects, State0); + _ -> + {State0, ok} + end; +apply(Meta, #return{msg_ids = MsgIds, consumer_id = ConsumerId}, + #?STATE{consumers = Cons0} = State) -> + case Cons0 of + #{ConsumerId := #consumer{checked_out = Checked0}} -> + Returned = maps:with(MsgIds, Checked0), + return(Meta, ConsumerId, Returned, [], State); + _ -> + {State, ok} + end; +apply(Meta, #credit{credit = NewCredit, delivery_count = RemoteDelCnt, + drain = Drain, consumer_id = ConsumerId}, + #?STATE{consumers = Cons0, + service_queue = ServiceQueue0, + waiting_consumers = Waiting0} = State0) -> + case Cons0 of + #{ConsumerId := #consumer{delivery_count = DelCnt} = Con0} -> + %% this can go below 0 when credit is reduced + C = max(0, RemoteDelCnt + NewCredit - DelCnt), + %% grant the credit + Con1 = Con0#consumer{credit = C}, + ServiceQueue = maybe_queue_consumer(ConsumerId, Con1, + ServiceQueue0), + Cons = maps:put(ConsumerId, Con1, Cons0), + {State1, ok, Effects} = + checkout(Meta, State0#?STATE{service_queue = ServiceQueue, + consumers = Cons}, []), + Response = {send_credit_reply, messages_ready(State1)}, + %% by this point all checkouts for the updated credit value + %% should be processed so we can evaluate the drain + case Drain of + false -> + %% just return the result of the checkout + {State1, Response, Effects}; + true -> + Con = #consumer{credit = PostCred} = + maps:get(ConsumerId, State1#?STATE.consumers), + %% add the outstanding credit to the delivery count + DeliveryCount = Con#consumer.delivery_count + PostCred, + Consumers = maps:put(ConsumerId, + Con#consumer{delivery_count = DeliveryCount, + credit = 0}, + State1#?STATE.consumers), + Drained = Con#consumer.credit, + {CTag, _} = ConsumerId, + {State1#?STATE{consumers = Consumers}, + %% returning a multi response with two client actions + %% for the channel to execute + {multi, [Response, {send_drained, {CTag, Drained}}]}, + Effects} + end; + _ when Waiting0 /= [] -> + %% there are waiting consuemrs + case lists:keytake(ConsumerId, 1, Waiting0) of + {value, {_, Con0 = #consumer{delivery_count = DelCnt}}, Waiting} -> + %% the consumer is a waiting one + %% grant the credit + C = max(0, RemoteDelCnt + NewCredit - DelCnt), + Con = Con0#consumer{credit = C}, + State = State0#?STATE{waiting_consumers = + [{ConsumerId, Con} | Waiting]}, + {State, {send_credit_reply, messages_ready(State)}}; + false -> + {State0, ok} + end; + _ -> + %% credit for unknown consumer - just ignore + {State0, ok} + end; +apply(_, #checkout{spec = {dequeue, _}}, + #?STATE{cfg = #cfg{consumer_strategy = single_active}} = State0) -> + {State0, {error, unsupported}}; +apply(#{from := From} = Meta, #checkout{spec = {dequeue, Settlement}, + meta = ConsumerMeta, + consumer_id = ConsumerId}, + #?STATE{consumers = Consumers} = State0) -> + Exists = maps:is_key(ConsumerId, Consumers), + case messages_ready(State0) of + 0 -> + {State0, {dequeue, empty}}; + _ when Exists -> + %% a dequeue using the same consumer_id isn't possible at this point + {State0, {dequeue, empty}}; + Ready -> + State1 = update_consumer(ConsumerId, ConsumerMeta, + {once, 1, simple_prefetch}, + State0), + {success, _, MsgId, Msg, State2} = checkout_one(State1), + {State, Effects} = case Settlement of + unsettled -> + {_, Pid} = ConsumerId, + {State2, [{monitor, process, Pid}]}; + settled -> + %% immediately settle the checkout + {State3, _, Effects0} = + apply(Meta, make_settle(ConsumerId, [MsgId]), + State2), + {State3, Effects0} + end, + case Msg of + {RaftIdx, {Header, 'empty'}} -> + %% TODO add here new log effect with reply + {State, '$ra_no_reply', + reply_log_effect(RaftIdx, MsgId, Header, Ready - 1, From)}; + _ -> + {State, {dequeue, {MsgId, Msg}, Ready-1}, Effects} + end + end; +apply(Meta, #checkout{spec = cancel, consumer_id = ConsumerId}, State0) -> + {State, Effects} = cancel_consumer(ConsumerId, State0, [], consumer_cancel), + checkout(Meta, State, Effects); +apply(Meta, #checkout{spec = Spec, meta = ConsumerMeta, + consumer_id = {_, Pid} = ConsumerId}, + State0) -> + State1 = update_consumer(ConsumerId, ConsumerMeta, Spec, State0), + checkout(Meta, State1, [{monitor, process, Pid}]); +apply(#{index := RaftIdx}, #purge{}, + #?STATE{ra_indexes = Indexes0, + returns = Returns, + messages = Messages} = State0) -> + Total = messages_ready(State0), + Indexes1 = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0, + [I || {I, _} <- lists:sort(maps:values(Messages))]), + Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes1, + [I || {_, {I, _}} <- lqueue:to_list(Returns)]), + {State, _, Effects} = + update_smallest_raft_index(RaftIdx, + State0#?STATE{ra_indexes = Indexes, + messages = #{}, + returns = lqueue:new(), + msg_bytes_enqueue = 0, + prefix_msgs = {0, [], 0, []}, + low_msg_num = undefined, + msg_bytes_in_memory = 0, + msgs_ready_in_memory = 0}, + []), + %% as we're not checking out after a purge (no point) we have to + %% reverse the effects ourselves + {State, {purge, Total}, + lists:reverse([garbage_collection | Effects])}; +apply(Meta, {down, Pid, noconnection}, + #?STATE{consumers = Cons0, + cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = Waiting0, + enqueuers = Enqs0} = State0) -> + Node = node(Pid), + %% if the pid refers to an active or cancelled consumer, + %% mark it as suspected and return it to the waiting queue + {State1, Effects0} = + maps:fold(fun({_, P} = Cid, C0, {S0, E0}) + when node(P) =:= Node -> + %% the consumer should be returned to waiting + %% and checked out messages should be returned + Effs = consumer_update_active_effects( + S0, Cid, C0, false, suspected_down, E0), + Checked = C0#consumer.checked_out, + Credit = increase_credit(C0, maps:size(Checked)), + {St, Effs1} = return_all(S0, Effs, + Cid, C0#consumer{credit = Credit}), + %% if the consumer was cancelled there is a chance it got + %% removed when returning hence we need to be defensive here + Waiting = case St#?STATE.consumers of + #{Cid := C} -> + Waiting0 ++ [{Cid, C}]; + _ -> + Waiting0 + end, + {St#?STATE{consumers = maps:remove(Cid, St#?STATE.consumers), + waiting_consumers = Waiting}, + Effs1}; + (_, _, S) -> + S + end, {State0, []}, Cons0), + WaitingConsumers = update_waiting_consumer_status(Node, State1, + suspected_down), + + %% select a new consumer from the waiting queue and run a checkout + State2 = State1#?STATE{waiting_consumers = WaitingConsumers}, + {State, Effects1} = activate_next_consumer(State2, Effects0), + + %% mark any enquers as suspected + Enqs = maps:map(fun(P, E) when node(P) =:= Node -> + E#enqueuer{status = suspected_down}; + (_, E) -> E + end, Enqs0), + Effects = [{monitor, node, Node} | Effects1], + checkout(Meta, State#?STATE{enqueuers = Enqs}, Effects); +apply(Meta, {down, Pid, noconnection}, + #?STATE{consumers = Cons0, + enqueuers = Enqs0} = State0) -> + %% A node has been disconnected. This doesn't necessarily mean that + %% any processes on this node are down, they _may_ come back so here + %% we just mark them as suspected (effectively deactivated) + %% and return all checked out messages to the main queue for delivery to any + %% live consumers + %% + %% all pids for the disconnected node will be marked as suspected not just + %% the one we got the `down' command for + Node = node(Pid), + + {State, Effects1} = + maps:fold( + fun({_, P} = Cid, #consumer{checked_out = Checked0, + status = up} = C0, + {St0, Eff}) when node(P) =:= Node -> + Credit = increase_credit(C0, map_size(Checked0)), + C = C0#consumer{status = suspected_down, + credit = Credit}, + {St, Eff0} = return_all(St0, Eff, Cid, C), + Eff1 = consumer_update_active_effects(St, Cid, C, false, + suspected_down, Eff0), + {St, Eff1}; + (_, _, {St, Eff}) -> + {St, Eff} + end, {State0, []}, Cons0), + Enqs = maps:map(fun(P, E) when node(P) =:= Node -> + E#enqueuer{status = suspected_down}; + (_, E) -> E + end, Enqs0), + + % Monitor the node so that we can "unsuspect" these processes when the node + % comes back, then re-issue all monitors and discover the final fate of + % these processes + Effects = case maps:size(State#?STATE.consumers) of + 0 -> + [{aux, inactive}, {monitor, node, Node}]; + _ -> + [{monitor, node, Node}] + end ++ Effects1, + checkout(Meta, State#?STATE{enqueuers = Enqs}, Effects); +apply(Meta, {down, Pid, _Info}, State0) -> + {State, Effects} = handle_down(Pid, State0), + checkout(Meta, State, Effects); +apply(Meta, {nodeup, Node}, #?STATE{consumers = Cons0, + enqueuers = Enqs0, + service_queue = SQ0} = State0) -> + %% A node we are monitoring has come back. + %% If we have suspected any processes of being + %% down we should now re-issue the monitors for them to detect if they're + %% actually down or not + Monitors = [{monitor, process, P} + || P <- suspected_pids_for(Node, State0)], + + Enqs1 = maps:map(fun(P, E) when node(P) =:= Node -> + E#enqueuer{status = up}; + (_, E) -> E + end, Enqs0), + ConsumerUpdateActiveFun = consumer_active_flag_update_function(State0), + %% mark all consumers as up + {Cons1, SQ, Effects1} = + maps:fold(fun({_, P} = ConsumerId, C, {CAcc, SQAcc, EAcc}) + when (node(P) =:= Node) and + (C#consumer.status =/= cancelled) -> + EAcc1 = ConsumerUpdateActiveFun(State0, ConsumerId, + C, true, up, EAcc), + update_or_remove_sub(ConsumerId, + C#consumer{status = up}, CAcc, + SQAcc, EAcc1); + (_, _, Acc) -> + Acc + end, {Cons0, SQ0, Monitors}, Cons0), + Waiting = update_waiting_consumer_status(Node, State0, up), + State1 = State0#?STATE{consumers = Cons1, + enqueuers = Enqs1, + service_queue = SQ, + waiting_consumers = Waiting}, + {State, Effects} = activate_next_consumer(State1, Effects1), + checkout(Meta, State, Effects); +apply(_, {nodedown, _Node}, State) -> + {State, ok}; +apply(_, #purge_nodes{nodes = Nodes}, State0) -> + {State, Effects} = lists:foldl(fun(Node, {S, E}) -> + purge_node(Node, S, E) + end, {State0, []}, Nodes), + {State, ok, Effects}; +apply(Meta, #update_config{config = Conf}, State) -> + checkout(Meta, update_config(Conf, State), []). + +purge_node(Node, State, Effects) -> + lists:foldl(fun(Pid, {S0, E0}) -> + {S, E} = handle_down(Pid, S0), + {S, E0 ++ E} + end, {State, Effects}, all_pids_for(Node, State)). + +%% any downs that re not noconnection +handle_down(Pid, #?STATE{consumers = Cons0, + enqueuers = Enqs0} = State0) -> + % Remove any enqueuer for the same pid and enqueue any pending messages + % This should be ok as we won't see any more enqueues from this pid + State1 = case maps:take(Pid, Enqs0) of + {#enqueuer{pending = Pend}, Enqs} -> + lists:foldl(fun ({_, RIdx, RawMsg}, S) -> + enqueue(RIdx, RawMsg, S) + end, State0#?STATE{enqueuers = Enqs}, Pend); + error -> + State0 + end, + {Effects1, State2} = handle_waiting_consumer_down(Pid, State1), + % return checked out messages to main queue + % Find the consumers for the down pid + DownConsumers = maps:keys( + maps:filter(fun({_, P}, _) -> P =:= Pid end, Cons0)), + lists:foldl(fun(ConsumerId, {S, E}) -> + cancel_consumer(ConsumerId, S, E, down) + end, {State2, Effects1}, DownConsumers). + +consumer_active_flag_update_function(#?STATE{cfg = #cfg{consumer_strategy = competing}}) -> + fun(State, ConsumerId, Consumer, Active, ActivityStatus, Effects) -> + consumer_update_active_effects(State, ConsumerId, Consumer, Active, + ActivityStatus, Effects) + end; +consumer_active_flag_update_function(#?STATE{cfg = #cfg{consumer_strategy = single_active}}) -> + fun(_, _, _, _, _, Effects) -> + Effects + end. + +handle_waiting_consumer_down(_Pid, + #?STATE{cfg = #cfg{consumer_strategy = competing}} = State) -> + {[], State}; +handle_waiting_consumer_down(_Pid, + #?STATE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = []} = State) -> + {[], State}; +handle_waiting_consumer_down(Pid, + #?STATE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = WaitingConsumers0} = State0) -> + % get cancel effects for down waiting consumers + Down = lists:filter(fun({{_, P}, _}) -> P =:= Pid end, + WaitingConsumers0), + Effects = lists:foldl(fun ({ConsumerId, _}, Effects) -> + cancel_consumer_effects(ConsumerId, State0, + Effects) + end, [], Down), + % update state to have only up waiting consumers + StillUp = lists:filter(fun({{_, P}, _}) -> P =/= Pid end, + WaitingConsumers0), + State = State0#?STATE{waiting_consumers = StillUp}, + {Effects, State}. + +update_waiting_consumer_status(Node, + #?STATE{waiting_consumers = WaitingConsumers}, + Status) -> + [begin + case node(Pid) of + Node -> + {ConsumerId, Consumer#consumer{status = Status}}; + _ -> + {ConsumerId, Consumer} + end + end || {{_, Pid} = ConsumerId, Consumer} <- WaitingConsumers, + Consumer#consumer.status =/= cancelled]. + +-spec state_enter(ra_server:ra_state(), state()) -> ra_machine:effects(). +state_enter(leader, #?STATE{consumers = Cons, + enqueuers = Enqs, + waiting_consumers = WaitingConsumers, + cfg = #cfg{name = Name, + resource = Resource, + become_leader_handler = BLH}, + prefix_msgs = {0, [], 0, []} + }) -> + % return effects to monitor all current consumers and enqueuers + Pids = lists:usort(maps:keys(Enqs) + ++ [P || {_, P} <- maps:keys(Cons)] + ++ [P || {{_, P}, _} <- WaitingConsumers]), + Mons = [{monitor, process, P} || P <- Pids], + Nots = [{send_msg, P, leader_change, ra_event} || P <- Pids], + NodeMons = lists:usort([{monitor, node, node(P)} || P <- Pids]), + FHReservation = [{mod_call, rabbit_quorum_queue, file_handle_leader_reservation, [Resource]}], + Effects = Mons ++ Nots ++ NodeMons ++ FHReservation, + case BLH of + undefined -> + Effects; + {Mod, Fun, Args} -> + [{mod_call, Mod, Fun, Args ++ [Name]} | Effects] + end; +state_enter(eol, #?STATE{enqueuers = Enqs, + consumers = Custs0, + waiting_consumers = WaitingConsumers0}) -> + Custs = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Custs0), + WaitingConsumers1 = lists:foldl(fun({{_, P}, V}, Acc) -> Acc#{P => V} end, + #{}, WaitingConsumers0), + AllConsumers = maps:merge(Custs, WaitingConsumers1), + [{send_msg, P, eol, ra_event} + || P <- maps:keys(maps:merge(Enqs, AllConsumers))] ++ + [{mod_call, rabbit_quorum_queue, file_handle_release_reservation, []}]; +state_enter(State, #?STATE{cfg = #cfg{resource = _Resource}}) when State =/= leader -> + FHReservation = {mod_call, rabbit_quorum_queue, file_handle_other_reservation, []}, + [FHReservation]; + state_enter(_, _) -> + %% catch all as not handling all states + []. + + +-spec tick(non_neg_integer(), state()) -> ra_machine:effects(). +tick(_Ts, #?STATE{cfg = #cfg{name = Name, + resource = QName}, + msg_bytes_enqueue = EnqueueBytes, + msg_bytes_checkout = CheckoutBytes} = State) -> + Metrics = {Name, + messages_ready(State), + num_checked_out(State), % checked out + messages_total(State), + query_consumer_count(State), % Consumers + EnqueueBytes, + CheckoutBytes}, + [{mod_call, rabbit_quorum_queue, + handle_tick, [QName, Metrics, all_nodes(State)]}]. + +-spec overview(state()) -> map(). +overview(#?STATE{consumers = Cons, + enqueuers = Enqs, + release_cursors = Cursors, + enqueue_count = EnqCount, + msg_bytes_enqueue = EnqueueBytes, + msg_bytes_checkout = CheckoutBytes, + cfg = Cfg} = State) -> + Conf = #{name => Cfg#cfg.name, + resource => Cfg#cfg.resource, + release_cursor_interval => Cfg#cfg.release_cursor_interval, + dead_lettering_enabled => undefined =/= Cfg#cfg.dead_letter_handler, + max_length => Cfg#cfg.max_length, + max_bytes => Cfg#cfg.max_bytes, + consumer_strategy => Cfg#cfg.consumer_strategy, + max_in_memory_length => Cfg#cfg.max_in_memory_length, + max_in_memory_bytes => Cfg#cfg.max_in_memory_bytes}, + #{type => ?MODULE, + config => Conf, + num_consumers => maps:size(Cons), + num_checked_out => num_checked_out(State), + num_enqueuers => maps:size(Enqs), + num_ready_messages => messages_ready(State), + num_messages => messages_total(State), + num_release_cursors => lqueue:len(Cursors), + release_crusor_enqueue_counter => EnqCount, + enqueue_message_bytes => EnqueueBytes, + checkout_message_bytes => CheckoutBytes}. + +-spec get_checked_out(consumer_id(), msg_id(), msg_id(), state()) -> + [delivery_msg()]. +get_checked_out(Cid, From, To, #?STATE{consumers = Consumers}) -> + case Consumers of + #{Cid := #consumer{checked_out = Checked}} -> + [{K, snd(snd(maps:get(K, Checked)))} + || K <- lists:seq(From, To), + maps:is_key(K, Checked)]; + _ -> + [] + end. + +-record(aux_gc, {last_raft_idx = 0 :: ra:index()}). +-record(aux, {name :: atom(), + utilisation :: term(), + gc = #aux_gc{} :: #aux_gc{}}). + +init_aux(Name) when is_atom(Name) -> + %% TODO: catch specific exception throw if table already exists + ok = ra_machine_ets:create_table(rabbit_fifo_usage, + [named_table, set, public, + {write_concurrency, true}]), + Now = erlang:monotonic_time(micro_seconds), + #aux{name = Name, + utilisation = {inactive, Now, 1, 1.0}}. + +handle_aux(_RaState, cast, Cmd, #aux{name = Name, + utilisation = Use0} = State0, + Log, MacState) -> + State = case Cmd of + _ when Cmd == active orelse Cmd == inactive -> + State0#aux{utilisation = update_use(Use0, Cmd)}; + tick -> + true = ets:insert(rabbit_fifo_usage, + {Name, utilisation(Use0)}), + eval_gc(Log, MacState, State0); + eval -> + State0 + end, + {no_reply, State, Log}. + +eval_gc(Log, #?STATE{cfg = #cfg{resource = QR}} = MacState, + #aux{gc = #aux_gc{last_raft_idx = LastGcIdx} = Gc} = AuxState) -> + {Idx, _} = ra_log:last_index_term(Log), + {memory, Mem} = erlang:process_info(self(), memory), + case messages_total(MacState) of + 0 when Idx > LastGcIdx andalso + Mem > ?GC_MEM_LIMIT_B -> + garbage_collect(), + {memory, MemAfter} = erlang:process_info(self(), memory), + rabbit_log:debug("~s: full GC sweep complete. " + "Process memory changed from ~.2fMB to ~.2fMB.", + [rabbit_misc:rs(QR), Mem/?MB, MemAfter/?MB]), + AuxState#aux{gc = Gc#aux_gc{last_raft_idx = Idx}}; + _ -> + AuxState + end. + +%%% Queries + +query_messages_ready(State) -> + messages_ready(State). + +query_messages_checked_out(#?STATE{consumers = Consumers}) -> + maps:fold(fun (_, #consumer{checked_out = C}, S) -> + maps:size(C) + S + end, 0, Consumers). + +query_messages_total(State) -> + messages_total(State). + +query_processes(#?STATE{enqueuers = Enqs, consumers = Cons0}) -> + Cons = maps:fold(fun({_, P}, V, S) -> S#{P => V} end, #{}, Cons0), + maps:keys(maps:merge(Enqs, Cons)). + + +query_ra_indexes(#?STATE{ra_indexes = RaIndexes}) -> + RaIndexes. + +query_consumer_count(#?STATE{consumers = Consumers, + waiting_consumers = WaitingConsumers}) -> + maps:size(Consumers) + length(WaitingConsumers). + +query_consumers(#?STATE{consumers = Consumers, + waiting_consumers = WaitingConsumers, + cfg = #cfg{consumer_strategy = ConsumerStrategy}} = State) -> + ActiveActivityStatusFun = + case ConsumerStrategy of + competing -> + fun(_ConsumerId, + #consumer{status = Status}) -> + case Status of + suspected_down -> + {false, Status}; + _ -> + {true, Status} + end + end; + single_active -> + SingleActiveConsumer = query_single_active_consumer(State), + fun({Tag, Pid} = _Consumer, _) -> + case SingleActiveConsumer of + {value, {Tag, Pid}} -> + {true, single_active}; + _ -> + {false, waiting} + end + end + end, + FromConsumers = + maps:fold(fun (_, #consumer{status = cancelled}, Acc) -> + Acc; + ({Tag, Pid}, #consumer{meta = Meta} = Consumer, Acc) -> + {Active, ActivityStatus} = + ActiveActivityStatusFun({Tag, Pid}, Consumer), + maps:put({Tag, Pid}, + {Pid, Tag, + maps:get(ack, Meta, undefined), + maps:get(prefetch, Meta, undefined), + Active, + ActivityStatus, + maps:get(args, Meta, []), + maps:get(username, Meta, undefined)}, + Acc) + end, #{}, Consumers), + FromWaitingConsumers = + lists:foldl(fun ({_, #consumer{status = cancelled}}, Acc) -> + Acc; + ({{Tag, Pid}, #consumer{meta = Meta} = Consumer}, Acc) -> + {Active, ActivityStatus} = + ActiveActivityStatusFun({Tag, Pid}, Consumer), + maps:put({Tag, Pid}, + {Pid, Tag, + maps:get(ack, Meta, undefined), + maps:get(prefetch, Meta, undefined), + Active, + ActivityStatus, + maps:get(args, Meta, []), + maps:get(username, Meta, undefined)}, + Acc) + end, #{}, WaitingConsumers), + maps:merge(FromConsumers, FromWaitingConsumers). + +query_single_active_consumer(#?STATE{cfg = #cfg{consumer_strategy = single_active}, + consumers = Consumers}) -> + case maps:size(Consumers) of + 0 -> + {error, no_value}; + 1 -> + {value, lists:nth(1, maps:keys(Consumers))}; + _ + -> + {error, illegal_size} + end ; +query_single_active_consumer(_) -> + disabled. + +query_stat(#?STATE{consumers = Consumers} = State) -> + {messages_ready(State), maps:size(Consumers)}. + +query_in_memory_usage(#?STATE{msg_bytes_in_memory = Bytes, + msgs_ready_in_memory = Length}) -> + {Length, Bytes}. + +-spec usage(atom()) -> float(). +usage(Name) when is_atom(Name) -> + case ets:lookup(rabbit_fifo_usage, Name) of + [] -> 0.0; + [{_, Use}] -> Use + end. + +%%% Internal + +messages_ready(#?STATE{messages = M, + prefix_msgs = {RCnt, _R, PCnt, _P}, + returns = R}) -> + + %% prefix messages will rarely have anything in them during normal + %% operations so length/1 is fine here + maps:size(M) + lqueue:len(R) + RCnt + PCnt. + +messages_total(#?STATE{ra_indexes = I, + prefix_msgs = {RCnt, _R, PCnt, _P}}) -> + rabbit_fifo_index:size(I) + RCnt + PCnt. + +update_use({inactive, _, _, _} = CUInfo, inactive) -> + CUInfo; +update_use({active, _, _} = CUInfo, active) -> + CUInfo; +update_use({active, Since, Avg}, inactive) -> + Now = erlang:monotonic_time(micro_seconds), + {inactive, Now, Now - Since, Avg}; +update_use({inactive, Since, Active, Avg}, active) -> + Now = erlang:monotonic_time(micro_seconds), + {active, Now, use_avg(Active, Now - Since, Avg)}. + +utilisation({active, Since, Avg}) -> + use_avg(erlang:monotonic_time(micro_seconds) - Since, 0, Avg); +utilisation({inactive, Since, Active, Avg}) -> + use_avg(Active, erlang:monotonic_time(micro_seconds) - Since, Avg). + +use_avg(0, 0, Avg) -> + Avg; +use_avg(Active, Inactive, Avg) -> + Time = Inactive + Active, + moving_average(Time, ?USE_AVG_HALF_LIFE, Active / Time, Avg). + +moving_average(_Time, _, Next, undefined) -> + Next; +moving_average(Time, HalfLife, Next, Current) -> + Weight = math:exp(Time * math:log(0.5) / HalfLife), + Next * (1 - Weight) + Current * Weight. + +num_checked_out(#?STATE{consumers = Cons}) -> + maps:fold(fun (_, #consumer{checked_out = C}, Acc) -> + maps:size(C) + Acc + end, 0, Cons). + +cancel_consumer(ConsumerId, + #?STATE{cfg = #cfg{consumer_strategy = competing}} = State, + Effects, Reason) -> + cancel_consumer0(ConsumerId, State, Effects, Reason); +cancel_consumer(ConsumerId, + #?STATE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = []} = State, + Effects, Reason) -> + %% single active consumer on, no consumers are waiting + cancel_consumer0(ConsumerId, State, Effects, Reason); +cancel_consumer(ConsumerId, + #?STATE{consumers = Cons0, + cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = Waiting0} = State0, + Effects0, Reason) -> + %% single active consumer on, consumers are waiting + case maps:is_key(ConsumerId, Cons0) of + true -> + % The active consumer is to be removed + {State1, Effects1} = cancel_consumer0(ConsumerId, State0, + Effects0, Reason), + activate_next_consumer(State1, Effects1); + false -> + % The cancelled consumer is not active or cancelled + % Just remove it from idle_consumers + Waiting = lists:keydelete(ConsumerId, 1, Waiting0), + Effects = cancel_consumer_effects(ConsumerId, State0, Effects0), + % A waiting consumer isn't supposed to have any checked out messages, + % so nothing special to do here + {State0#?STATE{waiting_consumers = Waiting}, Effects} + end. + +consumer_update_active_effects(#?STATE{cfg = #cfg{resource = QName}}, + ConsumerId, #consumer{meta = Meta}, + Active, ActivityStatus, + Effects) -> + Ack = maps:get(ack, Meta, undefined), + Prefetch = maps:get(prefetch, Meta, undefined), + Args = maps:get(args, Meta, []), + [{mod_call, rabbit_quorum_queue, update_consumer_handler, + [QName, ConsumerId, false, Ack, Prefetch, Active, ActivityStatus, Args]} + | Effects]. + +cancel_consumer0(ConsumerId, #?STATE{consumers = C0} = S0, Effects0, Reason) -> + case C0 of + #{ConsumerId := Consumer} -> + {S, Effects2} = maybe_return_all(ConsumerId, Consumer, S0, + Effects0, Reason), + %% The effects are emitted before the consumer is actually removed + %% if the consumer has unacked messages. This is a bit weird but + %% in line with what classic queues do (from an external point of + %% view) + Effects = cancel_consumer_effects(ConsumerId, S, Effects2), + case maps:size(S#?STATE.consumers) of + 0 -> + {S, [{aux, inactive} | Effects]}; + _ -> + {S, Effects} + end; + _ -> + %% already removed: do nothing + {S0, Effects0} + end. + +activate_next_consumer(#?STATE{consumers = Cons, + waiting_consumers = Waiting0} = State0, + Effects0) -> + case maps:filter(fun (_, #consumer{status = S}) -> S == up end, Cons) of + Up when map_size(Up) == 0 -> + %% there are no active consumer in the consumer map + case lists:filter(fun ({_, #consumer{status = Status}}) -> + Status == up + end, Waiting0) of + [{NextConsumerId, NextConsumer} | _] -> + %% there is a potential next active consumer + Remaining = lists:keydelete(NextConsumerId, 1, Waiting0), + #?STATE{service_queue = ServiceQueue} = State0, + ServiceQueue1 = maybe_queue_consumer(NextConsumerId, + NextConsumer, + ServiceQueue), + State = State0#?STATE{consumers = Cons#{NextConsumerId => NextConsumer}, + service_queue = ServiceQueue1, + waiting_consumers = Remaining}, + Effects = consumer_update_active_effects(State, NextConsumerId, + NextConsumer, true, + single_active, Effects0), + {State, Effects}; + [] -> + {State0, [{aux, inactive} | Effects0]} + end; + _ -> + {State0, Effects0} + end. + + + +maybe_return_all(ConsumerId, Consumer, + #?STATE{consumers = C0, + service_queue = SQ0} = S0, + Effects0, Reason) -> + case Reason of + consumer_cancel -> + {Cons, SQ, Effects1} = + update_or_remove_sub(ConsumerId, + Consumer#consumer{lifetime = once, + credit = 0, + status = cancelled}, + C0, SQ0, Effects0), + {S0#?STATE{consumers = Cons, + service_queue = SQ}, Effects1}; + down -> + {S1, Effects1} = return_all(S0, Effects0, ConsumerId, Consumer), + {S1#?STATE{consumers = maps:remove(ConsumerId, S1#?STATE.consumers)}, + Effects1} + end. + +apply_enqueue(#{index := RaftIdx} = Meta, From, Seq, RawMsg, State0) -> + case maybe_enqueue(RaftIdx, From, Seq, RawMsg, [], State0) of + {ok, State1, Effects1} -> + State2 = append_to_master_index(RaftIdx, State1), + {State, ok, Effects} = checkout(Meta, State2, Effects1), + {maybe_store_dehydrated_state(RaftIdx, State), ok, Effects}; + {duplicate, State, Effects} -> + {State, ok, Effects} + end. + +drop_head(#?STATE{ra_indexes = Indexes0} = State0, Effects0) -> + case take_next_msg(State0) of + {FullMsg = {_MsgId, {RaftIdxToDrop, {Header, Msg}}}, + State1} -> + Indexes = rabbit_fifo_index:delete(RaftIdxToDrop, Indexes0), + State2 = add_bytes_drop(Header, State1#?STATE{ra_indexes = Indexes}), + State = case Msg of + 'empty' -> State2; + _ -> subtract_in_memory_counts(Header, State2) + end, + Effects = dead_letter_effects(maxlen, #{none => FullMsg}, + State, Effects0), + {State, Effects}; + {{'$prefix_msg', Header}, State1} -> + State2 = subtract_in_memory_counts(Header, add_bytes_drop(Header, State1)), + {State2, Effects0}; + {{'$empty_msg', Header}, State1} -> + State2 = add_bytes_drop(Header, State1), + {State2, Effects0}; + empty -> + {State0, Effects0} + end. + +enqueue(RaftIdx, RawMsg, #?STATE{messages = Messages, + low_msg_num = LowMsgNum, + next_msg_num = NextMsgNum} = State0) -> + %% the initial header is an integer only - it will get expanded to a map + %% when the next required key is added + Header = message_size(RawMsg), + {State1, Msg} = + case evaluate_memory_limit(Header, State0) of + true -> + % indexed message with header map + {State0, {RaftIdx, {Header, 'empty'}}}; + false -> + {add_in_memory_counts(Header, State0), + {RaftIdx, {Header, RawMsg}}} % indexed message with header map + end, + State = add_bytes_enqueue(Header, State1), + State#?STATE{messages = Messages#{NextMsgNum => Msg}, + %% this is probably only done to record it when low_msg_num + %% is undefined + low_msg_num = min(LowMsgNum, NextMsgNum), + next_msg_num = NextMsgNum + 1}. + +append_to_master_index(RaftIdx, + #?STATE{ra_indexes = Indexes0} = State0) -> + State = incr_enqueue_count(State0), + Indexes = rabbit_fifo_index:append(RaftIdx, Indexes0), + State#?STATE{ra_indexes = Indexes}. + + +incr_enqueue_count(#?STATE{enqueue_count = C, + cfg = #cfg{release_cursor_interval = {_Base, C}} + } = State0) -> + %% this will trigger a dehydrated version of the state to be stored + %% at this raft index for potential future snapshot generation + %% Q: Why don't we just stash the release cursor here? + %% A: Because it needs to be the very last thing we do and we + %% first needs to run the checkout logic. + State0#?STATE{enqueue_count = 0}; +incr_enqueue_count(#?STATE{cfg = #cfg{release_cursor_interval = C} = Cfg} + = State0) + when is_integer(C) -> + %% conversion to new release cursor interval format + State = State0#?STATE{cfg = Cfg#cfg{release_cursor_interval = {C, C}}}, + incr_enqueue_count(State); +incr_enqueue_count(#?STATE{enqueue_count = C} = State) -> + State#?STATE{enqueue_count = C + 1}. + +maybe_store_dehydrated_state(RaftIdx, + #?STATE{cfg = + #cfg{release_cursor_interval = {Base, _}} + = Cfg, + ra_indexes = Indexes, + enqueue_count = 0, + release_cursors = Cursors0} = State0) -> + case rabbit_fifo_index:exists(RaftIdx, Indexes) of + false -> + %% the incoming enqueue must already have been dropped + State0; + true -> + Interval = case Base of + 0 -> 0; + _ -> + Total = messages_total(State0), + min(max(Total, Base), + ?RELEASE_CURSOR_EVERY_MAX) + end, + State = convert_prefix_msgs( + State0#?STATE{cfg = Cfg#cfg{release_cursor_interval = + {Base, Interval}}}), + Dehydrated = dehydrate_state(State), + Cursor = {release_cursor, RaftIdx, Dehydrated}, + Cursors = lqueue:in(Cursor, Cursors0), + State#?STATE{release_cursors = Cursors} + end; +maybe_store_dehydrated_state(RaftIdx, + #?STATE{cfg = + #cfg{release_cursor_interval = C} = Cfg} + = State0) + when is_integer(C) -> + %% convert to new format + State = State0#?STATE{cfg = Cfg#cfg{release_cursor_interval = {C, C}}}, + maybe_store_dehydrated_state(RaftIdx, State); +maybe_store_dehydrated_state(_RaftIdx, State) -> + State. + +enqueue_pending(From, + #enqueuer{next_seqno = Next, + pending = [{Next, RaftIdx, RawMsg} | Pending]} = Enq0, + State0) -> + State = enqueue(RaftIdx, RawMsg, State0), + Enq = Enq0#enqueuer{next_seqno = Next + 1, pending = Pending}, + enqueue_pending(From, Enq, State); +enqueue_pending(From, Enq, #?STATE{enqueuers = Enqueuers0} = State) -> + State#?STATE{enqueuers = Enqueuers0#{From => Enq}}. + +maybe_enqueue(RaftIdx, undefined, undefined, RawMsg, Effects, State0) -> + % direct enqueue without tracking + State = enqueue(RaftIdx, RawMsg, State0), + {ok, State, Effects}; +maybe_enqueue(RaftIdx, From, MsgSeqNo, RawMsg, Effects0, + #?STATE{enqueuers = Enqueuers0} = State0) -> + case maps:get(From, Enqueuers0, undefined) of + undefined -> + State1 = State0#?STATE{enqueuers = Enqueuers0#{From => #enqueuer{}}}, + {ok, State, Effects} = maybe_enqueue(RaftIdx, From, MsgSeqNo, + RawMsg, Effects0, State1), + {ok, State, [{monitor, process, From} | Effects]}; + #enqueuer{next_seqno = MsgSeqNo} = Enq0 -> + % it is the next expected seqno + State1 = enqueue(RaftIdx, RawMsg, State0), + Enq = Enq0#enqueuer{next_seqno = MsgSeqNo + 1}, + State = enqueue_pending(From, Enq, State1), + {ok, State, Effects0}; + #enqueuer{next_seqno = Next, + pending = Pending0} = Enq0 + when MsgSeqNo > Next -> + % out of order delivery + Pending = [{MsgSeqNo, RaftIdx, RawMsg} | Pending0], + Enq = Enq0#enqueuer{pending = lists:sort(Pending)}, + {ok, State0#?STATE{enqueuers = Enqueuers0#{From => Enq}}, Effects0}; + #enqueuer{next_seqno = Next} when MsgSeqNo =< Next -> + % duplicate delivery - remove the raft index from the ra_indexes + % map as it was added earlier + {duplicate, State0, Effects0} + end. + +snd(T) -> + element(2, T). + +return(#{index := IncomingRaftIdx} = Meta, ConsumerId, Returned, + Effects0, #?STATE{service_queue = SQ0} = State0) -> + {State1, Effects1} = maps:fold( + fun(MsgId, {Tag, _} = Msg, {S0, E0}) + when Tag == '$prefix_msg'; + Tag == '$empty_msg'-> + return_one(MsgId, 0, Msg, S0, E0, ConsumerId); + (MsgId, {MsgNum, Msg}, {S0, E0}) -> + return_one(MsgId, MsgNum, Msg, S0, E0, + ConsumerId) + end, {State0, Effects0}, Returned), + {State2, Effects3} = + case State1#?STATE.consumers of + #{ConsumerId := Con0} = Cons0 -> + Con = Con0#consumer{credit = increase_credit(Con0, + map_size(Returned))}, + {Cons, SQ, Effects2} = update_or_remove_sub(ConsumerId, Con, + Cons0, SQ0, Effects1), + {State1#?STATE{consumers = Cons, + service_queue = SQ}, Effects2}; + _ -> + {State1, Effects1} + end, + {State, ok, Effects} = checkout(Meta, State2, Effects3), + update_smallest_raft_index(IncomingRaftIdx, State, Effects). + +% used to processes messages that are finished +complete(ConsumerId, Discarded, + #consumer{checked_out = Checked} = Con0, Effects0, + #?STATE{consumers = Cons0, service_queue = SQ0, + ra_indexes = Indexes0} = State0) -> + %% TODO optimise use of Discarded map here + MsgRaftIdxs = [RIdx || {_, {RIdx, _}} <- maps:values(Discarded)], + %% credit_mode = simple_prefetch should automatically top-up credit + %% as messages are simple_prefetch or otherwise returned + Con = Con0#consumer{checked_out = maps:without(maps:keys(Discarded), Checked), + credit = increase_credit(Con0, map_size(Discarded))}, + {Cons, SQ, Effects} = update_or_remove_sub(ConsumerId, Con, Cons0, + SQ0, Effects0), + Indexes = lists:foldl(fun rabbit_fifo_index:delete/2, Indexes0, + MsgRaftIdxs), + %% TODO: use maps:fold instead + State1 = lists:foldl(fun({_, {_, {Header, _}}}, Acc) -> + add_bytes_settle(Header, Acc); + ({'$prefix_msg', Header}, Acc) -> + add_bytes_settle(Header, Acc); + ({'$empty_msg', Header}, Acc) -> + add_bytes_settle(Header, Acc) + end, State0, maps:values(Discarded)), + {State1#?STATE{consumers = Cons, + ra_indexes = Indexes, + service_queue = SQ}, Effects}. + +increase_credit(#consumer{lifetime = once, + credit = Credit}, _) -> + %% once consumers cannot increment credit + Credit; +increase_credit(#consumer{lifetime = auto, + credit_mode = credited, + credit = Credit}, _) -> + %% credit_mode: credit also doesn't automatically increment credit + Credit; +increase_credit(#consumer{credit = Current}, Credit) -> + Current + Credit. + +complete_and_checkout(#{index := IncomingRaftIdx} = Meta, MsgIds, ConsumerId, + #consumer{checked_out = Checked0} = Con0, + Effects0, State0) -> + Discarded = maps:with(MsgIds, Checked0), + {State2, Effects1} = complete(ConsumerId, Discarded, Con0, + Effects0, State0), + {State, ok, Effects} = checkout(Meta, State2, Effects1), + update_smallest_raft_index(IncomingRaftIdx, State, Effects). + +dead_letter_effects(_Reason, _Discarded, + #?STATE{cfg = #cfg{dead_letter_handler = undefined}}, + Effects) -> + Effects; +dead_letter_effects(Reason, Discarded, + #?STATE{cfg = #cfg{dead_letter_handler = {Mod, Fun, Args}}}, + Effects) -> + RaftIdxs = maps:fold( + fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) -> + [RaftIdx | Acc]; + (_, _, Acc) -> + Acc + end, [], Discarded), + [{log, RaftIdxs, + fun (Log) -> + Lookup = maps:from_list(lists:zip(RaftIdxs, Log)), + DeadLetters = maps:fold( + fun (_, {_, {RaftIdx, {_Header, 'empty'}}}, Acc) -> + {enqueue, _, _, Msg} = maps:get(RaftIdx, Lookup), + [{Reason, Msg} | Acc]; + (_, {_, {_, {_Header, Msg}}}, Acc) -> + [{Reason, Msg} | Acc]; + (_, _, Acc) -> + Acc + end, [], Discarded), + [{mod_call, Mod, Fun, Args ++ [DeadLetters]}] + end} | Effects]. + +cancel_consumer_effects(ConsumerId, + #?STATE{cfg = #cfg{resource = QName}}, Effects) -> + [{mod_call, rabbit_quorum_queue, + cancel_consumer_handler, [QName, ConsumerId]} | Effects]. + +update_smallest_raft_index(IncomingRaftIdx, + #?STATE{ra_indexes = Indexes, + release_cursors = Cursors0} = State0, + Effects) -> + case rabbit_fifo_index:size(Indexes) of + 0 -> + % there are no messages on queue anymore and no pending enqueues + % we can forward release_cursor all the way until + % the last received command, hooray + State = State0#?STATE{release_cursors = lqueue:new()}, + {State, ok, Effects ++ [{release_cursor, IncomingRaftIdx, State}]}; + _ -> + Smallest = rabbit_fifo_index:smallest(Indexes), + case find_next_cursor(Smallest, Cursors0) of + {empty, Cursors} -> + {State0#?STATE{release_cursors = Cursors}, + ok, Effects}; + {Cursor, Cursors} -> + %% we can emit a release cursor we've passed the smallest + %% release cursor available. + {State0#?STATE{release_cursors = Cursors}, ok, + Effects ++ [Cursor]} + end + end. + +find_next_cursor(Idx, Cursors) -> + find_next_cursor(Idx, Cursors, empty). + +find_next_cursor(Smallest, Cursors0, Potential) -> + case lqueue:out(Cursors0) of + {{value, {_, Idx, _} = Cursor}, Cursors} when Idx < Smallest -> + %% we found one but it may not be the largest one + find_next_cursor(Smallest, Cursors, Cursor); + _ -> + {Potential, Cursors0} + end. + +update_header(Key, UpdateFun, Default, Header) + when is_integer(Header) -> + update_header(Key, UpdateFun, Default, #{size => Header}); +update_header(Key, UpdateFun, Default, Header) -> + maps:update_with(Key, UpdateFun, Default, Header). + + +return_one(MsgId, 0, {Tag, Header0}, + #?STATE{returns = Returns, + consumers = Consumers, + cfg = #cfg{delivery_limit = DeliveryLimit}} = State0, + Effects0, ConsumerId) + when Tag == '$prefix_msg'; Tag == '$empty_msg' -> + #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers), + Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0), + Msg0 = {Tag, Header}, + case maps:get(delivery_count, Header) of + DeliveryCount when DeliveryCount > DeliveryLimit -> + complete(ConsumerId, #{MsgId => Msg0}, Con0, Effects0, State0); + _ -> + %% this should not affect the release cursor in any way + Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)}, + {Msg, State1} = case Tag of + '$empty_msg' -> + {Msg0, State0}; + _ -> case evaluate_memory_limit(Header, State0) of + true -> + {{'$empty_msg', Header}, State0}; + false -> + {Msg0, add_in_memory_counts(Header, State0)} + end + end, + {add_bytes_return( + Header, + State1#?STATE{consumers = Consumers#{ConsumerId => Con}, + returns = lqueue:in(Msg, Returns)}), + Effects0} + end; +return_one(MsgId, MsgNum, {RaftId, {Header0, RawMsg}}, + #?STATE{returns = Returns, + consumers = Consumers, + cfg = #cfg{delivery_limit = DeliveryLimit}} = State0, + Effects0, ConsumerId) -> + #consumer{checked_out = Checked} = Con0 = maps:get(ConsumerId, Consumers), + Header = update_header(delivery_count, fun (C) -> C+1 end, 1, Header0), + Msg0 = {RaftId, {Header, RawMsg}}, + case maps:get(delivery_count, Header) of + DeliveryCount when DeliveryCount > DeliveryLimit -> + DlMsg = {MsgNum, Msg0}, + Effects = dead_letter_effects(delivery_limit, #{none => DlMsg}, + State0, Effects0), + complete(ConsumerId, #{MsgId => DlMsg}, Con0, Effects, State0); + _ -> + Con = Con0#consumer{checked_out = maps:remove(MsgId, Checked)}, + %% this should not affect the release cursor in any way + {Msg, State1} = case RawMsg of + 'empty' -> + {Msg0, State0}; + _ -> + case evaluate_memory_limit(Header, State0) of + true -> + {{RaftId, {Header, 'empty'}}, State0}; + false -> + {Msg0, add_in_memory_counts(Header, State0)} + end + end, + {add_bytes_return( + Header, + State1#?STATE{consumers = Consumers#{ConsumerId => Con}, + returns = lqueue:in({MsgNum, Msg}, Returns)}), + Effects0} + end. + +return_all(#?STATE{consumers = Cons} = State0, Effects0, ConsumerId, + #consumer{checked_out = Checked0} = Con) -> + %% need to sort the list so that we return messages in the order + %% they were checked out + Checked = lists:sort(maps:to_list(Checked0)), + State = State0#?STATE{consumers = Cons#{ConsumerId => Con}}, + lists:foldl(fun ({MsgId, {'$prefix_msg', _} = Msg}, {S, E}) -> + return_one(MsgId, 0, Msg, S, E, ConsumerId); + ({MsgId, {'$empty_msg', _} = Msg}, {S, E}) -> + return_one(MsgId, 0, Msg, S, E, ConsumerId); + ({MsgId, {MsgNum, Msg}}, {S, E}) -> + return_one(MsgId, MsgNum, Msg, S, E, ConsumerId) + end, {State, Effects0}, Checked). + +%% checkout new messages to consumers +checkout(#{index := Index}, State0, Effects0) -> + {State1, _Result, Effects1} = checkout0(checkout_one(State0), + Effects0, {#{}, #{}}), + case evaluate_limit(false, State1, Effects1) of + {State, true, Effects} -> + update_smallest_raft_index(Index, State, Effects); + {State, false, Effects} -> + {State, ok, Effects} + end. + +checkout0({success, ConsumerId, MsgId, {RaftIdx, {Header, 'empty'}}, State}, + Effects, {SendAcc, LogAcc0}) -> + DelMsg = {RaftIdx, {MsgId, Header}}, + LogAcc = maps:update_with(ConsumerId, + fun (M) -> [DelMsg | M] end, + [DelMsg], LogAcc0), + checkout0(checkout_one(State), Effects, {SendAcc, LogAcc}); +checkout0({success, ConsumerId, MsgId, Msg, State}, Effects, + {SendAcc0, LogAcc}) -> + DelMsg = {MsgId, Msg}, + SendAcc = maps:update_with(ConsumerId, + fun (M) -> [DelMsg | M] end, + [DelMsg], SendAcc0), + checkout0(checkout_one(State), Effects, {SendAcc, LogAcc}); +checkout0({Activity, State0}, Effects0, {SendAcc, LogAcc}) -> + Effects1 = case Activity of + nochange -> + append_send_msg_effects( + append_log_effects(Effects0, LogAcc), SendAcc); + inactive -> + [{aux, inactive} + | append_send_msg_effects( + append_log_effects(Effects0, LogAcc), SendAcc)] + end, + {State0, ok, lists:reverse(Effects1)}. + +evaluate_limit(Result, + #?STATE{cfg = #cfg{max_length = undefined, + max_bytes = undefined}} = State, + Effects) -> + {State, Result, Effects}; +evaluate_limit(Result, State00, Effects0) -> + State0 = convert_prefix_msgs(State00), + case is_over_limit(State0) of + true -> + {State, Effects} = drop_head(State0, Effects0), + evaluate_limit(true, State, Effects); + false -> + {State0, Result, Effects0} + end. + +evaluate_memory_limit(_Header, + #?STATE{cfg = #cfg{max_in_memory_length = undefined, + max_in_memory_bytes = undefined}}) -> + false; +evaluate_memory_limit(#{size := Size}, State) -> + evaluate_memory_limit(Size, State); +evaluate_memory_limit(Size, + #?STATE{cfg = #cfg{max_in_memory_length = MaxLength, + max_in_memory_bytes = MaxBytes}, + msg_bytes_in_memory = Bytes, + msgs_ready_in_memory = Length}) + when is_integer(Size) -> + (Length >= MaxLength) orelse ((Bytes + Size) > MaxBytes). + +append_send_msg_effects(Effects, AccMap) when map_size(AccMap) == 0 -> + Effects; +append_send_msg_effects(Effects0, AccMap) -> + Effects = maps:fold(fun (C, Msgs, Ef) -> + [send_msg_effect(C, lists:reverse(Msgs)) | Ef] + end, Effects0, AccMap), + [{aux, active} | Effects]. + +append_log_effects(Effects0, AccMap) -> + maps:fold(fun (C, Msgs, Ef) -> + [send_log_effect(C, lists:reverse(Msgs)) | Ef] + end, Effects0, AccMap). + +%% next message is determined as follows: +%% First we check if there are are prefex returns +%% Then we check if there are current returns +%% then we check prefix msgs +%% then we check current messages +%% +%% When we return it is always done to the current return queue +%% for both prefix messages and current messages +take_next_msg(#?STATE{prefix_msgs = {R, P}} = State) -> + %% conversion + take_next_msg(State#?STATE{prefix_msgs = {length(R), R, length(P), P}}); +take_next_msg(#?STATE{prefix_msgs = {NumR, [{'$empty_msg', _} = Msg | Rem], + NumP, P}} = State) -> + %% there are prefix returns, these should be served first + {Msg, State#?STATE{prefix_msgs = {NumR-1, Rem, NumP, P}}}; +take_next_msg(#?STATE{prefix_msgs = {NumR, [Header | Rem], NumP, P}} = State) -> + %% there are prefix returns, these should be served first + {{'$prefix_msg', Header}, + State#?STATE{prefix_msgs = {NumR-1, Rem, NumP, P}}}; +take_next_msg(#?STATE{returns = Returns, + low_msg_num = Low0, + messages = Messages0, + prefix_msgs = {NumR, R, NumP, P}} = State) -> + %% use peek rather than out there as the most likely case is an empty + %% queue + case lqueue:peek(Returns) of + {value, NextMsg} -> + {NextMsg, + State#?STATE{returns = lqueue:drop(Returns)}}; + empty when P == [] -> + case Low0 of + undefined -> + empty; + _ -> + {Msg, Messages} = maps:take(Low0, Messages0), + case maps:size(Messages) of + 0 -> + {{Low0, Msg}, + State#?STATE{messages = Messages, + low_msg_num = undefined}}; + _ -> + {{Low0, Msg}, + State#?STATE{messages = Messages, + low_msg_num = Low0 + 1}} + end + end; + empty -> + [Msg | Rem] = P, + case Msg of + {Header, 'empty'} -> + %% There are prefix msgs + {{'$empty_msg', Header}, + State#?STATE{prefix_msgs = {NumR, R, NumP-1, Rem}}}; + Header -> + {{'$prefix_msg', Header}, + State#?STATE{prefix_msgs = {NumR, R, NumP-1, Rem}}} + end + end. + +send_msg_effect({CTag, CPid}, Msgs) -> + {send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}. + +send_log_effect({CTag, CPid}, IdxMsgs) -> + {RaftIdxs, Data} = lists:unzip(IdxMsgs), + {log, RaftIdxs, + fun(Log) -> + Msgs = lists:zipwith(fun ({enqueue, _, _, Msg}, {MsgId, Header}) -> + {MsgId, {Header, Msg}} + end, Log, Data), + [{send_msg, CPid, {delivery, CTag, Msgs}, [local, ra_event]}] + end, + {local, node(CPid)}}. + +reply_log_effect(RaftIdx, MsgId, Header, Ready, From) -> + {log, [RaftIdx], + fun([{enqueue, _, _, Msg}]) -> + [{reply, From, {wrap_reply, + {dequeue, {MsgId, {Header, Msg}}, Ready}}}] + end}. + +checkout_one(#?STATE{service_queue = SQ0, + messages = Messages0, + consumers = Cons0} = InitState) -> + case queue:peek(SQ0) of + {value, ConsumerId} -> + case take_next_msg(InitState) of + {ConsumerMsg, State0} -> + SQ1 = queue:drop(SQ0), + %% there are consumers waiting to be serviced + %% process consumer checkout + case maps:find(ConsumerId, Cons0) of + {ok, #consumer{credit = 0}} -> + %% no credit but was still on queue + %% can happen when draining + %% recurse without consumer on queue + checkout_one(InitState#?STATE{service_queue = SQ1}); + {ok, #consumer{status = cancelled}} -> + checkout_one(InitState#?STATE{service_queue = SQ1}); + {ok, #consumer{status = suspected_down}} -> + checkout_one(InitState#?STATE{service_queue = SQ1}); + {ok, #consumer{checked_out = Checked0, + next_msg_id = Next, + credit = Credit, + delivery_count = DelCnt} = Con0} -> + Checked = maps:put(Next, ConsumerMsg, Checked0), + Con = Con0#consumer{checked_out = Checked, + next_msg_id = Next + 1, + credit = Credit - 1, + delivery_count = DelCnt + 1}, + {Cons, SQ, []} = % we expect no effects + update_or_remove_sub(ConsumerId, Con, + Cons0, SQ1, []), + State1 = State0#?STATE{service_queue = SQ, + consumers = Cons}, + {State, Msg} = + case ConsumerMsg of + {'$prefix_msg', Header} -> + {subtract_in_memory_counts( + Header, add_bytes_checkout(Header, State1)), + ConsumerMsg}; + {'$empty_msg', Header} -> + {add_bytes_checkout(Header, State1), + ConsumerMsg}; + {_, {_, {Header, 'empty'}} = M} -> + {add_bytes_checkout(Header, State1), + M}; + {_, {_, {Header, _} = M}} -> + {subtract_in_memory_counts( + Header, + add_bytes_checkout(Header, State1)), + M} + end, + {success, ConsumerId, Next, Msg, State}; + error -> + %% consumer did not exist but was queued, recurse + checkout_one(InitState#?STATE{service_queue = SQ1}) + end; + empty -> + {nochange, InitState} + end; + empty -> + case maps:size(Messages0) of + 0 -> {nochange, InitState}; + _ -> {inactive, InitState} + end + end. + +update_or_remove_sub(ConsumerId, #consumer{lifetime = auto, + credit = 0} = Con, + Cons, ServiceQueue, Effects) -> + {maps:put(ConsumerId, Con, Cons), ServiceQueue, Effects}; +update_or_remove_sub(ConsumerId, #consumer{lifetime = auto} = Con, + Cons, ServiceQueue, Effects) -> + {maps:put(ConsumerId, Con, Cons), + uniq_queue_in(ConsumerId, ServiceQueue), Effects}; +update_or_remove_sub(ConsumerId, #consumer{lifetime = once, + checked_out = Checked, + credit = 0} = Con, + Cons, ServiceQueue, Effects) -> + case maps:size(Checked) of + 0 -> + % we're done with this consumer + % TODO: demonitor consumer pid but _only_ if there are no other + % monitors for this pid + {maps:remove(ConsumerId, Cons), ServiceQueue, Effects}; + _ -> + % there are unsettled items so need to keep around + {maps:put(ConsumerId, Con, Cons), ServiceQueue, Effects} + end; +update_or_remove_sub(ConsumerId, #consumer{lifetime = once} = Con, + Cons, ServiceQueue, Effects) -> + {maps:put(ConsumerId, Con, Cons), + uniq_queue_in(ConsumerId, ServiceQueue), Effects}. + +uniq_queue_in(Key, Queue) -> + % TODO: queue:member could surely be quite expensive, however the practical + % number of unique consumers may not be large enough for it to matter + case queue:member(Key, Queue) of + true -> + Queue; + false -> + queue:in(Key, Queue) + end. + +update_consumer(ConsumerId, Meta, Spec, + #?STATE{cfg = #cfg{consumer_strategy = competing}} = State0) -> + %% general case, single active consumer off + update_consumer0(ConsumerId, Meta, Spec, State0); +update_consumer(ConsumerId, Meta, Spec, + #?STATE{consumers = Cons0, + cfg = #cfg{consumer_strategy = single_active}} = State0) + when map_size(Cons0) == 0 -> + %% single active consumer on, no one is consuming yet + update_consumer0(ConsumerId, Meta, Spec, State0); +update_consumer(ConsumerId, Meta, {Life, Credit, Mode}, + #?STATE{cfg = #cfg{consumer_strategy = single_active}, + waiting_consumers = WaitingConsumers0} = State0) -> + %% single active consumer on and one active consumer already + %% adding the new consumer to the waiting list + Consumer = #consumer{lifetime = Life, meta = Meta, + credit = Credit, credit_mode = Mode}, + WaitingConsumers1 = WaitingConsumers0 ++ [{ConsumerId, Consumer}], + State0#?STATE{waiting_consumers = WaitingConsumers1}. + +update_consumer0(ConsumerId, Meta, {Life, Credit, Mode}, + #?STATE{consumers = Cons0, + service_queue = ServiceQueue0} = State0) -> + %% TODO: this logic may not be correct for updating a pre-existing consumer + Init = #consumer{lifetime = Life, meta = Meta, + credit = Credit, credit_mode = Mode}, + Cons = maps:update_with(ConsumerId, + fun(S) -> + %% remove any in-flight messages from + %% the credit update + N = maps:size(S#consumer.checked_out), + C = max(0, Credit - N), + S#consumer{lifetime = Life, credit = C} + end, Init, Cons0), + ServiceQueue = maybe_queue_consumer(ConsumerId, maps:get(ConsumerId, Cons), + ServiceQueue0), + State0#?STATE{consumers = Cons, service_queue = ServiceQueue}. + +maybe_queue_consumer(ConsumerId, #consumer{credit = Credit}, + ServiceQueue0) -> + case Credit > 0 of + true -> + % consumerect needs service - check if already on service queue + uniq_queue_in(ConsumerId, ServiceQueue0); + false -> + ServiceQueue0 + end. + +convert_prefix_msgs(#?STATE{prefix_msgs = {R, P}} = State) -> + State#?STATE{prefix_msgs = {length(R), R, length(P), P}}; +convert_prefix_msgs(State) -> + State. + +%% creates a dehydrated version of the current state to be cached and +%% potentially used to for a snaphot at a later point +dehydrate_state(#?STATE{messages = Messages, + consumers = Consumers, + returns = Returns, + low_msg_num = Low, + next_msg_num = Next, + prefix_msgs = {PRCnt, PrefRet0, PPCnt, PrefMsg0}, + waiting_consumers = Waiting0} = State) -> + RCnt = lqueue:len(Returns), + %% TODO: optimise this function as far as possible + PrefRet1 = lists:foldr(fun ({'$prefix_msg', Header}, Acc) -> + [Header | Acc]; + ({'$empty_msg', _} = Msg, Acc) -> + [Msg | Acc]; + ({_, {_, {Header, 'empty'}}}, Acc) -> + [{'$empty_msg', Header} | Acc]; + ({_, {_, {Header, _}}}, Acc) -> + [Header | Acc] + end, + [], + lqueue:to_list(Returns)), + PrefRet = PrefRet0 ++ PrefRet1, + PrefMsgsSuff = dehydrate_messages(Low, Next - 1, Messages, []), + %% prefix messages are not populated in normal operation only after + %% recovering from a snapshot + PrefMsgs = PrefMsg0 ++ PrefMsgsSuff, + Waiting = [{Cid, dehydrate_consumer(C)} || {Cid, C} <- Waiting0], + State#?STATE{messages = #{}, + ra_indexes = rabbit_fifo_index:empty(), + release_cursors = lqueue:new(), + low_msg_num = undefined, + consumers = maps:map(fun (_, C) -> + dehydrate_consumer(C) + end, Consumers), + returns = lqueue:new(), + prefix_msgs = {PRCnt + RCnt, PrefRet, + PPCnt + maps:size(Messages), PrefMsgs}, + waiting_consumers = Waiting}. + +dehydrate_messages(Low, Next, _Msgs, Acc) + when Next < Low -> + Acc; +dehydrate_messages(Low, Next, Msgs, Acc0) -> + Acc = case maps:get(Next, Msgs) of + {_RaftIdx, {_, 'empty'} = Msg} -> + [Msg | Acc0]; + {_RaftIdx, {Header, _}} -> + [Header | Acc0] + end, + dehydrate_messages(Low, Next - 1, Msgs, Acc). + +dehydrate_consumer(#consumer{checked_out = Checked0} = Con) -> + Checked = maps:map(fun (_, {'$prefix_msg', _} = M) -> + M; + (_, {'$empty_msg', _} = M) -> + M; + (_, {_, {_, {Header, 'empty'}}}) -> + {'$empty_msg', Header}; + (_, {_, {_, {Header, _}}}) -> + {'$prefix_msg', Header} + end, Checked0), + Con#consumer{checked_out = Checked}. + +%% make the state suitable for equality comparison +normalize(#?STATE{release_cursors = Cursors} = State) -> + State#?STATE{release_cursors = lqueue:from_list(lqueue:to_list(Cursors))}. + +is_over_limit(#?STATE{cfg = #cfg{max_length = undefined, + max_bytes = undefined}}) -> + false; +is_over_limit(#?STATE{cfg = #cfg{max_length = MaxLength, + max_bytes = MaxBytes}, + msg_bytes_enqueue = BytesEnq} = State) -> + + messages_ready(State) > MaxLength orelse (BytesEnq > MaxBytes). + +normalize_for_v1(#?STATE{cfg = Cfg} = State) -> + %% run all v0 conversions so that v1 does not have to have this code + RCI = case Cfg of + #cfg{release_cursor_interval = {_, _} = R} -> + R; + #cfg{release_cursor_interval = undefined} -> + {?RELEASE_CURSOR_EVERY, ?RELEASE_CURSOR_EVERY}; + #cfg{release_cursor_interval = C} -> + {?RELEASE_CURSOR_EVERY, C} + end, + convert_prefix_msgs( + State#?STATE{cfg = Cfg#cfg{release_cursor_interval = RCI}}). + +get_field(Field, State) -> + Fields = record_info(fields, ?STATE), + Index = record_index_of(Field, Fields), + element(Index, State). + +get_cfg_field(Field, #?STATE{cfg = Cfg} ) -> + Fields = record_info(fields, cfg), + Index = record_index_of(Field, Fields), + element(Index, Cfg). + +record_index_of(F, Fields) -> + index_of(2, F, Fields). + +index_of(_, F, []) -> + exit({field_not_found, F}); +index_of(N, F, [F | _]) -> + N; +index_of(N, F, [_ | T]) -> + index_of(N+1, F, T). + +-spec make_enqueue(option(pid()), option(msg_seqno()), raw_msg()) -> protocol(). +make_enqueue(Pid, Seq, Msg) -> + #enqueue{pid = Pid, seq = Seq, msg = Msg}. +-spec make_checkout(consumer_id(), + checkout_spec(), consumer_meta()) -> protocol(). +make_checkout(ConsumerId, Spec, Meta) -> + #checkout{consumer_id = ConsumerId, + spec = Spec, meta = Meta}. + +-spec make_settle(consumer_id(), [msg_id()]) -> protocol(). +make_settle(ConsumerId, MsgIds) -> + #settle{consumer_id = ConsumerId, msg_ids = MsgIds}. + +-spec make_return(consumer_id(), [msg_id()]) -> protocol(). +make_return(ConsumerId, MsgIds) -> + #return{consumer_id = ConsumerId, msg_ids = MsgIds}. + +-spec make_discard(consumer_id(), [msg_id()]) -> protocol(). +make_discard(ConsumerId, MsgIds) -> + #discard{consumer_id = ConsumerId, msg_ids = MsgIds}. + +-spec make_credit(consumer_id(), non_neg_integer(), non_neg_integer(), + boolean()) -> protocol(). +make_credit(ConsumerId, Credit, DeliveryCount, Drain) -> + #credit{consumer_id = ConsumerId, + credit = Credit, + delivery_count = DeliveryCount, + drain = Drain}. + +-spec make_purge() -> protocol(). +make_purge() -> #purge{}. + +-spec make_purge_nodes([node()]) -> protocol(). +make_purge_nodes(Nodes) -> + #purge_nodes{nodes = Nodes}. + +-spec make_update_config(config()) -> protocol(). +make_update_config(Config) -> + #update_config{config = Config}. + +add_bytes_enqueue(Bytes, + #?STATE{msg_bytes_enqueue = Enqueue} = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_enqueue = Enqueue + Bytes}; +add_bytes_enqueue(#{size := Bytes}, State) -> + add_bytes_enqueue(Bytes, State). + +add_bytes_drop(Bytes, + #?STATE{msg_bytes_enqueue = Enqueue} = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_enqueue = Enqueue - Bytes}; +add_bytes_drop(#{size := Bytes}, State) -> + add_bytes_drop(Bytes, State). + +add_bytes_checkout(Bytes, + #?STATE{msg_bytes_checkout = Checkout, + msg_bytes_enqueue = Enqueue } = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_checkout = Checkout + Bytes, + msg_bytes_enqueue = Enqueue - Bytes}; +add_bytes_checkout(#{size := Bytes}, State) -> + add_bytes_checkout(Bytes, State). + +add_bytes_settle(Bytes, + #?STATE{msg_bytes_checkout = Checkout} = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_checkout = Checkout - Bytes}; +add_bytes_settle(#{size := Bytes}, State) -> + add_bytes_settle(Bytes, State). + +add_bytes_return(Bytes, + #?STATE{msg_bytes_checkout = Checkout, + msg_bytes_enqueue = Enqueue} = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_checkout = Checkout - Bytes, + msg_bytes_enqueue = Enqueue + Bytes}; +add_bytes_return(#{size := Bytes}, State) -> + add_bytes_return(Bytes, State). + +add_in_memory_counts(Bytes, + #?STATE{msg_bytes_in_memory = InMemoryBytes, + msgs_ready_in_memory = InMemoryCount} = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_in_memory = InMemoryBytes + Bytes, + msgs_ready_in_memory = InMemoryCount + 1}; +add_in_memory_counts(#{size := Bytes}, State) -> + add_in_memory_counts(Bytes, State). + +subtract_in_memory_counts(Bytes, + #?STATE{msg_bytes_in_memory = InMemoryBytes, + msgs_ready_in_memory = InMemoryCount} = State) + when is_integer(Bytes) -> + State#?STATE{msg_bytes_in_memory = InMemoryBytes - Bytes, + msgs_ready_in_memory = InMemoryCount - 1}; +subtract_in_memory_counts(#{size := Bytes}, State) -> + subtract_in_memory_counts(Bytes, State). + +message_size(#basic_message{content = Content}) -> + #content{payload_fragments_rev = PFR} = Content, + iolist_size(PFR); +message_size({'$prefix_msg', H}) -> + get_size_from_header(H); +message_size({'$empty_msg', H}) -> + get_size_from_header(H); +message_size(B) when is_binary(B) -> + byte_size(B); +message_size(Msg) -> + %% probably only hit this for testing so ok to use erts_debug + erts_debug:size(Msg). + +get_size_from_header(Size) when is_integer(Size) -> + Size; +get_size_from_header(#{size := B}) -> + B. + + +all_nodes(#?STATE{consumers = Cons0, + enqueuers = Enqs0, + waiting_consumers = WaitingConsumers0}) -> + Nodes0 = maps:fold(fun({_, P}, _, Acc) -> + Acc#{node(P) => ok} + end, #{}, Cons0), + Nodes1 = maps:fold(fun(P, _, Acc) -> + Acc#{node(P) => ok} + end, Nodes0, Enqs0), + maps:keys( + lists:foldl(fun({{_, P}, _}, Acc) -> + Acc#{node(P) => ok} + end, Nodes1, WaitingConsumers0)). + +all_pids_for(Node, #?STATE{consumers = Cons0, + enqueuers = Enqs0, + waiting_consumers = WaitingConsumers0}) -> + Cons = maps:fold(fun({_, P}, _, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, [], Cons0), + Enqs = maps:fold(fun(P, _, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, Cons, Enqs0), + lists:foldl(fun({{_, P}, _}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, Acc) -> Acc + end, Enqs, WaitingConsumers0). + +suspected_pids_for(Node, #?STATE{consumers = Cons0, + enqueuers = Enqs0, + waiting_consumers = WaitingConsumers0}) -> + Cons = maps:fold(fun({_, P}, #consumer{status = suspected_down}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, [], Cons0), + Enqs = maps:fold(fun(P, #enqueuer{status = suspected_down}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, _, Acc) -> Acc + end, Cons, Enqs0), + lists:foldl(fun({{_, P}, + #consumer{status = suspected_down}}, Acc) + when node(P) =:= Node -> + [P | Acc]; + (_, Acc) -> Acc + end, Enqs, WaitingConsumers0). diff --git a/deps/rabbit/src/rabbit_fifo_v0.hrl b/deps/rabbit/src/rabbit_fifo_v0.hrl new file mode 100644 index 0000000000..333ccb4d77 --- /dev/null +++ b/deps/rabbit/src/rabbit_fifo_v0.hrl @@ -0,0 +1,195 @@ + +-type option(T) :: undefined | T. + +-type raw_msg() :: term(). +%% The raw message. It is opaque to rabbit_fifo. + +-type msg_in_id() :: non_neg_integer(). +% a queue scoped monotonically incrementing integer used to enforce order +% in the unassigned messages map + +-type msg_id() :: non_neg_integer(). +%% A consumer-scoped monotonically incrementing integer included with a +%% {@link delivery/0.}. Used to settle deliveries using +%% {@link rabbit_fifo_client:settle/3.} + +-type msg_seqno() :: non_neg_integer(). +%% A sender process scoped monotonically incrementing integer included +%% in enqueue messages. Used to ensure ordering of messages send from the +%% same process + +-type msg_header() :: msg_size() | + #{size := msg_size(), + delivery_count => non_neg_integer()}. +%% The message header: +%% delivery_count: the number of unsuccessful delivery attempts. +%% A non-zero value indicates a previous attempt. +%% If it only contains the size it can be condensed to an integer only + +-type msg() :: {msg_header(), raw_msg()}. +%% message with a header map. + +-type msg_size() :: non_neg_integer(). +%% the size in bytes of the msg payload + +-type indexed_msg() :: {ra:index(), msg()}. + +-type prefix_msg() :: {'$prefix_msg', msg_header()}. + +-type delivery_msg() :: {msg_id(), msg()}. +%% A tuple consisting of the message id and the headered message. + +-type consumer_tag() :: binary(). +%% An arbitrary binary tag used to distinguish between different consumers +%% set up by the same process. See: {@link rabbit_fifo_client:checkout/3.} + +-type delivery() :: {delivery, consumer_tag(), [delivery_msg()]}. +%% Represents the delivery of one or more rabbit_fifo messages. + +-type consumer_id() :: {consumer_tag(), pid()}. +%% The entity that receives messages. Uniquely identifies a consumer. + +-type credit_mode() :: simple_prefetch | credited. +%% determines how credit is replenished + +-type checkout_spec() :: {once | auto, Num :: non_neg_integer(), + credit_mode()} | + {dequeue, settled | unsettled} | + cancel. + +-type consumer_meta() :: #{ack => boolean(), + username => binary(), + prefetch => non_neg_integer(), + args => list()}. +%% static meta data associated with a consumer + + +-type applied_mfa() :: {module(), atom(), list()}. +% represents a partially applied module call + +-define(RELEASE_CURSOR_EVERY, 64000). +-define(RELEASE_CURSOR_EVERY_MAX, 3200000). +-define(USE_AVG_HALF_LIFE, 10000.0). +%% an average QQ without any message uses about 100KB so setting this limit +%% to ~10 times that should be relatively safe. +-define(GC_MEM_LIMIT_B, 2000000). + +-define(MB, 1048576). +-define(STATE, rabbit_fifo). + +-record(consumer, + {meta = #{} :: consumer_meta(), + checked_out = #{} :: #{msg_id() => {msg_in_id(), indexed_msg()}}, + next_msg_id = 0 :: msg_id(), % part of snapshot data + %% max number of messages that can be sent + %% decremented for each delivery + credit = 0 : non_neg_integer(), + %% total number of checked out messages - ever + %% incremented for each delivery + delivery_count = 0 :: non_neg_integer(), + %% the mode of how credit is incremented + %% simple_prefetch: credit is re-filled as deliveries are settled + %% or returned. + %% credited: credit can only be changed by receiving a consumer_credit + %% command: `{consumer_credit, ReceiverDeliveryCount, Credit}' + credit_mode = simple_prefetch :: credit_mode(), % part of snapshot data + lifetime = once :: once | auto, + status = up :: up | suspected_down | cancelled + }). + +-type consumer() :: #consumer{}. + +-type consumer_strategy() :: competing | single_active. + +-record(enqueuer, + {next_seqno = 1 :: msg_seqno(), + % out of order enqueues - sorted list + pending = [] :: [{msg_seqno(), ra:index(), raw_msg()}], + status = up :: up | suspected_down + }). + +-record(cfg, + {name :: atom(), + resource :: rabbit_types:r('queue'), + release_cursor_interval :: + undefined | non_neg_integer() | + {non_neg_integer(), non_neg_integer()}, + dead_letter_handler :: option(applied_mfa()), + become_leader_handler :: option(applied_mfa()), + max_length :: option(non_neg_integer()), + max_bytes :: option(non_neg_integer()), + %% whether single active consumer is on or not for this queue + consumer_strategy = competing :: consumer_strategy(), + %% the maximum number of unsuccessful delivery attempts permitted + delivery_limit :: option(non_neg_integer()), + max_in_memory_length :: option(non_neg_integer()), + max_in_memory_bytes :: option(non_neg_integer()) + }). + +-type prefix_msgs() :: {list(), list()} | + {non_neg_integer(), list(), + non_neg_integer(), list()}. + +-record(?STATE, + {cfg :: #cfg{}, + % unassigned messages + messages = #{} :: #{msg_in_id() => indexed_msg()}, + % defines the lowest message in id available in the messages map + % that isn't a return + low_msg_num :: option(msg_in_id()), + % defines the next message in id to be added to the messages map + next_msg_num = 1 :: msg_in_id(), + % list of returned msg_in_ids - when checking out it picks from + % this list first before taking low_msg_num + returns = lqueue:new() :: lqueue:lqueue(prefix_msg() | + {msg_in_id(), indexed_msg()}), + % a counter of enqueues - used to trigger shadow copy points + enqueue_count = 0 :: non_neg_integer(), + % a map containing all the live processes that have ever enqueued + % a message to this queue as well as a cached value of the smallest + % ra_index of all pending enqueues + enqueuers = #{} :: #{pid() => #enqueuer{}}, + % master index of all enqueue raft indexes including pending + % enqueues + % rabbit_fifo_index can be slow when calculating the smallest + % index when there are large gaps but should be faster than gb_trees + % for normal appending operations as it's backed by a map + ra_indexes = rabbit_fifo_index:empty() :: rabbit_fifo_index:state(), + release_cursors = lqueue:new() :: lqueue:lqueue({release_cursor, + ra:index(), #?STATE{}}), + % consumers need to reflect consumer state at time of snapshot + % needs to be part of snapshot + consumers = #{} :: #{consumer_id() => #consumer{}}, + % consumers that require further service are queued here + % needs to be part of snapshot + service_queue = queue:new() :: queue:queue(consumer_id()), + %% This is a special field that is only used for snapshots + %% It represents the queued messages at the time the + %% dehydrated snapshot state was cached. + %% As release_cursors are only emitted for raft indexes where all + %% prior messages no longer contribute to the current state we can + %% replace all message payloads with their sizes (to be used for + %% overflow calculations). + %% This is done so that consumers are still served in a deterministic + %% order on recovery. + prefix_msgs = {0, [], 0, []} :: prefix_msgs(), + msg_bytes_enqueue = 0 :: non_neg_integer(), + msg_bytes_checkout = 0 :: non_neg_integer(), + %% waiting consumers, one is picked active consumer is cancelled or dies + %% used only when single active consumer is on + waiting_consumers = [] :: [{consumer_id(), consumer()}], + msg_bytes_in_memory = 0 :: non_neg_integer(), + msgs_ready_in_memory = 0 :: non_neg_integer() + }). + +-type config() :: #{name := atom(), + queue_resource := rabbit_types:r('queue'), + dead_letter_handler => applied_mfa(), + become_leader_handler => applied_mfa(), + release_cursor_interval => non_neg_integer(), + max_length => non_neg_integer(), + max_bytes => non_neg_integer(), + max_in_memory_length => non_neg_integer(), + max_in_memory_bytes => non_neg_integer(), + single_active_consumer_on => boolean(), + delivery_limit => non_neg_integer()}. diff --git a/deps/rabbit/src/rabbit_file.erl b/deps/rabbit/src/rabbit_file.erl new file mode 100644 index 0000000000..f8263d9e77 --- /dev/null +++ b/deps/rabbit/src/rabbit_file.erl @@ -0,0 +1,321 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_file). + +-include_lib("kernel/include/file.hrl"). + +-export([is_file/1, is_dir/1, file_size/1, ensure_dir/1, wildcard/2, list_dir/1]). +-export([read_term_file/1, write_term_file/2, write_file/2, write_file/3]). +-export([append_file/2, ensure_parent_dirs_exist/1]). +-export([rename/2, delete/1, recursive_delete/1, recursive_copy/2]). +-export([lock_file/1]). +-export([read_file_info/1]). +-export([filename_as_a_directory/1]). + +-import(file_handle_cache, [with_handle/1, with_handle/2]). + +-define(TMP_EXT, ".tmp"). + +%%---------------------------------------------------------------------------- + +-type ok_or_error() :: rabbit_types:ok_or_error(any()). + +%%---------------------------------------------------------------------------- + +-spec is_file((file:filename())) -> boolean(). + +is_file(File) -> + case read_file_info(File) of + {ok, #file_info{type=regular}} -> true; + {ok, #file_info{type=directory}} -> true; + _ -> false + end. + +-spec is_dir((file:filename())) -> boolean(). + +is_dir(Dir) -> is_dir_internal(read_file_info(Dir)). + +is_dir_no_handle(Dir) -> is_dir_internal(prim_file:read_file_info(Dir)). + +is_dir_internal({ok, #file_info{type=directory}}) -> true; +is_dir_internal(_) -> false. + +-spec file_size((file:filename())) -> non_neg_integer(). + +file_size(File) -> + case read_file_info(File) of + {ok, #file_info{size=Size}} -> Size; + _ -> 0 + end. + +-spec ensure_dir((file:filename())) -> ok_or_error(). + +ensure_dir(File) -> with_handle(fun () -> ensure_dir_internal(File) end). + +ensure_dir_internal("/") -> + ok; +ensure_dir_internal(File) -> + Dir = filename:dirname(File), + case is_dir_no_handle(Dir) of + true -> ok; + false -> ensure_dir_internal(Dir), + prim_file:make_dir(Dir) + end. + +-spec wildcard(string(), file:filename()) -> [file:filename()]. + +wildcard(Pattern, Dir) -> + case list_dir(Dir) of + {ok, Files} -> {ok, RE} = re:compile(Pattern, [anchored]), + [File || File <- Files, + match =:= re:run(File, RE, [{capture, none}])]; + {error, _} -> [] + end. + +-spec list_dir(file:filename()) -> + rabbit_types:ok_or_error2([file:filename()], any()). + +list_dir(Dir) -> with_handle(fun () -> prim_file:list_dir(Dir) end). + +read_file_info(File) -> + with_handle(fun () -> prim_file:read_file_info(File) end). + +-spec read_term_file + (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any()). + +read_term_file(File) -> + try + {ok, Data} = with_handle(fun () -> prim_file:read_file(File) end), + {ok, Tokens, _} = erl_scan:string(binary_to_list(Data)), + TokenGroups = group_tokens(Tokens), + {ok, [begin + {ok, Term} = erl_parse:parse_term(Tokens1), + Term + end || Tokens1 <- TokenGroups]} + catch + error:{badmatch, Error} -> Error + end. + +group_tokens(Ts) -> [lists:reverse(G) || G <- group_tokens([], Ts)]. + +group_tokens([], []) -> []; +group_tokens(Cur, []) -> [Cur]; +group_tokens(Cur, [T = {dot, _} | Ts]) -> [[T | Cur] | group_tokens([], Ts)]; +group_tokens(Cur, [T | Ts]) -> group_tokens([T | Cur], Ts). + +-spec write_term_file(file:filename(), [any()]) -> ok_or_error(). + +write_term_file(File, Terms) -> + write_file(File, list_to_binary([io_lib:format("~w.~n", [Term]) || + Term <- Terms])). + +-spec write_file(file:filename(), iodata()) -> ok_or_error(). + +write_file(Path, Data) -> write_file(Path, Data, []). + +-spec write_file(file:filename(), iodata(), [any()]) -> ok_or_error(). + +write_file(Path, Data, Modes) -> + Modes1 = [binary, write | (Modes -- [binary, write])], + case make_binary(Data) of + Bin when is_binary(Bin) -> write_file1(Path, Bin, Modes1); + {error, _} = E -> E + end. + +%% make_binary/1 is based on the corresponding function in the +%% kernel/file.erl module of the Erlang R14B02 release, which is +%% licensed under the EPL. + +make_binary(Bin) when is_binary(Bin) -> + Bin; +make_binary(List) -> + try + iolist_to_binary(List) + catch error:Reason -> + {error, Reason} + end. + +write_file1(Path, Bin, Modes) -> + try + with_synced_copy(Path, Modes, + fun (Hdl) -> + ok = prim_file:write(Hdl, Bin) + end) + catch + error:{badmatch, Error} -> Error; + _:{error, Error} -> {error, Error} + end. + +with_synced_copy(Path, Modes, Fun) -> + case lists:member(append, Modes) of + true -> + {error, append_not_supported, Path}; + false -> + with_handle( + fun () -> + Bak = Path ++ ?TMP_EXT, + case prim_file:open(Bak, Modes) of + {ok, Hdl} -> + try + Result = Fun(Hdl), + ok = prim_file:sync(Hdl), + ok = prim_file:rename(Bak, Path), + Result + after + prim_file:close(Hdl) + end; + {error, _} = E -> E + end + end) + end. + +%% TODO the semantics of this function are rather odd. But see bug 25021. + +-spec append_file(file:filename(), string()) -> ok_or_error(). + +append_file(File, Suffix) -> + case read_file_info(File) of + {ok, FInfo} -> append_file(File, FInfo#file_info.size, Suffix); + {error, enoent} -> append_file(File, 0, Suffix); + Error -> Error + end. + +append_file(_, _, "") -> + ok; +append_file(File, 0, Suffix) -> + with_handle(fun () -> + case prim_file:open([File, Suffix], [append]) of + {ok, Fd} -> prim_file:close(Fd); + Error -> Error + end + end); +append_file(File, _, Suffix) -> + case with_handle(2, fun () -> + file:copy(File, {[File, Suffix], [append]}) + end) of + {ok, _BytesCopied} -> ok; + Error -> Error + end. + +-spec ensure_parent_dirs_exist(string()) -> 'ok'. + +ensure_parent_dirs_exist(Filename) -> + case ensure_dir(Filename) of + ok -> ok; + {error, Reason} -> + throw({error, {cannot_create_parent_dirs, Filename, Reason}}) + end. + +-spec rename(file:filename(), file:filename()) -> ok_or_error(). + +rename(Old, New) -> with_handle(fun () -> prim_file:rename(Old, New) end). + +-spec delete([file:filename()]) -> ok_or_error(). + +delete(File) -> with_handle(fun () -> prim_file:delete(File) end). + +-spec recursive_delete([file:filename()]) -> + rabbit_types:ok_or_error({file:filename(), any()}). + +recursive_delete(Files) -> + with_handle( + fun () -> lists:foldl(fun (Path, ok) -> recursive_delete1(Path); + (_Path, {error, _Err} = Error) -> Error + end, ok, Files) + end). + +recursive_delete1(Path) -> + case is_dir_no_handle(Path) and not(is_symlink_no_handle(Path)) of + false -> case prim_file:delete(Path) of + ok -> ok; + {error, enoent} -> ok; %% Path doesn't exist anyway + {error, Err} -> {error, {Path, Err}} + end; + true -> case prim_file:list_dir(Path) of + {ok, FileNames} -> + case lists:foldl( + fun (FileName, ok) -> + recursive_delete1( + filename:join(Path, FileName)); + (_FileName, Error) -> + Error + end, ok, FileNames) of + ok -> + case prim_file:del_dir(Path) of + ok -> ok; + {error, Err} -> {error, {Path, Err}} + end; + {error, _Err} = Error -> + Error + end; + {error, Err} -> + {error, {Path, Err}} + end + end. + +is_symlink_no_handle(File) -> + case prim_file:read_link(File) of + {ok, _} -> true; + _ -> false + end. + +-spec recursive_copy(file:filename(), file:filename()) -> + rabbit_types:ok_or_error({file:filename(), file:filename(), any()}). + +recursive_copy(Src, Dest) -> + %% Note that this uses the 'file' module and, hence, shouldn't be + %% run on many processes at once. + case is_dir(Src) of + false -> case file:copy(Src, Dest) of + {ok, _Bytes} -> ok; + {error, enoent} -> ok; %% Path doesn't exist anyway + {error, Err} -> {error, {Src, Dest, Err}} + end; + true -> case file:list_dir(Src) of + {ok, FileNames} -> + case file:make_dir(Dest) of + ok -> + lists:foldl( + fun (FileName, ok) -> + recursive_copy( + filename:join(Src, FileName), + filename:join(Dest, FileName)); + (_FileName, Error) -> + Error + end, ok, FileNames); + {error, Err} -> + {error, {Src, Dest, Err}} + end; + {error, Err} -> + {error, {Src, Dest, Err}} + end + end. + +%% TODO: When we stop supporting Erlang prior to R14, this should be +%% replaced with file:open [write, exclusive] + +-spec lock_file(file:filename()) -> rabbit_types:ok_or_error('eexist'). + +lock_file(Path) -> + case is_file(Path) of + true -> {error, eexist}; + false -> with_handle( + fun () -> {ok, Lock} = prim_file:open(Path, [write]), + ok = prim_file:close(Lock) + end) + end. + +-spec filename_as_a_directory(file:filename()) -> file:filename(). + +filename_as_a_directory(FileName) -> + case lists:last(FileName) of + "/" -> + FileName; + _ -> + FileName ++ "/" + end. diff --git a/deps/rabbit/src/rabbit_framing.erl b/deps/rabbit/src/rabbit_framing.erl new file mode 100644 index 0000000000..42927b2b68 --- /dev/null +++ b/deps/rabbit/src/rabbit_framing.erl @@ -0,0 +1,36 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% TODO auto-generate + +-module(rabbit_framing). + +-export_type([protocol/0, + amqp_field_type/0, amqp_property_type/0, + amqp_table/0, amqp_array/0, amqp_value/0, + amqp_method_name/0, amqp_method/0, amqp_method_record/0, + amqp_method_field_name/0, amqp_property_record/0, + amqp_exception/0, amqp_exception_code/0, amqp_class_id/0]). + +-type protocol() :: 'rabbit_framing_amqp_0_8' | 'rabbit_framing_amqp_0_9_1'. + +-define(protocol_type(T), type(T :: rabbit_framing_amqp_0_8:T | + rabbit_framing_amqp_0_9_1:T)). + +-?protocol_type(amqp_field_type()). +-?protocol_type(amqp_property_type()). +-?protocol_type(amqp_table()). +-?protocol_type(amqp_array()). +-?protocol_type(amqp_value()). +-?protocol_type(amqp_method_name()). +-?protocol_type(amqp_method()). +-?protocol_type(amqp_method_record()). +-?protocol_type(amqp_method_field_name()). +-?protocol_type(amqp_property_record()). +-?protocol_type(amqp_exception()). +-?protocol_type(amqp_exception_code()). +-?protocol_type(amqp_class_id()). diff --git a/deps/rabbit/src/rabbit_guid.erl b/deps/rabbit/src/rabbit_guid.erl new file mode 100644 index 0000000000..01e6464332 --- /dev/null +++ b/deps/rabbit/src/rabbit_guid.erl @@ -0,0 +1,181 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_guid). + +-behaviour(gen_server). + +-export([start_link/0]). +-export([filename/0]). +-export([gen/0, gen_secure/0, string/2, binary/2, to_string/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). +-define(SERIAL_FILENAME, "rabbit_serial"). + +-record(state, {serial}). + +%%---------------------------------------------------------------------------- + +-export_type([guid/0]). + +-type guid() :: binary(). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, + [update_disk_serial()], []). + +%% We use this to detect a (possibly rather old) Mnesia directory, +%% since it has existed since at least 1.7.0 (as far back as I cared +%% to go). + +-spec filename() -> string(). + +filename() -> + filename:join(rabbit_mnesia:dir(), ?SERIAL_FILENAME). + +update_disk_serial() -> + Filename = filename(), + Serial = case rabbit_file:read_term_file(Filename) of + {ok, [Num]} -> Num; + {ok, []} -> 0; %% [1] + {error, enoent} -> 0; + {error, Reason} -> + throw({error, {cannot_read_serial_file, Filename, Reason}}) + end, + case rabbit_file:write_term_file(Filename, [Serial + 1]) of + ok -> ok; + {error, Reason1} -> + throw({error, {cannot_write_serial_file, Filename, Reason1}}) + end, + Serial. +%% [1] a couple of users have reported startup failures due to the +%% file being empty, presumably as a result of filesystem +%% corruption. While rabbit doesn't cope with that in general, in this +%% specific case we can be more accommodating. + +%% Generate an un-hashed guid. +fresh() -> + %% We don't use erlang:now() here because a) it may return + %% duplicates when the system clock has been rewound prior to a + %% restart, or ids were generated at a high rate (which causes + %% now() to move ahead of the system time), and b) it is really + %% slow since it takes a global lock and makes a system call. + %% + %% A persisted serial number, the node, and a unique reference + %% (per node incarnation) uniquely identifies a process in space + %% and time. + Serial = gen_server:call(?SERVER, serial, infinity), + {Serial, node(), make_ref()}. + +advance_blocks({B1, B2, B3, B4}, I) -> + %% To produce a new set of blocks, we create a new 32bit block + %% hashing {B5, I}. The new hash is used as last block, and the + %% other three blocks are XORed with it. + %% + %% Doing this is convenient because it avoids cascading conflicts, + %% while being very fast. The conflicts are avoided by propagating + %% the changes through all the blocks at each round by XORing, so + %% the only occasion in which a collision will take place is when + %% all 4 blocks are the same and the counter is the same. + %% + %% The range (2^32) is provided explicitly since phash uses 2^27 + %% by default. + B5 = erlang:phash2({B1, I}, 4294967296), + {{(B2 bxor B5), (B3 bxor B5), (B4 bxor B5), B5}, I+1}. + +%% generate a GUID. This function should be used when performance is a +%% priority and predictability is not an issue. Otherwise use +%% gen_secure/0. + +-spec gen() -> guid(). + +gen() -> + %% We hash a fresh GUID with md5, split it in 4 blocks, and each + %% time we need a new guid we rotate them producing a new hash + %% with the aid of the counter. Look at the comments in + %% advance_blocks/2 for details. + case get(guid) of + undefined -> <<B1:32, B2:32, B3:32, B4:32>> = Res = + erlang:md5(term_to_binary(fresh())), + put(guid, {{B1, B2, B3, B4}, 0}), + Res; + {BS, I} -> {{B1, B2, B3, B4}, _} = S = advance_blocks(BS, I), + put(guid, S), + <<B1:32, B2:32, B3:32, B4:32>> + end. + +%% generate a non-predictable GUID. +%% +%% The id is only unique within a single cluster and as long as the +%% serial store hasn't been deleted. +%% +%% If you are not concerned with predictability, gen/0 is faster. + +-spec gen_secure() -> guid(). + +gen_secure() -> + %% Here instead of hashing once we hash the GUID and the counter + %% each time, so that the GUID is not predictable. + G = case get(guid_secure) of + undefined -> {fresh(), 0}; + {S, I} -> {S, I+1} + end, + put(guid_secure, G), + erlang:md5(term_to_binary(G)). + +%% generate a readable string representation of a GUID. +%% +%% employs base64url encoding, which is safer in more contexts than +%% plain base64. + +-spec string(guid() | string(), any()) -> string(). + +string(G, Prefix) when is_list(Prefix) -> + Prefix ++ "-" ++ rabbit_misc:base64url(G); +string(G, Prefix) when is_binary(Prefix) -> + binary_to_list(Prefix) ++ "-" ++ rabbit_misc:base64url(G). + +-spec binary(guid() | string(), any()) -> binary(). + +binary(G, Prefix) -> + list_to_binary(string(G, Prefix)). + +%% copied from https://stackoverflow.com/questions/1657204/erlang-uuid-generator +to_string(<<TL:32, TM:16, THV:16, CSR:8, CSL:8, N:48>>) -> + lists:flatten( + io_lib:format("~8.16.0b-~4.16.0b-~4.16.0b-~2.16.0b~2.16.0b-~12.16.0b", + [TL, TM, THV, CSR, CSL, N])). + +%%---------------------------------------------------------------------------- + +init([Serial]) -> + {ok, #state{serial = Serial}}. + +handle_call(serial, _From, State = #state{serial = Serial}) -> + {reply, Serial, State}; + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/deps/rabbit/src/rabbit_health_check.erl b/deps/rabbit/src/rabbit_health_check.erl new file mode 100644 index 0000000000..4674ca7d8e --- /dev/null +++ b/deps/rabbit/src/rabbit_health_check.erl @@ -0,0 +1,80 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +-module(rabbit_health_check). + +%% External API +-export([node/1, node/2]). + +%% Internal API +-export([local/0]). + +%%---------------------------------------------------------------------------- +%% External functions +%%---------------------------------------------------------------------------- + +-spec node(node(), timeout()) -> ok | {badrpc, term()} | {error_string, string()}. + +node(Node) -> + %% same default as in CLI + node(Node, 70000). +node(Node, Timeout) -> + rabbit_misc:rpc_call(Node, rabbit_health_check, local, [], Timeout). + +-spec local() -> ok | {error_string, string()}. + +local() -> + rabbit_log:warning("rabbitmqctl node_health_check and its HTTP API counterpart are DEPRECATED. " + "See https://www.rabbitmq.com/monitoring.html#health-checks for replacement options."), + run_checks([list_channels, list_queues, alarms, rabbit_node_monitor]). + +%%---------------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------------- +run_checks([]) -> + ok; +run_checks([C|Cs]) -> + case node_health_check(C) of + ok -> + run_checks(Cs); + Error -> + Error + end. + +node_health_check(list_channels) -> + case rabbit_channel:info_local([pid]) of + L when is_list(L) -> + ok + end; + +node_health_check(list_queues) -> + health_check_queues(rabbit_vhost:list_names()); + +node_health_check(rabbit_node_monitor) -> + case rabbit_node_monitor:partitions() of + [] -> + ok; + L when is_list(L), length(L) > 0 -> + ErrorMsg = io_lib:format("cluster partition in effect: ~p", [L]), + {error_string, ErrorMsg} + end; + +node_health_check(alarms) -> + case proplists:get_value(alarms, rabbit:status()) of + [] -> + ok; + Alarms -> + ErrorMsg = io_lib:format("resource alarm(s) in effect:~p", [Alarms]), + {error_string, ErrorMsg} + end. + +health_check_queues([]) -> + ok; +health_check_queues([VHost|RestVHosts]) -> + case rabbit_amqqueue:info_local(VHost) of + L when is_list(L) -> + health_check_queues(RestVHosts) + end. diff --git a/deps/rabbit/src/rabbit_lager.erl b/deps/rabbit/src/rabbit_lager.erl new file mode 100644 index 0000000000..3cbc5e431d --- /dev/null +++ b/deps/rabbit/src/rabbit_lager.erl @@ -0,0 +1,723 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_lager). + +-include_lib("rabbit_common/include/rabbit_log.hrl"). + +%% API +-export([start_logger/0, log_locations/0, fold_sinks/2, + broker_is_started/0, set_log_level/1]). + +%% For test purposes +-export([configure_lager/0]). + +-export_type([log_location/0]). + +-type log_location() :: string(). + +start_logger() -> + ok = maybe_remove_logger_handler(), + ok = app_utils:stop_applications([lager, syslog]), + ok = ensure_lager_configured(), + ok = app_utils:start_applications([lager]), + fold_sinks( + fun + (_, [], Acc) -> + Acc; + (SinkName, _, Acc) -> + lager:log(SinkName, info, self(), + "Log file opened with Lager", []), + Acc + end, ok), + ensure_log_working(). + +broker_is_started() -> + {ok, HwmCurrent} = application:get_env(lager, error_logger_hwm), + {ok, HwmOrig0} = application:get_env(lager, error_logger_hwm_original), + HwmOrig = case get_most_verbose_log_level() of + debug -> HwmOrig0 * 100; + _ -> HwmOrig0 + end, + case HwmOrig =:= HwmCurrent of + false -> + ok = application:set_env(lager, error_logger_hwm, HwmOrig), + Handlers = gen_event:which_handlers(lager_event), + lists:foreach(fun(Handler) -> + lager:set_loghwm(Handler, HwmOrig) + end, Handlers), + ok; + _ -> + ok + end. + +set_log_level(Level) -> + IsValidLevel = lists:member(Level, lager_util:levels()), + set_log_level(IsValidLevel, Level). + +set_log_level(true, Level) -> + SinksAndHandlers = [{Sink, gen_event:which_handlers(Sink)} || + Sink <- lager:list_all_sinks()], + DefaultHwm = application:get_env(lager, error_logger_hwm_original, 50), + Hwm = case Level of + debug -> DefaultHwm * 100; + _ -> DefaultHwm + end, + application:set_env(lager, error_logger_hwm, Hwm), + set_sink_log_level(SinksAndHandlers, Level, Hwm); +set_log_level(_, Level) -> + {error, {invalid_log_level, Level}}. + +set_sink_log_level([], _Level, _Hwm) -> + ok; +set_sink_log_level([{Sink, Handlers}|Rest], Level, Hwm) -> + set_sink_handler_log_level(Sink, Handlers, Level, Hwm), + set_sink_log_level(Rest, Level, Hwm). + +set_sink_handler_log_level(_Sink, [], _Level, _Hwm) -> + ok; +set_sink_handler_log_level(Sink, [Handler|Rest], Level, Hwm) + when is_atom(Handler) andalso is_integer(Hwm) -> + lager:set_loghwm(Sink, Handler, undefined, Hwm), + ok = lager:set_loglevel(Sink, Handler, undefined, Level), + set_sink_handler_log_level(Sink, Rest, Level, Hwm); +set_sink_handler_log_level(Sink, [{Handler, Id}|Rest], Level, Hwm) -> + lager:set_loghwm(Sink, Handler, Id, Hwm), + ok = lager:set_loglevel(Sink, Handler, Id, Level), + set_sink_handler_log_level(Sink, Rest, Level, Hwm); +set_sink_handler_log_level(Sink, [_|Rest], Level, Hwm) -> + set_sink_handler_log_level(Sink, Rest, Level, Hwm). + +log_locations() -> + ensure_lager_configured(), + DefaultHandlers = application:get_env(lager, handlers, []), + Sinks = application:get_env(lager, extra_sinks, []), + ExtraHandlers = [proplists:get_value(handlers, Props, []) + || {_, Props} <- Sinks], + lists:sort(log_locations1([DefaultHandlers | ExtraHandlers], [])). + +log_locations1([Handlers | Rest], Locations) -> + Locations1 = log_locations2(Handlers, Locations), + log_locations1(Rest, Locations1); +log_locations1([], Locations) -> + Locations. + +log_locations2([{lager_file_backend, Settings} | Rest], Locations) -> + FileName = lager_file_name1(Settings), + Locations1 = case lists:member(FileName, Locations) of + false -> [FileName | Locations]; + true -> Locations + end, + log_locations2(Rest, Locations1); +log_locations2([{lager_console_backend, _} | Rest], Locations) -> + Locations1 = case lists:member("<stdout>", Locations) of + false -> ["<stdout>" | Locations]; + true -> Locations + end, + log_locations2(Rest, Locations1); +log_locations2([_ | Rest], Locations) -> + log_locations2(Rest, Locations); +log_locations2([], Locations) -> + Locations. + +fold_sinks(Fun, Acc) -> + Handlers = lager_config:global_get(handlers), + Sinks = dict:to_list(lists:foldl( + fun + ({{lager_file_backend, F}, _, S}, Dict) -> + dict:append(S, F, Dict); + ({_, _, S}, Dict) -> + case dict:is_key(S, Dict) of + true -> dict:store(S, [], Dict); + false -> Dict + end + end, + dict:new(), Handlers)), + fold_sinks(Sinks, Fun, Acc). + +fold_sinks([{SinkName, FileNames} | Rest], Fun, Acc) -> + Acc1 = Fun(SinkName, FileNames, Acc), + fold_sinks(Rest, Fun, Acc1); +fold_sinks([], _, Acc) -> + Acc. + +ensure_log_working() -> + {ok, Handlers} = application:get_env(lager, handlers), + [ ensure_lager_handler_file_exist(Handler) + || Handler <- Handlers ], + Sinks = application:get_env(lager, extra_sinks, []), + ensure_extra_sinks_working(Sinks, list_expected_sinks()). + +ensure_extra_sinks_working(Sinks, [SinkName | Rest]) -> + case proplists:get_value(SinkName, Sinks) of + undefined -> throw({error, {cannot_log_to_file, unknown, + rabbit_log_lager_event_sink_undefined}}); + Sink -> + SinkHandlers = proplists:get_value(handlers, Sink, []), + [ ensure_lager_handler_file_exist(Handler) + || Handler <- SinkHandlers ] + end, + ensure_extra_sinks_working(Sinks, Rest); +ensure_extra_sinks_working(_Sinks, []) -> + ok. + +ensure_lager_handler_file_exist(Handler) -> + case lager_file_name(Handler) of + false -> ok; + FileName -> ensure_logfile_exist(FileName) + end. + +lager_file_name({lager_file_backend, Settings}) -> + lager_file_name1(Settings); +lager_file_name(_) -> + false. + +lager_file_name1(Settings) when is_list(Settings) -> + {file, FileName} = proplists:lookup(file, Settings), + lager_util:expand_path(FileName); +lager_file_name1({FileName, _}) -> lager_util:expand_path(FileName); +lager_file_name1({FileName, _, _, _, _}) -> lager_util:expand_path(FileName); +lager_file_name1(_) -> + throw({error, {cannot_log_to_file, unknown, + lager_file_backend_config_invalid}}). + + +ensure_logfile_exist(FileName) -> + LogFile = lager_util:expand_path(FileName), + case rabbit_file:read_file_info(LogFile) of + {ok,_} -> ok; + {error, Err} -> throw({error, {cannot_log_to_file, LogFile, Err}}) + end. + +ensure_lager_configured() -> + case lager_configured() of + false -> configure_lager(); + true -> ok + end. + +%% Lager should have handlers and sinks +%% Error logger forwarding to syslog should be disabled +lager_configured() -> + Sinks = lager:list_all_sinks(), + ExpectedSinks = list_expected_sinks(), + application:get_env(lager, handlers) =/= undefined + andalso + lists:all(fun(S) -> lists:member(S, Sinks) end, ExpectedSinks) + andalso + application:get_env(syslog, syslog_error_logger) =/= undefined. + +configure_lager() -> + ok = app_utils:load_applications([lager]), + %% Turn off reformatting for error_logger messages + case application:get_env(lager, error_logger_redirect) of + undefined -> application:set_env(lager, error_logger_redirect, true); + _ -> ok + end, + case application:get_env(lager, error_logger_format_raw) of + undefined -> application:set_env(lager, error_logger_format_raw, true); + _ -> ok + end, + case application:get_env(lager, log_root) of + undefined -> + %% Setting env var to 'undefined' is different from not + %% setting it at all, and lager is sensitive to this + %% difference. + case application:get_env(rabbit, lager_log_root) of + {ok, Value} -> + ok = application:set_env(lager, log_root, Value); + _ -> + ok + end; + _ -> ok + end, + case application:get_env(lager, colored) of + undefined -> + UseColor = rabbit_prelaunch_early_logging:use_colored_logging(), + application:set_env(lager, colored, UseColor); + _ -> + ok + end, + %% Set rabbit.log config variable based on environment. + prepare_rabbit_log_config(), + %% Configure syslog library. + ok = configure_syslog_error_logger(), + %% At this point we should have rabbit.log application variable + %% configured to generate RabbitMQ log handlers. + GeneratedHandlers = generate_lager_handlers(), + + %% If there are lager handlers configured, + %% both lager and generate RabbitMQ handlers are used. + %% This is because it's hard to decide clear preference rules. + %% RabbitMQ handlers can be set to [] to use only lager handlers. + Handlers = case application:get_env(lager, handlers, undefined) of + undefined -> GeneratedHandlers; + LagerHandlers -> + %% Remove handlers generated in previous starts + FormerRabbitHandlers = application:get_env(lager, rabbit_handlers, []), + GeneratedHandlers ++ remove_rabbit_handlers(LagerHandlers, + FormerRabbitHandlers) + end, + + ok = application:set_env(lager, handlers, Handlers), + ok = application:set_env(lager, rabbit_handlers, GeneratedHandlers), + + %% Setup extra sink/handlers. If they are not configured, redirect + %% messages to the default sink. To know the list of expected extra + %% sinks, we look at the 'lager_extra_sinks' compilation option. + LogConfig = application:get_env(rabbit, log, []), + LogLevels = application:get_env(rabbit, log_levels, []), + Categories = proplists:get_value(categories, LogConfig, []), + CategoriesConfig0 = case {Categories, LogLevels} of + {[], []} -> []; + {[], LogLevels} -> + io:format("Using deprecated config parameter 'log_levels'. " + "Please update your configuration file according to " + "https://rabbitmq.com/logging.html"), + lists:map(fun({Name, Level}) -> {Name, [{level, Level}]} end, + LogLevels); + {Categories, []} -> + Categories; + {Categories, _} -> + io:format("Using the deprecated config parameter 'rabbit.log_levels' together " + "with a new parameter for log categories." + " 'rabbit.log_levels' will be ignored. Please remove it from the config. More at " + "https://rabbitmq.com/logging.html"), + Categories + end, + LogLevelsFromContext = case rabbit_prelaunch:get_context() of + #{log_levels := LL} -> LL; + _ -> undefined + end, + Fun = fun + (global, _, CC) -> + CC; + (color, _, CC) -> + CC; + (CategoryS, LogLevel, CC) -> + Category = list_to_atom(CategoryS), + CCEntry = proplists:get_value( + Category, CC, []), + CCEntry1 = lists:ukeymerge( + 1, + [{level, LogLevel}], + lists:ukeysort(1, CCEntry)), + lists:keystore( + Category, 1, CC, {Category, CCEntry1}) + end, + CategoriesConfig = case LogLevelsFromContext of + undefined -> + CategoriesConfig0; + _ -> + maps:fold(Fun, + CategoriesConfig0, + LogLevelsFromContext) + end, + SinkConfigs = lists:map( + fun({Name, Config}) -> + {rabbit_log:make_internal_sink_name(Name), Config} + end, + CategoriesConfig), + LagerSinks = application:get_env(lager, extra_sinks, []), + GeneratedSinks = generate_lager_sinks( + [error_logger_lager_event | list_expected_sinks()], + SinkConfigs), + Sinks = merge_lager_sink_handlers(LagerSinks, GeneratedSinks, []), + ok = application:set_env(lager, extra_sinks, Sinks), + + case application:get_env(lager, error_logger_hwm) of + undefined -> + ok = application:set_env(lager, error_logger_hwm, 1000), + % NB: 50 is the default value in lager.app.src + ok = application:set_env(lager, error_logger_hwm_original, 50); + {ok, Val} when is_integer(Val) andalso Val < 1000 -> + ok = application:set_env(lager, error_logger_hwm, 1000), + ok = application:set_env(lager, error_logger_hwm_original, Val); + {ok, Val} when is_integer(Val) -> + ok = application:set_env(lager, error_logger_hwm_original, Val), + ok + end, + ok. + +configure_syslog_error_logger() -> + %% Disable error_logger forwarding to syslog if it's not configured + case application:get_env(syslog, syslog_error_logger) of + undefined -> + application:set_env(syslog, syslog_error_logger, false); + _ -> ok + end. + +remove_rabbit_handlers(Handlers, FormerHandlers) -> + lists:filter(fun(Handler) -> + not lists:member(Handler, FormerHandlers) + end, + Handlers). + +generate_lager_handlers() -> + LogConfig = application:get_env(rabbit, log, []), + LogHandlersConfig = lists:keydelete(categories, 1, LogConfig), + generate_lager_handlers(LogHandlersConfig). + +generate_lager_handlers(LogHandlersConfig) -> + lists:flatmap( + fun + ({file, HandlerConfig}) -> + case proplists:get_value(file, HandlerConfig, false) of + false -> []; + FileName when is_list(FileName) -> + Backend = lager_backend(file), + generate_handler(Backend, HandlerConfig) + end; + ({Other, HandlerConfig}) when + Other =:= console; Other =:= syslog; Other =:= exchange -> + case proplists:get_value(enabled, HandlerConfig, false) of + false -> []; + true -> + Backend = lager_backend(Other), + generate_handler(Backend, + lists:keydelete(enabled, 1, HandlerConfig)) + end + end, + LogHandlersConfig). + +lager_backend(file) -> lager_file_backend; +lager_backend(console) -> lager_console_backend; +lager_backend(syslog) -> syslog_lager_backend; +lager_backend(exchange) -> lager_exchange_backend. + +%% Syslog backend is using an old API for configuration and +%% does not support proplists. +generate_handler(syslog_lager_backend=Backend, HandlerConfig) -> + %% The default log level is set to `debug` because the actual + %% filtering is made at the sink level. We want to accept all + %% messages here. + DefaultConfigVal = debug, + Level = proplists:get_value(level, HandlerConfig, DefaultConfigVal), + ok = configure_handler_backend(Backend), + [{Backend, + [Level, + {}, + {lager_default_formatter, syslog_formatter_config()}]}]; +generate_handler(Backend, HandlerConfig) -> + [{Backend, + lists:ukeymerge(1, lists:ukeysort(1, HandlerConfig), + lists:ukeysort(1, default_handler_config(Backend)))}]. + +configure_handler_backend(syslog_lager_backend) -> + {ok, _} = application:ensure_all_started(syslog), + ok; +configure_handler_backend(_Backend) -> + ok. + +default_handler_config(lager_console_backend) -> + %% The default log level is set to `debug` because the actual + %% filtering is made at the sink level. We want to accept all + %% messages here. + DefaultConfigVal = debug, + [{level, DefaultConfigVal}, + {formatter_config, default_config_value({formatter_config, console})}]; +default_handler_config(lager_exchange_backend) -> + %% The default log level is set to `debug` because the actual + %% filtering is made at the sink level. We want to accept all + %% messages here. + DefaultConfigVal = debug, + [{level, DefaultConfigVal}, + {formatter_config, default_config_value({formatter_config, exchange})}]; +default_handler_config(lager_file_backend) -> + %% The default log level is set to `debug` because the actual + %% filtering is made at the sink level. We want to accept all + %% messages here. + DefaultConfigVal = debug, + [{level, DefaultConfigVal}, + {formatter_config, default_config_value({formatter_config, file})}, + {date, ""}, + {size, 0}]. + +default_config_value(level) -> + LogConfig = application:get_env(rabbit, log, []), + FoldFun = fun + ({_, Cfg}, LL) when is_list(Cfg) -> + NewLL = proplists:get_value(level, Cfg, LL), + case LL of + undefined -> + NewLL; + _ -> + MoreVerbose = lager_util:level_to_num(NewLL) > lager_util:level_to_num(LL), + case MoreVerbose of + true -> NewLL; + false -> LL + end + end; + (_, LL) -> + LL + end, + FoundLL = lists:foldl(FoldFun, undefined, LogConfig), + case FoundLL of + undefined -> info; + _ -> FoundLL + end; +default_config_value({formatter_config, console}) -> + EOL = case application:get_env(lager, colored) of + {ok, true} -> "\e[0m\r\n"; + _ -> "\r\n" + end, + [date, " ", time, " ", color, "[", severity, "] ", + {pid, ""}, + " ", message, EOL]; +default_config_value({formatter_config, _}) -> + [date, " ", time, " ", color, "[", severity, "] ", + {pid, ""}, + " ", message, "\n"]. + +syslog_formatter_config() -> + [color, "[", severity, "] ", + {pid, ""}, + " ", message, "\n"]. + +prepare_rabbit_log_config() -> + %% If RABBIT_LOGS is not set, we should ignore it. + DefaultFile = application:get_env(rabbit, lager_default_file, undefined), + %% If RABBIT_UPGRADE_LOGS is not set, we should ignore it. + UpgradeFile = application:get_env(rabbit, lager_upgrade_file, undefined), + case DefaultFile of + undefined -> ok; + false -> + set_env_default_log_disabled(); + tty -> + set_env_default_log_console(); + FileName when is_list(FileName) -> + case rabbit_prelaunch:get_context() of + %% The user explicitly sets $RABBITMQ_LOGS; + %% we should override a file location even + %% if it's set in rabbitmq.config + #{var_origins := #{main_log_file := environment}} -> + set_env_default_log_file(FileName, override); + _ -> + set_env_default_log_file(FileName, keep) + end + end, + + %% Upgrade log file never overrides the value set in rabbitmq.config + case UpgradeFile of + %% No special env for upgrade logs - redirect to the default sink + undefined -> ok; + %% Redirect logs to default output. + DefaultFile -> ok; + UpgradeFileName when is_list(UpgradeFileName) -> + set_env_upgrade_log_file(UpgradeFileName) + end. + +set_env_default_log_disabled() -> + %% Disabling all the logs. + ok = application:set_env(rabbit, log, []). + +set_env_default_log_console() -> + LogConfig = application:get_env(rabbit, log, []), + ConsoleConfig = proplists:get_value(console, LogConfig, []), + LogConfigConsole = + lists:keystore(console, 1, LogConfig, + {console, lists:keystore(enabled, 1, ConsoleConfig, + {enabled, true})}), + %% Remove the file handler - disable logging to file + LogConfigConsoleNoFile = lists:keydelete(file, 1, LogConfigConsole), + ok = application:set_env(rabbit, log, LogConfigConsoleNoFile). + +set_env_default_log_file(FileName, Override) -> + LogConfig = application:get_env(rabbit, log, []), + FileConfig = proplists:get_value(file, LogConfig, []), + NewLogConfig = case proplists:get_value(file, FileConfig, undefined) of + undefined -> + lists:keystore(file, 1, LogConfig, + {file, lists:keystore(file, 1, FileConfig, + {file, FileName})}); + _ConfiguredFileName -> + case Override of + override -> + lists:keystore( + file, 1, LogConfig, + {file, lists:keystore(file, 1, FileConfig, + {file, FileName})}); + keep -> + LogConfig + end + end, + ok = application:set_env(rabbit, log, NewLogConfig). + +set_env_upgrade_log_file(FileName) -> + LogConfig = application:get_env(rabbit, log, []), + SinksConfig = proplists:get_value(categories, LogConfig, []), + UpgradeSinkConfig = proplists:get_value(upgrade, SinksConfig, []), + FileConfig = proplists:get_value(file, SinksConfig, []), + NewLogConfig = case proplists:get_value(file, FileConfig, undefined) of + undefined -> + lists:keystore( + categories, 1, LogConfig, + {categories, + lists:keystore( + upgrade, 1, SinksConfig, + {upgrade, + lists:keystore(file, 1, UpgradeSinkConfig, + {file, FileName})})}); + %% No cahnge. We don't want to override the configured value. + _File -> LogConfig + end, + ok = application:set_env(rabbit, log, NewLogConfig). + +generate_lager_sinks(SinkNames, SinkConfigs) -> + LogLevels = case rabbit_prelaunch:get_context() of + #{log_levels := LL} -> LL; + _ -> undefined + end, + DefaultLogLevel = case LogLevels of + #{global := LogLevel} -> + LogLevel; + _ -> + default_config_value(level) + end, + lists:map(fun(SinkName) -> + SinkConfig = proplists:get_value(SinkName, SinkConfigs, []), + SinkHandlers = case proplists:get_value(file, SinkConfig, false) of + %% If no file defined - forward everything to the default backend + false -> + ForwarderLevel = proplists:get_value(level, + SinkConfig, + DefaultLogLevel), + [{lager_forwarder_backend, + [lager_util:make_internal_sink_name(lager), ForwarderLevel]}]; + %% If a file defined - add a file backend to handlers and remove all default file backends. + File -> + %% Use `debug` as a default handler to not override a handler level + Level = proplists:get_value(level, SinkConfig, DefaultLogLevel), + DefaultGeneratedHandlers = application:get_env(lager, rabbit_handlers, []), + SinkFileHandlers = case proplists:get_value(lager_file_backend, DefaultGeneratedHandlers, undefined) of + undefined -> + %% Create a new file handler. + %% `info` is a default level here. + FileLevel = proplists:get_value(level, SinkConfig, DefaultLogLevel), + generate_lager_handlers([{file, [{file, File}, {level, FileLevel}]}]); + FileHandler -> + %% Replace a filename in the handler + FileHandlerChanges = case handler_level_more_verbose(FileHandler, Level) of + true -> [{file, File}, {level, Level}]; + false -> [{file, File}] + end, + + [{lager_file_backend, + lists:ukeymerge(1, FileHandlerChanges, + lists:ukeysort(1, FileHandler))}] + end, + %% Remove all file handlers. + AllLagerHandlers = application:get_env(lager, handlers, []), + HandlersWithoutFile = lists:filter( + fun({lager_file_backend, _}) -> false; + ({_, _}) -> true + end, + AllLagerHandlers), + %% Set level for handlers which are more verbose. + %% We don't increase verbosity in sinks so it works like forwarder backend. + HandlersWithoutFileWithLevel = lists:map(fun({Name, Handler}) -> + case handler_level_more_verbose(Handler, Level) of + true -> {Name, lists:keystore(level, 1, Handler, {level, Level})}; + false -> {Name, Handler} + end + end, + HandlersWithoutFile), + + HandlersWithoutFileWithLevel ++ SinkFileHandlers + end, + {SinkName, [{handlers, SinkHandlers}, {rabbit_handlers, SinkHandlers}]} + end, + SinkNames). + +handler_level_more_verbose(Handler, Level) -> + HandlerLevel = proplists:get_value(level, Handler, default_config_value(level)), + lager_util:level_to_num(HandlerLevel) > lager_util:level_to_num(Level). + +merge_lager_sink_handlers([{Name, Sink} | RestSinks], GeneratedSinks, Agg) -> + %% rabbitmq/rabbitmq-server#2044. + %% We have to take into account that a sink's + %% handler backend may need additional configuration here. + %% {rabbit_log_federation_lager_event, [ + %% {handlers, [ + %% {lager_forwarder_backend, [lager_event,inherit]}, + %% {syslog_lager_backend, [debug]} + %% ]}, + %% {rabbit_handlers, [ + %% {lager_forwarder_backend, [lager_event,inherit]} + %% ]} + %% ]} + case lists:keytake(Name, 1, GeneratedSinks) of + {value, {Name, GenSink}, RestGeneratedSinks} -> + Handlers = proplists:get_value(handlers, Sink, []), + GenHandlers = proplists:get_value(handlers, GenSink, []), + FormerRabbitHandlers = proplists:get_value(rabbit_handlers, Sink, []), + + %% Remove handlers defined in previous starts + ConfiguredHandlers = remove_rabbit_handlers(Handlers, FormerRabbitHandlers), + NewHandlers = GenHandlers ++ ConfiguredHandlers, + ok = maybe_configure_handler_backends(NewHandlers), + MergedSink = lists:keystore(rabbit_handlers, 1, + lists:keystore(handlers, 1, Sink, + {handlers, NewHandlers}), + {rabbit_handlers, GenHandlers}), + merge_lager_sink_handlers( + RestSinks, + RestGeneratedSinks, + [{Name, MergedSink} | Agg]); + false -> + merge_lager_sink_handlers( + RestSinks, + GeneratedSinks, + [{Name, Sink} | Agg]) + end; +merge_lager_sink_handlers([], GeneratedSinks, Agg) -> GeneratedSinks ++ Agg. + +maybe_configure_handler_backends([]) -> + ok; +maybe_configure_handler_backends([{Backend, _}|Backends]) -> + ok = configure_handler_backend(Backend), + maybe_configure_handler_backends(Backends). + +list_expected_sinks() -> + rabbit_prelaunch_early_logging:list_expected_sinks(). + +maybe_remove_logger_handler() -> + M = logger, + F = remove_handler, + try + ok = erlang:apply(M, F, [default]) + catch + error:undef -> + % OK since the logger module only exists in OTP 21.1 or later + ok; + error:{badmatch, {error, {not_found, default}}} -> + % OK - this error happens when running a CLI command + ok; + Err:Reason -> + error_logger:error_msg("calling ~p:~p failed: ~p:~p~n", + [M, F, Err, Reason]) + end. + +get_most_verbose_log_level() -> + {ok, HandlersA} = application:get_env(lager, handlers), + {ok, ExtraSinks} = application:get_env(lager, extra_sinks), + HandlersB = lists:append( + [H || {_, Keys} <- ExtraSinks, + {handlers, H} <- Keys]), + get_most_verbose_log_level(HandlersA ++ HandlersB, + lager_util:level_to_num(none)). + +get_most_verbose_log_level([{_, Props} | Rest], MostVerbose) -> + LogLevel = proplists:get_value(level, Props, info), + LogLevelNum = lager_util:level_to_num(LogLevel), + case LogLevelNum > MostVerbose of + true -> + get_most_verbose_log_level(Rest, LogLevelNum); + false -> + get_most_verbose_log_level(Rest, MostVerbose) + end; +get_most_verbose_log_level([], MostVerbose) -> + lager_util:num_to_level(MostVerbose). diff --git a/deps/rabbit/src/rabbit_limiter.erl b/deps/rabbit/src/rabbit_limiter.erl new file mode 100644 index 0000000000..d3803957d3 --- /dev/null +++ b/deps/rabbit/src/rabbit_limiter.erl @@ -0,0 +1,448 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% The purpose of the limiter is to stem the flow of messages from +%% queues to channels, in order to act upon various protocol-level +%% flow control mechanisms, specifically AMQP 0-9-1's basic.qos +%% prefetch_count, our consumer prefetch extension, and AMQP 1.0's +%% link (aka consumer) credit mechanism. +%% +%% Each channel has an associated limiter process, created with +%% start_link/1, which it passes to queues on consumer creation with +%% rabbit_amqqueue:basic_consume/10, and rabbit_amqqueue:basic_get/4. +%% The latter isn't strictly necessary, since basic.get is not +%% subject to limiting, but it means that whenever a queue knows about +%% a channel, it also knows about its limiter, which is less fiddly. +%% +%% The limiter process holds state that is, in effect, shared between +%% the channel and all queues from which the channel is +%% consuming. Essentially all these queues are competing for access to +%% a single, limited resource - the ability to deliver messages via +%% the channel - and it is the job of the limiter process to mediate +%% that access. +%% +%% The limiter process is separate from the channel process for two +%% reasons: separation of concerns, and efficiency. Channels can get +%% very busy, particularly if they are also dealing with publishes. +%% With a separate limiter process all the aforementioned access +%% mediation can take place without touching the channel. +%% +%% For efficiency, both the channel and the queues keep some local +%% state, initialised from the limiter pid with new/1 and client/1, +%% respectively. In particular this allows them to avoid any +%% interaction with the limiter process when it is 'inactive', i.e. no +%% protocol-level flow control is taking place. +%% +%% This optimisation does come at the cost of some complexity though: +%% when a limiter becomes active, the channel needs to inform all its +%% consumer queues of this change in status. It does this by invoking +%% rabbit_amqqueue:activate_limit_all/2. Note that there is no inverse +%% transition, i.e. once a queue has been told about an active +%% limiter, it is not subsequently told when that limiter becomes +%% inactive. In practice it is rare for that to happen, though we +%% could optimise this case in the future. +%% +%% Consumer credit (for AMQP 1.0) and per-consumer prefetch (for AMQP +%% 0-9-1) are treated as essentially the same thing, but with the +%% exception that per-consumer prefetch gets an auto-topup when +%% acknowledgments come in. +%% +%% The bookkeeping for this is local to queues, so it is not necessary +%% to store information about it in the limiter process. But for +%% abstraction we hide it from the queue behind the limiter API, and +%% it therefore becomes part of the queue local state. +%% +%% The interactions with the limiter are as follows: +%% +%% 1. Channels tell the limiter about basic.qos prefetch counts - +%% that's what the limit_prefetch/3, unlimit_prefetch/1, +%% get_prefetch_limit/1 API functions are about. They also tell the +%% limiter queue state (via the queue) about consumer credit +%% changes and message acknowledgement - that's what credit/5 and +%% ack_from_queue/3 are for. +%% +%% 2. Queues also tell the limiter queue state about the queue +%% becoming empty (via drained/1) and consumers leaving (via +%% forget_consumer/2). +%% +%% 3. Queues register with the limiter - this happens as part of +%% activate/1. +%% +%% 4. The limiter process maintains an internal counter of 'messages +%% sent but not yet acknowledged', called the 'volume'. +%% +%% 5. Queues ask the limiter for permission (with can_send/3) whenever +%% they want to deliver a message to a channel. The limiter checks +%% whether a) the volume has not yet reached the prefetch limit, +%% and b) whether the consumer has enough credit. If so it +%% increments the volume and tells the queue to proceed. Otherwise +%% it marks the queue as requiring notification (see below) and +%% tells the queue not to proceed. +%% +%% 6. A queue that has been told to proceed (by the return value of +%% can_send/3) sends the message to the channel. Conversely, a +%% queue that has been told not to proceed, will not attempt to +%% deliver that message, or any future messages, to the +%% channel. This is accomplished by can_send/3 capturing the +%% outcome in the local state, where it can be accessed with +%% is_suspended/1. +%% +%% 7. When a channel receives an ack it tells the limiter (via ack/2) +%% how many messages were ack'ed. The limiter process decrements +%% the volume and if it falls below the prefetch_count then it +%% notifies (through rabbit_amqqueue:resume/2) all the queues +%% requiring notification, i.e. all those that had a can_send/3 +%% request denied. +%% +%% 8. Upon receipt of such a notification, queues resume delivery to +%% the channel, i.e. they will once again start asking limiter, as +%% described in (5). +%% +%% 9. When a queue has no more consumers associated with a particular +%% channel, it deactivates use of the limiter with deactivate/1, +%% which alters the local state such that no further interactions +%% with the limiter process take place until a subsequent +%% activate/1. + +-module(rabbit_limiter). + +-include("rabbit.hrl"). + +-behaviour(gen_server2). + +-export([start_link/1]). +%% channel API +-export([new/1, limit_prefetch/3, unlimit_prefetch/1, is_active/1, + get_prefetch_limit/1, ack/2, pid/1]). +%% queue API +-export([client/1, activate/1, can_send/3, resume/1, deactivate/1, + is_suspended/1, is_consumer_blocked/2, credit/5, ack_from_queue/3, + drained/1, forget_consumer/2]). +%% callbacks +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, + handle_info/2, prioritise_call/4]). + +%%---------------------------------------------------------------------------- + +-record(lstate, {pid, prefetch_limited}). +-record(qstate, {pid, state, credits}). + +-type lstate() :: #lstate{pid :: pid(), + prefetch_limited :: boolean()}. +-type qstate() :: #qstate{pid :: pid(), + state :: 'dormant' | 'active' | 'suspended'}. + +-type credit_mode() :: 'manual' | 'drain' | 'auto'. + +%%---------------------------------------------------------------------------- + +-record(lim, {prefetch_count = 0, + ch_pid, + %% 'Notify' is a boolean that indicates whether a queue should be + %% notified of a change in the limit or volume that may allow it to + %% deliver more messages via the limiter's channel. + queues = maps:new(), % QPid -> {MonitorRef, Notify} + volume = 0}). + +%% mode is of type credit_mode() +-record(credit, {credit = 0, mode}). + +%%---------------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------------- + +-spec start_link(rabbit_types:proc_name()) -> + rabbit_types:ok_pid_or_error(). + +start_link(ProcName) -> gen_server2:start_link(?MODULE, [ProcName], []). + +-spec new(pid()) -> lstate(). + +new(Pid) -> + %% this a 'call' to ensure that it is invoked at most once. + ok = gen_server:call(Pid, {new, self()}, infinity), + #lstate{pid = Pid, prefetch_limited = false}. + +-spec limit_prefetch(lstate(), non_neg_integer(), non_neg_integer()) -> + lstate(). + +limit_prefetch(L, PrefetchCount, UnackedCount) when PrefetchCount > 0 -> + ok = gen_server:call( + L#lstate.pid, + {limit_prefetch, PrefetchCount, UnackedCount}, infinity), + L#lstate{prefetch_limited = true}. + +-spec unlimit_prefetch(lstate()) -> lstate(). + +unlimit_prefetch(L) -> + ok = gen_server:call(L#lstate.pid, unlimit_prefetch, infinity), + L#lstate{prefetch_limited = false}. + +-spec is_active(lstate()) -> boolean(). + +is_active(#lstate{prefetch_limited = Limited}) -> Limited. + +-spec get_prefetch_limit(lstate()) -> non_neg_integer(). + +get_prefetch_limit(#lstate{prefetch_limited = false}) -> 0; +get_prefetch_limit(L) -> + gen_server:call(L#lstate.pid, get_prefetch_limit, infinity). + +-spec ack(lstate(), non_neg_integer()) -> 'ok'. + +ack(#lstate{prefetch_limited = false}, _AckCount) -> ok; +ack(L, AckCount) -> gen_server:cast(L#lstate.pid, {ack, AckCount}). + +-spec pid(lstate()) -> pid(). + +pid(#lstate{pid = Pid}) -> Pid. + +-spec client(pid()) -> qstate(). + +client(Pid) -> #qstate{pid = Pid, state = dormant, credits = gb_trees:empty()}. + +-spec activate(qstate()) -> qstate(). + +activate(L = #qstate{state = dormant}) -> + ok = gen_server:cast(L#qstate.pid, {register, self()}), + L#qstate{state = active}; +activate(L) -> L. + +-spec can_send(qstate(), boolean(), rabbit_types:ctag()) -> + {'continue' | 'suspend', qstate()}. + +can_send(L = #qstate{pid = Pid, state = State, credits = Credits}, + AckRequired, CTag) -> + case is_consumer_blocked(L, CTag) of + false -> case (State =/= active orelse + safe_call(Pid, {can_send, self(), AckRequired}, true)) of + true -> Credits1 = decrement_credit(CTag, Credits), + {continue, L#qstate{credits = Credits1}}; + false -> {suspend, L#qstate{state = suspended}} + end; + true -> {suspend, L} + end. + +safe_call(Pid, Msg, ExitValue) -> + rabbit_misc:with_exit_handler( + fun () -> ExitValue end, + fun () -> gen_server2:call(Pid, Msg, infinity) end). + +-spec resume(qstate()) -> qstate(). + +resume(L = #qstate{state = suspended}) -> + L#qstate{state = active}; +resume(L) -> L. + +-spec deactivate(qstate()) -> qstate(). + +deactivate(L = #qstate{state = dormant}) -> L; +deactivate(L) -> + ok = gen_server:cast(L#qstate.pid, {unregister, self()}), + L#qstate{state = dormant}. + +-spec is_suspended(qstate()) -> boolean(). + +is_suspended(#qstate{state = suspended}) -> true; +is_suspended(#qstate{}) -> false. + +-spec is_consumer_blocked(qstate(), rabbit_types:ctag()) -> boolean(). + +is_consumer_blocked(#qstate{credits = Credits}, CTag) -> + case gb_trees:lookup(CTag, Credits) of + none -> false; + {value, #credit{credit = C}} when C > 0 -> false; + {value, #credit{}} -> true + end. + +-spec credit + (qstate(), rabbit_types:ctag(), non_neg_integer(), credit_mode(), + boolean()) -> + {boolean(), qstate()}. + +credit(Limiter = #qstate{credits = Credits}, CTag, Crd, Mode, IsEmpty) -> + {Res, Cr} = + case IsEmpty andalso Mode =:= drain of + true -> {true, #credit{credit = 0, mode = manual}}; + false -> {false, #credit{credit = Crd, mode = Mode}} + end, + {Res, Limiter#qstate{credits = enter_credit(CTag, Cr, Credits)}}. + +-spec ack_from_queue(qstate(), rabbit_types:ctag(), non_neg_integer()) -> + {boolean(), qstate()}. + +ack_from_queue(Limiter = #qstate{credits = Credits}, CTag, Credit) -> + {Credits1, Unblocked} = + case gb_trees:lookup(CTag, Credits) of + {value, C = #credit{mode = auto, credit = C0}} -> + {update_credit(CTag, C#credit{credit = C0 + Credit}, Credits), + C0 =:= 0 andalso Credit =/= 0}; + _ -> + {Credits, false} + end, + {Unblocked, Limiter#qstate{credits = Credits1}}. + +-spec drained(qstate()) -> + {[{rabbit_types:ctag(), non_neg_integer()}], qstate()}. + +drained(Limiter = #qstate{credits = Credits}) -> + Drain = fun(C) -> C#credit{credit = 0, mode = manual} end, + {CTagCredits, Credits2} = + rabbit_misc:gb_trees_fold( + fun (CTag, C = #credit{credit = Crd, mode = drain}, {Acc, Creds0}) -> + {[{CTag, Crd} | Acc], update_credit(CTag, Drain(C), Creds0)}; + (_CTag, #credit{credit = _Crd, mode = _Mode}, {Acc, Creds0}) -> + {Acc, Creds0} + end, {[], Credits}, Credits), + {CTagCredits, Limiter#qstate{credits = Credits2}}. + +-spec forget_consumer(qstate(), rabbit_types:ctag()) -> qstate(). + +forget_consumer(Limiter = #qstate{credits = Credits}, CTag) -> + Limiter#qstate{credits = gb_trees:delete_any(CTag, Credits)}. + +%%---------------------------------------------------------------------------- +%% Queue-local code +%%---------------------------------------------------------------------------- + +%% We want to do all the AMQP 1.0-ish link level credit calculations +%% in the queue (to do them elsewhere introduces a ton of +%% races). However, it's a big chunk of code that is conceptually very +%% linked to the limiter concept. So we get the queue to hold a bit of +%% state for us (#qstate.credits), and maintain a fiction that the +%% limiter is making the decisions... + +decrement_credit(CTag, Credits) -> + case gb_trees:lookup(CTag, Credits) of + {value, C = #credit{credit = Credit}} -> + update_credit(CTag, C#credit{credit = Credit - 1}, Credits); + none -> + Credits + end. + +enter_credit(CTag, C, Credits) -> + gb_trees:enter(CTag, ensure_credit_invariant(C), Credits). + +update_credit(CTag, C, Credits) -> + gb_trees:update(CTag, ensure_credit_invariant(C), Credits). + +ensure_credit_invariant(C = #credit{credit = 0, mode = drain}) -> + %% Using up all credit implies no need to send a 'drained' event + C#credit{mode = manual}; +ensure_credit_invariant(C) -> + C. + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + +init([ProcName]) -> ?store_proc_name(ProcName), + ?LG_PROCESS_TYPE(limiter), + {ok, #lim{}}. + +prioritise_call(get_prefetch_limit, _From, _Len, _State) -> 9; +prioritise_call(_Msg, _From, _Len, _State) -> 0. + +handle_call({new, ChPid}, _From, State = #lim{ch_pid = undefined}) -> + {reply, ok, State#lim{ch_pid = ChPid}}; + +handle_call({limit_prefetch, PrefetchCount, UnackedCount}, _From, + State = #lim{prefetch_count = 0}) -> + {reply, ok, maybe_notify(State, State#lim{prefetch_count = PrefetchCount, + volume = UnackedCount})}; +handle_call({limit_prefetch, PrefetchCount, _UnackedCount}, _From, State) -> + {reply, ok, maybe_notify(State, State#lim{prefetch_count = PrefetchCount})}; + +handle_call(unlimit_prefetch, _From, State) -> + {reply, ok, maybe_notify(State, State#lim{prefetch_count = 0, + volume = 0})}; + +handle_call(get_prefetch_limit, _From, + State = #lim{prefetch_count = PrefetchCount}) -> + {reply, PrefetchCount, State}; + +handle_call({can_send, QPid, AckRequired}, _From, + State = #lim{volume = Volume}) -> + case prefetch_limit_reached(State) of + true -> {reply, false, limit_queue(QPid, State)}; + false -> {reply, true, State#lim{volume = if AckRequired -> Volume + 1; + true -> Volume + end}} + end. + +handle_cast({ack, Count}, State = #lim{volume = Volume}) -> + NewVolume = if Volume == 0 -> 0; + true -> Volume - Count + end, + {noreply, maybe_notify(State, State#lim{volume = NewVolume})}; + +handle_cast({register, QPid}, State) -> + {noreply, remember_queue(QPid, State)}; + +handle_cast({unregister, QPid}, State) -> + {noreply, forget_queue(QPid, State)}. + +handle_info({'DOWN', _MonitorRef, _Type, QPid, _Info}, State) -> + {noreply, forget_queue(QPid, State)}. + +terminate(_, _) -> + ok. + +code_change(_, State, _) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% Internal plumbing +%%---------------------------------------------------------------------------- + +maybe_notify(OldState, NewState) -> + case prefetch_limit_reached(OldState) andalso + not prefetch_limit_reached(NewState) of + true -> notify_queues(NewState); + false -> NewState + end. + +prefetch_limit_reached(#lim{prefetch_count = Limit, volume = Volume}) -> + Limit =/= 0 andalso Volume >= Limit. + +remember_queue(QPid, State = #lim{queues = Queues}) -> + case maps:is_key(QPid, Queues) of + false -> MRef = erlang:monitor(process, QPid), + State#lim{queues = maps:put(QPid, {MRef, false}, Queues)}; + true -> State + end. + +forget_queue(QPid, State = #lim{queues = Queues}) -> + case maps:find(QPid, Queues) of + {ok, {MRef, _}} -> true = erlang:demonitor(MRef), + State#lim{queues = maps:remove(QPid, Queues)}; + error -> State + end. + +limit_queue(QPid, State = #lim{queues = Queues}) -> + UpdateFun = fun ({MRef, _}) -> {MRef, true} end, + State#lim{queues = maps:update_with(QPid, UpdateFun, Queues)}. + +notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) -> + {QList, NewQueues} = + maps:fold(fun (_QPid, {_, false}, Acc) -> Acc; + (QPid, {MRef, true}, {L, D}) -> + {[QPid | L], maps:put(QPid, {MRef, false}, D)} + end, {[], Queues}, Queues), + case length(QList) of + 0 -> ok; + 1 -> ok = rabbit_amqqueue:resume(hd(QList), ChPid); %% common case + L -> + %% We randomly vary the position of queues in the list, + %% thus ensuring that each queue has an equal chance of + %% being notified first. + {L1, L2} = lists:split(rand:uniform(L), QList), + [[ok = rabbit_amqqueue:resume(Q, ChPid) || Q <- L3] + || L3 <- [L2, L1]], + ok + end, + State#lim{queues = NewQueues}. diff --git a/deps/rabbit/src/rabbit_log_tail.erl b/deps/rabbit/src/rabbit_log_tail.erl new file mode 100644 index 0000000000..c3faad07fc --- /dev/null +++ b/deps/rabbit/src/rabbit_log_tail.erl @@ -0,0 +1,102 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_log_tail). + +-export([tail_n_lines/2]). +-export([init_tail_stream/4]). + +-define(GUESS_OFFSET, 200). + +init_tail_stream(Filename, Pid, Ref, Duration) -> + RPCProc = self(), + Reader = spawn(fun() -> + link(Pid), + case file:open(Filename, [read, binary]) of + {ok, File} -> + TimeLimit = case Duration of + infinity -> infinity; + _ -> erlang:system_time(second) + Duration + end, + {ok, _} = file:position(File, eof), + RPCProc ! {Ref, opened}, + read_loop(File, Pid, Ref, TimeLimit); + {error, _} = Err -> + RPCProc ! {Ref, Err} + end + end), + receive + {Ref, opened} -> {ok, Ref}; + {Ref, {error, Err}} -> {error, Err} + after 5000 -> + exit(Reader, timeout), + {error, timeout} + end. + +read_loop(File, Pid, Ref, TimeLimit) -> + case is_integer(TimeLimit) andalso erlang:system_time(second) > TimeLimit of + true -> Pid ! {Ref, <<>>, finished}; + false -> + case file:read(File, ?GUESS_OFFSET) of + {ok, Data} -> + Pid ! {Ref, Data, confinue}, + read_loop(File, Pid, Ref, TimeLimit); + eof -> + timer:sleep(1000), + read_loop(File, Pid, Ref, TimeLimit); + {error, _} = Err -> + Pid ! {Ref, Err, finished} + end + end. + +tail_n_lines(Filename, N) -> + case file:open(Filename, [read, binary]) of + {ok, File} -> + {ok, Eof} = file:position(File, eof), + %% Eof may move. Only read up to the current one. + Result = reverse_read_n_lines(N, N, File, Eof, Eof), + file:close(File), + Result; + {error, _} = Error -> Error + end. + +reverse_read_n_lines(N, OffsetN, File, Position, Eof) -> + GuessPosition = offset(Position, OffsetN), + case read_lines_from_position(File, GuessPosition, Eof) of + {ok, Lines} -> + NLines = length(Lines), + case {NLines >= N, GuessPosition == 0} of + %% Take only N lines if there is more + {true, _} -> lists:nthtail(NLines - N, Lines); + %% Safe to assume that NLines is less then N + {_, true} -> Lines; + %% Adjust position + _ -> + reverse_read_n_lines(N, N - NLines + 1, File, GuessPosition, Eof) + end; + {error, _} = Error -> Error + end. + +read_from_position(File, GuessPosition, Eof) -> + file:pread(File, GuessPosition, max(0, Eof - GuessPosition)). + +read_lines_from_position(File, GuessPosition, Eof) -> + case read_from_position(File, GuessPosition, Eof) of + {ok, Data} -> + Lines = binary:split(Data, <<"\n">>, [global, trim]), + case {GuessPosition, Lines} of + %% If position is 0 - there are no partial lines + {0, _} -> {ok, Lines}; + %% Remove first line as it can be partial + {_, [_ | Rest]} -> {ok, Rest}; + {_, []} -> {ok, []} + end; + {error, _} = Error -> Error + end. + +offset(Base, N) -> + max(0, Base - N * ?GUESS_OFFSET). diff --git a/deps/rabbit/src/rabbit_looking_glass.erl b/deps/rabbit/src/rabbit_looking_glass.erl new file mode 100644 index 0000000000..00b1b6d46b --- /dev/null +++ b/deps/rabbit/src/rabbit_looking_glass.erl @@ -0,0 +1,48 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_looking_glass). + +-ignore_xref([{lg, trace, 4}]). +-ignore_xref([{maps, from_list, 1}]). + +-export([boot/0]). +-export([connections/0]). + +boot() -> + case os:getenv("RABBITMQ_TRACER") of + false -> + ok; + Value -> + Input = parse_value(Value), + rabbit_log:info( + "Enabling Looking Glass profiler, input value: ~p", + [Input] + ), + {ok, _} = application:ensure_all_started(looking_glass), + lg:trace( + Input, + lg_file_tracer, + "traces.lz4", + maps:from_list([ + {mode, profile}, + {process_dump, true}, + {running, true}, + {send, true}] + ) + ) + end. + +parse_value(Value) -> + [begin + [Mod, Fun] = string:tokens(C, ":"), + {callback, list_to_atom(Mod), list_to_atom(Fun)} + end || C <- string:tokens(Value, ",")]. + +connections() -> + Pids = [Pid || {{conns_sup, _}, Pid} <- ets:tab2list(ranch_server)], + ['_', {scope, Pids}]. diff --git a/deps/rabbit/src/rabbit_maintenance.erl b/deps/rabbit/src/rabbit_maintenance.erl new file mode 100644 index 0000000000..e5434dc888 --- /dev/null +++ b/deps/rabbit/src/rabbit_maintenance.erl @@ -0,0 +1,354 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_maintenance). + +-include("rabbit.hrl"). + +-export([ + is_enabled/0, + drain/0, + revive/0, + mark_as_being_drained/0, + unmark_as_being_drained/0, + is_being_drained_local_read/1, + is_being_drained_consistent_read/1, + status_local_read/1, + status_consistent_read/1, + filter_out_drained_nodes_local_read/1, + filter_out_drained_nodes_consistent_read/1, + suspend_all_client_listeners/0, + resume_all_client_listeners/0, + close_all_client_connections/0, + primary_replica_transfer_candidate_nodes/0, + random_primary_replica_transfer_candidate_node/1, + transfer_leadership_of_quorum_queues/1, + transfer_leadership_of_classic_mirrored_queues/1, + status_table_name/0, + status_table_definition/0 +]). + +-define(TABLE, rabbit_node_maintenance_states). +-define(FEATURE_FLAG, maintenance_mode_status). +-define(DEFAULT_STATUS, regular). +-define(DRAINING_STATUS, draining). + +-type maintenance_status() :: ?DEFAULT_STATUS | ?DRAINING_STATUS. +-type mnesia_table() :: atom(). + +-export_type([ + maintenance_status/0 +]). + +%% +%% API +%% + +-spec status_table_name() -> mnesia_table(). +status_table_name() -> + ?TABLE. + +-spec status_table_definition() -> list(). +status_table_definition() -> + maps:to_list(#{ + record_name => node_maintenance_state, + attributes => record_info(fields, node_maintenance_state) + }). + +-spec is_enabled() -> boolean(). +is_enabled() -> + rabbit_feature_flags:is_enabled(?FEATURE_FLAG). + +-spec drain() -> ok. +drain() -> + case is_enabled() of + true -> do_drain(); + false -> rabbit_log:warning("Feature flag `~s` is not enabled, draining is a no-op", [?FEATURE_FLAG]) + end. + +-spec do_drain() -> ok. +do_drain() -> + rabbit_log:alert("This node is being put into maintenance (drain) mode"), + mark_as_being_drained(), + rabbit_log:info("Marked this node as undergoing maintenance"), + suspend_all_client_listeners(), + rabbit_log:alert("Suspended all listeners and will no longer accept client connections"), + {ok, NConnections} = close_all_client_connections(), + %% allow plugins to react e.g. by closing their protocol connections + rabbit_event:notify(maintenance_connections_closed, #{ + reason => <<"node is being put into maintenance">> + }), + rabbit_log:alert("Closed ~b local client connections", [NConnections]), + + TransferCandidates = primary_replica_transfer_candidate_nodes(), + ReadableCandidates = readable_candidate_list(TransferCandidates), + rabbit_log:info("Node will transfer primary replicas of its queues to ~b peers: ~s", + [length(TransferCandidates), ReadableCandidates]), + transfer_leadership_of_classic_mirrored_queues(TransferCandidates), + transfer_leadership_of_quorum_queues(TransferCandidates), + stop_local_quorum_queue_followers(), + + %% allow plugins to react + rabbit_event:notify(maintenance_draining, #{ + reason => <<"node is being put into maintenance">> + }), + rabbit_log:alert("Node is ready to be shut down for maintenance or upgrade"), + + ok. + +-spec revive() -> ok. +revive() -> + case is_enabled() of + true -> do_revive(); + false -> rabbit_log:warning("Feature flag `~s` is not enabled, reviving is a no-op", [?FEATURE_FLAG]) + end. + +-spec do_revive() -> ok. +do_revive() -> + rabbit_log:alert("This node is being revived from maintenance (drain) mode"), + revive_local_quorum_queue_replicas(), + rabbit_log:alert("Resumed all listeners and will accept client connections again"), + resume_all_client_listeners(), + rabbit_log:alert("Resumed all listeners and will accept client connections again"), + unmark_as_being_drained(), + rabbit_log:info("Marked this node as back from maintenance and ready to serve clients"), + + %% allow plugins to react + rabbit_event:notify(maintenance_revived, #{}), + + ok. + +-spec mark_as_being_drained() -> boolean(). +mark_as_being_drained() -> + rabbit_log:debug("Marking the node as undergoing maintenance"), + set_maintenance_status_status(?DRAINING_STATUS). + +-spec unmark_as_being_drained() -> boolean(). +unmark_as_being_drained() -> + rabbit_log:debug("Unmarking the node as undergoing maintenance"), + set_maintenance_status_status(?DEFAULT_STATUS). + +set_maintenance_status_status(Status) -> + Res = mnesia:transaction(fun () -> + case mnesia:wread({?TABLE, node()}) of + [] -> + Row = #node_maintenance_state{ + node = node(), + status = Status + }, + mnesia:write(?TABLE, Row, write); + [Row0] -> + Row = Row0#node_maintenance_state{ + node = node(), + status = Status + }, + mnesia:write(?TABLE, Row, write) + end + end), + case Res of + {atomic, ok} -> true; + _ -> false + end. + + +-spec is_being_drained_local_read(node()) -> boolean(). +is_being_drained_local_read(Node) -> + Status = status_local_read(Node), + Status =:= ?DRAINING_STATUS. + +-spec is_being_drained_consistent_read(node()) -> boolean(). +is_being_drained_consistent_read(Node) -> + Status = status_consistent_read(Node), + Status =:= ?DRAINING_STATUS. + +-spec status_local_read(node()) -> maintenance_status(). +status_local_read(Node) -> + case catch mnesia:dirty_read(?TABLE, Node) of + [] -> ?DEFAULT_STATUS; + [#node_maintenance_state{node = Node, status = Status}] -> + Status; + _ -> ?DEFAULT_STATUS + end. + +-spec status_consistent_read(node()) -> maintenance_status(). +status_consistent_read(Node) -> + case mnesia:transaction(fun() -> mnesia:read(?TABLE, Node) end) of + {atomic, []} -> ?DEFAULT_STATUS; + {atomic, [#node_maintenance_state{node = Node, status = Status}]} -> + Status; + {atomic, _} -> ?DEFAULT_STATUS; + {aborted, _Reason} -> ?DEFAULT_STATUS + end. + + -spec filter_out_drained_nodes_local_read([node()]) -> [node()]. +filter_out_drained_nodes_local_read(Nodes) -> + lists:filter(fun(N) -> not is_being_drained_local_read(N) end, Nodes). + +-spec filter_out_drained_nodes_consistent_read([node()]) -> [node()]. +filter_out_drained_nodes_consistent_read(Nodes) -> + lists:filter(fun(N) -> not is_being_drained_consistent_read(N) end, Nodes). + +-spec suspend_all_client_listeners() -> rabbit_types:ok_or_error(any()). + %% Pauses all listeners on the current node except for + %% Erlang distribution (clustering and CLI tools). + %% A respausedumed listener will not accept any new client connections + %% but previously established connections won't be interrupted. +suspend_all_client_listeners() -> + Listeners = rabbit_networking:node_client_listeners(node()), + rabbit_log:info("Asked to suspend ~b client connection listeners. " + "No new client connections will be accepted until these listeners are resumed!", [length(Listeners)]), + Results = lists:foldl(local_listener_fold_fun(fun ranch:suspend_listener/1), [], Listeners), + lists:foldl(fun ok_or_first_error/2, ok, Results). + + -spec resume_all_client_listeners() -> rabbit_types:ok_or_error(any()). + %% Resumes all listeners on the current node except for + %% Erlang distribution (clustering and CLI tools). + %% A resumed listener will accept new client connections. +resume_all_client_listeners() -> + Listeners = rabbit_networking:node_client_listeners(node()), + rabbit_log:info("Asked to resume ~b client connection listeners. " + "New client connections will be accepted from now on", [length(Listeners)]), + Results = lists:foldl(local_listener_fold_fun(fun ranch:resume_listener/1), [], Listeners), + lists:foldl(fun ok_or_first_error/2, ok, Results). + + -spec close_all_client_connections() -> {'ok', non_neg_integer()}. +close_all_client_connections() -> + Pids = rabbit_networking:local_connections(), + rabbit_networking:close_connections(Pids, "Node was put into maintenance mode"), + {ok, length(Pids)}. + +-spec transfer_leadership_of_quorum_queues([node()]) -> ok. +transfer_leadership_of_quorum_queues([]) -> + rabbit_log:warning("Skipping leadership transfer of quorum queues: no candidate " + "(online, not under maintenance) nodes to transfer to!"); +transfer_leadership_of_quorum_queues(_TransferCandidates) -> + %% we only transfer leadership for QQs that have local leaders + Queues = rabbit_amqqueue:list_local_leaders(), + rabbit_log:info("Will transfer leadership of ~b quorum queues with current leader on this node", + [length(Queues)]), + [begin + Name = amqqueue:get_name(Q), + rabbit_log:debug("Will trigger a leader election for local quorum queue ~s", + [rabbit_misc:rs(Name)]), + %% we trigger an election and exclude this node from the list of candidates + %% by simply shutting its local QQ replica (Ra server) + RaLeader = amqqueue:get_pid(Q), + rabbit_log:debug("Will stop Ra server ~p", [RaLeader]), + case ra:stop_server(RaLeader) of + ok -> + rabbit_log:debug("Successfully stopped Ra server ~p", [RaLeader]); + {error, nodedown} -> + rabbit_log:error("Failed to stop Ra server ~p: target node was reported as down") + end + end || Q <- Queues], + rabbit_log:info("Leadership transfer for quorum queues hosted on this node has been initiated"). + +-spec transfer_leadership_of_classic_mirrored_queues([node()]) -> ok. + transfer_leadership_of_classic_mirrored_queues([]) -> + rabbit_log:warning("Skipping leadership transfer of classic mirrored queues: no candidate " + "(online, not under maintenance) nodes to transfer to!"); +transfer_leadership_of_classic_mirrored_queues(TransferCandidates) -> + Queues = rabbit_amqqueue:list_local_mirrored_classic_queues(), + ReadableCandidates = readable_candidate_list(TransferCandidates), + rabbit_log:info("Will transfer leadership of ~b classic mirrored queues hosted on this node to these peer nodes: ~s", + [length(Queues), ReadableCandidates]), + + [begin + Name = amqqueue:get_name(Q), + case random_primary_replica_transfer_candidate_node(TransferCandidates) of + {ok, Pick} -> + rabbit_log:debug("Will transfer leadership of local queue ~s to node ~s", + [rabbit_misc:rs(Name), Pick]), + case rabbit_mirror_queue_misc:transfer_leadership(Q, Pick) of + {migrated, _} -> + rabbit_log:debug("Successfully transferred leadership of queue ~s to node ~s", + [rabbit_misc:rs(Name), Pick]); + Other -> + rabbit_log:warning("Could not transfer leadership of queue ~s to node ~s: ~p", + [rabbit_misc:rs(Name), Pick, Other]) + end; + undefined -> + rabbit_log:warning("Could not transfer leadership of queue ~s: no suitable candidates?", + [Name]) + end + end || Q <- Queues], + rabbit_log:info("Leadership transfer for local classic mirrored queues is complete"). + +-spec stop_local_quorum_queue_followers() -> ok. +stop_local_quorum_queue_followers() -> + Queues = rabbit_amqqueue:list_local_followers(), + rabbit_log:info("Will stop local follower replicas of ~b quorum queues on this node", + [length(Queues)]), + [begin + Name = amqqueue:get_name(Q), + rabbit_log:debug("Will stop a local follower replica of quorum queue ~s", + [rabbit_misc:rs(Name)]), + %% shut down Ra nodes so that they are not considered for leader election + {RegisteredName, _LeaderNode} = amqqueue:get_pid(Q), + RaNode = {RegisteredName, node()}, + rabbit_log:debug("Will stop Ra server ~p", [RaNode]), + case ra:stop_server(RaNode) of + ok -> + rabbit_log:debug("Successfully stopped Ra server ~p", [RaNode]); + {error, nodedown} -> + rabbit_log:error("Failed to stop Ra server ~p: target node was reported as down") + end + end || Q <- Queues], + rabbit_log:info("Stopped all local replicas of quorum queues hosted on this node"). + + -spec primary_replica_transfer_candidate_nodes() -> [node()]. +primary_replica_transfer_candidate_nodes() -> + filter_out_drained_nodes_consistent_read(rabbit_nodes:all_running() -- [node()]). + +-spec random_primary_replica_transfer_candidate_node([node()]) -> {ok, node()} | undefined. +random_primary_replica_transfer_candidate_node([]) -> + undefined; +random_primary_replica_transfer_candidate_node(Candidates) -> + Nth = erlang:phash2(erlang:monotonic_time(), length(Candidates)), + Candidate = lists:nth(Nth + 1, Candidates), + {ok, Candidate}. + +revive_local_quorum_queue_replicas() -> + Queues = rabbit_amqqueue:list_local_followers(), + [begin + Name = amqqueue:get_name(Q), + rabbit_log:debug("Will trigger a leader election for local quorum queue ~s", + [rabbit_misc:rs(Name)]), + %% start local QQ replica (Ra server) of this queue + {Prefix, _Node} = amqqueue:get_pid(Q), + RaServer = {Prefix, node()}, + rabbit_log:debug("Will start Ra server ~p", [RaServer]), + case ra:restart_server(RaServer) of + ok -> + rabbit_log:debug("Successfully restarted Ra server ~p", [RaServer]); + {error, {already_started, _Pid}} -> + rabbit_log:debug("Ra server ~p is already running", [RaServer]); + {error, nodedown} -> + rabbit_log:error("Failed to restart Ra server ~p: target node was reported as down") + end + end || Q <- Queues], + rabbit_log:info("Restart of local quorum queue replicas is complete"). + +%% +%% Implementation +%% + +local_listener_fold_fun(Fun) -> + fun(#listener{node = Node, ip_address = Addr, port = Port}, Acc) when Node =:= node() -> + RanchRef = rabbit_networking:ranch_ref(Addr, Port), + [Fun(RanchRef) | Acc]; + (_, Acc) -> + Acc + end. + +ok_or_first_error(ok, Acc) -> + Acc; +ok_or_first_error({error, _} = Err, _Acc) -> + Err. + +readable_candidate_list(Nodes) -> + string:join(lists:map(fun rabbit_data_coercion:to_list/1, Nodes), ", "). diff --git a/deps/rabbit/src/rabbit_memory_monitor.erl b/deps/rabbit/src/rabbit_memory_monitor.erl new file mode 100644 index 0000000000..5934a97cff --- /dev/null +++ b/deps/rabbit/src/rabbit_memory_monitor.erl @@ -0,0 +1,259 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + + +%% This module handles the node-wide memory statistics. +%% It receives statistics from all queues, counts the desired +%% queue length (in seconds), and sends this information back to +%% queues. + +-module(rabbit_memory_monitor). + +-behaviour(gen_server2). + +-export([start_link/0, register/2, deregister/1, + report_ram_duration/2, stop/0, conserve_resources/3, memory_use/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(process, {pid, reported, sent, callback, monitor}). + +-record(state, {timer, %% 'internal_update' timer + queue_durations, %% ets #process + queue_duration_sum, %% sum of all queue_durations + queue_duration_count, %% number of elements in sum + desired_duration, %% the desired queue duration + disk_alarm %% disable paging, disk alarm has fired + }). + +-define(SERVER, ?MODULE). +-define(TABLE_NAME, ?MODULE). + +%% If all queues are pushed to disk (duration 0), then the sum of +%% their reported lengths will be 0. If memory then becomes available, +%% unless we manually intervene, the sum will remain 0, and the queues +%% will never get a non-zero duration. Thus when the mem use is < +%% SUM_INC_THRESHOLD, increase the sum artificially by SUM_INC_AMOUNT. +-define(SUM_INC_THRESHOLD, 0.95). +-define(SUM_INC_AMOUNT, 1.0). + +-define(EPSILON, 0.000001). %% less than this and we clamp to 0 + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server2:start_link({local, ?SERVER}, ?MODULE, [], []). + +-spec register(pid(), {atom(),atom(),[any()]}) -> 'ok'. + +register(Pid, MFA = {_M, _F, _A}) -> + gen_server2:call(?SERVER, {register, Pid, MFA}, infinity). + +-spec deregister(pid()) -> 'ok'. + +deregister(Pid) -> + gen_server2:cast(?SERVER, {deregister, Pid}). + +-spec report_ram_duration + (pid(), float() | 'infinity') -> number() | 'infinity'. + +report_ram_duration(Pid, QueueDuration) -> + gen_server2:call(?SERVER, + {report_ram_duration, Pid, QueueDuration}, infinity). + +-spec stop() -> 'ok'. + +stop() -> + gen_server2:cast(?SERVER, stop). + +%% Paging should be enabled/disabled only in response to disk resource alarms +%% for the current node. +conserve_resources(Pid, disk, {_, Conserve, Node}) when node(Pid) =:= Node -> + gen_server2:cast(Pid, {disk_alarm, Conserve}); +conserve_resources(_Pid, _Source, _Conserve) -> + ok. + +memory_use(Type) -> + vm_memory_monitor:get_memory_use(Type). + +%%---------------------------------------------------------------------------- +%% Gen_server callbacks +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, Interval} = application:get_env(rabbit, memory_monitor_interval), + {ok, TRef} = timer:send_interval(Interval, update), + + Ets = ets:new(?TABLE_NAME, [set, private, {keypos, #process.pid}]), + Alarms = rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}), + {ok, internal_update( + #state { timer = TRef, + queue_durations = Ets, + queue_duration_sum = 0.0, + queue_duration_count = 0, + desired_duration = infinity, + disk_alarm = lists:member(disk, Alarms)})}. + +handle_call({report_ram_duration, Pid, QueueDuration}, From, + State = #state { queue_duration_sum = Sum, + queue_duration_count = Count, + queue_durations = Durations, + desired_duration = SendDuration }) -> + + [Proc = #process { reported = PrevQueueDuration }] = + ets:lookup(Durations, Pid), + + gen_server2:reply(From, SendDuration), + + {Sum1, Count1} = + case {PrevQueueDuration, QueueDuration} of + {infinity, infinity} -> {Sum, Count}; + {infinity, _} -> {Sum + QueueDuration, Count + 1}; + {_, infinity} -> {Sum - PrevQueueDuration, Count - 1}; + {_, _} -> {Sum - PrevQueueDuration + QueueDuration, + Count} + end, + true = ets:insert(Durations, Proc #process { reported = QueueDuration, + sent = SendDuration }), + {noreply, State #state { queue_duration_sum = zero_clamp(Sum1), + queue_duration_count = Count1 }}; + +handle_call({register, Pid, MFA}, _From, + State = #state { queue_durations = Durations }) -> + MRef = erlang:monitor(process, Pid), + true = ets:insert(Durations, #process { pid = Pid, reported = infinity, + sent = infinity, callback = MFA, + monitor = MRef }), + {reply, ok, State}; + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast({disk_alarm, Alarm}, State = #state{disk_alarm = Alarm}) -> + {noreply, State}; + +handle_cast({disk_alarm, Alarm}, State) -> + {noreply, internal_update(State#state{disk_alarm = Alarm})}; + +handle_cast({deregister, Pid}, State) -> + {noreply, internal_deregister(Pid, true, State)}; + +handle_cast(stop, State) -> + {stop, normal, State}; + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(update, State) -> + {noreply, internal_update(State)}; + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) -> + {noreply, internal_deregister(Pid, false, State)}; + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, #state { timer = TRef }) -> + timer:cancel(TRef), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +%%---------------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------------- + +zero_clamp(Sum) when Sum < ?EPSILON -> 0.0; +zero_clamp(Sum) -> Sum. + +internal_deregister(Pid, Demonitor, + State = #state { queue_duration_sum = Sum, + queue_duration_count = Count, + queue_durations = Durations }) -> + case ets:lookup(Durations, Pid) of + [] -> State; + [#process { reported = PrevQueueDuration, monitor = MRef }] -> + true = case Demonitor of + true -> erlang:demonitor(MRef); + false -> true + end, + {Sum1, Count1} = + case PrevQueueDuration of + infinity -> {Sum, Count}; + _ -> {zero_clamp(Sum - PrevQueueDuration), + Count - 1} + end, + true = ets:delete(Durations, Pid), + State #state { queue_duration_sum = Sum1, + queue_duration_count = Count1 } + end. + +internal_update(State = #state{queue_durations = Durations, + desired_duration = DesiredDurationAvg, + disk_alarm = DiskAlarm}) -> + DesiredDurationAvg1 = desired_duration_average(State), + ShouldInform = should_inform_predicate(DiskAlarm), + case ShouldInform(DesiredDurationAvg, DesiredDurationAvg1) of + true -> inform_queues(ShouldInform, DesiredDurationAvg1, Durations); + false -> ok + end, + State#state{desired_duration = DesiredDurationAvg1}. + +desired_duration_average(#state{disk_alarm = true}) -> + infinity; +desired_duration_average(#state{disk_alarm = false, + queue_duration_sum = Sum, + queue_duration_count = Count}) -> + {ok, LimitThreshold} = + application:get_env(rabbit, vm_memory_high_watermark_paging_ratio), + MemoryRatio = memory_use(ratio), + if MemoryRatio =:= infinity -> + 0.0; + MemoryRatio < LimitThreshold orelse Count == 0 -> + infinity; + MemoryRatio < ?SUM_INC_THRESHOLD -> + ((Sum + ?SUM_INC_AMOUNT) / Count) / MemoryRatio; + true -> + (Sum / Count) / MemoryRatio + end. + +inform_queues(ShouldInform, DesiredDurationAvg, Durations) -> + true = + ets:foldl( + fun (Proc = #process{reported = QueueDuration, + sent = PrevSendDuration, + callback = {M, F, A}}, true) -> + case ShouldInform(PrevSendDuration, DesiredDurationAvg) + andalso ShouldInform(QueueDuration, DesiredDurationAvg) of + true -> ok = erlang:apply( + M, F, A ++ [DesiredDurationAvg]), + ets:insert( + Durations, + Proc#process{sent = DesiredDurationAvg}); + false -> true + end + end, true, Durations). + +%% In normal use, we only inform queues immediately if the desired +%% duration has decreased, we want to ensure timely paging. +should_inform_predicate(false) -> fun greater_than/2; +%% When the disk alarm has gone off though, we want to inform queues +%% immediately if the desired duration has *increased* - we want to +%% ensure timely stopping paging. +should_inform_predicate(true) -> fun (D1, D2) -> greater_than(D2, D1) end. + +greater_than(infinity, infinity) -> false; +greater_than(infinity, _D2) -> true; +greater_than(_D1, infinity) -> false; +greater_than(D1, D2) -> D1 > D2. diff --git a/deps/rabbit/src/rabbit_metrics.erl b/deps/rabbit/src/rabbit_metrics.erl new file mode 100644 index 0000000000..10418e3884 --- /dev/null +++ b/deps/rabbit/src/rabbit_metrics.erl @@ -0,0 +1,45 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_metrics). + +-behaviour(gen_server). + +-export([start_link/0]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). + +%%---------------------------------------------------------------------------- +%% Starts the raw metrics storage and owns the ETS tables. +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +init([]) -> + rabbit_core_metrics:init(), + {ok, none}. + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(_Msg, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl b/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl new file mode 100644 index 0000000000..91a7c3ddc8 --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl @@ -0,0 +1,460 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_coordinator). + +-export([start_link/4, get_gm/1, ensure_monitoring/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, handle_pre_hibernate/1]). + +-export([joined/2, members_changed/3, handle_msg/3, handle_terminate/2]). + +-behaviour(gen_server2). +-behaviour(gm). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). +-include("gm_specs.hrl"). + +-record(state, { q, + gm, + monitors, + death_fun, + depth_fun + }). + +%%---------------------------------------------------------------------------- +%% +%% Mirror Queues +%% +%% A queue with mirrors consists of the following: +%% +%% #amqqueue{ pid, slave_pids } +%% | | +%% +----------+ +-------+--------------+-----------...etc... +%% | | | +%% V V V +%% amqqueue_process---+ mirror-----+ mirror-----+ ...etc... +%% | BQ = master----+ | | BQ = vq | | BQ = vq | +%% | | BQ = vq | | +-+-------+ +-+-------+ +%% | +-+-------+ | | | +%% +-++-----|---------+ | | (some details elided) +%% || | | | +%% || coordinator-+ | | +%% || +-+---------+ | | +%% || | | | +%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc... +%% || +--+ +--+ +--+ +%% || +%% consumers +%% +%% The master is merely an implementation of bq, and thus is invoked +%% through the normal bq interface by the amqqueue_process. The mirrors +%% meanwhile are processes in their own right (as is the +%% coordinator). The coordinator and all mirrors belong to the same gm +%% group. Every member of a gm group receives messages sent to the gm +%% group. Because the master is the bq of amqqueue_process, it doesn't +%% have sole control over its mailbox, and as a result, the master +%% itself cannot be passed messages directly (well, it could by via +%% the amqqueue:run_backing_queue callback but that would induce +%% additional unnecessary loading on the master queue process), yet it +%% needs to react to gm events, such as the death of mirrors. Thus the +%% master creates the coordinator, and it is the coordinator that is +%% the gm callback module and event handler for the master. +%% +%% Consumers are only attached to the master. Thus the master is +%% responsible for informing all mirrors when messages are fetched from +%% the bq, when they're acked, and when they're requeued. +%% +%% The basic goal is to ensure that all mirrors performs actions on +%% their bqs in the same order as the master. Thus the master +%% intercepts all events going to its bq, and suitably broadcasts +%% these events on the gm. The mirrors thus receive two streams of +%% events: one stream is via the gm, and one stream is from channels +%% directly. Whilst the stream via gm is guaranteed to be consistently +%% seen by all mirrors , the same is not true of the stream via +%% channels. For example, in the event of an unexpected death of a +%% channel during a publish, only some of the mirrors may receive that +%% publish. As a result of this problem, the messages broadcast over +%% the gm contain published content, and thus mirrors can operate +%% successfully on messages that they only receive via the gm. +%% +%% The key purpose of also sending messages directly from the channels +%% to the mirrors is that without this, in the event of the death of +%% the master, messages could be lost until a suitable mirror is +%% promoted. However, that is not the only reason. A mirror cannot send +%% confirms for a message until it has seen it from the +%% channel. Otherwise, it might send a confirm to a channel for a +%% message that it might *never* receive from that channel. This can +%% happen because new mirrors join the gm ring (and thus receive +%% messages from the master) before inserting themselves in the +%% queue's mnesia record (which is what channels look at for routing). +%% As it turns out, channels will simply ignore such bogus confirms, +%% but relying on that would introduce a dangerously tight coupling. +%% +%% Hence the mirrors have to wait until they've seen both the publish +%% via gm, and the publish via the channel before they issue the +%% confirm. Either form of publish can arrive first, and a mirror can +%% be upgraded to the master at any point during this +%% process. Confirms continue to be issued correctly, however. +%% +%% Because the mirror is a full process, it impersonates parts of the +%% amqqueue API. However, it does not need to implement all parts: for +%% example, no ack or consumer-related message can arrive directly at +%% a mirror from a channel: it is only publishes that pass both +%% directly to the mirrors and go via gm. +%% +%% Slaves can be added dynamically. When this occurs, there is no +%% attempt made to sync the current contents of the master with the +%% new mirror, thus the mirror will start empty, regardless of the state +%% of the master. Thus the mirror needs to be able to detect and ignore +%% operations which are for messages it has not received: because of +%% the strict FIFO nature of queues in general, this is +%% straightforward - all new publishes that the new mirror receives via +%% gm should be processed as normal, but fetches which are for +%% messages the mirror has never seen should be ignored. Similarly, +%% acks for messages the mirror never fetched should be +%% ignored. Similarly, we don't republish rejected messages that we +%% haven't seen. Eventually, as the master is consumed from, the +%% messages at the head of the queue which were there before the slave +%% joined will disappear, and the mirror will become fully synced with +%% the state of the master. +%% +%% The detection of the sync-status is based on the depth of the BQs, +%% where the depth is defined as the sum of the length of the BQ (as +%% per BQ:len) and the messages pending an acknowledgement. When the +%% depth of the mirror is equal to the master's, then the mirror is +%% synchronised. We only store the difference between the two for +%% simplicity. Comparing the length is not enough since we need to +%% take into account rejected messages which will make it back into +%% the master queue but can't go back in the mirror, since we don't +%% want "holes" in the mirror queue. Note that the depth, and the +%% length likewise, must always be shorter on the mirror - we assert +%% that in various places. In case mirrors are joined to an empty queue +%% which only goes on to receive publishes, they start by asking the +%% master to broadcast its depth. This is enough for mirrors to always +%% be able to work out when their head does not differ from the master +%% (and is much simpler and cheaper than getting the master to hang on +%% to the guid of the msg at the head of its queue). When a mirror is +%% promoted to a master, it unilaterally broadcasts its depth, in +%% order to solve the problem of depth requests from new mirrors being +%% unanswered by a dead master. +%% +%% Obviously, due to the async nature of communication across gm, the +%% mirrors can fall behind. This does not matter from a sync pov: if +%% they fall behind and the master dies then a) no publishes are lost +%% because all publishes go to all mirrors anyway; b) the worst that +%% happens is that acks get lost and so messages come back to +%% life. This is no worse than normal given you never get confirmation +%% that an ack has been received (not quite true with QoS-prefetch, +%% but close enough for jazz). +%% +%% Because acktags are issued by the bq independently, and because +%% there is no requirement for the master and all mirrors to use the +%% same bq, all references to msgs going over gm is by msg_id. Thus +%% upon acking, the master must convert the acktags back to msg_ids +%% (which happens to be what bq:ack returns), then sends the msg_ids +%% over gm, the mirrors must convert the msg_ids to acktags (a mapping +%% the mirrors themselves must maintain). +%% +%% When the master dies, a mirror gets promoted. This will be the +%% eldest mirror, and thus the hope is that that mirror is most likely +%% to be sync'd with the master. The design of gm is that the +%% notification of the death of the master will only appear once all +%% messages in-flight from the master have been fully delivered to all +%% members of the gm group. Thus at this point, the mirror that gets +%% promoted cannot broadcast different events in a different order +%% than the master for the same msgs: there is no possibility for the +%% same msg to be processed by the old master and the new master - if +%% it was processed by the old master then it will have been processed +%% by the mirror before the mirror was promoted, and vice versa. +%% +%% Upon promotion, all msgs pending acks are requeued as normal, the +%% mirror constructs state suitable for use in the master module, and +%% then dynamically changes into an amqqueue_process with the master +%% as the bq, and the slave's bq as the master's bq. Thus the very +%% same process that was the mirror is now a full amqqueue_process. +%% +%% It is important that we avoid memory leaks due to the death of +%% senders (i.e. channels) and partial publications. A sender +%% publishing a message may fail mid way through the publish and thus +%% only some of the mirrors will receive the message. We need the +%% mirrors to be able to detect this and tidy up as necessary to avoid +%% leaks. If we just had the master monitoring all senders then we +%% would have the possibility that a sender appears and only sends the +%% message to a few of the mirrors before dying. Those mirrors would +%% then hold on to the message, assuming they'll receive some +%% instruction eventually from the master. Thus we have both mirrors +%% and the master monitor all senders they become aware of. But there +%% is a race: if the mirror receives a DOWN of a sender, how does it +%% know whether or not the master is going to send it instructions +%% regarding those messages? +%% +%% Whilst the master monitors senders, it can't access its mailbox +%% directly, so it delegates monitoring to the coordinator. When the +%% coordinator receives a DOWN message from a sender, it informs the +%% master via a callback. This allows the master to do any tidying +%% necessary, but more importantly allows the master to broadcast a +%% sender_death message to all the mirrors , saying the sender has +%% died. Once the mirrors receive the sender_death message, they know +%% that they're not going to receive any more instructions from the gm +%% regarding that sender. However, it is possible that the coordinator +%% receives the DOWN and communicates that to the master before the +%% master has finished receiving and processing publishes from the +%% sender. This turns out not to be a problem: the sender has actually +%% died, and so will not need to receive confirms or other feedback, +%% and should further messages be "received" from the sender, the +%% master will ask the coordinator to set up a new monitor, and +%% will continue to process the messages normally. Slaves may thus +%% receive publishes via gm from previously declared "dead" senders, +%% but again, this is fine: should the mirror have just thrown out the +%% message it had received directly from the sender (due to receiving +%% a sender_death message via gm), it will be able to cope with the +%% publication purely from the master via gm. +%% +%% When a mirror receives a DOWN message for a sender, if it has not +%% received the sender_death message from the master via gm already, +%% then it will wait 20 seconds before broadcasting a request for +%% confirmation from the master that the sender really has died. +%% Should a sender have only sent a publish to mirrors , this allows +%% mirrors to inform the master of the previous existence of the +%% sender. The master will thus monitor the sender, receive the DOWN, +%% and subsequently broadcast the sender_death message, allowing the +%% mirrors to tidy up. This process can repeat for the same sender: +%% consider one mirror receives the publication, then the DOWN, then +%% asks for confirmation of death, then the master broadcasts the +%% sender_death message. Only then does another mirror receive the +%% publication and thus set up its monitoring. Eventually that slave +%% too will receive the DOWN, ask for confirmation and the master will +%% monitor the sender again, receive another DOWN, and send out +%% another sender_death message. Given the 20 second delay before +%% requesting death confirmation, this is highly unlikely, but it is a +%% possibility. +%% +%% When the 20 second timer expires, the mirror first checks to see +%% whether it still needs confirmation of the death before requesting +%% it. This prevents unnecessary traffic on gm as it allows one +%% broadcast of the sender_death message to satisfy many mirrors. +%% +%% If we consider the promotion of a mirror at this point, we have two +%% possibilities: that of the mirror that has received the DOWN and is +%% thus waiting for confirmation from the master that the sender +%% really is down; and that of the mirror that has not received the +%% DOWN. In the first case, in the act of promotion to master, the new +%% master will monitor again the dead sender, and after it has +%% finished promoting itself, it should find another DOWN waiting, +%% which it will then broadcast. This will allow mirrors to tidy up as +%% normal. In the second case, we have the possibility that +%% confirmation-of-sender-death request has been broadcast, but that +%% it was broadcast before the master failed, and that the mirror being +%% promoted does not know anything about that sender, and so will not +%% monitor it on promotion. Thus a mirror that broadcasts such a +%% request, at the point of broadcasting it, recurses, setting another +%% 20 second timer. As before, on expiry of the timer, the mirrors +%% checks to see whether it still has not received a sender_death +%% message for the dead sender, and if not, broadcasts a death +%% confirmation request. Thus this ensures that even when a master +%% dies and the new mirror has no knowledge of the dead sender, it will +%% eventually receive a death confirmation request, shall monitor the +%% dead sender, receive the DOWN and broadcast the sender_death +%% message. +%% +%% The preceding commentary deals with the possibility of mirrors +%% receiving publications from senders which the master does not, and +%% the need to prevent memory leaks in such scenarios. The inverse is +%% also possible: a partial publication may cause only the master to +%% receive a publication. It will then publish the message via gm. The +%% mirrors will receive it via gm, will publish it to their BQ and will +%% set up monitoring on the sender. They will then receive the DOWN +%% message and the master will eventually publish the corresponding +%% sender_death message. The mirror will then be able to tidy up its +%% state as normal. +%% +%% Recovery of mirrored queues is straightforward: as nodes die, the +%% remaining nodes record this, and eventually a situation is reached +%% in which only one node is alive, which is the master. This is the +%% only node which, upon recovery, will resurrect a mirrored queue: +%% nodes which die and then rejoin as a mirror will start off empty as +%% if they have no mirrored content at all. This is not surprising: to +%% achieve anything more sophisticated would require the master and +%% recovering mirror to be able to check to see whether they agree on +%% the last seen state of the queue: checking depth alone is not +%% sufficient in this case. +%% +%% For more documentation see the comments in bug 23554. +%% +%%---------------------------------------------------------------------------- + +-spec start_link + (amqqueue:amqqueue(), pid() | 'undefined', + rabbit_mirror_queue_master:death_fun(), + rabbit_mirror_queue_master:depth_fun()) -> + rabbit_types:ok_pid_or_error(). + +start_link(Queue, GM, DeathFun, DepthFun) -> + gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, DepthFun], []). + +-spec get_gm(pid()) -> pid(). + +get_gm(CPid) -> + gen_server2:call(CPid, get_gm, infinity). + +-spec ensure_monitoring(pid(), [pid()]) -> 'ok'. + +ensure_monitoring(CPid, Pids) -> + gen_server2:cast(CPid, {ensure_monitoring, Pids}). + +%% --------------------------------------------------------------------------- +%% gen_server +%% --------------------------------------------------------------------------- + +init([Q, GM, DeathFun, DepthFun]) when ?is_amqqueue(Q) -> + QueueName = amqqueue:get_name(Q), + ?store_proc_name(QueueName), + GM1 = case GM of + undefined -> + {ok, GM2} = gm:start_link( + QueueName, ?MODULE, [self()], + fun rabbit_misc:execute_mnesia_transaction/1), + receive {joined, GM2, _Members} -> + ok + end, + GM2; + _ -> + true = link(GM), + GM + end, + {ok, #state { q = Q, + gm = GM1, + monitors = pmon:new(), + death_fun = DeathFun, + depth_fun = DepthFun }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(get_gm, _From, State = #state { gm = GM }) -> + reply(GM, State). + +handle_cast({gm_deaths, DeadGMPids}, State = #state{q = Q}) when ?amqqueue_pid_runs_on_local_node(Q) -> + QueueName = amqqueue:get_name(Q), + MPid = amqqueue:get_pid(Q), + case rabbit_mirror_queue_misc:remove_from_queue( + QueueName, MPid, DeadGMPids) of + {ok, MPid, DeadPids, ExtraNodes} -> + rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName, + DeadPids), + rabbit_mirror_queue_misc:add_mirrors(QueueName, ExtraNodes, async), + noreply(State); + {ok, _MPid0, DeadPids, _ExtraNodes} -> + %% see rabbitmq-server#914; + %% Different mirror is now master, stop current coordinator normally. + %% Initiating queue is now mirror and the least we could do is report + %% deaths which we 'think' we saw. + %% NOTE: Reported deaths here, could be inconsistent. + rabbit_mirror_queue_misc:report_deaths(MPid, false, QueueName, + DeadPids), + {stop, shutdown, State}; + {error, not_found} -> + {stop, normal, State}; + {error, {not_synced, _}} -> + rabbit_log:error("Mirror queue ~p in unexpected state." + " Promoted to master but already a master.", + [QueueName]), + error(unexpected_mirrored_state) + end; + +handle_cast(request_depth, State = #state{depth_fun = DepthFun, q = QArg}) when ?is_amqqueue(QArg) -> + QName = amqqueue:get_name(QArg), + MPid = amqqueue:get_pid(QArg), + case rabbit_amqqueue:lookup(QName) of + {ok, QFound} when ?amqqueue_pid_equals(QFound, MPid) -> + ok = DepthFun(), + noreply(State); + _ -> + {stop, shutdown, State} + end; + +handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) -> + noreply(State #state { monitors = pmon:monitor_all(Pids, Mons) }); + +handle_cast({delete_and_terminate, {shutdown, ring_shutdown}}, State) -> + {stop, normal, State}; +handle_cast({delete_and_terminate, Reason}, State) -> + {stop, Reason, State}. + +handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, + State = #state { monitors = Mons, + death_fun = DeathFun }) -> + noreply(case pmon:is_monitored(Pid, Mons) of + false -> State; + true -> ok = DeathFun(Pid), + State #state { monitors = pmon:erase(Pid, Mons) } + end); + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +terminate(_Reason, #state{}) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_pre_hibernate(State = #state { gm = GM }) -> + %% Since GM notifications of deaths are lazy we might not get a + %% timely notification of mirror death if policy changes when + %% everything is idle. So cause some activity just before we + %% sleep. This won't cause us to go into perpetual motion as the + %% heartbeat does not wake up coordinator or mirrors. + gm:broadcast(GM, hibernate_heartbeat), + {hibernate, State}. + +%% --------------------------------------------------------------------------- +%% GM +%% --------------------------------------------------------------------------- + +joined([CPid], Members) -> + CPid ! {joined, self(), Members}, + ok. + +members_changed([_CPid], _Births, []) -> + ok; +members_changed([CPid], _Births, Deaths) -> + ok = gen_server2:cast(CPid, {gm_deaths, Deaths}). + +handle_msg([CPid], _From, request_depth = Msg) -> + ok = gen_server2:cast(CPid, Msg); +handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> + ok = gen_server2:cast(CPid, Msg); +handle_msg([_CPid], _From, {delete_and_terminate, _Reason}) -> + %% We tell GM to stop, but we don't instruct the coordinator to + %% stop yet. The GM will first make sure all pending messages were + %% actually delivered. Then it calls handle_terminate/2 below so the + %% coordinator is stopped. + %% + %% If we stop the coordinator right now, remote mirrors could see the + %% coordinator DOWN before delete_and_terminate was delivered to all + %% GMs. One of those GM would be promoted as the master, and this GM + %% would hang forever, waiting for other GMs to stop. + {stop, {shutdown, ring_shutdown}}; +handle_msg([_CPid], _From, _Msg) -> + ok. + +handle_terminate([CPid], Reason) -> + ok = gen_server2:cast(CPid, {delete_and_terminate, Reason}), + ok. + +%% --------------------------------------------------------------------------- +%% Others +%% --------------------------------------------------------------------------- + +noreply(State) -> + {noreply, State, hibernate}. + +reply(Reply, State) -> + {reply, Reply, State, hibernate}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_master.erl b/deps/rabbit/src/rabbit_mirror_queue_master.erl new file mode 100644 index 0000000000..71146e1ce2 --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_master.erl @@ -0,0 +1,578 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_master). + +-export([init/3, terminate/2, delete_and_terminate/2, + purge/1, purge_acks/1, publish/6, publish_delivered/5, + batch_publish/4, batch_publish_delivered/4, + discard/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, + len/1, is_empty/1, depth/1, drain_confirmed/1, + dropwhile/2, fetchwhile/4, set_ram_duration_target/2, ram_duration/1, + needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, + msg_rates/1, info/2, invoke/3, is_duplicate/2, set_queue_mode/2, + zip_msgs_and_acks/4, handle_info/2]). + +-export([start/2, stop/1, delete_crashed/1]). + +-export([promote_backing_queue_state/8, sender_death_fun/0, depth_fun/0]). + +-export([init_with_existing_bq/3, stop_mirroring/1, sync_mirrors/3]). + +-behaviour(rabbit_backing_queue). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-record(state, { name, + gm, + coordinator, + backing_queue, + backing_queue_state, + seen_status, + confirmed, + known_senders, + wait_timeout + }). + +-export_type([death_fun/0, depth_fun/0, stats_fun/0]). + +-type death_fun() :: fun ((pid()) -> 'ok'). +-type depth_fun() :: fun (() -> 'ok'). +-type stats_fun() :: fun ((any()) -> 'ok'). +-type master_state() :: #state { name :: rabbit_amqqueue:name(), + gm :: pid(), + coordinator :: pid(), + backing_queue :: atom(), + backing_queue_state :: any(), + seen_status :: map(), + confirmed :: [rabbit_guid:guid()], + known_senders :: sets:set() + }. + +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator + +%% --------------------------------------------------------------------------- +%% Backing queue +%% --------------------------------------------------------------------------- + +-spec start(_, _) -> no_return(). +start(_Vhost, _DurableQueues) -> + %% This will never get called as this module will never be + %% installed as the default BQ implementation. + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +-spec stop(_) -> no_return(). +stop(_Vhost) -> + %% Same as start/1. + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +-spec delete_crashed(_) -> no_return(). +delete_crashed(_QName) -> + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +init(Q, Recover, AsyncCallback) -> + {ok, BQ} = application:get_env(backing_queue_module), + BQS = BQ:init(Q, Recover, AsyncCallback), + State = #state{gm = GM} = init_with_existing_bq(Q, BQ, BQS), + ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), + State. + +-spec init_with_existing_bq(amqqueue:amqqueue(), atom(), any()) -> + master_state(). + +init_with_existing_bq(Q0, BQ, BQS) when ?is_amqqueue(Q0) -> + QName = amqqueue:get_name(Q0), + case rabbit_mirror_queue_coordinator:start_link( + Q0, undefined, sender_death_fun(), depth_fun()) of + {ok, CPid} -> + GM = rabbit_mirror_queue_coordinator:get_gm(CPid), + Self = self(), + Fun = fun () -> + [Q1] = mnesia:read({rabbit_queue, QName}), + true = amqqueue:is_amqqueue(Q1), + GMPids0 = amqqueue:get_gm_pids(Q1), + GMPids1 = [{GM, Self} | GMPids0], + Q2 = amqqueue:set_gm_pids(Q1, GMPids1), + Q3 = amqqueue:set_state(Q2, live), + %% amqqueue migration: + %% The amqqueue was read from this transaction, no + %% need to handle migration. + ok = rabbit_amqqueue:store_queue(Q3) + end, + ok = rabbit_misc:execute_mnesia_transaction(Fun), + {_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q0), + %% We need synchronous add here (i.e. do not return until the + %% mirror is running) so that when queue declaration is finished + %% all mirrors are up; we don't want to end up with unsynced mirrors + %% just by declaring a new queue. But add can't be synchronous all + %% the time as it can be called by mirrors and that's + %% deadlock-prone. + rabbit_mirror_queue_misc:add_mirrors(QName, SNodes, sync), + #state{name = QName, + gm = GM, + coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS, + seen_status = #{}, + confirmed = [], + known_senders = sets:new(), + wait_timeout = rabbit_misc:get_env(rabbit, slave_wait_timeout, 15000)}; + {error, Reason} -> + %% The GM can shutdown before the coordinator has started up + %% (lost membership or missing group), thus the start_link of + %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process + % is trapping exists + throw({coordinator_not_started, Reason}) + end. + +-spec stop_mirroring(master_state()) -> {atom(), any()}. + +stop_mirroring(State = #state { coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS }) -> + unlink(CPid), + stop_all_slaves(shutdown, State), + {BQ, BQS}. + +-spec sync_mirrors(stats_fun(), stats_fun(), master_state()) -> + {'ok', master_state()} | {stop, any(), master_state()}. + +sync_mirrors(HandleInfo, EmitStats, + State = #state { name = QName, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + Log = fun (Fmt, Params) -> + rabbit_mirror_queue_misc:log_info( + QName, "Synchronising: " ++ Fmt ++ "~n", Params) + end, + Log("~p messages to synchronise", [BQ:len(BQS)]), + {ok, Q} = rabbit_amqqueue:lookup(QName), + SPids = amqqueue:get_slave_pids(Q), + SyncBatchSize = rabbit_mirror_queue_misc:sync_batch_size(Q), + Log("batch size: ~p", [SyncBatchSize]), + Ref = make_ref(), + Syncer = rabbit_mirror_queue_sync:master_prepare(Ref, QName, Log, SPids), + gm:broadcast(GM, {sync_start, Ref, Syncer, SPids}), + S = fun(BQSN) -> State#state{backing_queue_state = BQSN} end, + case rabbit_mirror_queue_sync:master_go( + Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, BQ, BQS) of + {cancelled, BQS1} -> Log(" synchronisation cancelled ", []), + {ok, S(BQS1)}; + {shutdown, R, BQS1} -> {stop, R, S(BQS1)}; + {sync_died, R, BQS1} -> Log("~p", [R]), + {ok, S(BQS1)}; + {already_synced, BQS1} -> {ok, S(BQS1)}; + {ok, BQS1} -> Log("complete", []), + {ok, S(BQS1)} + end. + +terminate({shutdown, dropped} = Reason, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + %% Backing queue termination - this node has been explicitly + %% dropped. Normally, non-durable queues would be tidied up on + %% startup, but there's a possibility that we will be added back + %% in without this node being restarted. Thus we must do the full + %% blown delete_and_terminate now, but only locally: we do not + %% broadcast delete_and_terminate. + State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}; + +terminate(Reason, + State = #state { name = QName, + backing_queue = BQ, + backing_queue_state = BQS }) -> + %% Backing queue termination. The queue is going down but + %% shouldn't be deleted. Most likely safe shutdown of this + %% node. + {ok, Q} = rabbit_amqqueue:lookup(QName), + SSPids = amqqueue:get_sync_slave_pids(Q), + case SSPids =:= [] andalso + rabbit_policy:get(<<"ha-promote-on-shutdown">>, Q) =/= <<"always">> of + true -> %% Remove the whole queue to avoid data loss + rabbit_mirror_queue_misc:log_warning( + QName, "Stopping all nodes on master shutdown since no " + "synchronised mirror (replica) is available~n", []), + stop_all_slaves(Reason, State); + false -> %% Just let some other mirror take over. + ok + end, + State #state { backing_queue_state = BQ:terminate(Reason, BQS) }. + +delete_and_terminate(Reason, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + stop_all_slaves(Reason, State), + State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}. + +stop_all_slaves(Reason, #state{name = QName, gm = GM, wait_timeout = WT}) -> + {ok, Q} = rabbit_amqqueue:lookup(QName), + SPids = amqqueue:get_slave_pids(Q), + rabbit_mirror_queue_misc:stop_all_slaves(Reason, SPids, QName, GM, WT). + +purge(State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {drop, 0, BQ:len(BQS), false}), + {Count, BQS1} = BQ:purge(BQS), + {Count, State #state { backing_queue_state = BQS1 }}. + +-spec purge_acks(_) -> no_return(). +purge_acks(_State) -> exit({not_implemented, {?MODULE, purge_acks}}). + +publish(Msg = #basic_message { id = MsgId }, MsgProps, IsDelivered, ChPid, Flow, + State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + false = maps:is_key(MsgId, SS), %% ASSERTION + ok = gm:broadcast(GM, {publish, ChPid, Flow, MsgProps, Msg}, + rabbit_basic:msg_size(Msg)), + BQS1 = BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS), + ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). + +batch_publish(Publishes, ChPid, Flow, + State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {Publishes1, false, MsgSizes} = + lists:foldl(fun ({Msg = #basic_message { id = MsgId }, + MsgProps, _IsDelivered}, {Pubs, false, Sizes}) -> + {[{Msg, MsgProps, true} | Pubs], %% [0] + false = maps:is_key(MsgId, SS), %% ASSERTION + Sizes + rabbit_basic:msg_size(Msg)} + end, {[], false, 0}, Publishes), + Publishes2 = lists:reverse(Publishes1), + ok = gm:broadcast(GM, {batch_publish, ChPid, Flow, Publishes2}, + MsgSizes), + BQS1 = BQ:batch_publish(Publishes2, ChPid, Flow, BQS), + ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). +%% [0] When the mirror process handles the publish command, it sets the +%% IsDelivered flag to true, so to avoid iterating over the messages +%% again at the mirror, we do it here. + +publish_delivered(Msg = #basic_message { id = MsgId }, MsgProps, + ChPid, Flow, State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + false = maps:is_key(MsgId, SS), %% ASSERTION + ok = gm:broadcast(GM, {publish_delivered, ChPid, Flow, MsgProps, Msg}, + rabbit_basic:msg_size(Msg)), + {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, Flow, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + {AckTag, ensure_monitoring(ChPid, State1)}. + +batch_publish_delivered(Publishes, ChPid, Flow, + State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {false, MsgSizes} = + lists:foldl(fun ({Msg = #basic_message { id = MsgId }, _MsgProps}, + {false, Sizes}) -> + {false = maps:is_key(MsgId, SS), %% ASSERTION + Sizes + rabbit_basic:msg_size(Msg)} + end, {false, 0}, Publishes), + ok = gm:broadcast(GM, {batch_publish_delivered, ChPid, Flow, Publishes}, + MsgSizes), + {AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + {AckTags, ensure_monitoring(ChPid, State1)}. + +discard(MsgId, ChPid, Flow, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS }) -> + false = maps:is_key(MsgId, SS), %% ASSERTION + ok = gm:broadcast(GM, {discard, ChPid, Flow, MsgId}), + ensure_monitoring(ChPid, + State #state { backing_queue_state = + BQ:discard(MsgId, ChPid, Flow, BQS) }). + +dropwhile(Pred, State = #state{backing_queue = BQ, + backing_queue_state = BQS }) -> + Len = BQ:len(BQS), + {Next, BQS1} = BQ:dropwhile(Pred, BQS), + {Next, drop(Len, false, State #state { backing_queue_state = BQS1 })}. + +fetchwhile(Pred, Fun, Acc, State = #state{backing_queue = BQ, + backing_queue_state = BQS }) -> + Len = BQ:len(BQS), + {Next, Acc1, BQS1} = BQ:fetchwhile(Pred, Fun, Acc, BQS), + {Next, Acc1, drop(Len, true, State #state { backing_queue_state = BQS1 })}. + +drain_confirmed(State = #state { backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS, + confirmed = Confirmed }) -> + {MsgIds, BQS1} = BQ:drain_confirmed(BQS), + {MsgIds1, SS1} = + lists:foldl( + fun (MsgId, {MsgIdsN, SSN}) -> + %% We will never see 'discarded' here + case maps:find(MsgId, SSN) of + error -> + {[MsgId | MsgIdsN], SSN}; + {ok, published} -> + %% It was published when we were a mirror, + %% and we were promoted before we saw the + %% publish from the channel. We still + %% haven't seen the channel publish, and + %% consequently we need to filter out the + %% confirm here. We will issue the confirm + %% when we see the publish from the channel. + {MsgIdsN, maps:put(MsgId, confirmed, SSN)}; + {ok, confirmed} -> + %% Well, confirms are racy by definition. + {[MsgId | MsgIdsN], SSN} + end + end, {[], SS}, MsgIds), + {Confirmed ++ MsgIds1, State #state { backing_queue_state = BQS1, + seen_status = SS1, + confirmed = [] }}. + +fetch(AckRequired, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:fetch(AckRequired, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + {Result, case Result of + empty -> State1; + {_MsgId, _IsDelivered, _AckTag} -> drop_one(AckRequired, State1) + end}. + +drop(AckRequired, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:drop(AckRequired, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + {Result, case Result of + empty -> State1; + {_MsgId, _AckTag} -> drop_one(AckRequired, State1) + end}. + +ack(AckTags, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {MsgIds, BQS1} = BQ:ack(AckTags, BQS), + case MsgIds of + [] -> ok; + _ -> ok = gm:broadcast(GM, {ack, MsgIds}) + end, + {MsgIds, State #state { backing_queue_state = BQS1 }}. + +requeue(AckTags, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + ok = gm:broadcast(GM, {requeue, MsgIds}), + {MsgIds, State #state { backing_queue_state = BQS1 }}. + +ackfold(MsgFun, Acc, State = #state { backing_queue = BQ, + backing_queue_state = BQS }, AckTags) -> + {Acc1, BQS1} = BQ:ackfold(MsgFun, Acc, BQS, AckTags), + {Acc1, State #state { backing_queue_state = BQS1 }}. + +fold(Fun, Acc, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:fold(Fun, Acc, BQS), + {Result, State #state { backing_queue_state = BQS1 }}. + +len(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:len(BQS). + +is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:is_empty(BQS). + +depth(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:depth(BQS). + +set_ram_duration_target(Target, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = + BQ:set_ram_duration_target(Target, BQS) }. + +ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + {Result, BQS1} = BQ:ram_duration(BQS), + {Result, State #state { backing_queue_state = BQS1 }}. + +needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:needs_timeout(BQS). + +timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:timeout(BQS) }. + +handle_pre_hibernate(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }. + +handle_info(Msg, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:handle_info(Msg, BQS) }. + +resume(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:resume(BQS) }. + +msg_rates(#state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:msg_rates(BQS). + +info(backing_queue_status, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:info(backing_queue_status, BQS) ++ + [ {mirror_seen, maps:size(State #state.seen_status)}, + {mirror_senders, sets:size(State #state.known_senders)} ]; +info(Item, #state { backing_queue = BQ, backing_queue_state = BQS }) -> + BQ:info(Item, BQS). + +invoke(?MODULE, Fun, State) -> + Fun(?MODULE, State); +invoke(Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. + +is_duplicate(Message = #basic_message { id = MsgId }, + State = #state { seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS, + confirmed = Confirmed }) -> + %% Here, we need to deal with the possibility that we're about to + %% receive a message that we've already seen when we were a mirror + %% (we received it via gm). Thus if we do receive such message now + %% via the channel, there may be a confirm waiting to issue for + %% it. + + %% We will never see {published, ChPid, MsgSeqNo} here. + case maps:find(MsgId, SS) of + error -> + %% We permit the underlying BQ to have a peek at it, but + %% only if we ourselves are not filtering out the msg. + {Result, BQS1} = BQ:is_duplicate(Message, BQS), + {Result, State #state { backing_queue_state = BQS1 }}; + {ok, published} -> + %% It already got published when we were a mirror and no + %% confirmation is waiting. amqqueue_process will have, in + %% its msg_id_to_channel mapping, the entry for dealing + %% with the confirm when that comes back in (it's added + %% immediately after calling is_duplicate). The msg is + %% invalid. We will not see this again, nor will we be + %% further involved in confirming this message, so erase. + {{true, drop}, State #state { seen_status = maps:remove(MsgId, SS) }}; + {ok, Disposition} + when Disposition =:= confirmed + %% It got published when we were a mirror via gm, and + %% confirmed some time after that (maybe even after + %% promotion), but before we received the publish from the + %% channel, so couldn't previously know what the + %% msg_seq_no was (and thus confirm as a mirror). So we + %% need to confirm now. As above, amqqueue_process will + %% have the entry for the msg_id_to_channel mapping added + %% immediately after calling is_duplicate/2. + orelse Disposition =:= discarded -> + %% Message was discarded while we were a mirror. Confirm now. + %% As above, amqqueue_process will have the entry for the + %% msg_id_to_channel mapping. + {{true, drop}, State #state { seen_status = maps:remove(MsgId, SS), + confirmed = [MsgId | Confirmed] }} + end. + +set_queue_mode(Mode, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {set_queue_mode, Mode}), + BQS1 = BQ:set_queue_mode(Mode, BQS), + State #state { backing_queue_state = BQS1 }. + +zip_msgs_and_acks(Msgs, AckTags, Accumulator, + #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQ:zip_msgs_and_acks(Msgs, AckTags, Accumulator, BQS). + +%% --------------------------------------------------------------------------- +%% Other exported functions +%% --------------------------------------------------------------------------- + +-spec promote_backing_queue_state + (rabbit_amqqueue:name(), pid(), atom(), any(), pid(), [any()], + map(), [pid()]) -> + master_state(). + +promote_backing_queue_state(QName, CPid, BQ, BQS, GM, AckTags, Seen, KS) -> + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + Len = BQ:len(BQS1), + Depth = BQ:depth(BQS1), + true = Len == Depth, %% ASSERTION: everything must have been requeued + ok = gm:broadcast(GM, {depth, Depth}), + WaitTimeout = rabbit_misc:get_env(rabbit, slave_wait_timeout, 15000), + #state { name = QName, + gm = GM, + coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS1, + seen_status = Seen, + confirmed = [], + known_senders = sets:from_list(KS), + wait_timeout = WaitTimeout }. + +-spec sender_death_fun() -> death_fun(). + +sender_death_fun() -> + Self = self(), + fun (DeadPid) -> + rabbit_amqqueue:run_backing_queue( + Self, ?MODULE, + fun (?MODULE, State = #state { gm = GM, known_senders = KS }) -> + ok = gm:broadcast(GM, {sender_death, DeadPid}), + KS1 = sets:del_element(DeadPid, KS), + State #state { known_senders = KS1 } + end) + end. + +-spec depth_fun() -> depth_fun(). + +depth_fun() -> + Self = self(), + fun () -> + rabbit_amqqueue:run_backing_queue( + Self, ?MODULE, + fun (?MODULE, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), + State + end) + end. + +%% --------------------------------------------------------------------------- +%% Helpers +%% --------------------------------------------------------------------------- + +drop_one(AckRequired, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {drop, BQ:len(BQS), 1, AckRequired}), + State. + +drop(PrevLen, AckRequired, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + Len = BQ:len(BQS), + case PrevLen - Len of + 0 -> State; + Dropped -> ok = gm:broadcast(GM, {drop, Len, Dropped, AckRequired}), + State + end. + +ensure_monitoring(ChPid, State = #state { coordinator = CPid, + known_senders = KS }) -> + case sets:is_element(ChPid, KS) of + true -> State; + false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring( + CPid, [ChPid]), + State #state { known_senders = sets:add_element(ChPid, KS) } + end. diff --git a/deps/rabbit/src/rabbit_mirror_queue_misc.erl b/deps/rabbit/src/rabbit_mirror_queue_misc.erl new file mode 100644 index 0000000000..02f590e2fb --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_misc.erl @@ -0,0 +1,680 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_misc). +-behaviour(rabbit_policy_validator). + +-export([remove_from_queue/3, on_vhost_up/1, add_mirrors/3, + report_deaths/4, store_updated_slaves/1, + initial_queue_node/2, suggested_queue_nodes/1, actual_queue_nodes/1, + is_mirrored/1, is_mirrored_ha_nodes/1, + update_mirrors/2, update_mirrors/1, validate_policy/1, + maybe_auto_sync/1, maybe_drop_master_after_sync/1, + sync_batch_size/1, log_info/3, log_warning/3]). +-export([stop_all_slaves/5]). + +-export([sync_queue/1, cancel_sync_queue/1]). + +-export([transfer_leadership/2, queue_length/1, get_replicas/1]). + +%% for testing only +-export([module/1]). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-define(HA_NODES_MODULE, rabbit_mirror_queue_mode_nodes). + +-rabbit_boot_step( + {?MODULE, + [{description, "HA policy validation"}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-mode">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-params">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-sync-mode">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-sync-batch-size">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-promote-on-shutdown">>, ?MODULE]}}, + {mfa, {rabbit_registry, register, + [policy_validator, <<"ha-promote-on-failure">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + + +%%---------------------------------------------------------------------------- + +%% Returns {ok, NewMPid, DeadPids, ExtraNodes} + +-spec remove_from_queue + (rabbit_amqqueue:name(), pid(), [pid()]) -> + {'ok', pid(), [pid()], [node()]} | {'error', 'not_found'} | + {'error', {'not_synced', [pid()]}}. + +remove_from_queue(QueueName, Self, DeadGMPids) -> + rabbit_misc:execute_mnesia_transaction( + fun () -> + %% Someone else could have deleted the queue before we + %% get here. Or, gm group could've altered. see rabbitmq-server#914 + case mnesia:read({rabbit_queue, QueueName}) of + [] -> {error, not_found}; + [Q0] when ?is_amqqueue(Q0) -> + QPid = amqqueue:get_pid(Q0), + SPids = amqqueue:get_slave_pids(Q0), + SyncSPids = amqqueue:get_sync_slave_pids(Q0), + GMPids = amqqueue:get_gm_pids(Q0), + {DeadGM, AliveGM} = lists:partition( + fun ({GM, _}) -> + lists:member(GM, DeadGMPids) + end, GMPids), + DeadPids = [Pid || {_GM, Pid} <- DeadGM], + AlivePids = [Pid || {_GM, Pid} <- AliveGM], + Alive = [Pid || Pid <- [QPid | SPids], + lists:member(Pid, AlivePids)], + {QPid1, SPids1} = case Alive of + [] -> + %% GM altered, & if all pids are + %% perceived as dead, rather do + %% do nothing here, & trust the + %% promoted mirror to have updated + %% mnesia during the alteration. + {QPid, SPids}; + _ -> promote_slave(Alive) + end, + DoNotPromote = SyncSPids =:= [] andalso + rabbit_policy:get(<<"ha-promote-on-failure">>, Q0) =:= <<"when-synced">>, + case {{QPid, SPids}, {QPid1, SPids1}} of + {Same, Same} -> + {ok, QPid1, DeadPids, []}; + _ when QPid1 =/= QPid andalso QPid1 =:= Self andalso DoNotPromote =:= true -> + %% We have been promoted to master + %% but there are no synchronised mirrors + %% hence this node is not synchronised either + %% Bailing out. + {error, {not_synced, SPids1}}; + _ when QPid =:= QPid1 orelse QPid1 =:= Self -> + %% Either master hasn't changed, so + %% we're ok to update mnesia; or we have + %% become the master. If gm altered, + %% we have no choice but to proceed. + Q1 = amqqueue:set_pid(Q0, QPid1), + Q2 = amqqueue:set_slave_pids(Q1, SPids1), + Q3 = amqqueue:set_gm_pids(Q2, AliveGM), + store_updated_slaves(Q3), + %% If we add and remove nodes at the + %% same time we might tell the old + %% master we need to sync and then + %% shut it down. So let's check if + %% the new master needs to sync. + maybe_auto_sync(Q3), + {ok, QPid1, DeadPids, slaves_to_start_on_failure(Q3, DeadGMPids)}; + _ -> + %% Master has changed, and we're not it. + %% [1]. + Q1 = amqqueue:set_slave_pids(Q0, Alive), + Q2 = amqqueue:set_gm_pids(Q1, AliveGM), + store_updated_slaves(Q2), + {ok, QPid1, DeadPids, []} + end + end + end). +%% [1] We still update mnesia here in case the mirror that is supposed +%% to become master dies before it does do so, in which case the dead +%% old master might otherwise never get removed, which in turn might +%% prevent promotion of another mirror (e.g. us). +%% +%% Note however that we do not update the master pid. Otherwise we can +%% have the situation where a mirror updates the mnesia record for a +%% queue, promoting another mirror before that mirror realises it has +%% become the new master, which is bad because it could then mean the +%% mirror (now master) receives messages it's not ready for (for +%% example, new consumers). +%% +%% We set slave_pids to Alive rather than SPids1 since otherwise we'd +%% be removing the pid of the candidate master, which in turn would +%% prevent it from promoting itself. +%% +%% We maintain gm_pids as our source of truth, i.e. it contains the +%% most up-to-date information about which GMs and associated +%% {M,S}Pids are alive. And all pids in slave_pids always have a +%% corresponding entry in gm_pids. By contrast, due to the +%% aforementioned restriction on updating the master pid, that pid may +%% not be present in gm_pids, but only if said master has died. + +%% Sometimes a mirror dying means we need to start more on other +%% nodes - "exactly" mode can cause this to happen. +slaves_to_start_on_failure(Q, DeadGMPids) -> + %% In case Mnesia has not caught up yet, filter out nodes we know + %% to be dead.. + ClusterNodes = rabbit_nodes:all_running() -- + [node(P) || P <- DeadGMPids], + {_, OldNodes, _} = actual_queue_nodes(Q), + {_, NewNodes} = suggested_queue_nodes(Q, ClusterNodes), + NewNodes -- OldNodes. + +on_vhost_up(VHost) -> + QNames = + rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:foldl( + fun + (Q, QNames0) when not ?amqqueue_vhost_equals(Q, VHost) -> + QNames0; + (Q, QNames0) when ?amqqueue_is_classic(Q) -> + QName = amqqueue:get_name(Q), + Pid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + %% We don't want to pass in the whole + %% cluster - we don't want a situation + %% where starting one node causes us to + %% decide to start a mirror on another + PossibleNodes0 = [node(P) || P <- [Pid | SPids]], + PossibleNodes = + case lists:member(node(), PossibleNodes0) of + true -> PossibleNodes0; + false -> [node() | PossibleNodes0] + end, + {_MNode, SNodes} = suggested_queue_nodes( + Q, PossibleNodes), + case lists:member(node(), SNodes) of + true -> [QName | QNames0]; + false -> QNames0 + end; + (_, QNames0) -> + QNames0 + end, [], rabbit_queue) + end), + [add_mirror(QName, node(), async) || QName <- QNames], + ok. + +drop_mirrors(QName, Nodes) -> + [drop_mirror(QName, Node) || Node <- Nodes], + ok. + +drop_mirror(QName, MirrorNode) -> + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?is_amqqueue(Q) -> + Name = amqqueue:get_name(Q), + QPid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of + [] -> + {error, {queue_not_mirrored_on_node, MirrorNode}}; + [QPid] when SPids =:= [] -> + {error, cannot_drop_only_mirror}; + [Pid] -> + log_info(Name, "Dropping queue mirror on node ~p~n", + [MirrorNode]), + exit(Pid, {shutdown, dropped}), + {ok, dropped} + end; + {error, not_found} = E -> + E + end. + +-spec add_mirrors(rabbit_amqqueue:name(), [node()], 'sync' | 'async') -> + 'ok'. + +add_mirrors(QName, Nodes, SyncMode) -> + [add_mirror(QName, Node, SyncMode) || Node <- Nodes], + ok. + +add_mirror(QName, MirrorNode, SyncMode) -> + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + rabbit_misc:with_exit_handler( + rabbit_misc:const(ok), + fun () -> + #resource{virtual_host = VHost} = amqqueue:get_name(Q), + case rabbit_vhost_sup_sup:get_vhost_sup(VHost, MirrorNode) of + {ok, _} -> + try + SPid = rabbit_amqqueue_sup_sup:start_queue_process( + MirrorNode, Q, slave), + log_info(QName, "Adding mirror on node ~p: ~p~n", + [MirrorNode, SPid]), + rabbit_mirror_queue_slave:go(SPid, SyncMode) + of + _ -> ok + catch + error:QError -> + log_warning(QName, + "Unable to start queue mirror on node '~p'. " + "Target queue supervisor is not running: ~p~n", + [MirrorNode, QError]) + end; + {error, Error} -> + log_warning(QName, + "Unable to start queue mirror on node '~p'. " + "Target virtual host is not running: ~p~n", + [MirrorNode, Error]), + ok + end + end); + {error, not_found} = E -> + E + end. + +report_deaths(_MirrorPid, _IsMaster, _QueueName, []) -> + ok; +report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) -> + log_info(QueueName, "~s ~s saw deaths of mirrors~s~n", + [case IsMaster of + true -> "Master"; + false -> "Slave" + end, + rabbit_misc:pid_to_string(MirrorPid), + [[$ , rabbit_misc:pid_to_string(P)] || P <- DeadPids]]). + +-spec log_info(rabbit_amqqueue:name(), string(), [any()]) -> 'ok'. + +log_info (QName, Fmt, Args) -> + rabbit_log_mirroring:info("Mirrored ~s: " ++ Fmt, + [rabbit_misc:rs(QName) | Args]). + +-spec log_warning(rabbit_amqqueue:name(), string(), [any()]) -> 'ok'. + +log_warning(QName, Fmt, Args) -> + rabbit_log_mirroring:warning("Mirrored ~s: " ++ Fmt, + [rabbit_misc:rs(QName) | Args]). + +-spec store_updated_slaves(amqqueue:amqqueue()) -> + amqqueue:amqqueue(). + +store_updated_slaves(Q0) when ?is_amqqueue(Q0) -> + SPids = amqqueue:get_slave_pids(Q0), + SSPids = amqqueue:get_sync_slave_pids(Q0), + RS0 = amqqueue:get_recoverable_slaves(Q0), + %% TODO now that we clear sync_slave_pids in rabbit_durable_queue, + %% do we still need this filtering? + SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)], + Q1 = amqqueue:set_sync_slave_pids(Q0, SSPids1), + RS1 = update_recoverable(SPids, RS0), + Q2 = amqqueue:set_recoverable_slaves(Q1, RS1), + Q3 = amqqueue:set_state(Q2, live), + %% amqqueue migration: + %% The amqqueue was read from this transaction, no need to handle + %% migration. + ok = rabbit_amqqueue:store_queue(Q3), + %% Wake it up so that we emit a stats event + rabbit_amqqueue:notify_policy_changed(Q3), + Q3. + +%% Recoverable nodes are those which we could promote if the whole +%% cluster were to suddenly stop and we then lose the master; i.e. all +%% nodes with running mirrors , and all stopped nodes which had running +%% mirrors when they were up. +%% +%% Therefore we aim here to add new nodes with mirrors , and remove +%% running nodes without mirrors , We also try to keep the order +%% constant, and similar to the live SPids field (i.e. oldest +%% first). That's not necessarily optimal if nodes spend a long time +%% down, but we don't have a good way to predict what the optimal is +%% in that case anyway, and we assume nodes will not just be down for +%% a long time without being removed. +update_recoverable(SPids, RS) -> + SNodes = [node(SPid) || SPid <- SPids], + RunningNodes = rabbit_nodes:all_running(), + AddNodes = SNodes -- RS, + DelNodes = RunningNodes -- SNodes, %% i.e. running with no slave + (RS -- DelNodes) ++ AddNodes. + +stop_all_slaves(Reason, SPids, QName, GM, WaitTimeout) -> + PidsMRefs = [{Pid, erlang:monitor(process, Pid)} || Pid <- [GM | SPids]], + ok = gm:broadcast(GM, {delete_and_terminate, Reason}), + %% It's possible that we could be partitioned from some mirrors + %% between the lookup and the broadcast, in which case we could + %% monitor them but they would not have received the GM + %% message. So only wait for mirrors which are still + %% not-partitioned. + PendingSlavePids = lists:foldl(fun({Pid, MRef}, Acc) -> + case rabbit_mnesia:on_running_node(Pid) of + true -> + receive + {'DOWN', MRef, process, _Pid, _Info} -> + Acc + after WaitTimeout -> + rabbit_mirror_queue_misc:log_warning( + QName, "Missing 'DOWN' message from ~p in" + " node ~p~n", [Pid, node(Pid)]), + [Pid | Acc] + end; + false -> + Acc + end + end, [], PidsMRefs), + %% Normally when we remove a mirror another mirror or master will + %% notice and update Mnesia. But we just removed them all, and + %% have stopped listening ourselves. So manually clean up. + rabbit_misc:execute_mnesia_transaction(fun () -> + [Q0] = mnesia:read({rabbit_queue, QName}), + Q1 = amqqueue:set_gm_pids(Q0, []), + Q2 = amqqueue:set_slave_pids(Q1, []), + %% Restarted mirrors on running nodes can + %% ensure old incarnations are stopped using + %% the pending mirror pids. + Q3 = amqqueue:set_slave_pids_pending_shutdown(Q2, PendingSlavePids), + rabbit_mirror_queue_misc:store_updated_slaves(Q3) + end), + ok = gm:forget_group(QName). + +%%---------------------------------------------------------------------------- + +promote_slave([SPid | SPids]) -> + %% The mirror pids are maintained in descending order of age, so + %% the one to promote is the oldest. + {SPid, SPids}. + +-spec initial_queue_node(amqqueue:amqqueue(), node()) -> node(). + +initial_queue_node(Q, DefNode) -> + {MNode, _SNodes} = suggested_queue_nodes(Q, DefNode, rabbit_nodes:all_running()), + MNode. + +-spec suggested_queue_nodes(amqqueue:amqqueue()) -> + {node(), [node()]}. + +suggested_queue_nodes(Q) -> suggested_queue_nodes(Q, rabbit_nodes:all_running()). +suggested_queue_nodes(Q, All) -> suggested_queue_nodes(Q, node(), All). + +%% The third argument exists so we can pull a call to +%% rabbit_nodes:all_running() out of a loop or transaction +%% or both. +suggested_queue_nodes(Q, DefNode, All) when ?is_amqqueue(Q) -> + Owner = amqqueue:get_exclusive_owner(Q), + {MNode0, SNodes, SSNodes} = actual_queue_nodes(Q), + MNode = case MNode0 of + none -> DefNode; + _ -> MNode0 + end, + case Owner of + none -> Params = policy(<<"ha-params">>, Q), + case module(Q) of + {ok, M} -> M:suggested_queue_nodes( + Params, MNode, SNodes, SSNodes, All); + _ -> {MNode, []} + end; + _ -> {MNode, []} + end. + +policy(Policy, Q) -> + case rabbit_policy:get(Policy, Q) of + undefined -> none; + P -> P + end. + +module(Q) when ?is_amqqueue(Q) -> + case rabbit_policy:get(<<"ha-mode">>, Q) of + undefined -> not_mirrored; + Mode -> module(Mode) + end; + +module(Mode) when is_binary(Mode) -> + case rabbit_registry:binary_to_type(Mode) of + {error, not_found} -> not_mirrored; + T -> case rabbit_registry:lookup_module(ha_mode, T) of + {ok, Module} -> {ok, Module}; + _ -> not_mirrored + end + end. + +validate_mode(Mode) -> + case module(Mode) of + {ok, _Module} -> + ok; + not_mirrored -> + {error, "~p is not a valid ha-mode value", [Mode]} + end. + +-spec is_mirrored(amqqueue:amqqueue()) -> boolean(). + +is_mirrored(Q) -> + case module(Q) of + {ok, _} -> true; + _ -> false + end. + +is_mirrored_ha_nodes(Q) -> + case module(Q) of + {ok, ?HA_NODES_MODULE} -> true; + _ -> false + end. + +actual_queue_nodes(Q) when ?is_amqqueue(Q) -> + MPid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + SSPids = amqqueue:get_sync_slave_pids(Q), + Nodes = fun (L) -> [node(Pid) || Pid <- L] end, + {case MPid of + none -> none; + _ -> node(MPid) + end, Nodes(SPids), Nodes(SSPids)}. + +-spec maybe_auto_sync(amqqueue:amqqueue()) -> 'ok'. + +maybe_auto_sync(Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + case policy(<<"ha-sync-mode">>, Q) of + <<"automatic">> -> + spawn(fun() -> rabbit_amqqueue:sync_mirrors(QPid) end); + _ -> + ok + end. + +sync_queue(Q0) -> + F = fun + (Q) when ?amqqueue_is_classic(Q) -> + QPid = amqqueue:get_pid(Q), + rabbit_amqqueue:sync_mirrors(QPid); + (Q) when ?amqqueue_is_quorum(Q) -> + {error, quorum_queue_not_supported} + end, + rabbit_amqqueue:with(Q0, F). + +cancel_sync_queue(Q0) -> + F = fun + (Q) when ?amqqueue_is_classic(Q) -> + QPid = amqqueue:get_pid(Q), + rabbit_amqqueue:cancel_sync_mirrors(QPid); + (Q) when ?amqqueue_is_quorum(Q) -> + {error, quorum_queue_not_supported} + end, + rabbit_amqqueue:with(Q0, F). + +sync_batch_size(Q) when ?is_amqqueue(Q) -> + case policy(<<"ha-sync-batch-size">>, Q) of + none -> %% we need this case because none > 1 == true + default_batch_size(); + BatchSize when BatchSize > 1 -> + BatchSize; + _ -> + default_batch_size() + end. + +-define(DEFAULT_BATCH_SIZE, 4096). + +default_batch_size() -> + rabbit_misc:get_env(rabbit, mirroring_sync_batch_size, + ?DEFAULT_BATCH_SIZE). + +-spec update_mirrors + (amqqueue:amqqueue(), amqqueue:amqqueue()) -> 'ok'. + +update_mirrors(OldQ, NewQ) when ?amqqueue_pids_are_equal(OldQ, NewQ) -> + % Note: we do want to ensure both queues have same pid + QPid = amqqueue:get_pid(OldQ), + QPid = amqqueue:get_pid(NewQ), + case {is_mirrored(OldQ), is_mirrored(NewQ)} of + {false, false} -> ok; + _ -> rabbit_amqqueue:update_mirroring(QPid) + end. + +-spec update_mirrors + (amqqueue:amqqueue()) -> 'ok'. + +update_mirrors(Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + {OldMNode, OldSNodes, _} = actual_queue_nodes(Q), + {NewMNode, NewSNodes} = suggested_queue_nodes(Q), + OldNodes = [OldMNode | OldSNodes], + NewNodes = [NewMNode | NewSNodes], + %% When a mirror dies, remove_from_queue/2 might have to add new + %% mirrors (in "exactly" mode). It will check mnesia to see which + %% mirrors there currently are. If drop_mirror/2 is invoked first + %% then when we end up in remove_from_queue/2 it will not see the + %% mirrors that add_mirror/2 will add, and also want to add them + %% (even though we are not responding to the death of a + %% mirror). Breakage ensues. + add_mirrors (QName, NewNodes -- OldNodes, async), + drop_mirrors(QName, OldNodes -- NewNodes), + %% This is for the case where no extra nodes were added but we changed to + %% a policy requiring auto-sync. + maybe_auto_sync(Q), + ok. + +queue_length(Q) -> + [{messages, M}] = rabbit_amqqueue:info(Q, [messages]), + M. + +get_replicas(Q) -> + {MNode, SNodes} = suggested_queue_nodes(Q), + [MNode] ++ SNodes. + +transfer_leadership(Q, Destination) -> + QName = amqqueue:get_name(Q), + {OldMNode, OldSNodes, _} = actual_queue_nodes(Q), + OldNodes = [OldMNode | OldSNodes], + add_mirrors(QName, [Destination] -- OldNodes, async), + drop_mirrors(QName, OldNodes -- [Destination]), + {Result, NewQ} = wait_for_new_master(QName, Destination), + update_mirrors(NewQ), + Result. + +wait_for_new_master(QName, Destination) -> + wait_for_new_master(QName, Destination, 100). + +wait_for_new_master(QName, _, 0) -> + {ok, Q} = rabbit_amqqueue:lookup(QName), + {{not_migrated, ""}, Q}; +wait_for_new_master(QName, Destination, N) -> + {ok, Q} = rabbit_amqqueue:lookup(QName), + case amqqueue:get_pid(Q) of + none -> + timer:sleep(100), + wait_for_new_master(QName, Destination, N - 1); + Pid -> + case node(Pid) of + Destination -> + {{migrated, Destination}, Q}; + _ -> + timer:sleep(100), + wait_for_new_master(QName, Destination, N - 1) + end + end. + +%% The arrival of a newly synced mirror may cause the master to die if +%% the policy does not want the master but it has been kept alive +%% because there were no synced mirrors. +%% +%% We don't just call update_mirrors/2 here since that could decide to +%% start a mirror for some other reason, and since we are the mirror ATM +%% that allows complicated deadlocks. + +-spec maybe_drop_master_after_sync(amqqueue:amqqueue()) -> 'ok'. + +maybe_drop_master_after_sync(Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + MPid = amqqueue:get_pid(Q), + {DesiredMNode, DesiredSNodes} = suggested_queue_nodes(Q), + case node(MPid) of + DesiredMNode -> ok; + OldMNode -> false = lists:member(OldMNode, DesiredSNodes), %% [0] + drop_mirror(QName, OldMNode) + end, + ok. +%% [0] ASSERTION - if the policy wants the master to change, it has +%% not just shuffled it into the mirrors. All our modes ensure this +%% does not happen, but we should guard against a misbehaving plugin. + +%%---------------------------------------------------------------------------- + +validate_policy(KeyList) -> + Mode = proplists:get_value(<<"ha-mode">>, KeyList, none), + Params = proplists:get_value(<<"ha-params">>, KeyList, none), + SyncMode = proplists:get_value(<<"ha-sync-mode">>, KeyList, none), + SyncBatchSize = proplists:get_value( + <<"ha-sync-batch-size">>, KeyList, none), + PromoteOnShutdown = proplists:get_value( + <<"ha-promote-on-shutdown">>, KeyList, none), + PromoteOnFailure = proplists:get_value( + <<"ha-promote-on-failure">>, KeyList, none), + case {Mode, Params, SyncMode, SyncBatchSize, PromoteOnShutdown, PromoteOnFailure} of + {none, none, none, none, none, none} -> + ok; + {none, _, _, _, _, _} -> + {error, "ha-mode must be specified to specify ha-params, " + "ha-sync-mode or ha-promote-on-shutdown", []}; + _ -> + validate_policies( + [{Mode, fun validate_mode/1}, + {Params, ha_params_validator(Mode)}, + {SyncMode, fun validate_sync_mode/1}, + {SyncBatchSize, fun validate_sync_batch_size/1}, + {PromoteOnShutdown, fun validate_pos/1}, + {PromoteOnFailure, fun validate_pof/1}]) + end. + +ha_params_validator(Mode) -> + fun(Val) -> + {ok, M} = module(Mode), + M:validate_policy(Val) + end. + +validate_policies([]) -> + ok; +validate_policies([{Val, Validator} | Rest]) -> + case Validator(Val) of + ok -> validate_policies(Rest); + E -> E + end. + +validate_sync_mode(SyncMode) -> + case SyncMode of + <<"automatic">> -> ok; + <<"manual">> -> ok; + none -> ok; + Mode -> {error, "ha-sync-mode must be \"manual\" " + "or \"automatic\", got ~p", [Mode]} + end. + +validate_sync_batch_size(none) -> + ok; +validate_sync_batch_size(N) when is_integer(N) andalso N > 0 -> + ok; +validate_sync_batch_size(N) -> + {error, "ha-sync-batch-size takes an integer greater than 0, " + "~p given", [N]}. + +validate_pos(PromoteOnShutdown) -> + case PromoteOnShutdown of + <<"always">> -> ok; + <<"when-synced">> -> ok; + none -> ok; + Mode -> {error, "ha-promote-on-shutdown must be " + "\"always\" or \"when-synced\", got ~p", [Mode]} + end. + +validate_pof(PromoteOnShutdown) -> + case PromoteOnShutdown of + <<"always">> -> ok; + <<"when-synced">> -> ok; + none -> ok; + Mode -> {error, "ha-promote-on-failure must be " + "\"always\" or \"when-synced\", got ~p", [Mode]} + end. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode.erl b/deps/rabbit/src/rabbit_mirror_queue_mode.erl new file mode 100644 index 0000000000..91491efc49 --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_mode.erl @@ -0,0 +1,42 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode). + +-behaviour(rabbit_registry_class). + +-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). + +-type master() :: node(). +-type slave() :: node(). +-type params() :: any(). + +-callback description() -> [proplists:property()]. + +%% Called whenever we think we might need to change nodes for a +%% mirrored queue. Note that this is called from a variety of +%% contexts, both inside and outside Mnesia transactions. Ideally it +%% will be pure-functional. +%% +%% Takes: parameters set in the policy, +%% current master, +%% current mirrors, +%% current synchronised mirrors, +%% all nodes to consider +%% +%% Returns: tuple of new master, new mirrors +%% +-callback suggested_queue_nodes( + params(), master(), [slave()], [slave()], [node()]) -> + {master(), [slave()]}. + +%% Are the parameters valid for this mode? +-callback validate_policy(params()) -> + rabbit_policy_validator:validate_results(). + +added_to_rabbit_registry(_Type, _ModuleName) -> ok. +removed_from_rabbit_registry(_Type) -> ok. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl new file mode 100644 index 0000000000..2da12a5972 --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl @@ -0,0 +1,32 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode_all). + +-include("rabbit.hrl"). + +-behaviour(rabbit_mirror_queue_mode). + +-export([description/0, suggested_queue_nodes/5, validate_policy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "mirror mode all"}, + {mfa, {rabbit_registry, register, + [ha_mode, <<"all">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +description() -> + [{description, <<"Mirror queue to all nodes">>}]. + +suggested_queue_nodes(_Params, MNode, _SNodes, _SSNodes, Poss) -> + {MNode, Poss -- [MNode]}. + +validate_policy(none) -> + ok; +validate_policy(_Params) -> + {error, "ha-mode=\"all\" does not take parameters", []}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl new file mode 100644 index 0000000000..a8aa7546ac --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl @@ -0,0 +1,45 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode_exactly). + +-include("rabbit.hrl"). + +-behaviour(rabbit_mirror_queue_mode). + +-export([description/0, suggested_queue_nodes/5, validate_policy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "mirror mode exactly"}, + {mfa, {rabbit_registry, register, + [ha_mode, <<"exactly">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +description() -> + [{description, <<"Mirror queue to a specified number of nodes">>}]. + +%% When we need to add nodes, we randomise our candidate list as a +%% crude form of load-balancing. TODO it would also be nice to +%% randomise the list of ones to remove when we have too many - we +%% would have to take account of synchronisation though. +suggested_queue_nodes(Count, MNode, SNodes, _SSNodes, Poss) -> + SCount = Count - 1, + {MNode, case SCount > length(SNodes) of + true -> Cand = shuffle((Poss -- [MNode]) -- SNodes), + SNodes ++ lists:sublist(Cand, SCount - length(SNodes)); + false -> lists:sublist(SNodes, SCount) + end}. + +shuffle(L) -> + {_, L1} = lists:unzip(lists:keysort(1, [{rand:uniform(), N} || N <- L])), + L1. + +validate_policy(N) when is_integer(N) andalso N > 0 -> + ok; +validate_policy(Params) -> + {error, "ha-mode=\"exactly\" takes an integer, ~p given", [Params]}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl new file mode 100644 index 0000000000..f3e134ba63 --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl @@ -0,0 +1,69 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_mode_nodes). + +-include("rabbit.hrl"). + +-behaviour(rabbit_mirror_queue_mode). + +-export([description/0, suggested_queue_nodes/5, validate_policy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "mirror mode nodes"}, + {mfa, {rabbit_registry, register, + [ha_mode, <<"nodes">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +description() -> + [{description, <<"Mirror queue to specified nodes">>}]. + +suggested_queue_nodes(PolicyNodes0, CurrentMaster, _SNodes, SSNodes, NodesRunningRabbitMQ) -> + PolicyNodes1 = [list_to_atom(binary_to_list(Node)) || Node <- PolicyNodes0], + %% If the current master is not in the nodes specified, then what we want + %% to do depends on whether there are any synchronised mirrors. If there + %% are then we can just kill the current master - the admin has asked for + %% a migration and we should give it to them. If there are not however + %% then we must keep the master around so as not to lose messages. + + PolicyNodes = case SSNodes of + [] -> lists:usort([CurrentMaster | PolicyNodes1]); + _ -> PolicyNodes1 + end, + Unavailable = PolicyNodes -- NodesRunningRabbitMQ, + AvailablePolicyNodes = PolicyNodes -- Unavailable, + case AvailablePolicyNodes of + [] -> %% We have never heard of anything? Not much we can do but + %% keep the master alive. + {CurrentMaster, []}; + _ -> case lists:member(CurrentMaster, AvailablePolicyNodes) of + true -> {CurrentMaster, + AvailablePolicyNodes -- [CurrentMaster]}; + false -> %% Make sure the new master is synced! In order to + %% get here SSNodes must not be empty. + SyncPolicyNodes = [Node || + Node <- AvailablePolicyNodes, + lists:member(Node, SSNodes)], + NewMaster = case SyncPolicyNodes of + [Node | _] -> Node; + [] -> erlang:hd(SSNodes) + end, + {NewMaster, AvailablePolicyNodes -- [NewMaster]} + end + end. + +validate_policy([]) -> + {error, "ha-mode=\"nodes\" list must be non-empty", []}; +validate_policy(Nodes) when is_list(Nodes) -> + case [I || I <- Nodes, not is_binary(I)] of + [] -> ok; + Invalid -> {error, "ha-mode=\"nodes\" takes a list of strings, " + "~p was not a string", [Invalid]} + end; +validate_policy(Params) -> + {error, "ha-mode=\"nodes\" takes a list, ~p given", [Params]}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_slave.erl b/deps/rabbit/src/rabbit_mirror_queue_slave.erl new file mode 100644 index 0000000000..0480db9cfe --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_slave.erl @@ -0,0 +1,1093 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_slave). + +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator +%% +%% We receive messages from GM and from publishers, and the gm +%% messages can arrive either before or after the 'actual' message. +%% All instructions from the GM group must be processed in the order +%% in which they're received. + +-export([set_maximum_since_use/2, info/1, go/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, handle_pre_hibernate/1, prioritise_call/4, + prioritise_cast/3, prioritise_info/3, format_message_queue/2]). + +-export([joined/2, members_changed/3, handle_msg/3, handle_terminate/2]). + +-behaviour(gen_server2). +-behaviour(gm). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-include("amqqueue.hrl"). +-include("gm_specs.hrl"). + +%%---------------------------------------------------------------------------- + +-define(INFO_KEYS, + [pid, + name, + master_pid, + is_synchronised + ]). + +-define(SYNC_INTERVAL, 25). %% milliseconds +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). +-define(DEATH_TIMEOUT, 20000). %% 20 seconds + +-record(state, { q, + gm, + backing_queue, + backing_queue_state, + sync_timer_ref, + rate_timer_ref, + + sender_queues, %% :: Pid -> {Q Msg, Set MsgId, ChState} + msg_id_ack, %% :: MsgId -> AckTag + + msg_id_status, + known_senders, + + %% Master depth - local depth + depth_delta + }). + +%%---------------------------------------------------------------------------- + +set_maximum_since_use(QPid, Age) -> + gen_server2:cast(QPid, {set_maximum_since_use, Age}). + +info(QPid) -> gen_server2:call(QPid, info, infinity). + +init(Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + ?store_proc_name(QName), + {ok, {not_started, Q}, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, + ?DESIRED_HIBERNATE}, ?MODULE}. + +go(SPid, sync) -> gen_server2:call(SPid, go, infinity); +go(SPid, async) -> gen_server2:cast(SPid, go). + +handle_go(Q0) when ?is_amqqueue(Q0) -> + QName = amqqueue:get_name(Q0), + %% We join the GM group before we add ourselves to the amqqueue + %% record. As a result: + %% 1. We can receive msgs from GM that correspond to messages we will + %% never receive from publishers. + %% 2. When we receive a message from publishers, we must receive a + %% message from the GM group for it. + %% 3. However, that instruction from the GM group can arrive either + %% before or after the actual message. We need to be able to + %% distinguish between GM instructions arriving early, and case (1) + %% above. + %% + process_flag(trap_exit, true), %% amqqueue_process traps exits too. + {ok, GM} = gm:start_link(QName, ?MODULE, [self()], + fun rabbit_misc:execute_mnesia_transaction/1), + MRef = erlang:monitor(process, GM), + %% We ignore the DOWN message because we are also linked and + %% trapping exits, we just want to not get stuck and we will exit + %% later. + receive + {joined, GM} -> erlang:demonitor(MRef, [flush]), + ok; + {'DOWN', MRef, _, _, _} -> ok + end, + Self = self(), + Node = node(), + case rabbit_misc:execute_mnesia_transaction( + fun() -> init_it(Self, GM, Node, QName) end) of + {new, QPid, GMPids} -> + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [Self]), + ok = rabbit_memory_monitor:register( + Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}), + {ok, BQ} = application:get_env(backing_queue_module), + Q1 = amqqueue:set_pid(Q0, QPid), + _ = BQ:delete_crashed(Q1), %% For crash recovery + BQS = bq_init(BQ, Q1, new), + State = #state { q = Q1, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = undefined, + sync_timer_ref = undefined, + + sender_queues = #{}, + msg_id_ack = #{}, + + msg_id_status = #{}, + known_senders = pmon:new(delegate), + + depth_delta = undefined + }, + ok = gm:broadcast(GM, request_depth), + ok = gm:validate_members(GM, [GM | [G || {G, _} <- GMPids]]), + rabbit_mirror_queue_misc:maybe_auto_sync(Q1), + {ok, State}; + {stale, StalePid} -> + rabbit_mirror_queue_misc:log_warning( + QName, "Detected stale HA master: ~p~n", [StalePid]), + gm:leave(GM), + {error, {stale_master_pid, StalePid}}; + duplicate_live_master -> + gm:leave(GM), + {error, {duplicate_live_master, Node}}; + existing -> + gm:leave(GM), + {error, normal}; + master_in_recovery -> + gm:leave(GM), + %% The queue record vanished - we must have a master starting + %% concurrently with us. In that case we can safely decide to do + %% nothing here, and the master will start us in + %% master:init_with_existing_bq/3 + {error, normal} + end. + +init_it(Self, GM, Node, QName) -> + case mnesia:read({rabbit_queue, QName}) of + [Q] when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + SPids = amqqueue:get_slave_pids(Q), + GMPids = amqqueue:get_gm_pids(Q), + PSPids = amqqueue:get_slave_pids_pending_shutdown(Q), + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of + [] -> stop_pending_slaves(QName, PSPids), + add_slave(Q, Self, GM), + {new, QPid, GMPids}; + [QPid] -> case rabbit_mnesia:is_process_alive(QPid) of + true -> duplicate_live_master; + false -> {stale, QPid} + end; + [SPid] -> case rabbit_mnesia:is_process_alive(SPid) of + true -> existing; + false -> GMPids1 = [T || T = {_, S} <- GMPids, S =/= SPid], + SPids1 = SPids -- [SPid], + Q1 = amqqueue:set_slave_pids(Q, SPids1), + Q2 = amqqueue:set_gm_pids(Q1, GMPids1), + add_slave(Q2, Self, GM), + {new, QPid, GMPids1} + end + end; + [] -> + master_in_recovery + end. + +%% Pending mirrors have been asked to stop by the master, but despite the node +%% being up these did not answer on the expected timeout. Stop local mirrors now. +stop_pending_slaves(QName, Pids) -> + [begin + rabbit_mirror_queue_misc:log_warning( + QName, "Detected a non-responsive classic queue mirror, stopping it: ~p~n", [Pid]), + case erlang:process_info(Pid, dictionary) of + undefined -> ok; + {dictionary, Dict} -> + Vhost = QName#resource.virtual_host, + {ok, AmqQSup} = rabbit_amqqueue_sup_sup:find_for_vhost(Vhost), + case proplists:get_value('$ancestors', Dict) of + [Sup, AmqQSup | _] -> + exit(Sup, kill), + exit(Pid, kill); + _ -> + ok + end + end + end || Pid <- Pids, node(Pid) =:= node(), + true =:= erlang:is_process_alive(Pid)]. + +%% Add to the end, so they are in descending order of age, see +%% rabbit_mirror_queue_misc:promote_slave/1 +add_slave(Q0, New, GM) when ?is_amqqueue(Q0) -> + SPids = amqqueue:get_slave_pids(Q0), + GMPids = amqqueue:get_gm_pids(Q0), + SPids1 = SPids ++ [New], + GMPids1 = [{GM, New} | GMPids], + Q1 = amqqueue:set_slave_pids(Q0, SPids1), + Q2 = amqqueue:set_gm_pids(Q1, GMPids1), + rabbit_mirror_queue_misc:store_updated_slaves(Q2). + +handle_call(go, _From, {not_started, Q} = NotStarted) -> + case handle_go(Q) of + {ok, State} -> {reply, ok, State}; + {error, Error} -> {stop, Error, NotStarted} + end; + +handle_call({gm_deaths, DeadGMPids}, From, + State = #state{ gm = GM, q = Q, + backing_queue = BQ, + backing_queue_state = BQS}) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + MPid = amqqueue:get_pid(Q), + Self = self(), + case rabbit_mirror_queue_misc:remove_from_queue(QName, Self, DeadGMPids) of + {error, not_found} -> + gen_server2:reply(From, ok), + {stop, normal, State}; + {error, {not_synced, _SPids}} -> + BQ:delete_and_terminate({error, not_synced}, BQS), + {stop, normal, State#state{backing_queue_state = undefined}}; + {ok, Pid, DeadPids, ExtraNodes} -> + rabbit_mirror_queue_misc:report_deaths(Self, false, QName, + DeadPids), + case Pid of + MPid -> + %% master hasn't changed + gen_server2:reply(From, ok), + rabbit_mirror_queue_misc:add_mirrors( + QName, ExtraNodes, async), + noreply(State); + Self -> + %% we've become master + QueueState = promote_me(From, State), + rabbit_mirror_queue_misc:add_mirrors( + QName, ExtraNodes, async), + {become, rabbit_amqqueue_process, QueueState, hibernate}; + _ -> + %% master has changed to not us + gen_server2:reply(From, ok), + %% see rabbitmq-server#914; + %% It's not always guaranteed that we won't have ExtraNodes. + %% If gm alters, master can change to not us with extra nodes, + %% in which case we attempt to add mirrors on those nodes. + case ExtraNodes of + [] -> void; + _ -> rabbit_mirror_queue_misc:add_mirrors( + QName, ExtraNodes, async) + end, + %% Since GM is by nature lazy we need to make sure + %% there is some traffic when a master dies, to + %% make sure all mirrors get informed of the + %% death. That is all process_death does, create + %% some traffic. + ok = gm:broadcast(GM, process_death), + Q1 = amqqueue:set_pid(Q, Pid), + State1 = State#state{q = Q1}, + noreply(State1) + end + end; + +handle_call(info, _From, State) -> + reply(infos(?INFO_KEYS, State), State). + +handle_cast(go, {not_started, Q} = NotStarted) -> + case handle_go(Q) of + {ok, State} -> {noreply, State}; + {error, Error} -> {stop, Error, NotStarted} + end; + +handle_cast({run_backing_queue, Mod, Fun}, State) -> + noreply(run_backing_queue(Mod, Fun, State)); + +handle_cast({gm, Instruction}, State = #state{q = Q0}) when ?is_amqqueue(Q0) -> + QName = amqqueue:get_name(Q0), + case rabbit_amqqueue:lookup(QName) of + {ok, Q1} when ?is_amqqueue(Q1) -> + SPids = amqqueue:get_slave_pids(Q1), + case lists:member(self(), SPids) of + true -> + handle_process_result(process_instruction(Instruction, State)); + false -> + %% Potentially a duplicated mirror caused by a partial partition, + %% will stop as a new mirror could start unaware of our presence + {stop, shutdown, State} + end; + {error, not_found} -> + %% Would not expect this to happen after fixing #953 + {stop, shutdown, State} + end; + +handle_cast({deliver, Delivery = #delivery{sender = Sender, flow = Flow}, true}, + State) -> + %% Asynchronous, non-"mandatory", deliver mode. + %% We are acking messages to the channel process that sent us + %% the message delivery. See + %% rabbit_amqqueue_process:handle_ch_down for more info. + %% If message is rejected by the master, the publish will be nacked + %% even if mirrors confirm it. No need to check for length here. + maybe_flow_ack(Sender, Flow), + noreply(maybe_enqueue_message(Delivery, State)); + +handle_cast({sync_start, Ref, Syncer}, + State = #state { depth_delta = DD, + backing_queue = BQ, + backing_queue_state = BQS }) -> + State1 = #state{rate_timer_ref = TRef} = ensure_rate_timer(State), + S = fun({MA, TRefN, BQSN}) -> + State1#state{depth_delta = undefined, + msg_id_ack = maps:from_list(MA), + rate_timer_ref = TRefN, + backing_queue_state = BQSN} + end, + case rabbit_mirror_queue_sync:slave( + DD, Ref, TRef, Syncer, BQ, BQS, + fun (BQN, BQSN) -> + BQSN1 = update_ram_duration(BQN, BQSN), + TRefN = rabbit_misc:send_after(?RAM_DURATION_UPDATE_INTERVAL, + self(), update_ram_duration), + {TRefN, BQSN1} + end) of + denied -> noreply(State1); + {ok, Res} -> noreply(set_delta(0, S(Res))); + {failed, Res} -> noreply(S(Res)); + {stop, Reason, Res} -> {stop, Reason, S(Res)} + end; + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State); + +handle_cast({set_ram_duration_target, Duration}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQS1 = BQ:set_ram_duration_target(Duration, BQS), + noreply(State #state { backing_queue_state = BQS1 }); + +handle_cast(policy_changed, State) -> + %% During partial partitions, we might end up receiving messages expected by a master + %% Ignore them + noreply(State). + +handle_info(update_ram_duration, State = #state{backing_queue = BQ, + backing_queue_state = BQS}) -> + BQS1 = update_ram_duration(BQ, BQS), + %% Don't call noreply/1, we don't want to set timers + {State1, Timeout} = next_state(State #state { + rate_timer_ref = undefined, + backing_queue_state = BQS1 }), + {noreply, State1, Timeout}; + +handle_info(sync_timeout, State) -> + noreply(backing_queue_timeout( + State #state { sync_timer_ref = undefined })); + +handle_info(timeout, State) -> + noreply(backing_queue_timeout(State)); + +handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> + local_sender_death(ChPid, State), + noreply(maybe_forget_sender(ChPid, down_from_ch, State)); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}; + +handle_info({bump_credit, Msg}, State) -> + credit_flow:handle_bump_msg(Msg), + noreply(State); + +handle_info(bump_reduce_memory_use, State) -> + noreply(State); + +%% In the event of a short partition during sync we can detect the +%% master's 'death', drop out of sync, and then receive sync messages +%% which were still in flight. Ignore them. +handle_info({sync_msg, _Ref, _Msg, _Props, _Unacked}, State) -> + noreply(State); + +handle_info({sync_complete, _Ref}, State) -> + noreply(State); + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +terminate(_Reason, {not_started, _Q}) -> + ok; +terminate(_Reason, #state { backing_queue_state = undefined }) -> + %% We've received a delete_and_terminate from gm, thus nothing to + %% do here. + ok; +terminate({shutdown, dropped} = R, State = #state{backing_queue = BQ, + backing_queue_state = BQS}) -> + %% See rabbit_mirror_queue_master:terminate/2 + terminate_common(State), + BQ:delete_and_terminate(R, BQS); +terminate(shutdown, State) -> + terminate_shutdown(shutdown, State); +terminate({shutdown, _} = R, State) -> + terminate_shutdown(R, State); +terminate(Reason, State = #state{backing_queue = BQ, + backing_queue_state = BQS}) -> + terminate_common(State), + BQ:delete_and_terminate(Reason, BQS). + +%% If the Reason is shutdown, or {shutdown, _}, it is not the queue +%% being deleted: it's just the node going down. Even though we're a +%% mirror, we have no idea whether or not we'll be the only copy coming +%% back up. Thus we must assume we will be, and preserve anything we +%% have on disk. +terminate_shutdown(Reason, State = #state{backing_queue = BQ, + backing_queue_state = BQS}) -> + terminate_common(State), + BQ:terminate(Reason, BQS). + +terminate_common(State) -> + ok = rabbit_memory_monitor:deregister(self()), + stop_rate_timer(stop_sync_timer(State)). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_pre_hibernate({not_started, _Q} = State) -> + {hibernate, State}; + +handle_pre_hibernate(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + BQS3 = BQ:handle_pre_hibernate(BQS2), + {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}. + +prioritise_call(Msg, _From, _Len, _State) -> + case Msg of + info -> 9; + {gm_deaths, _Dead} -> 5; + _ -> 0 + end. + +prioritise_cast(Msg, _Len, _State) -> + case Msg of + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + {run_backing_queue, _Mod, _Fun} -> 6; + {gm, _Msg} -> 5; + _ -> 0 + end. + +prioritise_info(Msg, _Len, _State) -> + case Msg of + update_ram_duration -> 8; + sync_timeout -> 6; + _ -> 0 + end. + +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + +%% --------------------------------------------------------------------------- +%% GM +%% --------------------------------------------------------------------------- + +joined([SPid], _Members) -> SPid ! {joined, self()}, ok. + +members_changed([_SPid], _Births, []) -> + ok; +members_changed([ SPid], _Births, Deaths) -> + case rabbit_misc:with_exit_handler( + rabbit_misc:const(ok), + fun() -> + gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) + end) of + ok -> ok; + {promote, CPid} -> {become, rabbit_mirror_queue_coordinator, [CPid]} + end. + +handle_msg([_SPid], _From, hibernate_heartbeat) -> + %% See rabbit_mirror_queue_coordinator:handle_pre_hibernate/1 + ok; +handle_msg([_SPid], _From, request_depth) -> + %% This is only of value to the master + ok; +handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> + %% This is only of value to the master + ok; +handle_msg([_SPid], _From, process_death) -> + %% We must not take any notice of the master death here since it + %% comes without ordering guarantees - there could still be + %% messages from the master we have yet to receive. When we get + %% members_changed, then there will be no more messages. + ok; +handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) -> + ok = gen_server2:cast(CPid, {gm, Msg}), + {stop, {shutdown, ring_shutdown}}; +handle_msg([SPid], _From, {sync_start, Ref, Syncer, SPids}) -> + case lists:member(SPid, SPids) of + true -> gen_server2:cast(SPid, {sync_start, Ref, Syncer}); + false -> ok + end; +handle_msg([SPid], _From, Msg) -> + ok = gen_server2:cast(SPid, {gm, Msg}). + +handle_terminate([_SPid], _Reason) -> + ok. + +%% --------------------------------------------------------------------------- +%% Others +%% --------------------------------------------------------------------------- + +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. + +i(pid, _State) -> + self(); +i(name, #state{q = Q}) when ?is_amqqueue(Q) -> + amqqueue:get_name(Q); +i(master_pid, #state{q = Q}) when ?is_amqqueue(Q) -> + amqqueue:get_pid(Q); +i(is_synchronised, #state{depth_delta = DD}) -> + DD =:= 0; +i(_, _) -> + ''. + +bq_init(BQ, Q, Recover) -> + Self = self(), + BQ:init(Q, Recover, + fun (Mod, Fun) -> + rabbit_amqqueue:run_backing_queue(Self, Mod, Fun) + end). + +run_backing_queue(rabbit_mirror_queue_master, Fun, State) -> + %% Yes, this might look a little crazy, but see comments in + %% confirm_sender_death/1 + Fun(?MODULE, State); +run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. + +%% This feature was used by `rabbit_amqqueue_process` and +%% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. It is +%% unused in 3.8.x and thus deprecated. We keep it to support in-place +%% upgrades to 3.8.x (i.e. mixed-version clusters), but it is a no-op +%% starting with that version. +send_mandatory(#delivery{mandatory = false}) -> + ok; +send_mandatory(#delivery{mandatory = true, + sender = SenderPid, + msg_seq_no = MsgSeqNo}) -> + gen_server2:cast(SenderPid, {mandatory_received, MsgSeqNo}). + +send_or_record_confirm(_, #delivery{ confirm = false }, MS, _State) -> + MS; +send_or_record_confirm(published, #delivery { sender = ChPid, + confirm = true, + msg_seq_no = MsgSeqNo, + message = #basic_message { + id = MsgId, + is_persistent = true } }, + MS, #state{q = Q}) when ?amqqueue_is_durable(Q) -> + maps:put(MsgId, {published, ChPid, MsgSeqNo} , MS); +send_or_record_confirm(_Status, #delivery { sender = ChPid, + confirm = true, + msg_seq_no = MsgSeqNo }, + MS, #state{q = Q} = _State) -> + ok = rabbit_classic_queue:confirm_to_sender(ChPid, + amqqueue:get_name(Q), [MsgSeqNo]), + MS. + +confirm_messages(MsgIds, State = #state{q = Q, msg_id_status = MS}) -> + QName = amqqueue:get_name(Q), + {CMs, MS1} = + lists:foldl( + fun (MsgId, {CMsN, MSN} = Acc) -> + %% We will never see 'discarded' here + case maps:find(MsgId, MSN) of + error -> + %% If it needed confirming, it'll have + %% already been done. + Acc; + {ok, published} -> + %% Still not seen it from the channel, just + %% record that it's been confirmed. + {CMsN, maps:put(MsgId, confirmed, MSN)}; + {ok, {published, ChPid, MsgSeqNo}} -> + %% Seen from both GM and Channel. Can now + %% confirm. + {rabbit_misc:gb_trees_cons(ChPid, MsgSeqNo, CMsN), + maps:remove(MsgId, MSN)}; + {ok, confirmed} -> + %% It's already been confirmed. This is + %% probably it's been both sync'd to disk + %% and then delivered and ack'd before we've + %% seen the publish from the + %% channel. Nothing to do here. + Acc + end + end, {gb_trees:empty(), MS}, MsgIds), + Fun = fun (Pid, MsgSeqNos) -> + rabbit_classic_queue:confirm_to_sender(Pid, QName, MsgSeqNos) + end, + rabbit_misc:gb_trees_foreach(Fun, CMs), + State #state { msg_id_status = MS1 }. + +handle_process_result({ok, State}) -> noreply(State); +handle_process_result({stop, State}) -> {stop, normal, State}. + +-spec promote_me({pid(), term()}, #state{}) -> no_return(). + +promote_me(From, #state { q = Q0, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + sender_queues = SQ, + msg_id_ack = MA, + msg_id_status = MS, + known_senders = KS}) when ?is_amqqueue(Q0) -> + QName = amqqueue:get_name(Q0), + rabbit_mirror_queue_misc:log_info(QName, "Promoting mirror ~s to master~n", + [rabbit_misc:pid_to_string(self())]), + Q1 = amqqueue:set_pid(Q0, self()), + DeathFun = rabbit_mirror_queue_master:sender_death_fun(), + DepthFun = rabbit_mirror_queue_master:depth_fun(), + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q1, GM, DeathFun, DepthFun), + true = unlink(GM), + gen_server2:reply(From, {promote, CPid}), + + %% Everything that we're monitoring, we need to ensure our new + %% coordinator is monitoring. + MPids = pmon:monitored(KS), + ok = rabbit_mirror_queue_coordinator:ensure_monitoring(CPid, MPids), + + %% We find all the messages that we've received from channels but + %% not from gm, and pass them to the + %% queue_process:init_with_backing_queue_state to be enqueued. + %% + %% We also have to requeue messages which are pending acks: the + %% consumers from the master queue have been lost and so these + %% messages need requeuing. They might also be pending + %% confirmation, and indeed they might also be pending arrival of + %% the publication from the channel itself, if we received both + %% the publication and the fetch via gm first! Requeuing doesn't + %% affect confirmations: if the message was previously pending a + %% confirmation then it still will be, under the same msg_id. So + %% as a master, we need to be prepared to filter out the + %% publication of said messages from the channel (is_duplicate + %% (thus such requeued messages must remain in the msg_id_status + %% (MS) which becomes seen_status (SS) in the master)). + %% + %% Then there are messages we already have in the queue, which are + %% not currently pending acknowledgement: + %% 1. Messages we've only received via gm: + %% Filter out subsequent publication from channel through + %% validate_message. Might have to issue confirms then or + %% later, thus queue_process state will have to know that + %% there's a pending confirm. + %% 2. Messages received via both gm and channel: + %% Queue will have to deal with issuing confirms if necessary. + %% + %% MS contains the following three entry types: + %% + %% a) published: + %% published via gm only; pending arrival of publication from + %% channel, maybe pending confirm. + %% + %% b) {published, ChPid, MsgSeqNo}: + %% published via gm and channel; pending confirm. + %% + %% c) confirmed: + %% published via gm only, and confirmed; pending publication + %% from channel. + %% + %% d) discarded: + %% seen via gm only as discarded. Pending publication from + %% channel + %% + %% The forms a, c and d only, need to go to the master state + %% seen_status (SS). + %% + %% The form b only, needs to go through to the queue_process + %% state to form the msg_id_to_channel mapping (MTC). + %% + %% No messages that are enqueued from SQ at this point will have + %% entries in MS. + %% + %% Messages that are extracted from MA may have entries in MS, and + %% those messages are then requeued. However, as discussed above, + %% this does not affect MS, nor which bits go through to SS in + %% Master, or MTC in queue_process. + + St = [published, confirmed, discarded], + SS = maps:filter(fun (_MsgId, Status) -> lists:member(Status, St) end, MS), + AckTags = [AckTag || {_MsgId, AckTag} <- maps:to_list(MA)], + + MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( + QName, CPid, BQ, BQS, GM, AckTags, SS, MPids), + + MTC = maps:fold(fun (MsgId, {published, ChPid, MsgSeqNo}, MTC0) -> + maps:put(MsgId, {ChPid, MsgSeqNo}, MTC0); + (_Msgid, _Status, MTC0) -> + MTC0 + end, #{}, MS), + Deliveries = [promote_delivery(Delivery) || + {_ChPid, {PubQ, _PendCh, _ChState}} <- maps:to_list(SQ), + Delivery <- queue:to_list(PubQ)], + AwaitGmDown = [ChPid || {ChPid, {_, _, down_from_ch}} <- maps:to_list(SQ)], + KS1 = lists:foldl(fun (ChPid0, KS0) -> + pmon:demonitor(ChPid0, KS0) + end, KS, AwaitGmDown), + rabbit_misc:store_proc_name(rabbit_amqqueue_process, QName), + rabbit_amqqueue_process:init_with_backing_queue_state( + Q1, rabbit_mirror_queue_master, MasterState, RateTRef, Deliveries, KS1, + MTC). + +%% We reset mandatory to false here because we will have sent the +%% mandatory_received already as soon as we got the message. We also +%% need to send an ack for these messages since the channel is waiting +%% for one for the via-GM case and we will not now receive one. +promote_delivery(Delivery = #delivery{sender = Sender, flow = Flow}) -> + maybe_flow_ack(Sender, Flow), + Delivery#delivery{mandatory = false}. + +noreply(State) -> + {NewState, Timeout} = next_state(State), + {noreply, ensure_rate_timer(NewState), Timeout}. + +reply(Reply, State) -> + {NewState, Timeout} = next_state(State), + {reply, Reply, ensure_rate_timer(NewState), Timeout}. + +next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> + {MsgIds, BQS1} = BQ:drain_confirmed(BQS), + State1 = confirm_messages(MsgIds, + State #state { backing_queue_state = BQS1 }), + case BQ:needs_timeout(BQS1) of + false -> {stop_sync_timer(State1), hibernate }; + idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL}; + timed -> {ensure_sync_timer(State1), 0 } + end. + +backing_queue_timeout(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + State#state{backing_queue_state = BQ:timeout(BQS)}. + +ensure_sync_timer(State) -> + rabbit_misc:ensure_timer(State, #state.sync_timer_ref, + ?SYNC_INTERVAL, sync_timeout). + +stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #state.sync_timer_ref). + +ensure_rate_timer(State) -> + rabbit_misc:ensure_timer(State, #state.rate_timer_ref, + ?RAM_DURATION_UPDATE_INTERVAL, + update_ram_duration). + +stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #state.rate_timer_ref). + +ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> + State #state { known_senders = pmon:monitor(ChPid, KS) }. + +local_sender_death(ChPid, #state { known_senders = KS }) -> + %% The channel will be monitored iff we have received a delivery + %% from it but not heard about its death from the master. So if it + %% is monitored we need to point the death out to the master (see + %% essay). + ok = case pmon:is_monitored(ChPid, KS) of + false -> ok; + true -> confirm_sender_death(ChPid) + end. + +confirm_sender_death(Pid) -> + %% We have to deal with the possibility that we'll be promoted to + %% master before this thing gets run. Consequently we set the + %% module to rabbit_mirror_queue_master so that if we do become a + %% rabbit_amqqueue_process before then, sane things will happen. + Fun = + fun (?MODULE, State = #state { known_senders = KS, + gm = GM }) -> + %% We're running still as a mirror + %% + %% See comment in local_sender_death/2; we might have + %% received a sender_death in the meanwhile so check + %% again. + ok = case pmon:is_monitored(Pid, KS) of + false -> ok; + true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}), + confirm_sender_death(Pid) + end, + State; + (rabbit_mirror_queue_master, State) -> + %% We've become a master. State is now opaque to + %% us. When we became master, if Pid was still known + %% to us then we'd have set up monitoring of it then, + %% so this is now a noop. + State + end, + %% Note that we do not remove our knowledge of this ChPid until we + %% get the sender_death from GM as well as a DOWN notification. + {ok, _TRef} = timer:apply_after( + ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue, + [self(), rabbit_mirror_queue_master, Fun]), + ok. + +forget_sender(_, running) -> false; +forget_sender(down_from_gm, down_from_gm) -> false; %% [1] +forget_sender(down_from_ch, down_from_ch) -> false; +forget_sender(Down1, Down2) when Down1 =/= Down2 -> true. + +%% [1] If another mirror goes through confirm_sender_death/1 before we +%% do we can get two GM sender_death messages in a row for the same +%% channel - don't treat that as anything special. + +%% Record and process lifetime events from channels. Forget all about a channel +%% only when down notifications are received from both the channel and from gm. +maybe_forget_sender(ChPid, ChState, State = #state { sender_queues = SQ, + msg_id_status = MS, + known_senders = KS }) -> + case maps:find(ChPid, SQ) of + error -> + State; + {ok, {MQ, PendCh, ChStateRecord}} -> + case forget_sender(ChState, ChStateRecord) of + true -> + credit_flow:peer_down(ChPid), + State #state { sender_queues = maps:remove(ChPid, SQ), + msg_id_status = lists:foldl( + fun maps:remove/2, + MS, sets:to_list(PendCh)), + known_senders = pmon:demonitor(ChPid, KS) }; + false -> + SQ1 = maps:put(ChPid, {MQ, PendCh, ChState}, SQ), + State #state { sender_queues = SQ1 } + end + end. + +maybe_enqueue_message( + Delivery = #delivery { message = #basic_message { id = MsgId }, + sender = ChPid }, + State = #state { sender_queues = SQ, msg_id_status = MS }) -> + send_mandatory(Delivery), %% must do this before confirms + State1 = ensure_monitoring(ChPid, State), + %% We will never see {published, ChPid, MsgSeqNo} here. + case maps:find(MsgId, MS) of + error -> + {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ), + MQ1 = queue:in(Delivery, MQ), + SQ1 = maps:put(ChPid, {MQ1, PendingCh, ChState}, SQ), + State1 #state { sender_queues = SQ1 }; + {ok, Status} -> + MS1 = send_or_record_confirm( + Status, Delivery, maps:remove(MsgId, MS), State1), + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = MS1, + sender_queues = SQ1 } + end. + +get_sender_queue(ChPid, SQ) -> + case maps:find(ChPid, SQ) of + error -> {queue:new(), sets:new(), running}; + {ok, Val} -> Val + end. + +remove_from_pending_ch(MsgId, ChPid, SQ) -> + case maps:find(ChPid, SQ) of + error -> + SQ; + {ok, {MQ, PendingCh, ChState}} -> + maps:put(ChPid, {MQ, sets:del_element(MsgId, PendingCh), ChState}, + SQ) + end. + +publish_or_discard(Status, ChPid, MsgId, + State = #state { sender_queues = SQ, msg_id_status = MS }) -> + %% We really are going to do the publish/discard right now, even + %% though we may not have seen it directly from the channel. But + %% we cannot issue confirms until the latter has happened. So we + %% need to keep track of the MsgId and its confirmation status in + %% the meantime. + State1 = ensure_monitoring(ChPid, State), + {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ), + {MQ1, PendingCh1, MS1} = + case queue:out(MQ) of + {empty, _MQ2} -> + {MQ, sets:add_element(MsgId, PendingCh), + maps:put(MsgId, Status, MS)}; + {{value, Delivery = #delivery { + message = #basic_message { id = MsgId } }}, MQ2} -> + {MQ2, PendingCh, + %% We received the msg from the channel first. Thus + %% we need to deal with confirms here. + send_or_record_confirm(Status, Delivery, MS, State1)}; + {{value, #delivery {}}, _MQ2} -> + %% The instruction was sent to us before we were + %% within the slave_pids within the #amqqueue{} + %% record. We'll never receive the message directly + %% from the channel. And the channel will not be + %% expecting any confirms from us. + {MQ, PendingCh, MS} + end, + SQ1 = maps:put(ChPid, {MQ1, PendingCh1, ChState}, SQ), + State1 #state { sender_queues = SQ1, msg_id_status = MS1 }. + + +process_instruction({publish, ChPid, Flow, MsgProps, + Msg = #basic_message { id = MsgId }}, State) -> + maybe_flow_ack(ChPid, Flow), + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(published, ChPid, MsgId, State), + BQS1 = BQ:publish(Msg, MsgProps, true, ChPid, Flow, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({batch_publish, ChPid, Flow, Publishes}, State) -> + maybe_flow_ack(ChPid, Flow), + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + lists:foldl(fun ({#basic_message { id = MsgId }, + _MsgProps, _IsDelivered}, St) -> + publish_or_discard(published, ChPid, MsgId, St) + end, State, Publishes), + BQS1 = BQ:batch_publish(Publishes, ChPid, Flow, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({publish_delivered, ChPid, Flow, MsgProps, + Msg = #basic_message { id = MsgId }}, State) -> + maybe_flow_ack(ChPid, Flow), + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(published, ChPid, MsgId, State), + true = BQ:is_empty(BQS), + {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, Flow, BQS), + {ok, maybe_store_ack(true, MsgId, AckTag, + State1 #state { backing_queue_state = BQS1 })}; +process_instruction({batch_publish_delivered, ChPid, Flow, Publishes}, State) -> + maybe_flow_ack(ChPid, Flow), + {MsgIds, + State1 = #state { backing_queue = BQ, backing_queue_state = BQS }} = + lists:foldl(fun ({#basic_message { id = MsgId }, _MsgProps}, + {MsgIds, St}) -> + {[MsgId | MsgIds], + publish_or_discard(published, ChPid, MsgId, St)} + end, {[], State}, Publishes), + true = BQ:is_empty(BQS), + {AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS), + MsgIdsAndAcks = lists:zip(lists:reverse(MsgIds), AckTags), + State2 = lists:foldl( + fun ({MsgId, AckTag}, St) -> + maybe_store_ack(true, MsgId, AckTag, St) + end, State1 #state { backing_queue_state = BQS1 }, + MsgIdsAndAcks), + {ok, State2}; +process_instruction({discard, ChPid, Flow, MsgId}, State) -> + maybe_flow_ack(ChPid, Flow), + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + publish_or_discard(discarded, ChPid, MsgId, State), + BQS1 = BQ:discard(MsgId, ChPid, Flow, BQS), + {ok, State1 #state { backing_queue_state = BQS1 }}; +process_instruction({drop, Length, Dropped, AckRequired}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + QLen = BQ:len(BQS), + ToDrop = case QLen - Length of + N when N > 0 -> N; + _ -> 0 + end, + State1 = lists:foldl( + fun (const, StateN = #state{backing_queue_state = BQSN}) -> + {{MsgId, AckTag}, BQSN1} = BQ:drop(AckRequired, BQSN), + maybe_store_ack( + AckRequired, MsgId, AckTag, + StateN #state { backing_queue_state = BQSN1 }) + end, State, lists:duplicate(ToDrop, const)), + {ok, case AckRequired of + true -> State1; + false -> update_delta(ToDrop - Dropped, State1) + end}; +process_instruction({ack, MsgIds}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + msg_id_ack = MA }) -> + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), + {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), + [] = MsgIds1 -- MsgIds, %% ASSERTION + {ok, update_delta(length(MsgIds1) - length(MsgIds), + State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 })}; +process_instruction({requeue, MsgIds}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + msg_id_ack = MA }) -> + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), + {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), + {ok, State #state { msg_id_ack = MA1, + backing_queue_state = BQS1 }}; +process_instruction({sender_death, ChPid}, + State = #state { known_senders = KS }) -> + %% The channel will be monitored iff we have received a message + %% from it. In this case we just want to avoid doing work if we + %% never got any messages. + {ok, case pmon:is_monitored(ChPid, KS) of + false -> State; + true -> maybe_forget_sender(ChPid, down_from_gm, State) + end}; +process_instruction({depth, Depth}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {ok, set_delta(Depth - BQ:depth(BQS), State)}; + +process_instruction({delete_and_terminate, Reason}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQ:delete_and_terminate(Reason, BQS), + {stop, State #state { backing_queue_state = undefined }}; +process_instruction({set_queue_mode, Mode}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQS1 = BQ:set_queue_mode(Mode, BQS), + {ok, State #state { backing_queue_state = BQS1 }}. + +maybe_flow_ack(Sender, flow) -> credit_flow:ack(Sender); +maybe_flow_ack(_Sender, noflow) -> ok. + +msg_ids_to_acktags(MsgIds, MA) -> + {AckTags, MA1} = + lists:foldl( + fun (MsgId, {Acc, MAN}) -> + case maps:find(MsgId, MA) of + error -> {Acc, MAN}; + {ok, AckTag} -> {[AckTag | Acc], maps:remove(MsgId, MAN)} + end + end, {[], MA}, MsgIds), + {lists:reverse(AckTags), MA1}. + +maybe_store_ack(false, _MsgId, _AckTag, State) -> + State; +maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA }) -> + State #state { msg_id_ack = maps:put(MsgId, AckTag, MA) }. + +set_delta(0, State = #state { depth_delta = undefined }) -> + ok = record_synchronised(State#state.q), + State #state { depth_delta = 0 }; +set_delta(NewDelta, State = #state { depth_delta = undefined }) -> + true = NewDelta > 0, %% assertion + State #state { depth_delta = NewDelta }; +set_delta(NewDelta, State = #state { depth_delta = Delta }) -> + update_delta(NewDelta - Delta, State). + +update_delta(_DeltaChange, State = #state { depth_delta = undefined }) -> + State; +update_delta( DeltaChange, State = #state { depth_delta = 0 }) -> + 0 = DeltaChange, %% assertion: we cannot become unsync'ed + State; +update_delta( DeltaChange, State = #state { depth_delta = Delta }) -> + true = DeltaChange =< 0, %% assertion: we cannot become 'less' sync'ed + set_delta(Delta + DeltaChange, State #state { depth_delta = undefined }). + +update_ram_duration(BQ, BQS) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQ:set_ram_duration_target(DesiredDuration, BQS1). + +record_synchronised(Q0) when ?is_amqqueue(Q0) -> + QName = amqqueue:get_name(Q0), + Self = self(), + F = fun () -> + case mnesia:read({rabbit_queue, QName}) of + [] -> + ok; + [Q1] when ?is_amqqueue(Q1) -> + SSPids = amqqueue:get_sync_slave_pids(Q1), + SSPids1 = [Self | SSPids], + Q2 = amqqueue:set_sync_slave_pids(Q1, SSPids1), + rabbit_mirror_queue_misc:store_updated_slaves(Q2), + {ok, Q2} + end + end, + case rabbit_misc:execute_mnesia_transaction(F) of + ok -> ok; + {ok, Q2} -> rabbit_mirror_queue_misc:maybe_drop_master_after_sync(Q2) + end. diff --git a/deps/rabbit/src/rabbit_mirror_queue_sync.erl b/deps/rabbit/src/rabbit_mirror_queue_sync.erl new file mode 100644 index 0000000000..a82ee05599 --- /dev/null +++ b/deps/rabbit/src/rabbit_mirror_queue_sync.erl @@ -0,0 +1,420 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mirror_queue_sync). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([master_prepare/4, master_go/8, slave/7, conserve_resources/3]). + +-define(SYNC_PROGRESS_INTERVAL, 1000000). + +%% There are three processes around, the master, the syncer and the +%% slave(s). The syncer is an intermediary, linked to the master in +%% order to make sure we do not mess with the master's credit flow or +%% set of monitors. +%% +%% Interactions +%% ------------ +%% +%% '*' indicates repeating messages. All are standard Erlang messages +%% except sync_start which is sent over GM to flush out any other +%% messages that we might have sent that way already. (credit) is the +%% usual credit_flow bump message every so often. +%% +%% Master Syncer Slave(s) +%% sync_mirrors -> || || +%% || -- (spawns) --> || || +%% || --------- sync_start (over GM) -------> || +%% || || <--- sync_ready ---- || +%% || || (or) || +%% || || <--- sync_deny ----- || +%% || <--- ready ---- || || +%% || <--- next* ---- || || } +%% || ---- msg* ----> || || } loop +%% || || ---- sync_msgs* ---> || } +%% || || <--- (credit)* ----- || } +%% || <--- next ---- || || +%% || ---- done ----> || || +%% || || -- sync_complete --> || +%% || (Dies) || + +-type log_fun() :: fun ((string(), [any()]) -> 'ok'). +-type bq() :: atom(). +-type bqs() :: any(). +-type ack() :: any(). +-type slave_sync_state() :: {[{rabbit_types:msg_id(), ack()}], timer:tref(), + bqs()}. + +%% --------------------------------------------------------------------------- +%% Master + +-spec master_prepare(reference(), rabbit_amqqueue:name(), + log_fun(), [pid()]) -> pid(). + +master_prepare(Ref, QName, Log, SPids) -> + MPid = self(), + spawn_link(fun () -> + ?store_proc_name(QName), + syncer(Ref, Log, MPid, SPids) + end). + +-spec master_go(pid(), reference(), log_fun(), + rabbit_mirror_queue_master:stats_fun(), + rabbit_mirror_queue_master:stats_fun(), + non_neg_integer(), + bq(), bqs()) -> + {'already_synced', bqs()} | {'ok', bqs()} | + {'cancelled', bqs()} | + {'shutdown', any(), bqs()} | + {'sync_died', any(), bqs()}. + +master_go(Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, BQ, BQS) -> + Args = {Syncer, Ref, Log, HandleInfo, EmitStats, rabbit_misc:get_parent()}, + receive + {'EXIT', Syncer, normal} -> {already_synced, BQS}; + {'EXIT', Syncer, Reason} -> {sync_died, Reason, BQS}; + {ready, Syncer} -> EmitStats({syncing, 0}), + master_batch_go0(Args, SyncBatchSize, + BQ, BQS) + end. + +master_batch_go0(Args, BatchSize, BQ, BQS) -> + FoldFun = + fun (Msg, MsgProps, Unacked, Acc) -> + Acc1 = append_to_acc(Msg, MsgProps, Unacked, Acc), + case maybe_master_batch_send(Acc1, BatchSize) of + true -> master_batch_send(Args, Acc1); + false -> {cont, Acc1} + end + end, + FoldAcc = {[], 0, {0, BQ:depth(BQS)}, erlang:monotonic_time()}, + bq_fold(FoldFun, FoldAcc, Args, BQ, BQS). + +master_batch_send({Syncer, Ref, Log, HandleInfo, EmitStats, Parent}, + {Batch, I, {Curr, Len}, Last}) -> + T = maybe_emit_stats(Last, I, EmitStats, Log), + HandleInfo({syncing, I}), + handle_set_maximum_since_use(), + SyncMsg = {msgs, Ref, lists:reverse(Batch)}, + NewAcc = {[], I + length(Batch), {Curr, Len}, T}, + master_send_receive(SyncMsg, NewAcc, Syncer, Ref, Parent). + +%% Either send messages when we reach the last one in the queue or +%% whenever we have accumulated BatchSize messages. +maybe_master_batch_send({_, _, {Len, Len}, _}, _BatchSize) -> + true; +maybe_master_batch_send({_, _, {Curr, _Len}, _}, BatchSize) + when Curr rem BatchSize =:= 0 -> + true; +maybe_master_batch_send(_Acc, _BatchSize) -> + false. + +bq_fold(FoldFun, FoldAcc, Args, BQ, BQS) -> + case BQ:fold(FoldFun, FoldAcc, BQS) of + {{shutdown, Reason}, BQS1} -> {shutdown, Reason, BQS1}; + {{sync_died, Reason}, BQS1} -> {sync_died, Reason, BQS1}; + {_, BQS1} -> master_done(Args, BQS1) + end. + +append_to_acc(Msg, MsgProps, Unacked, {Batch, I, {Curr, Len}, T}) -> + {[{Msg, MsgProps, Unacked} | Batch], I, {Curr + 1, Len}, T}. + +master_send_receive(SyncMsg, NewAcc, Syncer, Ref, Parent) -> + receive + {'$gen_call', From, + cancel_sync_mirrors} -> stop_syncer(Syncer, {cancel, Ref}), + gen_server2:reply(From, ok), + {stop, cancelled}; + {next, Ref} -> Syncer ! SyncMsg, + {cont, NewAcc}; + {'EXIT', Parent, Reason} -> {stop, {shutdown, Reason}}; + {'EXIT', Syncer, Reason} -> {stop, {sync_died, Reason}} + end. + +master_done({Syncer, Ref, _Log, _HandleInfo, _EmitStats, Parent}, BQS) -> + receive + {'$gen_call', From, + cancel_sync_mirrors} -> + stop_syncer(Syncer, {cancel, Ref}), + gen_server2:reply(From, ok), + {cancelled, BQS}; + {cancelled, Ref} -> + {cancelled, BQS}; + {next, Ref} -> + stop_syncer(Syncer, {done, Ref}), + {ok, BQS}; + {'EXIT', Parent, Reason} -> + {shutdown, Reason, BQS}; + {'EXIT', Syncer, Reason} -> + {sync_died, Reason, BQS} + end. + +stop_syncer(Syncer, Msg) -> + unlink(Syncer), + Syncer ! Msg, + receive {'EXIT', Syncer, _} -> ok + after 0 -> ok + end. + +maybe_emit_stats(Last, I, EmitStats, Log) -> + Interval = erlang:convert_time_unit( + erlang:monotonic_time() - Last, native, micro_seconds), + case Interval > ?SYNC_PROGRESS_INTERVAL of + true -> EmitStats({syncing, I}), + Log("~p messages", [I]), + erlang:monotonic_time(); + false -> Last + end. + +handle_set_maximum_since_use() -> + receive + {'$gen_cast', {set_maximum_since_use, Age}} -> + ok = file_handle_cache:set_maximum_since_use(Age) + after 0 -> + ok + end. + +%% Master +%% --------------------------------------------------------------------------- +%% Syncer + +syncer(Ref, Log, MPid, SPids) -> + [erlang:monitor(process, SPid) || SPid <- SPids], + %% We wait for a reply from the mirrors so that we know they are in + %% a receive block and will thus receive messages we send to them + %% *without* those messages ending up in their gen_server2 pqueue. + case await_slaves(Ref, SPids) of + [] -> Log("all mirrors already synced", []); + SPids1 -> MPid ! {ready, self()}, + Log("mirrors ~p to sync", [[node(SPid) || SPid <- SPids1]]), + syncer_check_resources(Ref, MPid, SPids1) + end. + +await_slaves(Ref, SPids) -> + [SPid || SPid <- SPids, + rabbit_mnesia:on_running_node(SPid) andalso %% [0] + receive + {sync_ready, Ref, SPid} -> true; + {sync_deny, Ref, SPid} -> false; + {'DOWN', _, process, SPid, _} -> false + end]. +%% [0] This check is in case there's been a partition which has then +%% healed in between the master retrieving the mirror pids from Mnesia +%% and sending 'sync_start' over GM. If so there might be mirrors on the +%% other side of the partition which we can monitor (since they have +%% rejoined the distributed system with us) but which did not get the +%% 'sync_start' and so will not reply. We need to act as though they are +%% down. + +syncer_check_resources(Ref, MPid, SPids) -> + rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}), + %% Before we ask the master node to send the first batch of messages + %% over here, we check if one node is already short on memory. If + %% that's the case, we wait for the alarm to be cleared before + %% starting the syncer loop. + AlarmedNodes = lists:any( + fun + ({{resource_limit, memory, _}, _}) -> true; + ({_, _}) -> false + end, rabbit_alarm:get_alarms()), + if + not AlarmedNodes -> + MPid ! {next, Ref}, + syncer_loop(Ref, MPid, SPids); + true -> + case wait_for_resources(Ref, SPids) of + cancel -> MPid ! {cancelled, Ref}; + SPids1 -> MPid ! {next, Ref}, + syncer_loop(Ref, MPid, SPids1) + end + end. + +syncer_loop(Ref, MPid, SPids) -> + receive + {conserve_resources, memory, true} -> + case wait_for_resources(Ref, SPids) of + cancel -> MPid ! {cancelled, Ref}; + SPids1 -> syncer_loop(Ref, MPid, SPids1) + end; + {conserve_resources, _, _} -> + %% Ignore other alerts. + syncer_loop(Ref, MPid, SPids); + {msgs, Ref, Msgs} -> + SPids1 = wait_for_credit(SPids), + case SPids1 of + [] -> + % Die silently because there are no mirrors left. + ok; + _ -> + broadcast(SPids1, {sync_msgs, Ref, Msgs}), + MPid ! {next, Ref}, + syncer_loop(Ref, MPid, SPids1) + end; + {cancel, Ref} -> + %% We don't tell the mirrors we will die - so when we do + %% they interpret that as a failure, which is what we + %% want. + ok; + {done, Ref} -> + [SPid ! {sync_complete, Ref} || SPid <- SPids] + end. + +broadcast(SPids, Msg) -> + [begin + credit_flow:send(SPid), + SPid ! Msg + end || SPid <- SPids]. + +conserve_resources(Pid, Source, {_, Conserve, _}) -> + Pid ! {conserve_resources, Source, Conserve}, + ok. + +wait_for_credit(SPids) -> + case credit_flow:blocked() of + true -> receive + {bump_credit, Msg} -> + credit_flow:handle_bump_msg(Msg), + wait_for_credit(SPids); + {'DOWN', _, process, SPid, _} -> + credit_flow:peer_down(SPid), + wait_for_credit(lists:delete(SPid, SPids)) + end; + false -> SPids + end. + +wait_for_resources(Ref, SPids) -> + receive + {conserve_resources, memory, false} -> + SPids; + {conserve_resources, _, _} -> + %% Ignore other alerts. + wait_for_resources(Ref, SPids); + {cancel, Ref} -> + %% We don't tell the mirrors we will die - so when we do + %% they interpret that as a failure, which is what we + %% want. + cancel; + {'DOWN', _, process, SPid, _} -> + credit_flow:peer_down(SPid), + SPids1 = wait_for_credit(lists:delete(SPid, SPids)), + wait_for_resources(Ref, SPids1) + end. + +%% Syncer +%% --------------------------------------------------------------------------- +%% Slave + +-spec slave(non_neg_integer(), reference(), timer:tref(), pid(), + bq(), bqs(), fun((bq(), bqs()) -> {timer:tref(), bqs()})) -> + 'denied' | + {'ok' | 'failed', slave_sync_state()} | + {'stop', any(), slave_sync_state()}. + +slave(0, Ref, _TRef, Syncer, _BQ, _BQS, _UpdateRamDuration) -> + Syncer ! {sync_deny, Ref, self()}, + denied; + +slave(_DD, Ref, TRef, Syncer, BQ, BQS, UpdateRamDuration) -> + MRef = erlang:monitor(process, Syncer), + Syncer ! {sync_ready, Ref, self()}, + {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)), + slave_sync_loop({Ref, MRef, Syncer, BQ, UpdateRamDuration, + rabbit_misc:get_parent()}, {[], TRef, BQS1}). + +slave_sync_loop(Args = {Ref, MRef, Syncer, BQ, UpdateRamDuration, Parent}, + State = {MA, TRef, BQS}) -> + receive + {'DOWN', MRef, process, Syncer, _Reason} -> + %% If the master dies half way we are not in the usual + %% half-synced state (with messages nearer the tail of the + %% queue); instead we have ones nearer the head. If we then + %% sync with a newly promoted master, or even just receive + %% messages from it, we have a hole in the middle. So the + %% only thing to do here is purge. + {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)), + credit_flow:peer_down(Syncer), + {failed, {[], TRef, BQS1}}; + {bump_credit, Msg} -> + credit_flow:handle_bump_msg(Msg), + slave_sync_loop(Args, State); + {sync_complete, Ref} -> + erlang:demonitor(MRef, [flush]), + credit_flow:peer_down(Syncer), + {ok, State}; + {'$gen_cast', {set_maximum_since_use, Age}} -> + ok = file_handle_cache:set_maximum_since_use(Age), + slave_sync_loop(Args, State); + {'$gen_cast', {set_ram_duration_target, Duration}} -> + BQS1 = BQ:set_ram_duration_target(Duration, BQS), + slave_sync_loop(Args, {MA, TRef, BQS1}); + {'$gen_cast', {run_backing_queue, Mod, Fun}} -> + BQS1 = BQ:invoke(Mod, Fun, BQS), + slave_sync_loop(Args, {MA, TRef, BQS1}); + update_ram_duration -> + {TRef1, BQS1} = UpdateRamDuration(BQ, BQS), + slave_sync_loop(Args, {MA, TRef1, BQS1}); + {sync_msgs, Ref, Batch} -> + credit_flow:ack(Syncer), + {MA1, BQS1} = process_batch(Batch, MA, BQ, BQS), + slave_sync_loop(Args, {MA1, TRef, BQS1}); + {'EXIT', Parent, Reason} -> + {stop, Reason, State}; + %% If the master throws an exception + {'$gen_cast', {gm, {delete_and_terminate, Reason}}} -> + BQ:delete_and_terminate(Reason, BQS), + {stop, Reason, {[], TRef, undefined}} + end. + +%% We are partitioning messages by the Unacked element in the tuple. +%% when unacked = true, then it's a publish_delivered message, +%% otherwise it's a publish message. +%% +%% Note that we can't first partition the batch and then publish each +%% part, since that would result in re-ordering messages, which we +%% don't want to do. +process_batch([], MA, _BQ, BQS) -> + {MA, BQS}; +process_batch(Batch, MA, BQ, BQS) -> + {_Msg, _MsgProps, Unacked} = hd(Batch), + process_batch(Batch, Unacked, [], MA, BQ, BQS). + +process_batch([{Msg, Props, true = Unacked} | Rest], true = Unacked, + Acc, MA, BQ, BQS) -> + %% publish_delivered messages don't need the IsDelivered flag, + %% therefore we just add {Msg, Props} to the accumulator. + process_batch(Rest, Unacked, [{Msg, props(Props)} | Acc], + MA, BQ, BQS); +process_batch([{Msg, Props, false = Unacked} | Rest], false = Unacked, + Acc, MA, BQ, BQS) -> + %% publish messages needs the IsDelivered flag which is set to true + %% here. + process_batch(Rest, Unacked, [{Msg, props(Props), true} | Acc], + MA, BQ, BQS); +process_batch(Batch, Unacked, Acc, MA, BQ, BQS) -> + {MA1, BQS1} = publish_batch(Unacked, lists:reverse(Acc), MA, BQ, BQS), + process_batch(Batch, MA1, BQ, BQS1). + +%% Unacked msgs are published via batch_publish. +publish_batch(false, Batch, MA, BQ, BQS) -> + batch_publish(Batch, MA, BQ, BQS); +%% Acked msgs are published via batch_publish_delivered. +publish_batch(true, Batch, MA, BQ, BQS) -> + batch_publish_delivered(Batch, MA, BQ, BQS). + + +batch_publish(Batch, MA, BQ, BQS) -> + BQS1 = BQ:batch_publish(Batch, none, noflow, BQS), + {MA, BQS1}. + +batch_publish_delivered(Batch, MA, BQ, BQS) -> + {AckTags, BQS1} = BQ:batch_publish_delivered(Batch, none, noflow, BQS), + MA1 = BQ:zip_msgs_and_acks(Batch, AckTags, MA, BQS1), + {MA1, BQS1}. + +props(Props) -> + Props#message_properties{needs_confirming = false}. diff --git a/deps/rabbit/src/rabbit_mnesia.erl b/deps/rabbit/src/rabbit_mnesia.erl new file mode 100644 index 0000000000..070c6a8205 --- /dev/null +++ b/deps/rabbit/src/rabbit_mnesia.erl @@ -0,0 +1,1117 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mnesia). + +-export([%% Main interface + init/0, + join_cluster/2, + reset/0, + force_reset/0, + update_cluster_nodes/1, + change_cluster_node_type/1, + forget_cluster_node/2, + force_load_next_boot/0, + + %% Various queries to get the status of the db + status/0, + is_clustered/0, + on_running_node/1, + is_process_alive/1, + is_registered_process_alive/1, + cluster_nodes/1, + node_type/0, + dir/0, + cluster_status_from_mnesia/0, + + %% Operations on the db and utils, mainly used in `rabbit_upgrade' and `rabbit' + init_db_unchecked/2, + copy_db/1, + check_cluster_consistency/0, + ensure_mnesia_dir/0, + + %% Hooks used in `rabbit_node_monitor' + on_node_up/1, + on_node_down/1, + + %% Helpers for diagnostics commands + schema_info/1 + ]). + +%% Used internally in rpc calls +-export([node_info/0, remove_node_if_mnesia_running/1]). + +-ifdef(TEST). +-compile(export_all). +-export([init_with_lock/3]). +-endif. + +%%---------------------------------------------------------------------------- + +-export_type([node_type/0, cluster_status/0]). + +-type node_type() :: disc | ram. +-type cluster_status() :: {[node()], [node()], [node()]}. + +%%---------------------------------------------------------------------------- +%% Main interface +%%---------------------------------------------------------------------------- + +-spec init() -> 'ok'. + +init() -> + ensure_mnesia_running(), + ensure_mnesia_dir(), + case is_virgin_node() of + true -> + rabbit_log:info("Node database directory at ~ts is empty. " + "Assuming we need to join an existing cluster or initialise from scratch...~n", + [dir()]), + rabbit_peer_discovery:log_configured_backend(), + rabbit_peer_discovery:maybe_init(), + init_with_lock(); + false -> + NodeType = node_type(), + init_db_and_upgrade(cluster_nodes(all), NodeType, + NodeType =:= ram, _Retry = true), + rabbit_peer_discovery:maybe_init(), + rabbit_peer_discovery:maybe_register() + end, + %% We intuitively expect the global name server to be synced when + %% Mnesia is up. In fact that's not guaranteed to be the case - + %% let's make it so. + ok = rabbit_node_monitor:global_sync(), + ok. + +init_with_lock() -> + {Retries, Timeout} = rabbit_peer_discovery:locking_retry_timeout(), + init_with_lock(Retries, Timeout, fun run_peer_discovery/0). + +init_with_lock(0, _, RunPeerDiscovery) -> + case rabbit_peer_discovery:lock_acquisition_failure_mode() of + ignore -> + rabbit_log:warning("Could not acquire a peer discovery lock, out of retries", []), + RunPeerDiscovery(), + rabbit_peer_discovery:maybe_register(); + fail -> + exit(cannot_acquire_startup_lock) + end; +init_with_lock(Retries, Timeout, RunPeerDiscovery) -> + LockResult = rabbit_peer_discovery:lock(), + rabbit_log:debug("rabbit_peer_discovery:lock returned ~p", [LockResult]), + case LockResult of + not_supported -> + rabbit_log:info("Peer discovery backend does not support locking, falling back to randomized delay"), + %% See rabbitmq/rabbitmq-server#1202 for details. + rabbit_peer_discovery:maybe_inject_randomized_delay(), + RunPeerDiscovery(), + rabbit_peer_discovery:maybe_register(); + {error, _Reason} -> + timer:sleep(Timeout), + init_with_lock(Retries - 1, Timeout, RunPeerDiscovery); + {ok, Data} -> + try + RunPeerDiscovery(), + rabbit_peer_discovery:maybe_register() + after + rabbit_peer_discovery:unlock(Data) + end + end. + +-spec run_peer_discovery() -> ok | {[node()], node_type()}. +run_peer_discovery() -> + {RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(), + run_peer_discovery_with_retries(RetriesLeft, DelayInterval). + +-spec run_peer_discovery_with_retries(non_neg_integer(), non_neg_integer()) -> ok | {[node()], node_type()}. +run_peer_discovery_with_retries(0, _DelayInterval) -> + ok; +run_peer_discovery_with_retries(RetriesLeft, DelayInterval) -> + FindBadNodeNames = fun + (Name, BadNames) when is_atom(Name) -> BadNames; + (Name, BadNames) -> [Name | BadNames] + end, + {DiscoveredNodes0, NodeType} = + case rabbit_peer_discovery:discover_cluster_nodes() of + {error, Reason} -> + RetriesLeft1 = RetriesLeft - 1, + rabbit_log:error("Peer discovery returned an error: ~p. Will retry after a delay of ~b ms, ~b retries left...", + [Reason, DelayInterval, RetriesLeft1]), + timer:sleep(DelayInterval), + run_peer_discovery_with_retries(RetriesLeft1, DelayInterval); + {ok, {Nodes, Type} = Config} + when is_list(Nodes) andalso (Type == disc orelse Type == disk orelse Type == ram) -> + case lists:foldr(FindBadNodeNames, [], Nodes) of + [] -> Config; + BadNames -> e({invalid_cluster_node_names, BadNames}) + end; + {ok, {_, BadType}} when BadType /= disc andalso BadType /= ram -> + e({invalid_cluster_node_type, BadType}); + {ok, _} -> + e(invalid_cluster_nodes_conf) + end, + DiscoveredNodes = lists:usort(DiscoveredNodes0), + rabbit_log:info("All discovered existing cluster peers: ~s~n", + [rabbit_peer_discovery:format_discovered_nodes(DiscoveredNodes)]), + Peers = nodes_excl_me(DiscoveredNodes), + case Peers of + [] -> + rabbit_log:info("Discovered no peer nodes to cluster with. " + "Some discovery backends can filter nodes out based on a readiness criteria. " + "Enabling debug logging might help troubleshoot."), + init_db_and_upgrade([node()], disc, false, _Retry = true); + _ -> + rabbit_log:info("Peer nodes we can cluster with: ~s~n", + [rabbit_peer_discovery:format_discovered_nodes(Peers)]), + join_discovered_peers(Peers, NodeType) + end. + +%% Attempts to join discovered, +%% reachable and compatible (in terms of Mnesia internal protocol version and such) +%% cluster peers in order. +join_discovered_peers(TryNodes, NodeType) -> + {RetriesLeft, DelayInterval} = rabbit_peer_discovery:discovery_retries(), + join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval). + +join_discovered_peers_with_retries(TryNodes, _NodeType, 0, _DelayInterval) -> + rabbit_log:warning( + "Could not successfully contact any node of: ~s (as in Erlang distribution). " + "Starting as a blank standalone node...~n", + [string:join(lists:map(fun atom_to_list/1, TryNodes), ",")]), + init_db_and_upgrade([node()], disc, false, _Retry = true); +join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft, DelayInterval) -> + case find_reachable_peer_to_cluster_with(nodes_excl_me(TryNodes)) of + {ok, Node} -> + rabbit_log:info("Node '~s' selected for auto-clustering~n", [Node]), + {ok, {_, DiscNodes, _}} = discover_cluster0(Node), + init_db_and_upgrade(DiscNodes, NodeType, true, _Retry = true), + rabbit_connection_tracking:boot(), + rabbit_node_monitor:notify_joined_cluster(); + none -> + RetriesLeft1 = RetriesLeft - 1, + rabbit_log:error("Trying to join discovered peers failed. Will retry after a delay of ~b ms, ~b retries left...", + [DelayInterval, RetriesLeft1]), + timer:sleep(DelayInterval), + join_discovered_peers_with_retries(TryNodes, NodeType, RetriesLeft1, DelayInterval) + end. + +%% Make the node join a cluster. The node will be reset automatically +%% before we actually cluster it. The nodes provided will be used to +%% find out about the nodes in the cluster. +%% +%% This function will fail if: +%% +%% * The node is currently the only disc node of its cluster +%% * We can't connect to any of the nodes provided +%% * The node is currently already clustered with the cluster of the nodes +%% provided +%% +%% Note that we make no attempt to verify that the nodes provided are +%% all in the same cluster, we simply pick the first online node and +%% we cluster to its cluster. + +-spec join_cluster(node(), node_type()) + -> ok | {ok, already_member} | {error, {inconsistent_cluster, string()}}. + +join_cluster(DiscoveryNode, NodeType) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + case is_only_clustered_disc_node() of + true -> e(clustering_only_disc_node); + false -> ok + end, + {ClusterNodes, _, _} = discover_cluster([DiscoveryNode]), + case me_in_nodes(ClusterNodes) of + false -> + case check_cluster_consistency(DiscoveryNode, false) of + {ok, _} -> + %% reset the node. this simplifies things and it + %% will be needed in this case - we're joining a new + %% cluster with new nodes which are not in synch + %% with the current node. It also lifts the burden + %% of resetting the node from the user. + reset_gracefully(), + + %% Join the cluster + rabbit_log:info("Clustering with ~p as ~p node~n", + [ClusterNodes, NodeType]), + ok = init_db_with_mnesia(ClusterNodes, NodeType, + true, true, _Retry = true), + rabbit_connection_tracking:boot(), + rabbit_node_monitor:notify_joined_cluster(), + ok; + {error, Reason} -> + {error, Reason} + end; + true -> + %% DiscoveryNode thinks that we are part of a cluster, but + %% do we think so ourselves? + case are_we_clustered_with(DiscoveryNode) of + true -> + rabbit_log:info("Asked to join a cluster but already a member of it: ~p~n", [ClusterNodes]), + {ok, already_member}; + false -> + Msg = format_inconsistent_cluster_message(DiscoveryNode, node()), + rabbit_log:error(Msg), + {error, {inconsistent_cluster, Msg}} + end + end. + +%% return node to its virgin state, where it is not member of any +%% cluster, has no cluster configuration, no local database, and no +%% persisted messages + +-spec reset() -> 'ok'. + +reset() -> + ensure_mnesia_not_running(), + rabbit_log:info("Resetting Rabbit~n", []), + reset_gracefully(). + +-spec force_reset() -> 'ok'. + +force_reset() -> + ensure_mnesia_not_running(), + rabbit_log:info("Resetting Rabbit forcefully~n", []), + wipe(). + +reset_gracefully() -> + AllNodes = cluster_nodes(all), + %% Reconnecting so that we will get an up to date nodes. We don't + %% need to check for consistency because we are resetting. + %% Force=true here so that reset still works when clustered with a + %% node which is down. + init_db_with_mnesia(AllNodes, node_type(), false, false, _Retry = false), + case is_only_clustered_disc_node() of + true -> e(resetting_only_disc_node); + false -> ok + end, + leave_cluster(), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), cannot_delete_schema), + wipe(). + +wipe() -> + %% We need to make sure that we don't end up in a distributed + %% Erlang system with nodes while not being in an Mnesia cluster + %% with them. We don't handle that well. + [erlang:disconnect_node(N) || N <- cluster_nodes(all)], + %% remove persisted messages and any other garbage we find + ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")), + ok = rabbit_node_monitor:reset_cluster_status(), + ok. + +-spec change_cluster_node_type(node_type()) -> 'ok'. + +change_cluster_node_type(Type) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + case is_clustered() of + false -> e(not_clustered); + true -> ok + end, + {_, _, RunningNodes} = discover_cluster(cluster_nodes(all)), + %% We might still be marked as running by a remote node since the + %% information of us going down might not have propagated yet. + Node = case RunningNodes -- [node()] of + [] -> e(no_online_cluster_nodes); + [Node0|_] -> Node0 + end, + ok = reset(), + ok = join_cluster(Node, Type). + +-spec update_cluster_nodes(node()) -> 'ok'. + +update_cluster_nodes(DiscoveryNode) -> + ensure_mnesia_not_running(), + ensure_mnesia_dir(), + Status = {AllNodes, _, _} = discover_cluster([DiscoveryNode]), + case me_in_nodes(AllNodes) of + true -> + %% As in `check_consistency/0', we can safely delete the + %% schema here, since it'll be replicated from the other + %% nodes + mnesia:delete_schema([node()]), + rabbit_node_monitor:write_cluster_status(Status), + rabbit_log:info("Updating cluster nodes from ~p~n", + [DiscoveryNode]), + init_db_with_mnesia(AllNodes, node_type(), true, true, _Retry = false); + false -> + e(inconsistent_cluster) + end, + ok. + +%% We proceed like this: try to remove the node locally. If the node +%% is offline, we remove the node if: +%% * This node is a disc node +%% * All other nodes are offline +%% * This node was, at the best of our knowledge (see comment below) +%% the last or second to last after the node we're removing to go +%% down + +-spec forget_cluster_node(node(), boolean()) -> 'ok'. + +forget_cluster_node(Node, RemoveWhenOffline) -> + forget_cluster_node(Node, RemoveWhenOffline, true). + +forget_cluster_node(Node, RemoveWhenOffline, EmitNodeDeletedEvent) -> + case lists:member(Node, cluster_nodes(all)) of + true -> ok; + false -> e(not_a_cluster_node) + end, + case {RemoveWhenOffline, is_running()} of + {true, false} -> remove_node_offline_node(Node); + {true, true} -> e(online_node_offline_flag); + {false, false} -> e(offline_node_no_offline_flag); + {false, true} -> rabbit_log:info( + "Removing node ~p from cluster~n", [Node]), + case remove_node_if_mnesia_running(Node) of + ok when EmitNodeDeletedEvent -> + rabbit_event:notify(node_deleted, [{node, Node}]), + ok; + ok -> ok; + {error, _} = Err -> throw(Err) + end + end. + +remove_node_offline_node(Node) -> + %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we + %% want - we need to know the running nodes *now*. If the current node is a + %% RAM node it will return bogus results, but we don't care since we only do + %% this operation from disc nodes. + case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of + {[], disc} -> + start_mnesia(), + try + %% What we want to do here is replace the last node to + %% go down with the current node. The way we do this + %% is by force loading the table, and making sure that + %% they are loaded. + rabbit_table:force_load(), + rabbit_table:wait_for_replicated(_Retry = false), + %% We skip the 'node_deleted' event because the + %% application is stopped and thus, rabbit_event is not + %% enabled. + forget_cluster_node(Node, false, false), + force_load_next_boot() + after + stop_mnesia() + end; + {_, _} -> + e(removing_node_from_offline_node) + end. + +%%---------------------------------------------------------------------------- +%% Queries +%%---------------------------------------------------------------------------- + +-spec status() -> [{'nodes', [{node_type(), [node()]}]} | + {'running_nodes', [node()]} | + {'partitions', [{node(), [node()]}]}]. + +status() -> + IfNonEmpty = fun (_, []) -> []; + (Type, Nodes) -> [{Type, Nodes}] + end, + [{nodes, (IfNonEmpty(disc, cluster_nodes(disc)) ++ + IfNonEmpty(ram, cluster_nodes(ram)))}] ++ + case is_running() of + true -> RunningNodes = cluster_nodes(running), + [{running_nodes, RunningNodes}, + {cluster_name, rabbit_nodes:cluster_name()}, + {partitions, mnesia_partitions(RunningNodes)}]; + false -> [] + end. + +mnesia_partitions(Nodes) -> + Replies = rabbit_node_monitor:partitions(Nodes), + [Reply || Reply = {_, R} <- Replies, R =/= []]. + +is_running() -> mnesia:system_info(is_running) =:= yes. + +-spec is_clustered() -> boolean(). + +is_clustered() -> AllNodes = cluster_nodes(all), + AllNodes =/= [] andalso AllNodes =/= [node()]. + +-spec on_running_node(pid()) -> boolean(). + +on_running_node(Pid) -> lists:member(node(Pid), cluster_nodes(running)). + +%% This requires the process be in the same running cluster as us +%% (i.e. not partitioned or some random node). +%% +%% See also rabbit_misc:is_process_alive/1 which does not. + +-spec is_process_alive(pid() | {atom(), node()}) -> boolean(). + +is_process_alive(Pid) when is_pid(Pid) -> + on_running_node(Pid) andalso + rpc:call(node(Pid), erlang, is_process_alive, [Pid]) =:= true; +is_process_alive({Name, Node}) -> + lists:member(Node, cluster_nodes(running)) andalso + rpc:call(Node, rabbit_mnesia, is_registered_process_alive, [Name]) =:= true. + +-spec is_registered_process_alive(atom()) -> boolean(). + +is_registered_process_alive(Name) -> + is_pid(whereis(Name)). + +-spec cluster_nodes('all' | 'disc' | 'ram' | 'running') -> [node()]. + +cluster_nodes(WhichNodes) -> cluster_status(WhichNodes). + +%% This function is the actual source of information, since it gets +%% the data from mnesia. Obviously it'll work only when mnesia is +%% running. + +-spec cluster_status_from_mnesia() -> rabbit_types:ok_or_error2( + cluster_status(), any()). + +cluster_status_from_mnesia() -> + case is_running() of + false -> + {error, mnesia_not_running}; + true -> + %% If the tables are not present, it means that + %% `init_db/3' hasn't been run yet. In other words, either + %% we are a virgin node or a restarted RAM node. In both + %% cases we're not interested in what mnesia has to say. + NodeType = case mnesia:system_info(use_dir) of + true -> disc; + false -> ram + end, + case rabbit_table:is_present() of + true -> AllNodes = mnesia:system_info(db_nodes), + DiscCopies = mnesia:table_info(schema, disc_copies), + DiscNodes = case NodeType of + disc -> nodes_incl_me(DiscCopies); + ram -> DiscCopies + end, + %% `mnesia:system_info(running_db_nodes)' is safe since + %% we know that mnesia is running + RunningNodes = mnesia:system_info(running_db_nodes), + {ok, {AllNodes, DiscNodes, RunningNodes}}; + false -> {error, tables_not_present} + end + end. + +cluster_status(WhichNodes) -> + {AllNodes, DiscNodes, RunningNodes} = Nodes = + case cluster_status_from_mnesia() of + {ok, Nodes0} -> + Nodes0; + {error, _Reason} -> + {AllNodes0, DiscNodes0, RunningNodes0} = + rabbit_node_monitor:read_cluster_status(), + %% The cluster status file records the status when the node is + %% online, but we know for sure that the node is offline now, so + %% we can remove it from the list of running nodes. + {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)} + end, + case WhichNodes of + status -> Nodes; + all -> AllNodes; + disc -> DiscNodes; + ram -> AllNodes -- DiscNodes; + running -> RunningNodes + end. + +node_info() -> + {rabbit_misc:otp_release(), rabbit_misc:version(), + mnesia:system_info(protocol_version), + cluster_status_from_mnesia()}. + +-spec node_type() -> node_type(). + +node_type() -> + {_AllNodes, DiscNodes, _RunningNodes} = + rabbit_node_monitor:read_cluster_status(), + case DiscNodes =:= [] orelse me_in_nodes(DiscNodes) of + true -> disc; + false -> ram + end. + +-spec dir() -> file:filename(). + +dir() -> mnesia:system_info(directory). + +%%---------------------------------------------------------------------------- +%% Operations on the db +%%---------------------------------------------------------------------------- + +%% Adds the provided nodes to the mnesia cluster, creating a new +%% schema if there is the need to and catching up if there are other +%% nodes in the cluster already. It also updates the cluster status +%% file. +init_db(ClusterNodes, NodeType, CheckOtherNodes) -> + NodeIsVirgin = is_virgin_node(), + rabbit_log:debug("Does data directory looks like that of a blank (uninitialised) node? ~p", [NodeIsVirgin]), + Nodes = change_extra_db_nodes(ClusterNodes, CheckOtherNodes), + %% Note that we use `system_info' here and not the cluster status + %% since when we start rabbit for the first time the cluster + %% status will say we are a disc node but the tables won't be + %% present yet. + WasDiscNode = mnesia:system_info(use_dir), + case {Nodes, WasDiscNode, NodeType} of + {[], _, ram} -> + %% Standalone ram node, we don't want that + throw({error, cannot_create_standalone_ram_node}); + {[], false, disc} -> + %% RAM -> disc, starting from scratch + ok = create_schema(); + {[], true, disc} -> + %% First disc node up + maybe_force_load(), + ok; + {[_ | _], _, _} -> + %% Subsequent node in cluster, catch up + maybe_force_load(), + ok = rabbit_table:wait_for_replicated(_Retry = true), + ok = rabbit_table:ensure_local_copies(NodeType) + end, + ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin), + ensure_schema_integrity(), + rabbit_node_monitor:update_cluster_status(), + ok. + +-spec init_db_unchecked([node()], node_type()) -> 'ok'. + +init_db_unchecked(ClusterNodes, NodeType) -> + init_db(ClusterNodes, NodeType, false). + +init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes, Retry) -> + ok = init_db(ClusterNodes, NodeType, CheckOtherNodes), + ok = case rabbit_upgrade:maybe_upgrade_local() of + ok -> ok; + starting_from_scratch -> rabbit_version:record_desired(); + version_not_available -> schema_ok_or_move() + end, + %% `maybe_upgrade_local' restarts mnesia, so ram nodes will forget + %% about the cluster + case NodeType of + ram -> start_mnesia(), + change_extra_db_nodes(ClusterNodes, false); + disc -> ok + end, + %% ...and all nodes will need to wait for tables + rabbit_table:wait_for_replicated(Retry), + ok. + +init_db_with_mnesia(ClusterNodes, NodeType, + CheckOtherNodes, CheckConsistency, Retry) -> + start_mnesia(CheckConsistency), + try + init_db_and_upgrade(ClusterNodes, NodeType, CheckOtherNodes, Retry) + after + stop_mnesia() + end. + +-spec ensure_mnesia_dir() -> 'ok'. + +ensure_mnesia_dir() -> + MnesiaDir = dir() ++ "/", + case filelib:ensure_dir(MnesiaDir) of + {error, Reason} -> + throw({error, {cannot_create_mnesia_dir, MnesiaDir, Reason}}); + ok -> + ok + end. + +ensure_mnesia_running() -> + case mnesia:system_info(is_running) of + yes -> + ok; + starting -> + wait_for(mnesia_running), + ensure_mnesia_running(); + Reason when Reason =:= no; Reason =:= stopping -> + throw({error, mnesia_not_running}) + end. + +ensure_mnesia_not_running() -> + case mnesia:system_info(is_running) of + no -> + ok; + stopping -> + wait_for(mnesia_not_running), + ensure_mnesia_not_running(); + Reason when Reason =:= yes; Reason =:= starting -> + throw({error, mnesia_unexpectedly_running}) + end. + +ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin) -> + Ret = rabbit_feature_flags:sync_feature_flags_with_cluster( + Nodes, NodeIsVirgin), + case Ret of + ok -> ok; + {error, Reason} -> throw({error, {incompatible_feature_flags, Reason}}) + end. + +ensure_schema_integrity() -> + case rabbit_table:check_schema_integrity(_Retry = true) of + ok -> + ok; + {error, Reason} -> + throw({error, {schema_integrity_check_failed, Reason}}) + end. + +-spec copy_db(file:filename()) -> rabbit_types:ok_or_error(any()). + +copy_db(Destination) -> + ok = ensure_mnesia_not_running(), + rabbit_file:recursive_copy(dir(), Destination). + +force_load_filename() -> + filename:join(dir(), "force_load"). + +-spec force_load_next_boot() -> 'ok'. + +force_load_next_boot() -> + rabbit_file:write_file(force_load_filename(), <<"">>). + +maybe_force_load() -> + case rabbit_file:is_file(force_load_filename()) of + true -> rabbit_table:force_load(), + rabbit_file:delete(force_load_filename()); + false -> ok + end. + +%% This does not guarantee us much, but it avoids some situations that +%% will definitely end up badly + +-spec check_cluster_consistency() -> 'ok'. + +check_cluster_consistency() -> + %% We want to find 0 or 1 consistent nodes. + case lists:foldl( + fun (Node, {error, _}) -> check_cluster_consistency(Node, true); + (_Node, {ok, Status}) -> {ok, Status} + end, {error, not_found}, nodes_excl_me(cluster_nodes(all))) + of + {ok, Status = {RemoteAllNodes, _, _}} -> + case ordsets:is_subset(ordsets:from_list(cluster_nodes(all)), + ordsets:from_list(RemoteAllNodes)) of + true -> + ok; + false -> + %% We delete the schema here since we think we are + %% clustered with nodes that are no longer in the + %% cluster and there is no other way to remove + %% them from our schema. On the other hand, we are + %% sure that there is another online node that we + %% can use to sync the tables with. There is a + %% race here: if between this check and the + %% `init_db' invocation the cluster gets + %% disbanded, we're left with a node with no + %% mnesia data that will try to connect to offline + %% nodes. + mnesia:delete_schema([node()]) + end, + rabbit_node_monitor:write_cluster_status(Status); + {error, not_found} -> + ok; + {error, _} = E -> + throw(E) + end. + +check_cluster_consistency(Node, CheckNodesConsistency) -> + case remote_node_info(Node) of + {badrpc, _Reason} -> + {error, not_found}; + {_OTP, Rabbit, DelegateModuleHash, _Status} when is_binary(DelegateModuleHash) -> + %% when a delegate module .beam file hash is present + %% in the tuple, we are dealing with an old version + rabbit_version:version_error("Rabbit", rabbit_misc:version(), Rabbit); + {_OTP, _Rabbit, _Protocol, {error, _}} -> + {error, not_found}; + {OTP, Rabbit, Protocol, {ok, Status}} when CheckNodesConsistency -> + case check_consistency(Node, OTP, Rabbit, Protocol, Status) of + {error, _} = E -> E; + {ok, Res} -> {ok, Res} + end; + {OTP, Rabbit, Protocol, {ok, Status}} -> + case check_consistency(Node, OTP, Rabbit, Protocol) of + {error, _} = E -> E; + ok -> {ok, Status} + end + end. + +remote_node_info(Node) -> + case rpc:call(Node, rabbit_mnesia, node_info, []) of + {badrpc, _} = Error -> Error; + %% RabbitMQ prior to 3.6.2 + {OTP, Rabbit, Status} -> {OTP, Rabbit, unsupported, Status}; + %% RabbitMQ 3.6.2 or later + {OTP, Rabbit, Protocol, Status} -> {OTP, Rabbit, Protocol, Status} + end. + + +%%-------------------------------------------------------------------- +%% Hooks for `rabbit_node_monitor' +%%-------------------------------------------------------------------- + +-spec on_node_up(node()) -> 'ok'. + +on_node_up(Node) -> + case running_disc_nodes() of + [Node] -> rabbit_log:info("cluster contains disc nodes again~n"); + _ -> ok + end. + +-spec on_node_down(node()) -> 'ok'. + +on_node_down(_Node) -> + case running_disc_nodes() of + [] -> rabbit_log:info("only running disc node went down~n"); + _ -> ok + end. + +running_disc_nodes() -> + {_AllNodes, DiscNodes, RunningNodes} = cluster_status(status), + ordsets:to_list(ordsets:intersection(ordsets:from_list(DiscNodes), + ordsets:from_list(RunningNodes))). + +%%-------------------------------------------------------------------- +%% Helpers for diagnostics commands +%%-------------------------------------------------------------------- + +schema_info(Items) -> + Tables = mnesia:system_info(tables), + [info(Table, Items) || Table <- Tables]. + +info(Table, Items) -> + All = [{name, Table} | mnesia:table_info(Table, all)], + [{Item, proplists:get_value(Item, All)} || Item <- Items]. + +%%-------------------------------------------------------------------- +%% Internal helpers +%%-------------------------------------------------------------------- + +discover_cluster(Nodes) -> + case lists:foldl(fun (_, {ok, Res}) -> {ok, Res}; + (Node, _) -> discover_cluster0(Node) + end, {error, no_nodes_provided}, Nodes) of + {ok, Res} -> Res; + {error, E} -> throw({error, E}); + {badrpc, Reason} -> throw({badrpc_multi, Reason, Nodes}) + end. + +discover_cluster0(Node) when Node == node() -> + {error, cannot_cluster_node_with_itself}; +discover_cluster0(Node) -> + rpc:call(Node, rabbit_mnesia, cluster_status_from_mnesia, []). + +schema_ok_or_move() -> + case rabbit_table:check_schema_integrity(_Retry = false) of + ok -> + ok; + {error, Reason} -> + %% NB: we cannot use rabbit_log here since it may not have been + %% started yet + rabbit_log:warning("schema integrity check failed: ~p~n" + "moving database to backup location " + "and recreating schema from scratch~n", + [Reason]), + ok = move_db(), + ok = create_schema() + end. + +%% We only care about disc nodes since ram nodes are supposed to catch +%% up only +create_schema() -> + stop_mnesia(), + rabbit_log:debug("Will bootstrap a schema database..."), + rabbit_misc:ensure_ok(mnesia:create_schema([node()]), cannot_create_schema), + rabbit_log:debug("Bootstraped a schema database successfully"), + start_mnesia(), + + rabbit_log:debug("Will create schema database tables"), + ok = rabbit_table:create(), + rabbit_log:debug("Created schema database tables successfully"), + rabbit_log:debug("Will check schema database integrity..."), + ensure_schema_integrity(), + rabbit_log:debug("Schema database schema integrity check passed"), + ok = rabbit_version:record_desired(). + +move_db() -> + stop_mnesia(), + MnesiaDir = filename:dirname(dir() ++ "/"), + {{Year, Month, Day}, {Hour, Minute, Second}} = erlang:universaltime(), + BackupDir = rabbit_misc:format( + "~s_~w~2..0w~2..0w~2..0w~2..0w~2..0w", + [MnesiaDir, Year, Month, Day, Hour, Minute, Second]), + case file:rename(MnesiaDir, BackupDir) of + ok -> + %% NB: we cannot use rabbit_log here since it may not have + %% been started yet + rabbit_log:warning("moved database from ~s to ~s~n", + [MnesiaDir, BackupDir]), + ok; + {error, Reason} -> throw({error, {cannot_backup_mnesia, + MnesiaDir, BackupDir, Reason}}) + end, + ensure_mnesia_dir(), + start_mnesia(), + ok. + +remove_node_if_mnesia_running(Node) -> + case is_running() of + false -> + {error, mnesia_not_running}; + true -> + %% Deleting the the schema copy of the node will result in + %% the node being removed from the cluster, with that + %% change being propagated to all nodes + case mnesia:del_table_copy(schema, Node) of + {atomic, ok} -> + rabbit_amqqueue:forget_all_durable(Node), + rabbit_node_monitor:notify_left_cluster(Node), + ok; + {aborted, Reason} -> + {error, {failed_to_remove_node, Node, Reason}} + end + end. + +leave_cluster() -> + case nodes_excl_me(cluster_nodes(all)) of + [] -> ok; + AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of + true -> ok; + false -> e(no_running_cluster_nodes) + end + end. + +leave_cluster(Node) -> + case rpc:call(Node, + rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of + ok -> true; + {error, mnesia_not_running} -> false; + {error, Reason} -> throw({error, Reason}); + {badrpc, nodedown} -> false + end. + +wait_for(Condition) -> + rabbit_log:info("Waiting for ~p...~n", [Condition]), + timer:sleep(1000). + +start_mnesia(CheckConsistency) -> + case CheckConsistency of + true -> check_cluster_consistency(); + false -> ok + end, + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_running(). + +start_mnesia() -> + start_mnesia(true). + +stop_mnesia() -> + stopped = mnesia:stop(), + ensure_mnesia_not_running(). + +change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) -> + ClusterNodes = nodes_excl_me(ClusterNodes0), + case {mnesia:change_config(extra_db_nodes, ClusterNodes), ClusterNodes} of + {{ok, []}, [_|_]} when CheckOtherNodes -> + throw({error, {failed_to_cluster_with, ClusterNodes, + "Mnesia could not connect to any nodes."}}); + {{ok, Nodes}, _} -> + Nodes + end. + +check_consistency(Node, OTP, Rabbit, ProtocolVersion) -> + rabbit_misc:sequence_error( + [check_mnesia_or_otp_consistency(Node, ProtocolVersion, OTP), + check_rabbit_consistency(Node, Rabbit)]). + +check_consistency(Node, OTP, Rabbit, ProtocolVersion, Status) -> + rabbit_misc:sequence_error( + [check_mnesia_or_otp_consistency(Node, ProtocolVersion, OTP), + check_rabbit_consistency(Node, Rabbit), + check_nodes_consistency(Node, Status)]). + +check_nodes_consistency(Node, RemoteStatus = {RemoteAllNodes, _, _}) -> + case me_in_nodes(RemoteAllNodes) of + true -> + {ok, RemoteStatus}; + false -> + {error, {inconsistent_cluster, + format_inconsistent_cluster_message(node(), Node)}} + end. + +check_mnesia_or_otp_consistency(_Node, unsupported, OTP) -> + rabbit_version:check_otp_consistency(OTP); +check_mnesia_or_otp_consistency(Node, ProtocolVersion, _) -> + check_mnesia_consistency(Node, ProtocolVersion). + +check_mnesia_consistency(Node, ProtocolVersion) -> + % If mnesia is running we will just check protocol version + % If it's not running, we don't want it to join cluster until all checks pass + % so we start it without `dir` env variable to prevent + % joining cluster and/or corrupting data + with_running_or_clean_mnesia(fun() -> + case negotiate_protocol([Node]) of + [Node] -> ok; + [] -> + LocalVersion = mnesia:system_info(protocol_version), + {error, {inconsistent_cluster, + rabbit_misc:format("Mnesia protocol negotiation failed." + " Local version: ~p." + " Remote version ~p", + [LocalVersion, ProtocolVersion])}} + end + end). + +negotiate_protocol([Node]) -> + mnesia_monitor:negotiate_protocol([Node]). + +with_running_or_clean_mnesia(Fun) -> + IsMnesiaRunning = case mnesia:system_info(is_running) of + yes -> true; + no -> false; + stopping -> + ensure_mnesia_not_running(), + false; + starting -> + ensure_mnesia_running(), + true + end, + case IsMnesiaRunning of + true -> Fun(); + false -> + SavedMnesiaDir = dir(), + application:unset_env(mnesia, dir), + SchemaLoc = application:get_env(mnesia, schema_location, opt_disc), + application:set_env(mnesia, schema_location, ram), + mnesia:start(), + Result = Fun(), + application:stop(mnesia), + application:set_env(mnesia, dir, SavedMnesiaDir), + application:set_env(mnesia, schema_location, SchemaLoc), + Result + end. + +check_rabbit_consistency(RemoteNode, RemoteVersion) -> + rabbit_misc:sequence_error( + [rabbit_version:check_version_consistency( + rabbit_misc:version(), RemoteVersion, "Rabbit", + fun rabbit_misc:version_minor_equivalent/2), + rabbit_feature_flags:check_node_compatibility(RemoteNode)]). + +%% This is fairly tricky. We want to know if the node is in the state +%% that a `reset' would leave it in. We cannot simply check if the +%% mnesia tables aren't there because restarted RAM nodes won't have +%% tables while still being non-virgin. What we do instead is to +%% check if the mnesia directory is non existent or empty, with the +%% exception of certain files and directories, which can be there very early +%% on node boot. +is_virgin_node() -> + case rabbit_file:list_dir(dir()) of + {error, enoent} -> + true; + {ok, []} -> + true; + {ok, List0} -> + IgnoredFiles0 = + [rabbit_node_monitor:cluster_status_filename(), + rabbit_node_monitor:running_nodes_filename(), + rabbit_node_monitor:default_quorum_filename(), + rabbit_node_monitor:quorum_filename(), + rabbit_feature_flags:enabled_feature_flags_list_file()], + IgnoredFiles = [filename:basename(File) || File <- IgnoredFiles0], + rabbit_log:debug("Files and directories found in node's data directory: ~s, of them to be ignored: ~s", + [string:join(lists:usort(List0), ", "), string:join(lists:usort(IgnoredFiles), ", ")]), + List = List0 -- IgnoredFiles, + rabbit_log:debug("Files and directories found in node's data directory sans ignored ones: ~s", [string:join(lists:usort(List), ", ")]), + List =:= [] + end. + +find_reachable_peer_to_cluster_with([]) -> + none; +find_reachable_peer_to_cluster_with([Node | Nodes]) -> + Fail = fun (Fmt, Args) -> + rabbit_log:warning( + "Could not auto-cluster with node ~s: " ++ Fmt, [Node | Args]), + find_reachable_peer_to_cluster_with(Nodes) + end, + case remote_node_info(Node) of + {badrpc, _} = Reason -> + Fail("~p~n", [Reason]); + %% old delegate hash check + {_OTP, RMQ, Hash, _} when is_binary(Hash) -> + Fail("version ~s~n", [RMQ]); + {_OTP, _RMQ, _Protocol, {error, _} = E} -> + Fail("~p~n", [E]); + {OTP, RMQ, Protocol, _} -> + case check_consistency(Node, OTP, RMQ, Protocol) of + {error, _} -> Fail("versions ~p~n", + [{OTP, RMQ}]); + ok -> {ok, Node} + end + end. + +is_only_clustered_disc_node() -> + node_type() =:= disc andalso is_clustered() andalso + cluster_nodes(disc) =:= [node()]. + +are_we_clustered_with(Node) -> + lists:member(Node, mnesia_lib:all_nodes()). + +me_in_nodes(Nodes) -> lists:member(node(), Nodes). + +nodes_incl_me(Nodes) -> lists:usort([node()|Nodes]). + +nodes_excl_me(Nodes) -> Nodes -- [node()]. + +-spec e(any()) -> no_return(). + +e(Tag) -> throw({error, {Tag, error_description(Tag)}}). + +error_description({invalid_cluster_node_names, BadNames}) -> + "In the 'cluster_nodes' configuration key, the following node names " + "are invalid: " ++ lists:flatten(io_lib:format("~p", [BadNames])); +error_description({invalid_cluster_node_type, BadType}) -> + "In the 'cluster_nodes' configuration key, the node type is invalid " + "(expected 'disc' or 'ram'): " ++ + lists:flatten(io_lib:format("~p", [BadType])); +error_description(invalid_cluster_nodes_conf) -> + "The 'cluster_nodes' configuration key is invalid, it must be of the " + "form {[Nodes], Type}, where Nodes is a list of node names and " + "Type is either 'disc' or 'ram'"; +error_description(clustering_only_disc_node) -> + "You cannot cluster a node if it is the only disc node in its existing " + " cluster. If new nodes joined while this node was offline, use " + "'update_cluster_nodes' to add them manually."; +error_description(resetting_only_disc_node) -> + "You cannot reset a node when it is the only disc node in a cluster. " + "Please convert another node of the cluster to a disc node first."; +error_description(not_clustered) -> + "Non-clustered nodes can only be disc nodes."; +error_description(no_online_cluster_nodes) -> + "Could not find any online cluster nodes. If the cluster has changed, " + "you can use the 'update_cluster_nodes' command."; +error_description(inconsistent_cluster) -> + "The nodes provided do not have this node as part of the cluster."; +error_description(not_a_cluster_node) -> + "The node selected is not in the cluster."; +error_description(online_node_offline_flag) -> + "You set the --offline flag, which is used to remove nodes remotely from " + "offline nodes, but this node is online."; +error_description(offline_node_no_offline_flag) -> + "You are trying to remove a node from an offline node. That is dangerous, " + "but can be done with the --offline flag. Please consult the manual " + "for rabbitmqctl for more information."; +error_description(removing_node_from_offline_node) -> + "To remove a node remotely from an offline node, the node you are removing " + "from must be a disc node and all the other nodes must be offline."; +error_description(no_running_cluster_nodes) -> + "You cannot leave a cluster if no online nodes are present.". + +format_inconsistent_cluster_message(Thinker, Dissident) -> + rabbit_misc:format("Node ~p thinks it's clustered " + "with node ~p, but ~p disagrees", + [Thinker, Dissident, Dissident]). diff --git a/deps/rabbit/src/rabbit_mnesia_rename.erl b/deps/rabbit/src/rabbit_mnesia_rename.erl new file mode 100644 index 0000000000..e0d88c0f5e --- /dev/null +++ b/deps/rabbit/src/rabbit_mnesia_rename.erl @@ -0,0 +1,276 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_mnesia_rename). +-include("rabbit.hrl"). + +-export([rename/2]). +-export([maybe_finish/1]). + +-define(CONVERT_TABLES, [schema, rabbit_durable_queue]). + +%% Supports renaming the nodes in the Mnesia database. In order to do +%% this, we take a backup of the database, traverse the backup +%% changing node names and pids as we go, then restore it. +%% +%% That's enough for a standalone node, for clusters the story is more +%% complex. We can take pairs of nodes From and To, but backing up and +%% restoring the database changes schema cookies, so if we just do +%% this on all nodes the cluster will refuse to re-form with +%% "Incompatible schema cookies.". Therefore we do something similar +%% to what we do for upgrades - the first node in the cluster to +%% restart becomes the authority, and other nodes wipe their own +%% Mnesia state and rejoin. They also need to tell Mnesia the old node +%% is not coming back. +%% +%% If we are renaming nodes one at a time then the running cluster +%% might not be aware that a rename has taken place, so after we wipe +%% and rejoin we then update any tables (in practice just +%% rabbit_durable_queue) which should be aware that we have changed. + +%%---------------------------------------------------------------------------- + +-spec rename(node(), [{node(), node()}]) -> 'ok'. + +rename(Node, NodeMapList) -> + try + %% Check everything is correct and figure out what we are + %% changing from and to. + {FromNode, ToNode, NodeMap} = prepare(Node, NodeMapList), + + %% We backup and restore Mnesia even if other nodes are + %% running at the time, and defer the final decision about + %% whether to use our mutated copy or rejoin the cluster until + %% we restart. That means we might be mutating our copy of the + %% database while the cluster is running. *Do not* contact the + %% cluster while this is happening, we are likely to get + %% confused. + application:set_env(kernel, dist_auto_connect, never), + + %% Take a copy we can restore from if we abandon the + %% rename. We don't restore from the "backup" since restoring + %% that changes schema cookies and might stop us rejoining the + %% cluster. + ok = rabbit_mnesia:copy_db(mnesia_copy_dir()), + + %% And make the actual changes + become(FromNode), + take_backup(before_backup_name()), + convert_backup(NodeMap, before_backup_name(), after_backup_name()), + ok = rabbit_file:write_term_file(rename_config_name(), + [{FromNode, ToNode}]), + convert_config_files(NodeMap), + become(ToNode), + restore_backup(after_backup_name()), + ok + after + stop_mnesia() + end. + +prepare(Node, NodeMapList) -> + %% If we have a previous rename and haven't started since, give up. + case rabbit_file:is_dir(dir()) of + true -> exit({rename_in_progress, + "Restart node under old name to roll back"}); + false -> ok = rabbit_file:ensure_dir(mnesia_copy_dir()) + end, + + %% Check we don't have two nodes mapped to the same node + {FromNodes, ToNodes} = lists:unzip(NodeMapList), + case length(FromNodes) - length(lists:usort(ToNodes)) of + 0 -> ok; + _ -> exit({duplicate_node, ToNodes}) + end, + + %% Figure out which node we are before and after the change + FromNode = case [From || {From, To} <- NodeMapList, + To =:= Node] of + [N] -> N; + [] -> Node + end, + NodeMap = dict:from_list(NodeMapList), + ToNode = case dict:find(FromNode, NodeMap) of + {ok, N2} -> N2; + error -> FromNode + end, + + %% Check that we are in the cluster, all old nodes are in the + %% cluster, and no new nodes are. + Nodes = rabbit_mnesia:cluster_nodes(all), + case {FromNodes -- Nodes, ToNodes -- (ToNodes -- Nodes), + lists:member(Node, Nodes ++ ToNodes)} of + {[], [], true} -> ok; + {[], [], false} -> exit({i_am_not_involved, Node}); + {F, [], _} -> exit({nodes_not_in_cluster, F}); + {_, T, _} -> exit({nodes_already_in_cluster, T}) + end, + {FromNode, ToNode, NodeMap}. + +take_backup(Backup) -> + start_mnesia(), + %% We backup only local tables: in particular, this excludes the + %% connection tracking tables which have no local replica. + LocalTables = mnesia:system_info(local_tables), + {ok, Name, _Nodes} = mnesia:activate_checkpoint([ + {max, LocalTables} + ]), + ok = mnesia:backup_checkpoint(Name, Backup), + stop_mnesia(). + +restore_backup(Backup) -> + ok = mnesia:install_fallback(Backup, [{scope, local}]), + start_mnesia(), + stop_mnesia(), + rabbit_mnesia:force_load_next_boot(). + +-spec maybe_finish([node()]) -> 'ok'. + +maybe_finish(AllNodes) -> + case rabbit_file:read_term_file(rename_config_name()) of + {ok, [{FromNode, ToNode}]} -> finish(FromNode, ToNode, AllNodes); + _ -> ok + end. + +finish(FromNode, ToNode, AllNodes) -> + case node() of + ToNode -> + case rabbit_upgrade:nodes_running(AllNodes) of + [] -> finish_primary(FromNode, ToNode); + _ -> finish_secondary(FromNode, ToNode, AllNodes) + end; + FromNode -> + rabbit_log:info( + "Abandoning rename from ~s to ~s since we are still ~s~n", + [FromNode, ToNode, FromNode]), + [{ok, _} = file:copy(backup_of_conf(F), F) || F <- config_files()], + ok = rabbit_file:recursive_delete([rabbit_mnesia:dir()]), + ok = rabbit_file:recursive_copy( + mnesia_copy_dir(), rabbit_mnesia:dir()), + delete_rename_files(); + _ -> + %% Boot will almost certainly fail but we might as + %% well just log this + rabbit_log:info( + "Rename attempted from ~s to ~s but we are ~s - ignoring.~n", + [FromNode, ToNode, node()]) + end. + +finish_primary(FromNode, ToNode) -> + rabbit_log:info("Restarting as primary after rename from ~s to ~s~n", + [FromNode, ToNode]), + delete_rename_files(), + ok. + +finish_secondary(FromNode, ToNode, AllNodes) -> + rabbit_log:info("Restarting as secondary after rename from ~s to ~s~n", + [FromNode, ToNode]), + rabbit_upgrade:secondary_upgrade(AllNodes), + rename_in_running_mnesia(FromNode, ToNode), + delete_rename_files(), + ok. + +dir() -> rabbit_mnesia:dir() ++ "-rename". +before_backup_name() -> dir() ++ "/backup-before". +after_backup_name() -> dir() ++ "/backup-after". +rename_config_name() -> dir() ++ "/pending.config". +mnesia_copy_dir() -> dir() ++ "/mnesia-copy". + +delete_rename_files() -> ok = rabbit_file:recursive_delete([dir()]). + +start_mnesia() -> rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + rabbit_table:force_load(), + rabbit_table:wait_for_replicated(_Retry = false). +stop_mnesia() -> stopped = mnesia:stop(). + +convert_backup(NodeMap, FromBackup, ToBackup) -> + mnesia:traverse_backup( + FromBackup, ToBackup, + fun + (Row, Acc) -> + case lists:member(element(1, Row), ?CONVERT_TABLES) of + true -> {[update_term(NodeMap, Row)], Acc}; + false -> {[Row], Acc} + end + end, switched). + +config_files() -> + [rabbit_node_monitor:running_nodes_filename(), + rabbit_node_monitor:cluster_status_filename()]. + +backup_of_conf(Path) -> + filename:join([dir(), filename:basename(Path)]). + +convert_config_files(NodeMap) -> + [convert_config_file(NodeMap, Path) || Path <- config_files()]. + +convert_config_file(NodeMap, Path) -> + {ok, Term} = rabbit_file:read_term_file(Path), + {ok, _} = file:copy(Path, backup_of_conf(Path)), + ok = rabbit_file:write_term_file(Path, update_term(NodeMap, Term)). + +lookup_node(OldNode, NodeMap) -> + case dict:find(OldNode, NodeMap) of + {ok, NewNode} -> NewNode; + error -> OldNode + end. + +mini_map(FromNode, ToNode) -> dict:from_list([{FromNode, ToNode}]). + +update_term(NodeMap, L) when is_list(L) -> + [update_term(NodeMap, I) || I <- L]; +update_term(NodeMap, T) when is_tuple(T) -> + list_to_tuple(update_term(NodeMap, tuple_to_list(T))); +update_term(NodeMap, Node) when is_atom(Node) -> + lookup_node(Node, NodeMap); +update_term(NodeMap, Pid) when is_pid(Pid) -> + rabbit_misc:pid_change_node(Pid, lookup_node(node(Pid), NodeMap)); +update_term(_NodeMap, Term) -> + Term. + +rename_in_running_mnesia(FromNode, ToNode) -> + All = rabbit_mnesia:cluster_nodes(all), + Running = rabbit_nodes:all_running(), + case {lists:member(FromNode, Running), lists:member(ToNode, All)} of + {false, true} -> ok; + {true, _} -> exit({old_node_running, FromNode}); + {_, false} -> exit({new_node_not_in_cluster, ToNode}) + end, + {atomic, ok} = mnesia:del_table_copy(schema, FromNode), + Map = mini_map(FromNode, ToNode), + {atomic, _} = transform_table(rabbit_durable_queue, Map), + ok. + +transform_table(Table, Map) -> + mnesia:sync_transaction( + fun () -> + mnesia:lock({table, Table}, write), + transform_table(Table, Map, mnesia:first(Table)) + end). + +transform_table(_Table, _Map, '$end_of_table') -> + ok; +transform_table(Table, Map, Key) -> + [Term] = mnesia:read(Table, Key, write), + ok = mnesia:write(Table, update_term(Map, Term), write), + transform_table(Table, Map, mnesia:next(Table, Key)). + +become(BecomeNode) -> + error_logger:tty(false), + case net_adm:ping(BecomeNode) of + pong -> exit({node_running, BecomeNode}); + pang -> ok = net_kernel:stop(), + io:format(" * Impersonating node: ~s...", [BecomeNode]), + {ok, _} = start_distribution(BecomeNode), + io:format(" done~n", []), + Dir = mnesia:system_info(directory), + io:format(" * Mnesia directory : ~s~n", [Dir]) + end. + +start_distribution(Name) -> + rabbit_nodes:ensure_epmd(), + NameType = rabbit_nodes_common:name_type(Name), + net_kernel:start([Name, NameType]). diff --git a/deps/rabbit/src/rabbit_msg_file.erl b/deps/rabbit/src/rabbit_msg_file.erl new file mode 100644 index 0000000000..1a24f690a0 --- /dev/null +++ b/deps/rabbit/src/rabbit_msg_file.erl @@ -0,0 +1,114 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_msg_file). + +-export([append/3, read/2, scan/4]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(INTEGER_SIZE_BYTES, 8). +-define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)). +-define(WRITE_OK_SIZE_BITS, 8). +-define(WRITE_OK_MARKER, 255). +-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)). +-define(MSG_ID_SIZE_BYTES, 16). +-define(MSG_ID_SIZE_BITS, (8 * ?MSG_ID_SIZE_BYTES)). +-define(SCAN_BLOCK_SIZE, 4194304). %% 4MB + +%%---------------------------------------------------------------------------- + +-type io_device() :: any(). +-type position() :: non_neg_integer(). +-type msg_size() :: non_neg_integer(). +-type file_size() :: non_neg_integer(). +-type message_accumulator(A) :: + fun (({rabbit_types:msg_id(), msg_size(), position(), binary()}, A) -> + A). + +%%---------------------------------------------------------------------------- + +-spec append(io_device(), rabbit_types:msg_id(), msg()) -> + rabbit_types:ok_or_error2(msg_size(), any()). + +append(FileHdl, MsgId, MsgBody) + when is_binary(MsgId) andalso size(MsgId) =:= ?MSG_ID_SIZE_BYTES -> + MsgBodyBin = term_to_binary(MsgBody), + MsgBodyBinSize = size(MsgBodyBin), + Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES, + case file_handle_cache:append(FileHdl, + <<Size:?INTEGER_SIZE_BITS, + MsgId:?MSG_ID_SIZE_BYTES/binary, + MsgBodyBin:MsgBodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of + ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT}; + KO -> KO + end. + +-spec read(io_device(), msg_size()) -> + rabbit_types:ok_or_error2({rabbit_types:msg_id(), msg()}, + any()). + +read(FileHdl, TotalSize) -> + Size = TotalSize - ?FILE_PACKING_ADJUSTMENT, + BodyBinSize = Size - ?MSG_ID_SIZE_BYTES, + case file_handle_cache:read(FileHdl, TotalSize) of + {ok, <<Size:?INTEGER_SIZE_BITS, + MsgId:?MSG_ID_SIZE_BYTES/binary, + MsgBodyBin:BodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} -> + {ok, {MsgId, binary_to_term(MsgBodyBin)}}; + KO -> KO + end. + +-spec scan(io_device(), file_size(), message_accumulator(A), A) -> + {'ok', A, position()}. + +scan(FileHdl, FileSize, Fun, Acc) when FileSize >= 0 -> + scan(FileHdl, FileSize, <<>>, 0, 0, Fun, Acc). + +scan(_FileHdl, FileSize, _Data, FileSize, ScanOffset, _Fun, Acc) -> + {ok, Acc, ScanOffset}; +scan(FileHdl, FileSize, Data, ReadOffset, ScanOffset, Fun, Acc) -> + Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]), + case file_handle_cache:read(FileHdl, Read) of + {ok, Data1} -> + {Data2, Acc1, ScanOffset1} = + scanner(<<Data/binary, Data1/binary>>, ScanOffset, Fun, Acc), + ReadOffset1 = ReadOffset + size(Data1), + scan(FileHdl, FileSize, Data2, ReadOffset1, ScanOffset1, Fun, Acc1); + _KO -> + {ok, Acc, ScanOffset} + end. + +scanner(<<>>, Offset, _Fun, Acc) -> + {<<>>, Acc, Offset}; +scanner(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Offset, _Fun, Acc) -> + {<<>>, Acc, Offset}; %% Nothing to do other than stop. +scanner(<<Size:?INTEGER_SIZE_BITS, MsgIdAndMsg:Size/binary, + WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Offset, Fun, Acc) -> + TotalSize = Size + ?FILE_PACKING_ADJUSTMENT, + case WriteMarker of + ?WRITE_OK_MARKER -> + %% Here we take option 5 from + %% https://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in + %% which we read the MsgId as a number, and then convert it + %% back to a binary in order to work around bugs in + %% Erlang's GC. + <<MsgIdNum:?MSG_ID_SIZE_BITS, Msg/binary>> = + <<MsgIdAndMsg:Size/binary>>, + <<MsgId:?MSG_ID_SIZE_BYTES/binary>> = + <<MsgIdNum:?MSG_ID_SIZE_BITS>>, + scanner(Rest, Offset + TotalSize, Fun, + Fun({MsgId, TotalSize, Offset, Msg}, Acc)); + _ -> + scanner(Rest, Offset + TotalSize, Fun, Acc) + end; +scanner(Data, Offset, _Fun, Acc) -> + {Data, Acc, Offset}. diff --git a/deps/rabbit/src/rabbit_msg_record.erl b/deps/rabbit/src/rabbit_msg_record.erl new file mode 100644 index 0000000000..3ebe14cb9f --- /dev/null +++ b/deps/rabbit/src/rabbit_msg_record.erl @@ -0,0 +1,400 @@ +-module(rabbit_msg_record). + +-export([ + init/1, + to_iodata/1, + from_amqp091/2, + to_amqp091/1, + add_message_annotations/2, + message_annotation/2, + message_annotation/3 + ]). + +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). +-include_lib("amqp10_common/include/amqp10_framing.hrl"). + +-type maybe(T) :: T | undefined. +-type amqp10_data() :: #'v1_0.data'{} | + [#'v1_0.amqp_sequence'{} | #'v1_0.data'{}] | + #'v1_0.amqp_value'{}. +-record(msg, + { + % header :: maybe(#'v1_0.header'{}), + % delivery_annotations :: maybe(#'v1_0.delivery_annotations'{}), + message_annotations :: maybe(#'v1_0.message_annotations'{}), + properties :: maybe(#'v1_0.properties'{}), + application_properties :: maybe(#'v1_0.application_properties'{}), + data :: maybe(amqp10_data()) + % footer :: maybe(#'v1_0.footer'{}) + }). + +%% holds static or rarely changing fields +-record(cfg, {}). +-record(?MODULE, {cfg :: #cfg{}, + msg :: #msg{}, + %% holds a list of modifications to various sections + changes = [] :: list()}). + +-opaque state() :: #?MODULE{}. + +-export_type([ + state/0 + ]). + +%% this module acts as a wrapper / converter for the internal binar storage format +%% (AMQP 1.0) and any format it needs to be converted to / from. +%% Efficiency is key. No unnecessary allocations or work should be done until it +%% is absolutely needed + +%% init from an AMQP 1.0 encoded binary +-spec init(binary()) -> state(). +init(Bin) when is_binary(Bin) -> + %% TODO: delay parsing until needed + {MA, P, AP, D} = decode(amqp10_framing:decode_bin(Bin), + {undefined, undefined, undefined, undefined}), + #?MODULE{cfg = #cfg{}, + msg = #msg{properties = P, + application_properties = AP, + message_annotations = MA, + data = D}}. + +decode([], Acc) -> + Acc; +decode([#'v1_0.message_annotations'{} = MA | Rem], {_, P, AP, D}) -> + decode(Rem, {MA, P, AP, D}); +decode([#'v1_0.properties'{} = P | Rem], {MA, _, AP, D}) -> + decode(Rem, {MA, P, AP, D}); +decode([#'v1_0.application_properties'{} = AP | Rem], {MA, P, _, D}) -> + decode(Rem, {MA, P, AP, D}); +decode([#'v1_0.data'{} = D | Rem], {MA, P, AP, _}) -> + decode(Rem, {MA, P, AP, D}). + +amqp10_properties_empty(#'v1_0.properties'{message_id = undefined, + user_id = undefined, + to = undefined, + % subject = wrap(utf8, RKey), + reply_to = undefined, + correlation_id = undefined, + content_type = undefined, + content_encoding = undefined, + creation_time = undefined}) -> + true; +amqp10_properties_empty(_) -> + false. + +%% to realise the final binary data representation +-spec to_iodata(state()) -> iodata(). +to_iodata(#?MODULE{msg = #msg{properties = P, + application_properties = AP, + message_annotations = MA, + data = Data}}) -> + [ + case MA of + #'v1_0.message_annotations'{content = []} -> + <<>>; + _ -> + amqp10_framing:encode_bin(MA) + end, + case amqp10_properties_empty(P) of + true -> <<>>; + false -> + amqp10_framing:encode_bin(P) + end, + case AP of + #'v1_0.application_properties'{content = []} -> + <<>>; + _ -> + amqp10_framing:encode_bin(AP) + end, + amqp10_framing:encode_bin(Data) + ]. + +%% TODO: refine type spec here +-spec add_message_annotations(#{binary() => {atom(), term()}}, state()) -> + state(). +add_message_annotations(Anns, + #?MODULE{msg = + #msg{message_annotations = MA0} = Msg} = State) -> + Content = maps:fold( + fun (K, {T, V}, Acc) -> + map_add(symbol, K, T, V, Acc) + end, + case MA0 of + undefined -> []; + #'v1_0.message_annotations'{content = C} -> C + end, + Anns), + + State#?MODULE{msg = + Msg#msg{message_annotations = + #'v1_0.message_annotations'{content = Content}}}. + +%% TODO: refine +-type amqp10_term() :: {atom(), term()}. + +-spec message_annotation(binary(), state()) -> undefined | amqp10_term(). +message_annotation(Key, State) -> + message_annotation(Key, State, undefined). + +-spec message_annotation(binary(), state(), undefined | amqp10_term()) -> + undefined | amqp10_term(). +message_annotation(_Key, #?MODULE{msg = #msg{message_annotations = undefined}}, + Default) -> + Default; +message_annotation(Key, + #?MODULE{msg = + #msg{message_annotations = + #'v1_0.message_annotations'{content = Content}}}, + Default) + when is_binary(Key) -> + case lists:search(fun ({{symbol, K}, _}) -> K == Key end, Content) of + {value, {_K, V}} -> + V; + false -> + Default + end. + + +%% take a binary AMQP 1.0 input function, +%% parses it and returns the current parse state +%% this is the input function from storage and from, e.g. socket input +-spec from_amqp091(#'P_basic'{}, iodata()) -> state(). +from_amqp091(#'P_basic'{message_id = MsgId, + expiration = Expiration, + delivery_mode = DelMode, + headers = Headers, + user_id = UserId, + reply_to = ReplyTo, + type = Type, + priority = Priority, + app_id = AppId, + correlation_id = CorrId, + content_type = ContentType, + content_encoding = ContentEncoding, + timestamp = Timestamp + }, Data) -> + %% TODO: support parsing properties bin directly? + ConvertedTs = case Timestamp of + undefined -> + undefined; + _ -> + Timestamp * 1000 + end, + P = #'v1_0.properties'{message_id = wrap(utf8, MsgId), + user_id = wrap(binary, UserId), + to = undefined, + % subject = wrap(utf8, RKey), + reply_to = wrap(utf8, ReplyTo), + correlation_id = wrap(utf8, CorrId), + content_type = wrap(symbol, ContentType), + content_encoding = wrap(symbol, ContentEncoding), + creation_time = wrap(timestamp, ConvertedTs)}, + + APC0 = [{wrap(utf8, K), from_091(T, V)} || {K, T, V} + <- case Headers of + undefined -> []; + _ -> Headers + end], + %% properties that do not map directly to AMQP 1.0 properties are stored + %% in application properties + APC = map_add(utf8, <<"x-basic-type">>, utf8, Type, + map_add(utf8, <<"x-basic-app-id">>, utf8, AppId, APC0)), + + MAC = map_add(symbol, <<"x-basic-priority">>, ubyte, Priority, + map_add(symbol, <<"x-basic-delivery-mode">>, ubyte, DelMode, + map_add(symbol, <<"x-basic-expiration">>, utf8, Expiration, []))), + + AP = #'v1_0.application_properties'{content = APC}, + MA = #'v1_0.message_annotations'{content = MAC}, + #?MODULE{cfg = #cfg{}, + msg = #msg{properties = P, + application_properties = AP, + message_annotations = MA, + data = #'v1_0.data'{content = Data}}}. + +map_add(_T, _Key, _Type, undefined, Acc) -> + Acc; +map_add(KeyType, Key, Type, Value, Acc) -> + [{wrap(KeyType, Key), wrap(Type, Value)} | Acc]. + +-spec to_amqp091(state()) -> {#'P_basic'{}, iodata()}. +to_amqp091(#?MODULE{msg = #msg{properties = P, + application_properties = APR, + message_annotations = MAR, + data = #'v1_0.data'{content = Payload}}}) -> + #'v1_0.properties'{message_id = MsgId, + user_id = UserId, + reply_to = ReplyTo0, + correlation_id = CorrId, + content_type = ContentType, + content_encoding = ContentEncoding, + creation_time = Timestamp} = case P of + undefined -> + #'v1_0.properties'{}; + _ -> + P + end, + + AP0 = case APR of + #'v1_0.application_properties'{content = AC} -> AC; + _ -> [] + end, + MA0 = case MAR of + #'v1_0.message_annotations'{content = MC} -> MC; + _ -> [] + end, + + {Type, AP1} = amqp10_map_get(utf8(<<"x-basic-type">>), AP0), + {AppId, AP} = amqp10_map_get(utf8(<<"x-basic-app-id">>), AP1), + + {Priority, MA1} = amqp10_map_get(symbol(<<"x-basic-priority">>), MA0), + {DelMode, MA2} = amqp10_map_get(symbol(<<"x-basic-delivery-mode">>), MA1), + {Expiration, _MA} = amqp10_map_get(symbol(<<"x-basic-expiration">>), MA2), + + Headers0 = [to_091(unwrap(K), V) || {K, V} <- AP], + {Headers1, MsgId091} = message_id(MsgId, <<"x-message-id-type">>, Headers0), + {Headers, CorrId091} = message_id(CorrId, <<"x-correlation-id-type">>, Headers1), + + BP = #'P_basic'{message_id = MsgId091, + delivery_mode = DelMode, + expiration = Expiration, + user_id = unwrap(UserId), + headers = case Headers of + [] -> undefined; + _ -> Headers + end, + reply_to = unwrap(ReplyTo0), + type = Type, + app_id = AppId, + priority = Priority, + correlation_id = CorrId091, + content_type = unwrap(ContentType), + content_encoding = unwrap(ContentEncoding), + timestamp = case unwrap(Timestamp) of + undefined -> + undefined; + Ts -> + Ts div 1000 + end + }, + {BP, Payload}. + +%%% Internal + +amqp10_map_get(K, AP0) -> + case lists:keytake(K, 1, AP0) of + false -> + {undefined, AP0}; + {value, {_, V}, AP} -> + {unwrap(V), AP} + end. + +wrap(_Type, undefined) -> + undefined; +wrap(Type, Val) -> + {Type, Val}. + +unwrap(undefined) -> + undefined; +unwrap({_Type, V}) -> + V. + +% symbol_for(#'v1_0.properties'{}) -> +% {symbol, <<"amqp:properties:list">>}; + +% number_for(#'v1_0.properties'{}) -> +% {ulong, 115}; +% encode(Frame = #'v1_0.properties'{}) -> +% amqp10_framing:encode_described(list, 115, Frame); + +% encode_described(list, CodeNumber, Frame) -> +% {described, {ulong, CodeNumber}, +% {list, lists:map(fun encode/1, tl(tuple_to_list(Frame)))}}; + +% -spec generate(amqp10_type()) -> iolist(). +% generate({described, Descriptor, Value}) -> +% DescBin = generate(Descriptor), +% ValueBin = generate(Value), +% [ ?DESCRIBED_BIN, DescBin, ValueBin ]. + +to_091(Key, {utf8, V}) when is_binary(V) -> {Key, longstr, V}; +to_091(Key, {long, V}) -> {Key, long, V}; +to_091(Key, {byte, V}) -> {Key, byte, V}; +to_091(Key, {ubyte, V}) -> {Key, unsignedbyte, V}; +to_091(Key, {short, V}) -> {Key, short, V}; +to_091(Key, {ushort, V}) -> {Key, unsignedshort, V}; +to_091(Key, {uint, V}) -> {Key, unsignedint, V}; +to_091(Key, {int, V}) -> {Key, signedint, V}; +to_091(Key, {double, V}) -> {Key, double, V}; +to_091(Key, {float, V}) -> {Key, float, V}; +%% NB: header values can never be shortstr! +to_091(Key, {timestamp, V}) -> {Key, timestamp, V div 1000}; +to_091(Key, {binary, V}) -> {Key, binary, V}; +to_091(Key, {boolean, V}) -> {Key, bool, V}; +to_091(Key, true) -> {Key, bool, true}; +to_091(Key, false) -> {Key, bool, false}. + +from_091(longstr, V) when is_binary(V) -> {utf8, V}; +from_091(long, V) -> {long, V}; +from_091(unsignedbyte, V) -> {ubyte, V}; +from_091(short, V) -> {short, V}; +from_091(unsignedshort, V) -> {ushort, V}; +from_091(unsignedint, V) -> {uint, V}; +from_091(signedint, V) -> {int, V}; +from_091(double, V) -> {double, V}; +from_091(float, V) -> {float, V}; +from_091(bool, V) -> {boolean, V}; +from_091(binary, V) -> {binary, V}; +from_091(timestamp, V) -> {timestamp, V * 1000}; +from_091(byte, V) -> {byte, V}. + +% convert_header(signedint, V) -> [$I, <<V:32/signed>>]; +% convert_header(decimal, V) -> {Before, After} = V, +% [$D, Before, <<After:32>>]; +% convert_header(timestamp, V) -> [$T, <<V:64>>]; +% % convert_header(table, V) -> [$F | table_to_binary(V)]; +% % convert_header(array, V) -> [$A | array_to_binary(V)]; +% convert_header(byte, V) -> [$b, <<V:8/signed>>]; +% convert_header(double, V) -> [$d, <<V:64/float>>]; +% convert_header(float, V) -> [$f, <<V:32/float>>]; +% convert_header(short, V) -> [$s, <<V:16/signed>>]; +% convert_header(binary, V) -> [$x | long_string_to_binary(V)]; +% convert_header(unsignedbyte, V) -> [$B, <<V:8/unsigned>>]; +% convert_header(unsignedshort, V) -> [$u, <<V:16/unsigned>>]; +% convert_header(unsignedint, V) -> [$i, <<V:32/unsigned>>]; +% convert_header(void, _V) -> [$V]. + +utf8(T) -> {utf8, T}. +symbol(T) -> {symbol, T}. + +message_id({uuid, UUID}, HKey, H0) -> + H = [{HKey, longstr, <<"uuid">>} | H0], + {H, rabbit_data_coercion:to_binary(rabbit_guid:to_string(UUID))}; +message_id({ulong, N}, HKey, H0) -> + H = [{HKey, longstr, <<"ulong">>} | H0], + {H, erlang:integer_to_binary(N)}; +message_id({binary, B}, HKey, H0) -> + E = base64:encode(B), + case byte_size(E) > 256 of + true -> + K = binary:replace(HKey, <<"-type">>, <<>>), + {[{K, longstr, B} | H0], undefined}; + false -> + H = [{HKey, longstr, <<"binary">>} | H0], + {H, E} + end; +message_id({utf8, S}, HKey, H0) -> + case byte_size(S) > 256 of + true -> + K = binary:replace(HKey, <<"-type">>, <<>>), + {[{K, longstr, S} | H0], undefined}; + false -> + {H0, S} + end; +message_id(MsgId, _, H) -> + {H, unwrap(MsgId)}. + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. diff --git a/deps/rabbit/src/rabbit_msg_store.erl b/deps/rabbit/src/rabbit_msg_store.erl new file mode 100644 index 0000000000..4851e56248 --- /dev/null +++ b/deps/rabbit/src/rabbit_msg_store.erl @@ -0,0 +1,2245 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_msg_store). + +-behaviour(gen_server2). + +-export([start_link/4, start_global_store_link/4, successfully_recovered_state/1, + client_init/4, client_terminate/1, client_delete_and_terminate/1, + client_ref/1, close_all_indicated/1, + write/3, write_flow/3, read/2, contains/2, remove/2]). + +-export([set_maximum_since_use/2, combine_files/3, + delete_file/2]). %% internal + +-export([scan_file_for_valid_messages/1]). %% salvage tool + +-export([transform_dir/3, force_recovery/2]). %% upgrade + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, prioritise_call/4, prioritise_cast/3, + prioritise_info/3, format_message_queue/2]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(SYNC_INTERVAL, 25). %% milliseconds +-define(CLEAN_FILENAME, "clean.dot"). +-define(FILE_SUMMARY_FILENAME, "file_summary.ets"). +-define(TRANSFORM_TMP, "transform_tmp"). + +-define(BINARY_MODE, [raw, binary]). +-define(READ_MODE, [read]). +-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]). +-define(WRITE_MODE, [write]). + +-define(FILE_EXTENSION, ".rdq"). +-define(FILE_EXTENSION_TMP, ".rdt"). + +-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB + + %% i.e. two pairs, so GC does not go idle when busy +-define(MAXIMUM_SIMULTANEOUS_GC_FILES, 4). + +%%---------------------------------------------------------------------------- + +-record(msstate, + { + %% store directory + dir, + %% the module for index ops, + %% rabbit_msg_store_ets_index by default + index_module, + %% where are messages? + index_state, + %% current file name as number + current_file, + %% current file handle since the last fsync? + current_file_handle, + %% file handle cache + file_handle_cache, + %% TRef for our interval timer + sync_timer_ref, + %% sum of valid data in all files + sum_valid_data, + %% sum of file sizes + sum_file_size, + %% things to do once GC completes + pending_gc_completion, + %% pid of our GC + gc_pid, + %% tid of the shared file handles table + file_handles_ets, + %% tid of the file summary table + file_summary_ets, + %% tid of current file cache table + cur_file_cache_ets, + %% tid of writes/removes in flight + flying_ets, + %% set of dying clients + dying_clients, + %% map of references of all registered clients + %% to callbacks + clients, + %% boolean: did we recover state? + successfully_recovered, + %% how big are our files allowed to get? + file_size_limit, + %% client ref to synced messages mapping + cref_to_msg_ids, + %% See CREDIT_DISC_BOUND in rabbit.hrl + credit_disc_bound + }). + +-record(client_msstate, + { server, + client_ref, + file_handle_cache, + index_state, + index_module, + dir, + gc_pid, + file_handles_ets, + file_summary_ets, + cur_file_cache_ets, + flying_ets, + credit_disc_bound + }). + +-record(file_summary, + {file, valid_total_size, left, right, file_size, locked, readers}). + +-record(gc_state, + { dir, + index_module, + index_state, + file_summary_ets, + file_handles_ets, + msg_store + }). + +-record(dying_client, + { client_ref, + file, + offset + }). + +%%---------------------------------------------------------------------------- + +-export_type([gc_state/0, file_num/0]). + +-type gc_state() :: #gc_state { dir :: file:filename(), + index_module :: atom(), + index_state :: any(), + file_summary_ets :: ets:tid(), + file_handles_ets :: ets:tid(), + msg_store :: server() + }. + +-type server() :: pid() | atom(). +-type client_ref() :: binary(). +-type file_num() :: non_neg_integer(). +-type client_msstate() :: #client_msstate { + server :: server(), + client_ref :: client_ref(), + file_handle_cache :: map(), + index_state :: any(), + index_module :: atom(), + dir :: file:filename(), + gc_pid :: pid(), + file_handles_ets :: ets:tid(), + file_summary_ets :: ets:tid(), + cur_file_cache_ets :: ets:tid(), + flying_ets :: ets:tid(), + credit_disc_bound :: {pos_integer(), pos_integer()}}. +-type msg_ref_delta_gen(A) :: + fun ((A) -> 'finished' | + {rabbit_types:msg_id(), non_neg_integer(), A}). +-type maybe_msg_id_fun() :: + 'undefined' | fun ((gb_sets:set(), 'written' | 'ignored') -> any()). +-type maybe_close_fds_fun() :: 'undefined' | fun (() -> 'ok'). +-type deletion_thunk() :: fun (() -> boolean()). + +%%---------------------------------------------------------------------------- + +%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION +%% It is not recommended to set this to < 0.5 +-define(GARBAGE_FRACTION, 0.5). + +%% Message store is responsible for storing messages +%% on disk and loading them back. The store handles both +%% persistent messages and transient ones (when a node +%% is under RAM pressure and needs to page messages out +%% to disk). The store is responsible for locating messages +%% on disk and maintaining an index. +%% +%% There are two message stores per node: one for transient +%% and one for persistent messages. +%% +%% Queue processes interact with the stores via clients. +%% +%% The components: +%% +%% Index: this is a mapping from MsgId to #msg_location{}. +%% By default, it's in ETS, but other implementations can +%% be used. +%% FileSummary: this maps File to #file_summary{} and is stored +%% in ETS. +%% +%% The basic idea is that messages are appended to the current file up +%% until that file becomes too big (> file_size_limit). At that point, +%% the file is closed and a new file is created on the _right_ of the +%% old file which is used for new messages. Files are named +%% numerically ascending, thus the file with the lowest name is the +%% eldest file. +%% +%% We need to keep track of which messages are in which files (this is +%% the index); how much useful data is in each file and which files +%% are on the left and right of each other. This is the purpose of the +%% file summary ETS table. +%% +%% As messages are removed from files, holes appear in these +%% files. The field ValidTotalSize contains the total amount of useful +%% data left in the file. This is needed for garbage collection. +%% +%% When we discover that a file is now empty, we delete it. When we +%% discover that it can be combined with the useful data in either its +%% left or right neighbour, and overall, across all the files, we have +%% ((the amount of garbage) / (the sum of all file sizes)) > +%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently, +%% which will compact the two files together. This keeps disk +%% utilisation high and aids performance. We deliberately do this +%% lazily in order to prevent doing GC on files which are soon to be +%% emptied (and hence deleted). +%% +%% Given the compaction between two files, the left file (i.e. elder +%% file) is considered the ultimate destination for the good data in +%% the right file. If necessary, the good data in the left file which +%% is fragmented throughout the file is written out to a temporary +%% file, then read back in to form a contiguous chunk of good data at +%% the start of the left file. Thus the left file is garbage collected +%% and compacted. Then the good data from the right file is copied +%% onto the end of the left file. Index and file summary tables are +%% updated. +%% +%% On non-clean startup, we scan the files we discover, dealing with +%% the possibilities of a crash having occurred during a compaction +%% (this consists of tidyup - the compaction is deliberately designed +%% such that data is duplicated on disk rather than risking it being +%% lost), and rebuild the file summary and index ETS table. +%% +%% So, with this design, messages move to the left. Eventually, they +%% should end up in a contiguous block on the left and are then never +%% rewritten. But this isn't quite the case. If in a file there is one +%% message that is being ignored, for some reason, and messages in the +%% file to the right and in the current block are being read all the +%% time then it will repeatedly be the case that the good data from +%% both files can be combined and will be written out to a new +%% file. Whenever this happens, our shunned message will be rewritten. +%% +%% So, provided that we combine messages in the right order, +%% (i.e. left file, bottom to top, right file, bottom to top), +%% eventually our shunned message will end up at the bottom of the +%% left file. The compaction/combining algorithm is smart enough to +%% read in good data from the left file that is scattered throughout +%% (i.e. C and D in the below diagram), then truncate the file to just +%% above B (i.e. truncate to the limit of the good contiguous region +%% at the start of the file), then write C and D on top and then write +%% E, F and G from the right file on top. Thus contiguous blocks of +%% good data at the bottom of files are not rewritten. +%% +%% +-------+ +-------+ +-------+ +%% | X | | G | | G | +%% +-------+ +-------+ +-------+ +%% | D | | X | | F | +%% +-------+ +-------+ +-------+ +%% | X | | X | | E | +%% +-------+ +-------+ +-------+ +%% | C | | F | ===> | D | +%% +-------+ +-------+ +-------+ +%% | X | | X | | C | +%% +-------+ +-------+ +-------+ +%% | B | | X | | B | +%% +-------+ +-------+ +-------+ +%% | A | | E | | A | +%% +-------+ +-------+ +-------+ +%% left right left +%% +%% From this reasoning, we do have a bound on the number of times the +%% message is rewritten. From when it is inserted, there can be no +%% files inserted between it and the head of the queue, and the worst +%% case is that every time it is rewritten, it moves one position lower +%% in the file (for it to stay at the same position requires that +%% there are no holes beneath it, which means truncate would be used +%% and so it would not be rewritten at all). Thus this seems to +%% suggest the limit is the number of messages ahead of it in the +%% queue, though it's likely that that's pessimistic, given the +%% requirements for compaction/combination of files. +%% +%% The other property that we have is the bound on the lowest +%% utilisation, which should be 50% - worst case is that all files are +%% fractionally over half full and can't be combined (equivalent is +%% alternating full files and files with only one tiny message in +%% them). +%% +%% Messages are reference-counted. When a message with the same msg id +%% is written several times we only store it once, and only remove it +%% from the store when it has been removed the same number of times. +%% +%% The reference counts do not persist. Therefore the initialisation +%% function must be provided with a generator that produces ref count +%% deltas for all recovered messages. This is only used on startup +%% when the shutdown was non-clean. +%% +%% Read messages with a reference count greater than one are entered +%% into a message cache. The purpose of the cache is not especially +%% performance, though it can help there too, but prevention of memory +%% explosion. It ensures that as messages with a high reference count +%% are read from several processes they are read back as the same +%% binary object rather than multiples of identical binary +%% objects. +%% +%% Reads can be performed directly by clients without calling to the +%% server. This is safe because multiple file handles can be used to +%% read files. However, locking is used by the concurrent GC to make +%% sure that reads are not attempted from files which are in the +%% process of being garbage collected. +%% +%% When a message is removed, its reference count is decremented. Even +%% if the reference count becomes 0, its entry is not removed. This is +%% because in the event of the same message being sent to several +%% different queues, there is the possibility of one queue writing and +%% removing the message before other queues write it at all. Thus +%% accommodating 0-reference counts allows us to avoid unnecessary +%% writes here. Of course, there are complications: the file to which +%% the message has already been written could be locked pending +%% deletion or GC, which means we have to rewrite the message as the +%% original copy will now be lost. +%% +%% The server automatically defers reads, removes and contains calls +%% that occur which refer to files which are currently being +%% GC'd. Contains calls are only deferred in order to ensure they do +%% not overtake removes. +%% +%% The current file to which messages are being written has a +%% write-back cache. This is written to immediately by clients and can +%% be read from by clients too. This means that there are only ever +%% writes made to the current file, thus eliminating delays due to +%% flushing write buffers in order to be able to safely read from the +%% current file. The one exception to this is that on start up, the +%% cache is not populated with msgs found in the current file, and +%% thus in this case only, reads may have to come from the file +%% itself. The effect of this is that even if the msg_store process is +%% heavily overloaded, clients can still write and read messages with +%% very low latency and not block at all. +%% +%% Clients of the msg_store are required to register before using the +%% msg_store. This provides them with the necessary client-side state +%% to allow them to directly access the various caches and files. When +%% they terminate, they should deregister. They can do this by calling +%% either client_terminate/1 or client_delete_and_terminate/1. The +%% differences are: (a) client_terminate is synchronous. As a result, +%% if the msg_store is badly overloaded and has lots of in-flight +%% writes and removes to process, this will take some time to +%% return. However, once it does return, you can be sure that all the +%% actions you've issued to the msg_store have been processed. (b) Not +%% only is client_delete_and_terminate/1 asynchronous, but it also +%% permits writes and subsequent removes from the current +%% (terminating) client which are still in flight to be safely +%% ignored. Thus from the point of view of the msg_store itself, and +%% all from the same client: +%% +%% (T) = termination; (WN) = write of msg N; (RN) = remove of msg N +%% --> W1, W2, W1, R1, T, W3, R2, W2, R1, R2, R3, W4 --> +%% +%% The client obviously sent T after all the other messages (up to +%% W4), but because the msg_store prioritises messages, the T can be +%% promoted and thus received early. +%% +%% Thus at the point of the msg_store receiving T, we have messages 1 +%% and 2 with a refcount of 1. After T, W3 will be ignored because +%% it's an unknown message, as will R3, and W4. W2, R1 and R2 won't be +%% ignored because the messages that they refer to were already known +%% to the msg_store prior to T. However, it can be a little more +%% complex: after the first R2, the refcount of msg 2 is 0. At that +%% point, if a GC occurs or file deletion, msg 2 could vanish, which +%% would then mean that the subsequent W2 and R2 are then ignored. +%% +%% The use case then for client_delete_and_terminate/1 is if the +%% client wishes to remove everything it's written to the msg_store: +%% it issues removes for all messages it's written and not removed, +%% and then calls client_delete_and_terminate/1. At that point, any +%% in-flight writes (and subsequent removes) can be ignored, but +%% removes and writes for messages the msg_store already knows about +%% will continue to be processed normally (which will normally just +%% involve modifying the reference count, which is fast). Thus we save +%% disk bandwidth for writes which are going to be immediately removed +%% again by the the terminating client. +%% +%% We use a separate set to keep track of the dying clients in order +%% to keep that set, which is inspected on every write and remove, as +%% small as possible. Inspecting the set of all clients would degrade +%% performance with many healthy clients and few, if any, dying +%% clients, which is the typical case. +%% +%% Client termination messages are stored in a separate ets index to +%% avoid filling primary message store index and message files with +%% client termination messages. +%% +%% When the msg_store has a backlog (i.e. it has unprocessed messages +%% in its mailbox / gen_server priority queue), a further optimisation +%% opportunity arises: we can eliminate pairs of 'write' and 'remove' +%% from the same client for the same message. A typical occurrence of +%% these is when an empty durable queue delivers persistent messages +%% to ack'ing consumers. The queue will asynchronously ask the +%% msg_store to 'write' such messages, and when they are acknowledged +%% it will issue a 'remove'. That 'remove' may be issued before the +%% msg_store has processed the 'write'. There is then no point going +%% ahead with the processing of that 'write'. +%% +%% To detect this situation a 'flying_ets' table is shared between the +%% clients and the server. The table is keyed on the combination of +%% client (reference) and msg id, and the value represents an +%% integration of all the writes and removes currently "in flight" for +%% that message between the client and server - '+1' means all the +%% writes/removes add up to a single 'write', '-1' to a 'remove', and +%% '0' to nothing. (NB: the integration can never add up to more than +%% one 'write' or 'read' since clients must not write/remove a message +%% more than once without first removing/writing it). +%% +%% Maintaining this table poses two challenges: 1) both the clients +%% and the server access and update the table, which causes +%% concurrency issues, 2) we must ensure that entries do not stay in +%% the table forever, since that would constitute a memory leak. We +%% address the former by carefully modelling all operations as +%% sequences of atomic actions that produce valid results in all +%% possible interleavings. We address the latter by deleting table +%% entries whenever the server finds a 0-valued entry during the +%% processing of a write/remove. 0 is essentially equivalent to "no +%% entry". If, OTOH, the value is non-zero we know there is at least +%% one other 'write' or 'remove' in flight, so we get an opportunity +%% later to delete the table entry when processing these. +%% +%% There are two further complications. We need to ensure that 1) +%% eliminated writes still get confirmed, and 2) the write-back cache +%% doesn't grow unbounded. These are quite straightforward to +%% address. See the comments in the code. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% rabbit_variable_queue. + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +-spec start_link + (atom(), file:filename(), [binary()] | 'undefined', + {msg_ref_delta_gen(A), A}) -> rabbit_types:ok_pid_or_error(). + +start_link(Type, Dir, ClientRefs, StartupFunState) when is_atom(Type) -> + gen_server2:start_link(?MODULE, + [Type, Dir, ClientRefs, StartupFunState], + [{timeout, infinity}]). + +start_global_store_link(Type, Dir, ClientRefs, StartupFunState) when is_atom(Type) -> + gen_server2:start_link({local, Type}, ?MODULE, + [Type, Dir, ClientRefs, StartupFunState], + [{timeout, infinity}]). + +-spec successfully_recovered_state(server()) -> boolean(). + +successfully_recovered_state(Server) -> + gen_server2:call(Server, successfully_recovered_state, infinity). + +-spec client_init(server(), client_ref(), maybe_msg_id_fun(), + maybe_close_fds_fun()) -> client_msstate(). + +client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) when is_pid(Server); is_atom(Server) -> + {IState, IModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, CurFileCacheEts, FlyingEts} = + gen_server2:call( + Server, {new_client_state, Ref, self(), MsgOnDiskFun, CloseFDsFun}, + infinity), + CreditDiscBound = rabbit_misc:get_env(rabbit, msg_store_credit_disc_bound, + ?CREDIT_DISC_BOUND), + #client_msstate { server = Server, + client_ref = Ref, + file_handle_cache = #{}, + index_state = IState, + index_module = IModule, + dir = Dir, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + flying_ets = FlyingEts, + credit_disc_bound = CreditDiscBound }. + +-spec client_terminate(client_msstate()) -> 'ok'. + +client_terminate(CState = #client_msstate { client_ref = Ref }) -> + close_all_handles(CState), + ok = server_call(CState, {client_terminate, Ref}). + +-spec client_delete_and_terminate(client_msstate()) -> 'ok'. + +client_delete_and_terminate(CState = #client_msstate { client_ref = Ref }) -> + close_all_handles(CState), + ok = server_cast(CState, {client_dying, Ref}), + ok = server_cast(CState, {client_delete, Ref}). + +-spec client_ref(client_msstate()) -> client_ref(). + +client_ref(#client_msstate { client_ref = Ref }) -> Ref. + +-spec write_flow(rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok'. + +write_flow(MsgId, Msg, + CState = #client_msstate { + server = Server, + credit_disc_bound = CreditDiscBound }) -> + %% Here we are tracking messages sent by the + %% rabbit_amqqueue_process process via the + %% rabbit_variable_queue. We are accessing the + %% rabbit_amqqueue_process process dictionary. + credit_flow:send(Server, CreditDiscBound), + client_write(MsgId, Msg, flow, CState). + +-spec write(rabbit_types:msg_id(), msg(), client_msstate()) -> 'ok'. + +write(MsgId, Msg, CState) -> client_write(MsgId, Msg, noflow, CState). + +-spec read(rabbit_types:msg_id(), client_msstate()) -> + {rabbit_types:ok(msg()) | 'not_found', client_msstate()}. + +read(MsgId, + CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) -> + file_handle_cache_stats:update(msg_store_read), + %% Check the cur file cache + case ets:lookup(CurFileCacheEts, MsgId) of + [] -> + Defer = fun() -> {server_call(CState, {read, MsgId}), CState} end, + case index_lookup_positive_ref_count(MsgId, CState) of + not_found -> Defer(); + MsgLocation -> client_read1(MsgLocation, Defer, CState) + end; + [{MsgId, Msg, _CacheRefCount}] -> + {{ok, Msg}, CState} + end. + +-spec contains(rabbit_types:msg_id(), client_msstate()) -> boolean(). + +contains(MsgId, CState) -> server_call(CState, {contains, MsgId}). + +-spec remove([rabbit_types:msg_id()], client_msstate()) -> 'ok'. + +remove([], _CState) -> ok; +remove(MsgIds, CState = #client_msstate { client_ref = CRef }) -> + [client_update_flying(-1, MsgId, CState) || MsgId <- MsgIds], + server_cast(CState, {remove, CRef, MsgIds}). + +-spec set_maximum_since_use(server(), non_neg_integer()) -> 'ok'. + +set_maximum_since_use(Server, Age) when is_pid(Server); is_atom(Server) -> + gen_server2:cast(Server, {set_maximum_since_use, Age}). + +%%---------------------------------------------------------------------------- +%% Client-side-only helpers +%%---------------------------------------------------------------------------- + +server_call(#client_msstate { server = Server }, Msg) -> + gen_server2:call(Server, Msg, infinity). + +server_cast(#client_msstate { server = Server }, Msg) -> + gen_server2:cast(Server, Msg). + +client_write(MsgId, Msg, Flow, + CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts, + client_ref = CRef }) -> + file_handle_cache_stats:update(msg_store_write), + ok = client_update_flying(+1, MsgId, CState), + ok = update_msg_cache(CurFileCacheEts, MsgId, Msg), + ok = server_cast(CState, {write, CRef, MsgId, Flow}). + +client_read1(#msg_location { msg_id = MsgId, file = File } = MsgLocation, Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + case ets:lookup(FileSummaryEts, File) of + [] -> %% File has been GC'd and no longer exists. Go around again. + read(MsgId, CState); + [#file_summary { locked = Locked, right = Right }] -> + client_read2(Locked, Right, MsgLocation, Defer, CState) + end. + +client_read2(false, undefined, _MsgLocation, Defer, _CState) -> + %% Although we've already checked both caches and not found the + %% message there, the message is apparently in the + %% current_file. We can only arrive here if we are trying to read + %% a message which we have not written, which is very odd, so just + %% defer. + %% + %% OR, on startup, the cur_file_cache is not populated with the + %% contents of the current file, thus reads from the current file + %% will end up here and will need to be deferred. + Defer(); +client_read2(true, _Right, _MsgLocation, Defer, _CState) -> + %% Of course, in the mean time, the GC could have run and our msg + %% is actually in a different file, unlocked. However, deferring is + %% the safest and simplest thing to do. + Defer(); +client_read2(false, _Right, + MsgLocation = #msg_location { msg_id = MsgId, file = File }, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + %% It's entirely possible that everything we're doing from here on + %% is for the wrong file, or a non-existent file, as a GC may have + %% finished. + safe_ets_update_counter( + FileSummaryEts, File, {#file_summary.readers, +1}, + fun (_) -> client_read3(MsgLocation, Defer, CState) end, + fun () -> read(MsgId, CState) end). + +client_read3(#msg_location { msg_id = MsgId, file = File }, Defer, + CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + gc_pid = GCPid, + client_ref = Ref }) -> + Release = + fun() -> ok = case ets:update_counter(FileSummaryEts, File, + {#file_summary.readers, -1}) of + 0 -> case ets:lookup(FileSummaryEts, File) of + [#file_summary { locked = true }] -> + rabbit_msg_store_gc:no_readers( + GCPid, File); + _ -> ok + end; + _ -> ok + end + end, + %% If a GC involving the file hasn't already started, it won't + %% start now. Need to check again to see if we've been locked in + %% the meantime, between lookup and update_counter (thus GC + %% started before our +1. In fact, it could have finished by now + %% too). + case ets:lookup(FileSummaryEts, File) of + [] -> %% GC has deleted our file, just go round again. + read(MsgId, CState); + [#file_summary { locked = true }] -> + %% If we get a badarg here, then the GC has finished and + %% deleted our file. Try going around again. Otherwise, + %% just defer. + %% + %% badarg scenario: we lookup, msg_store locks, GC starts, + %% GC ends, we +1 readers, msg_store ets:deletes (and + %% unlocks the dest) + try Release(), + Defer() + catch error:badarg -> read(MsgId, CState) + end; + [#file_summary { locked = false }] -> + %% Ok, we're definitely safe to continue - a GC involving + %% the file cannot start up now, and isn't running, so + %% nothing will tell us from now on to close the handle if + %% it's already open. + %% + %% Finally, we need to recheck that the msg is still at + %% the same place - it's possible an entire GC ran between + %% us doing the lookup and the +1 on the readers. (Same as + %% badarg scenario above, but we don't have a missing file + %% - we just have the /wrong/ file). + case index_lookup(MsgId, CState) of + #msg_location { file = File } = MsgLocation -> + %% Still the same file. + {ok, CState1} = close_all_indicated(CState), + %% We are now guaranteed that the mark_handle_open + %% call will either insert_new correctly, or will + %% fail, but find the value is open, not close. + mark_handle_open(FileHandlesEts, File, Ref), + %% Could the msg_store now mark the file to be + %% closed? No: marks for closing are issued only + %% when the msg_store has locked the file. + %% This will never be the current file + {Msg, CState2} = read_from_disk(MsgLocation, CState1), + Release(), %% this MUST NOT fail with badarg + {{ok, Msg}, CState2}; + #msg_location {} = MsgLocation -> %% different file! + Release(), %% this MUST NOT fail with badarg + client_read1(MsgLocation, Defer, CState); + not_found -> %% it seems not to exist. Defer, just to be sure. + try Release() %% this can badarg, same as locked case, above + catch error:badarg -> ok + end, + Defer() + end + end. + +client_update_flying(Diff, MsgId, #client_msstate { flying_ets = FlyingEts, + client_ref = CRef }) -> + Key = {MsgId, CRef}, + case ets:insert_new(FlyingEts, {Key, Diff}) of + true -> ok; + false -> try ets:update_counter(FlyingEts, Key, {2, Diff}) of + 0 -> ok; + Diff -> ok; + Err -> throw({bad_flying_ets_update, Diff, Err, Key}) + catch error:badarg -> + %% this is guaranteed to succeed since the + %% server only removes and updates flying_ets + %% entries; it never inserts them + true = ets:insert_new(FlyingEts, {Key, Diff}) + end, + ok + end. + +clear_client(CRef, State = #msstate { cref_to_msg_ids = CTM, + dying_clients = DyingClients }) -> + State #msstate { cref_to_msg_ids = maps:remove(CRef, CTM), + dying_clients = maps:remove(CRef, DyingClients) }. + + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + + +init([Type, BaseDir, ClientRefs, StartupFunState]) -> + process_flag(trap_exit, true), + + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + + Dir = filename:join(BaseDir, atom_to_list(Type)), + Name = filename:join(filename:basename(BaseDir), atom_to_list(Type)), + + {ok, IndexModule} = application:get_env(rabbit, msg_store_index_module), + rabbit_log:info("Message store ~tp: using ~p to provide index~n", [Name, IndexModule]), + + AttemptFileSummaryRecovery = + case ClientRefs of + undefined -> ok = rabbit_file:recursive_delete([Dir]), + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + false; + _ -> ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + recover_crashed_compactions(Dir) + end, + %% if we found crashed compactions we trust neither the + %% file_summary nor the location index. Note the file_summary is + %% left empty here if it can't be recovered. + {FileSummaryRecovered, FileSummaryEts} = + recover_file_summary(AttemptFileSummaryRecovery, Dir), + {CleanShutdown, IndexState, ClientRefs1} = + recover_index_and_client_refs(IndexModule, FileSummaryRecovered, + ClientRefs, Dir, Name), + Clients = maps:from_list( + [{CRef, {undefined, undefined, undefined}} || + CRef <- ClientRefs1]), + %% CleanShutdown => msg location index and file_summary both + %% recovered correctly. + true = case {FileSummaryRecovered, CleanShutdown} of + {true, false} -> ets:delete_all_objects(FileSummaryEts); + _ -> true + end, + %% CleanShutdown <=> msg location index and file_summary both + %% recovered correctly. + + FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles, + [ordered_set, public]), + CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]), + FlyingEts = ets:new(rabbit_msg_store_flying, [set, public]), + + {ok, FileSizeLimit} = application:get_env(rabbit, msg_store_file_size_limit), + + {ok, GCPid} = rabbit_msg_store_gc:start_link( + #gc_state { dir = Dir, + index_module = IndexModule, + index_state = IndexState, + file_summary_ets = FileSummaryEts, + file_handles_ets = FileHandlesEts, + msg_store = self() + }), + + CreditDiscBound = rabbit_misc:get_env(rabbit, msg_store_credit_disc_bound, + ?CREDIT_DISC_BOUND), + + State = #msstate { dir = Dir, + index_module = IndexModule, + index_state = IndexState, + current_file = 0, + current_file_handle = undefined, + file_handle_cache = #{}, + sync_timer_ref = undefined, + sum_valid_data = 0, + sum_file_size = 0, + pending_gc_completion = maps:new(), + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + flying_ets = FlyingEts, + dying_clients = #{}, + clients = Clients, + successfully_recovered = CleanShutdown, + file_size_limit = FileSizeLimit, + cref_to_msg_ids = #{}, + credit_disc_bound = CreditDiscBound + }, + %% If we didn't recover the msg location index then we need to + %% rebuild it now. + Cleanliness = case CleanShutdown of + true -> "clean"; + false -> "unclean" + end, + rabbit_log:debug("Rebuilding message location index after ~s shutdown...~n", + [Cleanliness]), + {Offset, State1 = #msstate { current_file = CurFile }} = + build_index(CleanShutdown, StartupFunState, State), + rabbit_log:debug("Finished rebuilding index~n", []), + %% read is only needed so that we can seek + {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile), + [read | ?WRITE_MODE]), + {ok, Offset} = file_handle_cache:position(CurHdl, Offset), + ok = file_handle_cache:truncate(CurHdl), + + {ok, maybe_compact(State1 #msstate { current_file_handle = CurHdl }), + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +prioritise_call(Msg, _From, _Len, _State) -> + case Msg of + successfully_recovered_state -> 7; + {new_client_state, _Ref, _Pid, _MODC, _CloseFDsFun} -> 7; + {read, _MsgId} -> 2; + _ -> 0 + end. + +prioritise_cast(Msg, _Len, _State) -> + case Msg of + {combine_files, _Source, _Destination, _Reclaimed} -> 8; + {delete_file, _File, _Reclaimed} -> 8; + {set_maximum_since_use, _Age} -> 8; + {client_dying, _Pid} -> 7; + _ -> 0 + end. + +prioritise_info(Msg, _Len, _State) -> + case Msg of + sync -> 8; + _ -> 0 + end. + +handle_call(successfully_recovered_state, _From, State) -> + reply(State #msstate.successfully_recovered, State); + +handle_call({new_client_state, CRef, CPid, MsgOnDiskFun, CloseFDsFun}, _From, + State = #msstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + flying_ets = FlyingEts, + clients = Clients, + gc_pid = GCPid }) -> + Clients1 = maps:put(CRef, {CPid, MsgOnDiskFun, CloseFDsFun}, Clients), + erlang:monitor(process, CPid), + reply({IndexState, IndexModule, Dir, GCPid, FileHandlesEts, FileSummaryEts, + CurFileCacheEts, FlyingEts}, + State #msstate { clients = Clients1 }); + +handle_call({client_terminate, CRef}, _From, State) -> + reply(ok, clear_client(CRef, State)); + +handle_call({read, MsgId}, From, State) -> + State1 = read_message(MsgId, From, State), + noreply(State1); + +handle_call({contains, MsgId}, From, State) -> + State1 = contains_message(MsgId, From, State), + noreply(State1). + +handle_cast({client_dying, CRef}, + State = #msstate { dying_clients = DyingClients, + current_file_handle = CurHdl, + current_file = CurFile }) -> + {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), + DyingClients1 = maps:put(CRef, + #dying_client{client_ref = CRef, + file = CurFile, + offset = CurOffset}, + DyingClients), + noreply(State #msstate { dying_clients = DyingClients1 }); + +handle_cast({client_delete, CRef}, + State = #msstate { clients = Clients }) -> + State1 = State #msstate { clients = maps:remove(CRef, Clients) }, + noreply(clear_client(CRef, State1)); + +handle_cast({write, CRef, MsgId, Flow}, + State = #msstate { cur_file_cache_ets = CurFileCacheEts, + clients = Clients, + credit_disc_bound = CreditDiscBound }) -> + case Flow of + flow -> {CPid, _, _} = maps:get(CRef, Clients), + %% We are going to process a message sent by the + %% rabbit_amqqueue_process. Now we are accessing the + %% msg_store process dictionary. + credit_flow:ack(CPid, CreditDiscBound); + noflow -> ok + end, + true = 0 =< ets:update_counter(CurFileCacheEts, MsgId, {3, -1}), + case update_flying(-1, MsgId, CRef, State) of + process -> + [{MsgId, Msg, _PWC}] = ets:lookup(CurFileCacheEts, MsgId), + noreply(write_message(MsgId, Msg, CRef, State)); + ignore -> + %% A 'remove' has already been issued and eliminated the + %% 'write'. + State1 = blind_confirm(CRef, gb_sets:singleton(MsgId), + ignored, State), + %% If all writes get eliminated, cur_file_cache_ets could + %% grow unbounded. To prevent that we delete the cache + %% entry here, but only if the message isn't in the + %% current file. That way reads of the message can + %% continue to be done client side, from either the cache + %% or the non-current files. If the message *is* in the + %% current file then the cache entry will be removed by + %% the normal logic for that in write_message/4 and + %% maybe_roll_to_new_file/2. + case index_lookup(MsgId, State1) of + [#msg_location { file = File }] + when File == State1 #msstate.current_file -> + ok; + _ -> + true = ets:match_delete(CurFileCacheEts, {MsgId, '_', 0}) + end, + noreply(State1) + end; + +handle_cast({remove, CRef, MsgIds}, State) -> + {RemovedMsgIds, State1} = + lists:foldl( + fun (MsgId, {Removed, State2}) -> + case update_flying(+1, MsgId, CRef, State2) of + process -> {[MsgId | Removed], + remove_message(MsgId, CRef, State2)}; + ignore -> {Removed, State2} + end + end, {[], State}, MsgIds), + noreply(maybe_compact(client_confirm(CRef, gb_sets:from_list(RemovedMsgIds), + ignored, State1))); + +handle_cast({combine_files, Source, Destination, Reclaimed}, + State = #msstate { sum_file_size = SumFileSize, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + clients = Clients }) -> + ok = cleanup_after_file_deletion(Source, State), + %% see comment in cleanup_after_file_deletion, and client_read3 + true = mark_handle_to_close(Clients, FileHandlesEts, Destination, false), + true = ets:update_element(FileSummaryEts, Destination, + {#file_summary.locked, false}), + State1 = State #msstate { sum_file_size = SumFileSize - Reclaimed }, + noreply(maybe_compact(run_pending([Source, Destination], State1))); + +handle_cast({delete_file, File, Reclaimed}, + State = #msstate { sum_file_size = SumFileSize }) -> + ok = cleanup_after_file_deletion(File, State), + State1 = State #msstate { sum_file_size = SumFileSize - Reclaimed }, + noreply(maybe_compact(run_pending([File], State1))); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State). + +handle_info(sync, State) -> + noreply(internal_sync(State)); + +handle_info(timeout, State) -> + noreply(internal_sync(State)); + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, State) -> + %% similar to what happens in + %% rabbit_amqqueue_process:handle_ch_down but with a relation of + %% msg_store -> rabbit_amqqueue_process instead of + %% rabbit_amqqueue_process -> rabbit_channel. + credit_flow:peer_down(Pid), + noreply(State); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. + +terminate(_Reason, State = #msstate { index_state = IndexState, + index_module = IndexModule, + current_file_handle = CurHdl, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + flying_ets = FlyingEts, + clients = Clients, + dir = Dir }) -> + rabbit_log:info("Stopping message store for directory '~s'", [Dir]), + %% stop the gc first, otherwise it could be working and we pull + %% out the ets tables from under it. + ok = rabbit_msg_store_gc:stop(GCPid), + State1 = case CurHdl of + undefined -> State; + _ -> State2 = internal_sync(State), + ok = file_handle_cache:close(CurHdl), + State2 + end, + State3 = close_all_handles(State1), + case store_file_summary(FileSummaryEts, Dir) of + ok -> ok; + {error, FSErr} -> + rabbit_log:error("Unable to store file summary" + " for vhost message store for directory ~p~n" + "Error: ~p~n", + [Dir, FSErr]) + end, + [true = ets:delete(T) || T <- [FileSummaryEts, FileHandlesEts, + CurFileCacheEts, FlyingEts]], + IndexModule:terminate(IndexState), + case store_recovery_terms([{client_refs, maps:keys(Clients)}, + {index_module, IndexModule}], Dir) of + ok -> + rabbit_log:info("Message store for directory '~s' is stopped", [Dir]), + ok; + {error, RTErr} -> + rabbit_log:error("Unable to save message store recovery terms" + " for directory ~p~nError: ~p~n", + [Dir, RTErr]) + end, + State3 #msstate { index_state = undefined, + current_file_handle = undefined }. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + +%%---------------------------------------------------------------------------- +%% general helper functions +%%---------------------------------------------------------------------------- + +noreply(State) -> + {State1, Timeout} = next_state(State), + {noreply, State1, Timeout}. + +reply(Reply, State) -> + {State1, Timeout} = next_state(State), + {reply, Reply, State1, Timeout}. + +next_state(State = #msstate { sync_timer_ref = undefined, + cref_to_msg_ids = CTM }) -> + case maps:size(CTM) of + 0 -> {State, hibernate}; + _ -> {start_sync_timer(State), 0} + end; +next_state(State = #msstate { cref_to_msg_ids = CTM }) -> + case maps:size(CTM) of + 0 -> {stop_sync_timer(State), hibernate}; + _ -> {State, 0} + end. + +start_sync_timer(State) -> + rabbit_misc:ensure_timer(State, #msstate.sync_timer_ref, + ?SYNC_INTERVAL, sync). + +stop_sync_timer(State) -> + rabbit_misc:stop_timer(State, #msstate.sync_timer_ref). + +internal_sync(State = #msstate { current_file_handle = CurHdl, + cref_to_msg_ids = CTM }) -> + State1 = stop_sync_timer(State), + CGs = maps:fold(fun (CRef, MsgIds, NS) -> + case gb_sets:is_empty(MsgIds) of + true -> NS; + false -> [{CRef, MsgIds} | NS] + end + end, [], CTM), + ok = case CGs of + [] -> ok; + _ -> file_handle_cache:sync(CurHdl) + end, + lists:foldl(fun ({CRef, MsgIds}, StateN) -> + client_confirm(CRef, MsgIds, written, StateN) + end, State1, CGs). + +update_flying(Diff, MsgId, CRef, #msstate { flying_ets = FlyingEts }) -> + Key = {MsgId, CRef}, + NDiff = -Diff, + case ets:lookup(FlyingEts, Key) of + [] -> ignore; + [{_, Diff}] -> ignore; %% [1] + [{_, NDiff}] -> ets:update_counter(FlyingEts, Key, {2, Diff}), + true = ets:delete_object(FlyingEts, {Key, 0}), + process; + [{_, 0}] -> true = ets:delete_object(FlyingEts, {Key, 0}), + ignore; + [{_, Err}] -> throw({bad_flying_ets_record, Diff, Err, Key}) + end. +%% [1] We can get here, for example, in the following scenario: There +%% is a write followed by a remove in flight. The counter will be 0, +%% so on processing the write the server attempts to delete the +%% entry. If at that point the client injects another write it will +%% either insert a new entry, containing +1, or increment the existing +%% entry to +1, thus preventing its removal. Either way therefore when +%% the server processes the read, the counter will be +1. + +write_action({true, not_found}, _MsgId, State) -> + {ignore, undefined, State}; +write_action({true, #msg_location { file = File }}, _MsgId, State) -> + {ignore, File, State}; +write_action({false, not_found}, _MsgId, State) -> + {write, State}; +write_action({Mask, #msg_location { ref_count = 0, file = File, + total_size = TotalSize }}, + MsgId, State = #msstate { file_summary_ets = FileSummaryEts }) -> + case {Mask, ets:lookup(FileSummaryEts, File)} of + {false, [#file_summary { locked = true }]} -> + ok = index_delete(MsgId, State), + {write, State}; + {false_if_increment, [#file_summary { locked = true }]} -> + %% The msg for MsgId is older than the client death + %% message, but as it is being GC'd currently we'll have + %% to write a new copy, which will then be younger, so + %% ignore this write. + {ignore, File, State}; + {_Mask, [#file_summary {}]} -> + ok = index_update_ref_count(MsgId, 1, State), + State1 = adjust_valid_total_size(File, TotalSize, State), + {confirm, File, State1} + end; +write_action({_Mask, #msg_location { ref_count = RefCount, file = File }}, + MsgId, State) -> + ok = index_update_ref_count(MsgId, RefCount + 1, State), + %% We already know about it, just update counter. Only update + %% field otherwise bad interaction with concurrent GC + {confirm, File, State}. + +write_message(MsgId, Msg, CRef, + State = #msstate { cur_file_cache_ets = CurFileCacheEts }) -> + case write_action(should_mask_action(CRef, MsgId, State), MsgId, State) of + {write, State1} -> + write_message(MsgId, Msg, + record_pending_confirm(CRef, MsgId, State1)); + {ignore, CurFile, State1 = #msstate { current_file = CurFile }} -> + State1; + {ignore, _File, State1} -> + true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}), + State1; + {confirm, CurFile, State1 = #msstate { current_file = CurFile }}-> + record_pending_confirm(CRef, MsgId, State1); + {confirm, _File, State1} -> + true = ets:delete_object(CurFileCacheEts, {MsgId, Msg, 0}), + update_pending_confirms( + fun (MsgOnDiskFun, CTM) -> + MsgOnDiskFun(gb_sets:singleton(MsgId), written), + CTM + end, CRef, State1) + end. + +remove_message(MsgId, CRef, + State = #msstate { file_summary_ets = FileSummaryEts }) -> + case should_mask_action(CRef, MsgId, State) of + {true, _Location} -> + State; + {false_if_increment, #msg_location { ref_count = 0 }} -> + %% CRef has tried to both write and remove this msg whilst + %% it's being GC'd. + %% + %% ASSERTION: [#file_summary { locked = true }] = + %% ets:lookup(FileSummaryEts, File), + State; + {_Mask, #msg_location { ref_count = RefCount, file = File, + total_size = TotalSize }} + when RefCount > 0 -> + %% only update field, otherwise bad interaction with + %% concurrent GC + Dec = fun () -> index_update_ref_count( + MsgId, RefCount - 1, State) end, + case RefCount of + %% don't remove from cur_file_cache_ets here because + %% there may be further writes in the mailbox for the + %% same msg. + 1 -> case ets:lookup(FileSummaryEts, File) of + [#file_summary { locked = true }] -> + add_to_pending_gc_completion( + {remove, MsgId, CRef}, File, State); + [#file_summary {}] -> + ok = Dec(), + delete_file_if_empty( + File, adjust_valid_total_size( + File, -TotalSize, State)) + end; + _ -> ok = Dec(), + State + end + end. + +write_message(MsgId, Msg, + State = #msstate { current_file_handle = CurHdl, + current_file = CurFile, + sum_valid_data = SumValid, + sum_file_size = SumFileSize, + file_summary_ets = FileSummaryEts }) -> + {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), + {ok, TotalSize} = rabbit_msg_file:append(CurHdl, MsgId, Msg), + ok = index_insert( + #msg_location { msg_id = MsgId, ref_count = 1, file = CurFile, + offset = CurOffset, total_size = TotalSize }, State), + [#file_summary { right = undefined, locked = false }] = + ets:lookup(FileSummaryEts, CurFile), + [_,_] = ets:update_counter(FileSummaryEts, CurFile, + [{#file_summary.valid_total_size, TotalSize}, + {#file_summary.file_size, TotalSize}]), + maybe_roll_to_new_file(CurOffset + TotalSize, + State #msstate { + sum_valid_data = SumValid + TotalSize, + sum_file_size = SumFileSize + TotalSize }). + +read_message(MsgId, From, State) -> + case index_lookup_positive_ref_count(MsgId, State) of + not_found -> gen_server2:reply(From, not_found), + State; + MsgLocation -> read_message1(From, MsgLocation, State) + end. + +read_message1(From, #msg_location { msg_id = MsgId, file = File, + offset = Offset } = MsgLoc, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts }) -> + case File =:= CurFile of + true -> {Msg, State1} = + %% can return [] if msg in file existed on startup + case ets:lookup(CurFileCacheEts, MsgId) of + [] -> + {ok, RawOffSet} = + file_handle_cache:current_raw_offset(CurHdl), + ok = case Offset >= RawOffSet of + true -> file_handle_cache:flush(CurHdl); + false -> ok + end, + read_from_disk(MsgLoc, State); + [{MsgId, Msg1, _CacheRefCount}] -> + {Msg1, State} + end, + gen_server2:reply(From, {ok, Msg}), + State1; + false -> [#file_summary { locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> add_to_pending_gc_completion({read, MsgId, From}, + File, State); + false -> {Msg, State1} = read_from_disk(MsgLoc, State), + gen_server2:reply(From, {ok, Msg}), + State1 + end + end. + +read_from_disk(#msg_location { msg_id = MsgId, file = File, offset = Offset, + total_size = TotalSize }, State) -> + {Hdl, State1} = get_read_handle(File, State), + {ok, Offset} = file_handle_cache:position(Hdl, Offset), + {ok, {MsgId, Msg}} = + case rabbit_msg_file:read(Hdl, TotalSize) of + {ok, {MsgId, _}} = Obj -> + Obj; + Rest -> + {error, {misread, [{old_state, State}, + {file_num, File}, + {offset, Offset}, + {msg_id, MsgId}, + {read, Rest}, + {proc_dict, get()} + ]}} + end, + {Msg, State1}. + +contains_message(MsgId, From, + State = #msstate { pending_gc_completion = Pending }) -> + case index_lookup_positive_ref_count(MsgId, State) of + not_found -> + gen_server2:reply(From, false), + State; + #msg_location { file = File } -> + case maps:is_key(File, Pending) of + true -> add_to_pending_gc_completion( + {contains, MsgId, From}, File, State); + false -> gen_server2:reply(From, true), + State + end + end. + +add_to_pending_gc_completion( + Op, File, State = #msstate { pending_gc_completion = Pending }) -> + State #msstate { pending_gc_completion = + rabbit_misc:maps_cons(File, Op, Pending) }. + +run_pending(Files, State) -> + lists:foldl( + fun (File, State1 = #msstate { pending_gc_completion = Pending }) -> + Pending1 = maps:remove(File, Pending), + lists:foldl( + fun run_pending_action/2, + State1 #msstate { pending_gc_completion = Pending1 }, + lists:reverse(maps:get(File, Pending))) + end, State, Files). + +run_pending_action({read, MsgId, From}, State) -> + read_message(MsgId, From, State); +run_pending_action({contains, MsgId, From}, State) -> + contains_message(MsgId, From, State); +run_pending_action({remove, MsgId, CRef}, State) -> + remove_message(MsgId, CRef, State). + +safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) -> + try + SuccessFun(ets:update_counter(Tab, Key, UpdateOp)) + catch error:badarg -> FailThunk() + end. + +update_msg_cache(CacheEts, MsgId, Msg) -> + case ets:insert_new(CacheEts, {MsgId, Msg, 1}) of + true -> ok; + false -> safe_ets_update_counter( + CacheEts, MsgId, {3, +1}, fun (_) -> ok end, + fun () -> update_msg_cache(CacheEts, MsgId, Msg) end) + end. + +adjust_valid_total_size(File, Delta, State = #msstate { + sum_valid_data = SumValid, + file_summary_ets = FileSummaryEts }) -> + [_] = ets:update_counter(FileSummaryEts, File, + [{#file_summary.valid_total_size, Delta}]), + State #msstate { sum_valid_data = SumValid + Delta }. + +maps_store(Key, Val, Dict) -> + false = maps:is_key(Key, Dict), + maps:put(Key, Val, Dict). + +update_pending_confirms(Fun, CRef, + State = #msstate { clients = Clients, + cref_to_msg_ids = CTM }) -> + case maps:get(CRef, Clients) of + {_CPid, undefined, _CloseFDsFun} -> State; + {_CPid, MsgOnDiskFun, _CloseFDsFun} -> CTM1 = Fun(MsgOnDiskFun, CTM), + State #msstate { + cref_to_msg_ids = CTM1 } + end. + +record_pending_confirm(CRef, MsgId, State) -> + update_pending_confirms( + fun (_MsgOnDiskFun, CTM) -> + NewMsgIds = case maps:find(CRef, CTM) of + error -> gb_sets:singleton(MsgId); + {ok, MsgIds} -> gb_sets:add(MsgId, MsgIds) + end, + maps:put(CRef, NewMsgIds, CTM) + end, CRef, State). + +client_confirm(CRef, MsgIds, ActionTaken, State) -> + update_pending_confirms( + fun (MsgOnDiskFun, CTM) -> + case maps:find(CRef, CTM) of + {ok, Gs} -> MsgOnDiskFun(gb_sets:intersection(Gs, MsgIds), + ActionTaken), + MsgIds1 = rabbit_misc:gb_sets_difference( + Gs, MsgIds), + case gb_sets:is_empty(MsgIds1) of + true -> maps:remove(CRef, CTM); + false -> maps:put(CRef, MsgIds1, CTM) + end; + error -> CTM + end + end, CRef, State). + +blind_confirm(CRef, MsgIds, ActionTaken, State) -> + update_pending_confirms( + fun (MsgOnDiskFun, CTM) -> MsgOnDiskFun(MsgIds, ActionTaken), CTM end, + CRef, State). + +%% Detect whether the MsgId is older or younger than the client's death +%% msg (if there is one). If the msg is older than the client death +%% msg, and it has a 0 ref_count we must only alter the ref_count, not +%% rewrite the msg - rewriting it would make it younger than the death +%% msg and thus should be ignored. Note that this (correctly) returns +%% false when testing to remove the death msg itself. +should_mask_action(CRef, MsgId, + State = #msstate{dying_clients = DyingClients}) -> + case {maps:find(CRef, DyingClients), index_lookup(MsgId, State)} of + {error, Location} -> + {false, Location}; + {{ok, _}, not_found} -> + {true, not_found}; + {{ok, Client}, #msg_location { file = File, offset = Offset, + ref_count = RefCount } = Location} -> + #dying_client{file = DeathFile, offset = DeathOffset} = Client, + {case {{DeathFile, DeathOffset} < {File, Offset}, RefCount} of + {true, _} -> true; + {false, 0} -> false_if_increment; + {false, _} -> false + end, Location} + end. + +%%---------------------------------------------------------------------------- +%% file helper functions +%%---------------------------------------------------------------------------- + +open_file(File, Mode) -> + file_handle_cache:open_with_absolute_path( + File, ?BINARY_MODE ++ Mode, + [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}, + {read_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]). + +open_file(Dir, FileName, Mode) -> + open_file(form_filename(Dir, FileName), Mode). + +close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) -> + CState #client_msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, State = #msstate { file_handle_cache = FHC }) -> + State #msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, FHC) -> + case maps:find(Key, FHC) of + {ok, Hdl} -> ok = file_handle_cache:close(Hdl), + maps:remove(Key, FHC); + error -> FHC + end. + +mark_handle_open(FileHandlesEts, File, Ref) -> + %% This is fine to fail (already exists). Note it could fail with + %% the value being close, and not have it updated to open. + ets:insert_new(FileHandlesEts, {{Ref, File}, open}), + true. + +%% See comment in client_read3 - only call this when the file is locked +mark_handle_to_close(ClientRefs, FileHandlesEts, File, Invoke) -> + [ begin + case (ets:update_element(FileHandlesEts, Key, {2, close}) + andalso Invoke) of + true -> case maps:get(Ref, ClientRefs) of + {_CPid, _MsgOnDiskFun, undefined} -> + ok; + {_CPid, _MsgOnDiskFun, CloseFDsFun} -> + ok = CloseFDsFun() + end; + false -> ok + end + end || {{Ref, _File} = Key, open} <- + ets:match_object(FileHandlesEts, {{'_', File}, open}) ], + true. + +safe_file_delete_fun(File, Dir, FileHandlesEts) -> + fun () -> safe_file_delete(File, Dir, FileHandlesEts) end. + +safe_file_delete(File, Dir, FileHandlesEts) -> + %% do not match on any value - it's the absence of the row that + %% indicates the client has really closed the file. + case ets:match_object(FileHandlesEts, {{'_', File}, '_'}, 1) of + {[_|_], _Cont} -> false; + _ -> ok = file:delete( + form_filename(Dir, filenum_to_name(File))), + true + end. + +-spec close_all_indicated + (client_msstate()) -> rabbit_types:ok(client_msstate()). + +close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts, + client_ref = Ref } = + CState) -> + Objs = ets:match_object(FileHandlesEts, {{Ref, '_'}, close}), + {ok, lists:foldl(fun ({Key = {_Ref, File}, close}, CStateM) -> + true = ets:delete(FileHandlesEts, Key), + close_handle(File, CStateM) + end, CState, Objs)}. + +close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_handle_cache = FHC, + client_ref = Ref }) -> + ok = maps:fold(fun (File, Hdl, ok) -> + true = ets:delete(FileHandlesEts, {Ref, File}), + file_handle_cache:close(Hdl) + end, ok, FHC), + CState #client_msstate { file_handle_cache = #{} }; + +close_all_handles(State = #msstate { file_handle_cache = FHC }) -> + ok = maps:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end, + ok, FHC), + State #msstate { file_handle_cache = #{} }. + +get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, CState #client_msstate { file_handle_cache = FHC2 }}; + +get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, State #msstate { file_handle_cache = FHC2 }}. + +get_read_handle(FileNum, FHC, Dir) -> + case maps:find(FileNum, FHC) of + {ok, Hdl} -> {Hdl, FHC}; + error -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum), + ?READ_MODE), + {Hdl, maps:put(FileNum, Hdl, FHC)} + end. + +preallocate(Hdl, FileSizeLimit, FinalPos) -> + {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit), + ok = file_handle_cache:truncate(Hdl), + {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos), + ok. + +truncate_and_extend_file(Hdl, Lowpoint, Highpoint) -> + {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint), + ok = file_handle_cache:truncate(Hdl), + ok = preallocate(Hdl, Highpoint, Lowpoint). + +form_filename(Dir, Name) -> filename:join(Dir, Name). + +filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION. + +filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)). + +list_sorted_filenames(Dir, Ext) -> + lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end, + filelib:wildcard("*" ++ Ext, Dir)). + +%%---------------------------------------------------------------------------- +%% index +%%---------------------------------------------------------------------------- + +index_lookup_positive_ref_count(Key, State) -> + case index_lookup(Key, State) of + not_found -> not_found; + #msg_location { ref_count = 0 } -> not_found; + #msg_location {} = MsgLocation -> MsgLocation + end. + +index_update_ref_count(Key, RefCount, State) -> + index_update_fields(Key, {#msg_location.ref_count, RefCount}, State). + +index_lookup(Key, #gc_state { index_module = Index, + index_state = State }) -> + Index:lookup(Key, State); + +index_lookup(Key, #client_msstate { index_module = Index, + index_state = State }) -> + Index:lookup(Key, State); + +index_lookup(Key, #msstate { index_module = Index, index_state = State }) -> + Index:lookup(Key, State). + +index_insert(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:insert(Obj, State). + +index_update(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:update(Obj, State). + +index_update_fields(Key, Updates, #msstate{ index_module = Index, + index_state = State }) -> + Index:update_fields(Key, Updates, State); +index_update_fields(Key, Updates, #gc_state{ index_module = Index, + index_state = State }) -> + Index:update_fields(Key, Updates, State). + +index_delete(Key, #msstate { index_module = Index, index_state = State }) -> + Index:delete(Key, State). + +index_delete_object(Obj, #gc_state{ index_module = Index, + index_state = State }) -> + Index:delete_object(Obj, State). + +index_clean_up_temporary_reference_count_entries( + #msstate { index_module = Index, + index_state = State }) -> + Index:clean_up_temporary_reference_count_entries_without_file(State). + +%%---------------------------------------------------------------------------- +%% shutdown and recovery +%%---------------------------------------------------------------------------- + +recover_index_and_client_refs(IndexModule, _Recover, undefined, Dir, _Name) -> + {false, IndexModule:new(Dir), []}; +recover_index_and_client_refs(IndexModule, false, _ClientRefs, Dir, Name) -> + rabbit_log:warning("Message store ~tp: rebuilding indices from scratch~n", [Name]), + {false, IndexModule:new(Dir), []}; +recover_index_and_client_refs(IndexModule, true, ClientRefs, Dir, Name) -> + Fresh = fun (ErrorMsg, ErrorArgs) -> + rabbit_log:warning("Message store ~tp : " ++ ErrorMsg ++ "~n" + "rebuilding indices from scratch~n", + [Name | ErrorArgs]), + {false, IndexModule:new(Dir), []} + end, + case read_recovery_terms(Dir) of + {false, Error} -> + Fresh("failed to read recovery terms: ~p", [Error]); + {true, Terms} -> + RecClientRefs = proplists:get_value(client_refs, Terms, []), + RecIndexModule = proplists:get_value(index_module, Terms), + case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs) + andalso IndexModule =:= RecIndexModule) of + true -> case IndexModule:recover(Dir) of + {ok, IndexState1} -> + {true, IndexState1, ClientRefs}; + {error, Error} -> + Fresh("failed to recover index: ~p", [Error]) + end; + false -> Fresh("recovery terms differ from present", []) + end + end. + +store_recovery_terms(Terms, Dir) -> + rabbit_file:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +read_recovery_terms(Dir) -> + Path = filename:join(Dir, ?CLEAN_FILENAME), + case rabbit_file:read_term_file(Path) of + {ok, Terms} -> case file:delete(Path) of + ok -> {true, Terms}; + {error, Error} -> {false, Error} + end; + {error, Error} -> {false, Error} + end. + +store_file_summary(Tid, Dir) -> + ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME), + [{extended_info, [object_count]}]). + +recover_file_summary(false, _Dir) -> + %% TODO: the only reason for this to be an *ordered*_set is so + %% that a) maybe_compact can start a traversal from the eldest + %% file, and b) build_index in fast recovery mode can easily + %% identify the current file. It's awkward to have both that + %% odering and the left/right pointers in the entries - replacing + %% the former with some additional bit of state would be easy, but + %% ditching the latter would be neater. + {false, ets:new(rabbit_msg_store_file_summary, + [ordered_set, public, {keypos, #file_summary.file}])}; +recover_file_summary(true, Dir) -> + Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> ok = file:delete(Path), + {true, Tid}; + {error, _Error} -> recover_file_summary(false, Dir) + end. + +count_msg_refs(Gen, Seed, State) -> + case Gen(Seed) of + finished -> + ok; + {_MsgId, 0, Next} -> + count_msg_refs(Gen, Next, State); + {MsgId, Delta, Next} -> + ok = case index_lookup(MsgId, State) of + not_found -> + index_insert(#msg_location { msg_id = MsgId, + file = undefined, + ref_count = Delta }, + State); + #msg_location { ref_count = RefCount } = StoreEntry -> + NewRefCount = RefCount + Delta, + case NewRefCount of + 0 -> index_delete(MsgId, State); + _ -> index_update(StoreEntry #msg_location { + ref_count = NewRefCount }, + State) + end + end, + count_msg_refs(Gen, Next, State) + end. + +recover_crashed_compactions(Dir) -> + FileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION), + TmpFileNames = list_sorted_filenames(Dir, ?FILE_EXTENSION_TMP), + lists:foreach( + fun (TmpFileName) -> + NonTmpRelatedFileName = + filename:rootname(TmpFileName) ++ ?FILE_EXTENSION, + true = lists:member(NonTmpRelatedFileName, FileNames), + ok = recover_crashed_compaction( + Dir, TmpFileName, NonTmpRelatedFileName) + end, TmpFileNames), + TmpFileNames == []. + +recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) -> + %% Because a msg can legitimately appear multiple times in the + %% same file, identifying the contents of the tmp file and where + %% they came from is non-trivial. If we are recovering a crashed + %% compaction then we will be rebuilding the index, which can cope + %% with duplicates appearing. Thus the simplest and safest thing + %% to do is to append the contents of the tmp file to its main + %% file. + {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_MODE), + {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName, + ?READ_MODE ++ ?WRITE_MODE), + {ok, _End} = file_handle_cache:position(MainHdl, eof), + Size = filelib:file_size(form_filename(Dir, TmpFileName)), + {ok, Size} = file_handle_cache:copy(TmpHdl, MainHdl, Size), + ok = file_handle_cache:close(MainHdl), + ok = file_handle_cache:delete(TmpHdl), + ok. + +scan_file_for_valid_messages(File) -> + case open_file(File, ?READ_MODE) of + {ok, Hdl} -> Valid = rabbit_msg_file:scan( + Hdl, filelib:file_size(File), + fun scan_fun/2, []), + ok = file_handle_cache:close(Hdl), + Valid; + {error, enoent} -> {ok, [], 0}; + {error, Reason} -> {error, {unable_to_scan_file, + filename:basename(File), + Reason}} + end. + +scan_file_for_valid_messages(Dir, FileName) -> + scan_file_for_valid_messages(form_filename(Dir, FileName)). + +scan_fun({MsgId, TotalSize, Offset, _Msg}, Acc) -> + [{MsgId, TotalSize, Offset} | Acc]. + +%% Takes the list in *ascending* order (i.e. eldest message +%% first). This is the opposite of what scan_file_for_valid_messages +%% produces. The list of msgs that is produced is youngest first. +drop_contiguous_block_prefix(L) -> drop_contiguous_block_prefix(L, 0). + +drop_contiguous_block_prefix([], ExpectedOffset) -> + {ExpectedOffset, []}; +drop_contiguous_block_prefix([#msg_location { offset = ExpectedOffset, + total_size = TotalSize } | Tail], + ExpectedOffset) -> + ExpectedOffset1 = ExpectedOffset + TotalSize, + drop_contiguous_block_prefix(Tail, ExpectedOffset1); +drop_contiguous_block_prefix(MsgsAfterGap, ExpectedOffset) -> + {ExpectedOffset, MsgsAfterGap}. + +build_index(true, _StartupFunState, + State = #msstate { file_summary_ets = FileSummaryEts }) -> + ets:foldl( + fun (#file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize, + file = File }, + {_Offset, State1 = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize }}) -> + {FileSize, State1 #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize, + current_file = File }} + end, {0, State}, FileSummaryEts); +build_index(false, {MsgRefDeltaGen, MsgRefDeltaGenInit}, + State = #msstate { dir = Dir }) -> + rabbit_log:debug("Rebuilding message refcount...~n", []), + ok = count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State), + rabbit_log:debug("Done rebuilding message refcount~n", []), + {ok, Pid} = gatherer:start_link(), + case [filename_to_num(FileName) || + FileName <- list_sorted_filenames(Dir, ?FILE_EXTENSION)] of + [] -> rebuild_index(Pid, [State #msstate.current_file], + State); + Files -> {Offset, State1} = rebuild_index(Pid, Files, State), + {Offset, lists:foldl(fun delete_file_if_empty/2, + State1, Files)} + end. + +build_index_worker(Gatherer, State = #msstate { dir = Dir }, + Left, File, Files) -> + FileName = filenum_to_name(File), + rabbit_log:debug("Rebuilding message location index from ~p (~B file(s) remaining)~n", + [form_filename(Dir, FileName), length(Files)]), + {ok, Messages, FileSize} = + scan_file_for_valid_messages(Dir, FileName), + {ValidMessages, ValidTotalSize} = + lists:foldl( + fun (Obj = {MsgId, TotalSize, Offset}, {VMAcc, VTSAcc}) -> + case index_lookup(MsgId, State) of + #msg_location { file = undefined } = StoreEntry -> + ok = index_update(StoreEntry #msg_location { + file = File, offset = Offset, + total_size = TotalSize }, + State), + {[Obj | VMAcc], VTSAcc + TotalSize}; + _ -> + {VMAcc, VTSAcc} + end + end, {[], 0}, Messages), + {Right, FileSize1} = + case Files of + %% if it's the last file, we'll truncate to remove any + %% rubbish above the last valid message. This affects the + %% file size. + [] -> {undefined, case ValidMessages of + [] -> 0; + _ -> {_MsgId, TotalSize, Offset} = + lists:last(ValidMessages), + Offset + TotalSize + end}; + [F|_] -> {F, FileSize} + end, + ok = gatherer:in(Gatherer, #file_summary { + file = File, + valid_total_size = ValidTotalSize, + left = Left, + right = Right, + file_size = FileSize1, + locked = false, + readers = 0 }), + ok = gatherer:finish(Gatherer). + +enqueue_build_index_workers(_Gatherer, _Left, [], _State) -> + exit(normal); +enqueue_build_index_workers(Gatherer, Left, [File|Files], State) -> + ok = worker_pool:dispatch_sync( + fun () -> + link(Gatherer), + ok = build_index_worker(Gatherer, State, + Left, File, Files), + unlink(Gatherer), + ok + end), + enqueue_build_index_workers(Gatherer, File, Files, State). + +reduce_index(Gatherer, LastFile, + State = #msstate { file_summary_ets = FileSummaryEts, + sum_valid_data = SumValid, + sum_file_size = SumFileSize }) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = index_clean_up_temporary_reference_count_entries(State), + Offset = case ets:lookup(FileSummaryEts, LastFile) of + [] -> 0; + [#file_summary { file_size = FileSize }] -> FileSize + end, + {Offset, State #msstate { current_file = LastFile }}; + {value, #file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize } = FileSummary} -> + true = ets:insert_new(FileSummaryEts, FileSummary), + reduce_index(Gatherer, LastFile, + State #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize }) + end. + +rebuild_index(Gatherer, Files, State) -> + lists:foreach(fun (_File) -> + ok = gatherer:fork(Gatherer) + end, Files), + Pid = spawn( + fun () -> + enqueue_build_index_workers(Gatherer, undefined, + Files, State) + end), + erlang:monitor(process, Pid), + reduce_index(Gatherer, lists:last(Files), State). + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- internal +%%---------------------------------------------------------------------------- + +maybe_roll_to_new_file( + Offset, + State = #msstate { dir = Dir, + current_file_handle = CurHdl, + current_file = CurFile, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + file_size_limit = FileSizeLimit }) + when Offset >= FileSizeLimit -> + State1 = internal_sync(State), + ok = file_handle_cache:close(CurHdl), + NextFile = CurFile + 1, + {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE), + true = ets:insert_new(FileSummaryEts, #file_summary { + file = NextFile, + valid_total_size = 0, + left = CurFile, + right = undefined, + file_size = 0, + locked = false, + readers = 0 }), + true = ets:update_element(FileSummaryEts, CurFile, + {#file_summary.right, NextFile}), + true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}), + maybe_compact(State1 #msstate { current_file_handle = NextHdl, + current_file = NextFile }); +maybe_roll_to_new_file(_, State) -> + State. + +maybe_compact(State = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize, + gc_pid = GCPid, + pending_gc_completion = Pending, + file_summary_ets = FileSummaryEts, + file_size_limit = FileSizeLimit }) + when SumFileSize > 2 * FileSizeLimit andalso + (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION -> + %% TODO: the algorithm here is sub-optimal - it may result in a + %% complete traversal of FileSummaryEts. + First = ets:first(FileSummaryEts), + case First =:= '$end_of_table' orelse + maps:size(Pending) >= ?MAXIMUM_SIMULTANEOUS_GC_FILES of + true -> + State; + false -> + case find_files_to_combine(FileSummaryEts, FileSizeLimit, + ets:lookup(FileSummaryEts, First)) of + not_found -> + State; + {Src, Dst} -> + Pending1 = maps_store(Dst, [], + maps_store(Src, [], Pending)), + State1 = close_handle(Src, close_handle(Dst, State)), + true = ets:update_element(FileSummaryEts, Src, + {#file_summary.locked, true}), + true = ets:update_element(FileSummaryEts, Dst, + {#file_summary.locked, true}), + ok = rabbit_msg_store_gc:combine(GCPid, Src, Dst), + State1 #msstate { pending_gc_completion = Pending1 } + end + end; +maybe_compact(State) -> + State. + +find_files_to_combine(FileSummaryEts, FileSizeLimit, + [#file_summary { file = Dst, + valid_total_size = DstValid, + right = Src, + locked = DstLocked }]) -> + case Src of + undefined -> + not_found; + _ -> + [#file_summary { file = Src, + valid_total_size = SrcValid, + left = Dst, + right = SrcRight, + locked = SrcLocked }] = Next = + ets:lookup(FileSummaryEts, Src), + case SrcRight of + undefined -> not_found; + _ -> case (DstValid + SrcValid =< FileSizeLimit) andalso + (DstValid > 0) andalso (SrcValid > 0) andalso + not (DstLocked orelse SrcLocked) of + true -> {Src, Dst}; + false -> find_files_to_combine( + FileSummaryEts, FileSizeLimit, Next) + end + end + end. + +delete_file_if_empty(File, State = #msstate { current_file = File }) -> + State; +delete_file_if_empty(File, State = #msstate { + gc_pid = GCPid, + file_summary_ets = FileSummaryEts, + pending_gc_completion = Pending }) -> + [#file_summary { valid_total_size = ValidData, + locked = false }] = + ets:lookup(FileSummaryEts, File), + case ValidData of + %% don't delete the file_summary_ets entry for File here + %% because we could have readers which need to be able to + %% decrement the readers count. + 0 -> true = ets:update_element(FileSummaryEts, File, + {#file_summary.locked, true}), + ok = rabbit_msg_store_gc:delete(GCPid, File), + Pending1 = maps_store(File, [], Pending), + close_handle(File, + State #msstate { pending_gc_completion = Pending1 }); + _ -> State + end. + +cleanup_after_file_deletion(File, + #msstate { file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + clients = Clients }) -> + %% Ensure that any clients that have open fhs to the file close + %% them before using them again. This has to be done here (given + %% it's done in the msg_store, and not the gc), and not when + %% starting up the GC, because if done when starting up the GC, + %% the client could find the close, and close and reopen the fh, + %% whilst the GC is waiting for readers to disappear, before it's + %% actually done the GC. + true = mark_handle_to_close(Clients, FileHandlesEts, File, true), + [#file_summary { left = Left, + right = Right, + locked = true, + readers = 0 }] = ets:lookup(FileSummaryEts, File), + %% We'll never delete the current file, so right is never undefined + true = Right =/= undefined, %% ASSERTION + true = ets:update_element(FileSummaryEts, Right, + {#file_summary.left, Left}), + %% ensure the double linked list is maintained + true = case Left of + undefined -> true; %% File is the eldest file (left-most) + _ -> ets:update_element(FileSummaryEts, Left, + {#file_summary.right, Right}) + end, + true = ets:delete(FileSummaryEts, File), + ok. + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- external +%%---------------------------------------------------------------------------- + +-spec combine_files(non_neg_integer(), non_neg_integer(), gc_state()) -> + {ok, deletion_thunk()} | {defer, [non_neg_integer()]}. + +combine_files(Source, Destination, + State = #gc_state { file_summary_ets = FileSummaryEts }) -> + [#file_summary{locked = true} = SourceSummary] = + ets:lookup(FileSummaryEts, Source), + + [#file_summary{locked = true} = DestinationSummary] = + ets:lookup(FileSummaryEts, Destination), + + case {SourceSummary, DestinationSummary} of + {#file_summary{readers = 0}, #file_summary{readers = 0}} -> + {ok, do_combine_files(SourceSummary, DestinationSummary, + Source, Destination, State)}; + _ -> + rabbit_log:debug("Asked to combine files ~p and ~p but they have active readers. Deferring.", + [Source, Destination]), + DeferredFiles = [FileSummary#file_summary.file + || FileSummary <- [SourceSummary, DestinationSummary], + FileSummary#file_summary.readers /= 0], + {defer, DeferredFiles} + end. + +do_combine_files(SourceSummary, DestinationSummary, + Source, Destination, + State = #gc_state { file_summary_ets = FileSummaryEts, + file_handles_ets = FileHandlesEts, + dir = Dir, + msg_store = Server }) -> + #file_summary { + readers = 0, + left = Destination, + valid_total_size = SourceValid, + file_size = SourceFileSize, + locked = true } = SourceSummary, + #file_summary { + readers = 0, + right = Source, + valid_total_size = DestinationValid, + file_size = DestinationFileSize, + locked = true } = DestinationSummary, + + SourceName = filenum_to_name(Source), + DestinationName = filenum_to_name(Destination), + {ok, SourceHdl} = open_file(Dir, SourceName, + ?READ_AHEAD_MODE), + {ok, DestinationHdl} = open_file(Dir, DestinationName, + ?READ_AHEAD_MODE ++ ?WRITE_MODE), + TotalValidData = SourceValid + DestinationValid, + %% if DestinationValid =:= DestinationContiguousTop then we don't + %% need a tmp file + %% if they're not equal, then we need to write out everything past + %% the DestinationContiguousTop to a tmp file then truncate, + %% copy back in, and then copy over from Source + %% otherwise we just truncate straight away and copy over from Source + {DestinationWorkList, DestinationValid} = + load_and_vacuum_message_file(Destination, State), + {DestinationContiguousTop, DestinationWorkListTail} = + drop_contiguous_block_prefix(DestinationWorkList), + case DestinationWorkListTail of + [] -> ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, TotalValidData); + _ -> Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP, + {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE++?WRITE_MODE), + ok = copy_messages( + DestinationWorkListTail, DestinationContiguousTop, + DestinationValid, DestinationHdl, TmpHdl, Destination, + State), + TmpSize = DestinationValid - DestinationContiguousTop, + %% so now Tmp contains everything we need to salvage + %% from Destination, and index_state has been updated to + %% reflect the compaction of Destination so truncate + %% Destination and copy from Tmp back to the end + {ok, 0} = file_handle_cache:position(TmpHdl, 0), + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, TotalValidData), + {ok, TmpSize} = + file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize), + %% position in DestinationHdl should now be DestinationValid + ok = file_handle_cache:sync(DestinationHdl), + ok = file_handle_cache:delete(TmpHdl) + end, + {SourceWorkList, SourceValid} = load_and_vacuum_message_file(Source, State), + ok = copy_messages(SourceWorkList, DestinationValid, TotalValidData, + SourceHdl, DestinationHdl, Destination, State), + %% tidy up + ok = file_handle_cache:close(DestinationHdl), + ok = file_handle_cache:close(SourceHdl), + + %% don't update dest.right, because it could be changing at the + %% same time + true = ets:update_element( + FileSummaryEts, Destination, + [{#file_summary.valid_total_size, TotalValidData}, + {#file_summary.file_size, TotalValidData}]), + + Reclaimed = SourceFileSize + DestinationFileSize - TotalValidData, + rabbit_log:debug("Combined segment files number ~p (source) and ~p (destination), reclaimed ~p bytes", + [Source, Destination, Reclaimed]), + gen_server2:cast(Server, {combine_files, Source, Destination, Reclaimed}), + safe_file_delete_fun(Source, Dir, FileHandlesEts). + +-spec delete_file(non_neg_integer(), gc_state()) -> {ok, deletion_thunk()} | {defer, [non_neg_integer()]}. + +delete_file(File, State = #gc_state { file_summary_ets = FileSummaryEts, + file_handles_ets = FileHandlesEts, + dir = Dir, + msg_store = Server }) -> + case ets:lookup(FileSummaryEts, File) of + [#file_summary { valid_total_size = 0, + locked = true, + file_size = FileSize, + readers = 0 }] -> + {[], 0} = load_and_vacuum_message_file(File, State), + gen_server2:cast(Server, {delete_file, File, FileSize}), + {ok, safe_file_delete_fun(File, Dir, FileHandlesEts)}; + [#file_summary{readers = Readers}] when Readers > 0 -> + rabbit_log:debug("Asked to delete file ~p but it has active readers. Deferring.", + [File]), + {defer, [File]} + end. + +load_and_vacuum_message_file(File, State = #gc_state { dir = Dir }) -> + %% Messages here will be end-of-file at start-of-list + {ok, Messages, _FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + %% foldl will reverse so will end up with msgs in ascending offset order + lists:foldl( + fun ({MsgId, TotalSize, Offset}, Acc = {List, Size}) -> + case index_lookup(MsgId, State) of + #msg_location { file = File, total_size = TotalSize, + offset = Offset, ref_count = 0 } = Entry -> + ok = index_delete_object(Entry, State), + Acc; + #msg_location { file = File, total_size = TotalSize, + offset = Offset } = Entry -> + {[ Entry | List ], TotalSize + Size}; + _ -> + Acc + end + end, {[], 0}, Messages). + +copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl, + Destination, State) -> + Copy = fun ({BlockStart, BlockEnd}) -> + BSize = BlockEnd - BlockStart, + {ok, BlockStart} = + file_handle_cache:position(SourceHdl, BlockStart), + {ok, BSize} = + file_handle_cache:copy(SourceHdl, DestinationHdl, BSize) + end, + case + lists:foldl( + fun (#msg_location { msg_id = MsgId, offset = Offset, + total_size = TotalSize }, + {CurOffset, Block = {BlockStart, BlockEnd}}) -> + %% CurOffset is in the DestinationFile. + %% Offset, BlockStart and BlockEnd are in the SourceFile + %% update MsgLocation to reflect change of file and offset + ok = index_update_fields(MsgId, + [{#msg_location.file, Destination}, + {#msg_location.offset, CurOffset}], + State), + {CurOffset + TotalSize, + case BlockEnd of + undefined -> + %% base case, called only for the first list elem + {Offset, Offset + TotalSize}; + Offset -> + %% extend the current block because the + %% next msg follows straight on + {BlockStart, BlockEnd + TotalSize}; + _ -> + %% found a gap, so actually do the work for + %% the previous block + Copy(Block), + {Offset, Offset + TotalSize} + end} + end, {InitOffset, {undefined, undefined}}, WorkList) of + {FinalOffset, Block} -> + case WorkList of + [] -> ok; + _ -> Copy(Block), %% do the last remaining block + ok = file_handle_cache:sync(DestinationHdl) + end; + {FinalOffsetZ, _Block} -> + {gc_error, [{expected, FinalOffset}, + {got, FinalOffsetZ}, + {destination, Destination}]} + end. + +-spec force_recovery(file:filename(), server()) -> 'ok'. + +force_recovery(BaseDir, Store) -> + Dir = filename:join(BaseDir, atom_to_list(Store)), + case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of + ok -> ok; + {error, enoent} -> ok + end, + recover_crashed_compactions(BaseDir), + ok. + +foreach_file(D, Fun, Files) -> + [ok = Fun(filename:join(D, File)) || File <- Files]. + +foreach_file(D1, D2, Fun, Files) -> + [ok = Fun(filename:join(D1, File), filename:join(D2, File)) || File <- Files]. + +-spec transform_dir(file:filename(), server(), + fun ((any()) -> (rabbit_types:ok_or_error2(msg(), any())))) -> 'ok'. + +transform_dir(BaseDir, Store, TransformFun) -> + Dir = filename:join(BaseDir, atom_to_list(Store)), + TmpDir = filename:join(Dir, ?TRANSFORM_TMP), + TransformFile = fun (A, B) -> transform_msg_file(A, B, TransformFun) end, + CopyFile = fun (Src, Dst) -> {ok, _Bytes} = file:copy(Src, Dst), ok end, + case filelib:is_dir(TmpDir) of + true -> throw({error, transform_failed_previously}); + false -> FileList = list_sorted_filenames(Dir, ?FILE_EXTENSION), + foreach_file(Dir, TmpDir, TransformFile, FileList), + foreach_file(Dir, fun file:delete/1, FileList), + foreach_file(TmpDir, Dir, CopyFile, FileList), + foreach_file(TmpDir, fun file:delete/1, FileList), + ok = file:del_dir(TmpDir) + end. + +transform_msg_file(FileOld, FileNew, TransformFun) -> + ok = rabbit_file:ensure_parent_dirs_exist(FileNew), + {ok, RefOld} = file_handle_cache:open_with_absolute_path( + FileOld, [raw, binary, read], []), + {ok, RefNew} = file_handle_cache:open_with_absolute_path( + FileNew, [raw, binary, write], + [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]), + {ok, _Acc, _IgnoreSize} = + rabbit_msg_file:scan( + RefOld, filelib:file_size(FileOld), + fun({MsgId, _Size, _Offset, BinMsg}, ok) -> + {ok, MsgNew} = case binary_to_term(BinMsg) of + <<>> -> {ok, <<>>}; %% dying client marker + Msg -> TransformFun(Msg) + end, + {ok, _} = rabbit_msg_file:append(RefNew, MsgId, MsgNew), + ok + end, ok), + ok = file_handle_cache:close(RefOld), + ok = file_handle_cache:close(RefNew), + ok. diff --git a/deps/rabbit/src/rabbit_msg_store_ets_index.erl b/deps/rabbit/src/rabbit_msg_store_ets_index.erl new file mode 100644 index 0000000000..294417b5ba --- /dev/null +++ b/deps/rabbit/src/rabbit_msg_store_ets_index.erl @@ -0,0 +1,76 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_msg_store_ets_index). + +-include("rabbit_msg_store.hrl"). + +-behaviour(rabbit_msg_store_index). + +-export([new/1, recover/1, + lookup/2, insert/2, update/2, update_fields/3, delete/2, + delete_object/2, clean_up_temporary_reference_count_entries_without_file/1, terminate/1]). + +-define(MSG_LOC_NAME, rabbit_msg_store_ets_index). +-define(FILENAME, "msg_store_index.ets"). + +-record(state, { table, dir }). + +new(Dir) -> + file:delete(filename:join(Dir, ?FILENAME)), + Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.msg_id}]), + #state { table = Tid, dir = Dir }. + +recover(Dir) -> + Path = filename:join(Dir, ?FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {ok, #state { table = Tid, dir = Dir }}; + Error -> Error + end. + +lookup(Key, State) -> + case ets:lookup(State #state.table, Key) of + [] -> not_found; + [Entry] -> Entry + end. + +insert(Obj, State) -> + true = ets:insert_new(State #state.table, Obj), + ok. + +update(Obj, State) -> + true = ets:insert(State #state.table, Obj), + ok. + +update_fields(Key, Updates, State) -> + true = ets:update_element(State #state.table, Key, Updates), + ok. + +delete(Key, State) -> + true = ets:delete(State #state.table, Key), + ok. + +delete_object(Obj, State) -> + true = ets:delete_object(State #state.table, Obj), + ok. + +clean_up_temporary_reference_count_entries_without_file(State) -> + MatchHead = #msg_location { file = undefined, _ = '_' }, + ets:select_delete(State #state.table, [{MatchHead, [], [true]}]), + ok. + +terminate(#state { table = MsgLocations, dir = Dir }) -> + case ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME), + [{extended_info, [object_count]}]) of + ok -> ok; + {error, Err} -> + rabbit_log:error("Unable to save message store index" + " for directory ~p.~nError: ~p~n", + [Dir, Err]) + end, + ets:delete(MsgLocations). diff --git a/deps/rabbit/src/rabbit_msg_store_gc.erl b/deps/rabbit/src/rabbit_msg_store_gc.erl new file mode 100644 index 0000000000..41addc5fa6 --- /dev/null +++ b/deps/rabbit/src/rabbit_msg_store_gc.erl @@ -0,0 +1,125 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_msg_store_gc). + +-behaviour(gen_server2). + +-export([start_link/1, combine/3, delete/2, no_readers/2, stop/1]). + +-export([set_maximum_since_use/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3, prioritise_cast/3]). + +-record(state, + { pending_no_readers, + on_action, + msg_store_state + }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start_link(rabbit_msg_store:gc_state()) -> + rabbit_types:ok_pid_or_error(). + +start_link(MsgStoreState) -> + gen_server2:start_link(?MODULE, [MsgStoreState], + [{timeout, infinity}]). + +-spec combine(pid(), rabbit_msg_store:file_num(), + rabbit_msg_store:file_num()) -> 'ok'. + +combine(Server, Source, Destination) -> + gen_server2:cast(Server, {combine, Source, Destination}). + +-spec delete(pid(), rabbit_msg_store:file_num()) -> 'ok'. + +delete(Server, File) -> + gen_server2:cast(Server, {delete, File}). + +-spec no_readers(pid(), rabbit_msg_store:file_num()) -> 'ok'. + +no_readers(Server, File) -> + gen_server2:cast(Server, {no_readers, File}). + +-spec stop(pid()) -> 'ok'. + +stop(Server) -> + gen_server2:call(Server, stop, infinity). + +-spec set_maximum_since_use(pid(), non_neg_integer()) -> 'ok'. + +set_maximum_since_use(Pid, Age) -> + gen_server2:cast(Pid, {set_maximum_since_use, Age}). + +%%---------------------------------------------------------------------------- + +init([MsgStoreState]) -> + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + {ok, #state { pending_no_readers = #{}, + on_action = [], + msg_store_state = MsgStoreState }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +prioritise_cast({set_maximum_since_use, _Age}, _Len, _State) -> 8; +prioritise_cast(_Msg, _Len, _State) -> 0. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast({combine, Source, Destination}, State) -> + {noreply, attempt_action(combine, [Source, Destination], State), hibernate}; + +handle_cast({delete, File}, State) -> + {noreply, attempt_action(delete, [File], State), hibernate}; + +handle_cast({no_readers, File}, + State = #state { pending_no_readers = Pending }) -> + {noreply, case maps:find(File, Pending) of + error -> + State; + {ok, {Action, Files}} -> + Pending1 = maps:remove(File, Pending), + attempt_action( + Action, Files, + State #state { pending_no_readers = Pending1 }) + end, hibernate}; + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + {noreply, State, hibernate}. + +handle_info(Info, State) -> + {stop, {unhandled_info, Info}, State}. + +terminate(_Reason, State) -> + State. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +attempt_action(Action, Files, + State = #state { pending_no_readers = Pending, + on_action = Thunks, + msg_store_state = MsgStoreState }) -> + case do_action(Action, Files, MsgStoreState) of + {ok, OkThunk} -> + State#state{on_action = lists:filter(fun (Thunk) -> not Thunk() end, + [OkThunk | Thunks])}; + {defer, [File | _]} -> + Pending1 = maps:put(File, {Action, Files}, Pending), + State #state { pending_no_readers = Pending1 } + end. + +do_action(combine, [Source, Destination], MsgStoreState) -> + rabbit_msg_store:combine_files(Source, Destination, MsgStoreState); +do_action(delete, [File], MsgStoreState) -> + rabbit_msg_store:delete_file(File, MsgStoreState). diff --git a/deps/rabbit/src/rabbit_networking.erl b/deps/rabbit/src/rabbit_networking.erl new file mode 100644 index 0000000000..433b1d7540 --- /dev/null +++ b/deps/rabbit/src/rabbit_networking.erl @@ -0,0 +1,663 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_networking). + +%% This module contains various functions that deal with networking, +%% TCP and TLS listeners, and connection information. +%% +%% It also contains a boot step — boot/0 — that starts networking machinery. +%% This module primarily covers AMQP 0-9-1 but some bits are reused in +%% plugins that provide protocol support, e.g. STOMP or MQTT. +%% +%% Functions in this module take care of normalising TCP listener options, +%% including dual IP stack cases, and starting the AMQP 0-9-1 listener(s). +%% +%% See also tcp_listener_sup and tcp_listener. + +-export([boot/0, start_tcp_listener/2, start_ssl_listener/3, + stop_tcp_listener/1, on_node_down/1, active_listeners/0, + node_listeners/1, node_client_listeners/1, + register_connection/1, unregister_connection/1, + register_non_amqp_connection/1, unregister_non_amqp_connection/1, + connections/0, non_amqp_connections/0, connection_info_keys/0, + connection_info/1, connection_info/2, + connection_info_all/0, connection_info_all/1, + emit_connection_info_all/4, emit_connection_info_local/3, + close_connection/2, close_connections/2, close_all_connections/1, + force_connection_event_refresh/1, force_non_amqp_connection_event_refresh/1, + handshake/2, tcp_host/1, + ranch_ref/1, ranch_ref/2, ranch_ref_of_protocol/1, + listener_of_protocol/1, stop_ranch_listener_of_protocol/1]). + +%% Used by TCP-based transports, e.g. STOMP adapter +-export([tcp_listener_addresses/1, tcp_listener_spec/9, + ensure_ssl/0, fix_ssl_options/1, poodle_check/1]). + +-export([tcp_listener_started/4, tcp_listener_stopped/4]). + +-deprecated([{force_connection_event_refresh, 1, eventually}]). + +-export([ + local_connections/0, + local_non_amqp_connections/0, + %% prefer local_connections/0 + connections_local/0 +]). + +-include("rabbit.hrl"). +-include("rabbit_misc.hrl"). + +%% IANA-suggested ephemeral port range is 49152 to 65535 +-define(FIRST_TEST_BIND_PORT, 49152). + +%%---------------------------------------------------------------------------- + +-export_type([ip_port/0, hostname/0]). + +-type hostname() :: rabbit_net:hostname(). +-type ip_port() :: rabbit_net:ip_port(). + +-type family() :: atom(). +-type listener_config() :: ip_port() | + {hostname(), ip_port()} | + {hostname(), ip_port(), family()}. +-type address() :: {inet:ip_address(), ip_port(), family()}. +-type name_prefix() :: atom(). +-type protocol() :: atom(). +-type label() :: string(). + +-spec boot() -> 'ok' | no_return(). + +boot() -> + ok = record_distribution_listener(), + _ = application:start(ranch), + rabbit_log:debug("Started Ranch"), + %% Failures will throw exceptions + _ = boot_listeners(fun boot_tcp/1, application:get_env(rabbit, num_tcp_acceptors, 10), "TCP"), + _ = boot_listeners(fun boot_tls/1, application:get_env(rabbit, num_ssl_acceptors, 10), "TLS"), + ok. + +boot_listeners(Fun, NumAcceptors, Type) -> + case Fun(NumAcceptors) of + ok -> + ok; + {error, {could_not_start_listener, Address, Port, Details}} = Error -> + rabbit_log:error("Failed to start ~s listener [~s]:~p, error: ~p", + [Type, Address, Port, Details]), + throw(Error) + end. + +boot_tcp(NumAcceptors) -> + {ok, TcpListeners} = application:get_env(tcp_listeners), + case lists:foldl(fun(Listener, ok) -> + start_tcp_listener(Listener, NumAcceptors); + (_Listener, Error) -> + Error + end, + ok, TcpListeners) of + ok -> ok; + {error, _} = Error -> Error + end. + +boot_tls(NumAcceptors) -> + case application:get_env(ssl_listeners) of + {ok, []} -> + ok; + {ok, SslListeners} -> + SslOpts = ensure_ssl(), + case poodle_check('AMQP') of + ok -> [start_ssl_listener(L, SslOpts, NumAcceptors) || L <- SslListeners]; + danger -> ok + end, + ok + end. + +-spec ensure_ssl() -> rabbit_types:infos(). + +ensure_ssl() -> + {ok, SslAppsConfig} = application:get_env(rabbit, ssl_apps), + ok = app_utils:start_applications(SslAppsConfig), + {ok, SslOptsConfig0} = application:get_env(rabbit, ssl_options), + rabbit_ssl_options:fix(SslOptsConfig0). + +-spec poodle_check(atom()) -> 'ok' | 'danger'. + +poodle_check(Context) -> + {ok, Vsn} = application:get_key(ssl, vsn), + case rabbit_misc:version_compare(Vsn, "5.3", gte) of %% R16B01 + true -> ok; + false -> case application:get_env(rabbit, ssl_allow_poodle_attack) of + {ok, true} -> ok; + _ -> log_poodle_fail(Context), + danger + end + end. + +log_poodle_fail(Context) -> + rabbit_log:error( + "The installed version of Erlang (~s) contains the bug OTP-10905,~n" + "which makes it impossible to disable SSLv3. This makes the system~n" + "vulnerable to the POODLE attack. SSL listeners for ~s have therefore~n" + "been disabled.~n~n" + "You are advised to upgrade to a recent Erlang version; R16B01 is the~n" + "first version in which this bug is fixed, but later is usually~n" + "better.~n~n" + "If you cannot upgrade now and want to re-enable SSL listeners, you can~n" + "set the config item 'ssl_allow_poodle_attack' to 'true' in the~n" + "'rabbit' section of your configuration file.~n", + [rabbit_misc:otp_release(), Context]). + +fix_ssl_options(Config) -> + rabbit_ssl_options:fix(Config). + +-spec tcp_listener_addresses(listener_config()) -> [address()]. + +tcp_listener_addresses(Port) when is_integer(Port) -> + tcp_listener_addresses_auto(Port); +tcp_listener_addresses({"auto", Port}) -> + %% Variant to prevent lots of hacking around in bash and batch files + tcp_listener_addresses_auto(Port); +tcp_listener_addresses({Host, Port}) -> + %% auto: determine family IPv4 / IPv6 after converting to IP address + tcp_listener_addresses({Host, Port, auto}); +tcp_listener_addresses({Host, Port, Family0}) + when is_integer(Port) andalso (Port >= 0) andalso (Port =< 65535) -> + [{IPAddress, Port, Family} || + {IPAddress, Family} <- getaddr(Host, Family0)]; +tcp_listener_addresses({_Host, Port, _Family0}) -> + rabbit_log:error("invalid port ~p - not 0..65535~n", [Port]), + throw({error, {invalid_port, Port}}). + +tcp_listener_addresses_auto(Port) -> + lists:append([tcp_listener_addresses(Listener) || + Listener <- port_to_listeners(Port)]). + +-spec tcp_listener_spec + (name_prefix(), address(), [gen_tcp:listen_option()], module(), module(), + any(), protocol(), non_neg_integer(), label()) -> + supervisor:child_spec(). + +tcp_listener_spec(NamePrefix, {IPAddress, Port, Family}, SocketOpts, + Transport, ProtoSup, ProtoOpts, Protocol, NumAcceptors, Label) -> + Args = [IPAddress, Port, Transport, [Family | SocketOpts], ProtoSup, ProtoOpts, + {?MODULE, tcp_listener_started, [Protocol, SocketOpts]}, + {?MODULE, tcp_listener_stopped, [Protocol, SocketOpts]}, + NumAcceptors, Label], + {rabbit_misc:tcp_name(NamePrefix, IPAddress, Port), + {tcp_listener_sup, start_link, Args}, + transient, infinity, supervisor, [tcp_listener_sup]}. + +-spec ranch_ref(#listener{} | [{atom(), any()}] | 'undefined') -> ranch:ref() | undefined. +ranch_ref(#listener{port = Port}) -> + [{IPAddress, Port, _Family} | _] = tcp_listener_addresses(Port), + {acceptor, IPAddress, Port}; +ranch_ref(Listener) when is_list(Listener) -> + Port = rabbit_misc:pget(port, Listener), + [{IPAddress, Port, _Family} | _] = tcp_listener_addresses(Port), + {acceptor, IPAddress, Port}; +ranch_ref(undefined) -> + undefined. + +-spec ranch_ref(inet:ip_address(), ip_port()) -> ranch:ref(). + +%% Returns a reference that identifies a TCP listener in Ranch. +ranch_ref(IPAddress, Port) -> + {acceptor, IPAddress, Port}. + +-spec ranch_ref_of_protocol(atom()) -> ranch:ref() | undefined. +ranch_ref_of_protocol(Protocol) -> + ranch_ref(listener_of_protocol(Protocol)). + +-spec listener_of_protocol(atom()) -> #listener{}. +listener_of_protocol(Protocol) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + MatchSpec = #listener{ + node = node(), + protocol = Protocol, + _ = '_' + }, + case mnesia:match_object(rabbit_listener, MatchSpec, read) of + [] -> undefined; + [Row] -> Row + end + end). + +-spec stop_ranch_listener_of_protocol(atom()) -> ok | {error, not_found}. +stop_ranch_listener_of_protocol(Protocol) -> + case rabbit_networking:ranch_ref_of_protocol(Protocol) of + undefined -> ok; + Ref -> + rabbit_log:debug("Stopping Ranch listener for protocol ~s", [Protocol]), + ranch:stop_listener(Ref) + end. + +-spec start_tcp_listener( + listener_config(), integer()) -> 'ok' | {'error', term()}. + +start_tcp_listener(Listener, NumAcceptors) -> + start_listener(Listener, NumAcceptors, amqp, "TCP listener", tcp_opts()). + +-spec start_ssl_listener( + listener_config(), rabbit_types:infos(), integer()) -> 'ok' | {'error', term()}. + +start_ssl_listener(Listener, SslOpts, NumAcceptors) -> + start_listener(Listener, NumAcceptors, 'amqp/ssl', "TLS (SSL) listener", tcp_opts() ++ SslOpts). + + +-spec start_listener( + listener_config(), integer(), protocol(), label(), list()) -> 'ok' | {'error', term()}. +start_listener(Listener, NumAcceptors, Protocol, Label, Opts) -> + lists:foldl(fun (Address, ok) -> + start_listener0(Address, NumAcceptors, Protocol, Label, Opts); + (_Address, {error, _} = Error) -> + Error + end, ok, tcp_listener_addresses(Listener)). + +start_listener0(Address, NumAcceptors, Protocol, Label, Opts) -> + Transport = transport(Protocol), + Spec = tcp_listener_spec(rabbit_tcp_listener_sup, Address, Opts, + Transport, rabbit_connection_sup, [], Protocol, + NumAcceptors, Label), + case supervisor:start_child(rabbit_sup, Spec) of + {ok, _} -> ok; + {error, {{shutdown, {failed_to_start_child, _, + {shutdown, {failed_to_start_child, _, + {listen_error, _, PosixError}}}}}, _}} -> + {IPAddress, Port, _Family} = Address, + {error, {could_not_start_listener, rabbit_misc:ntoa(IPAddress), Port, PosixError}}; + {error, Other} -> + {IPAddress, Port, _Family} = Address, + {error, {could_not_start_listener, rabbit_misc:ntoa(IPAddress), Port, Other}} + end. + +transport(Protocol) -> + case Protocol of + amqp -> ranch_tcp; + 'amqp/ssl' -> ranch_ssl + end. + +-spec stop_tcp_listener(listener_config()) -> 'ok'. + +stop_tcp_listener(Listener) -> + [stop_tcp_listener0(Address) || + Address <- tcp_listener_addresses(Listener)], + ok. + +stop_tcp_listener0({IPAddress, Port, _Family}) -> + Name = rabbit_misc:tcp_name(rabbit_tcp_listener_sup, IPAddress, Port), + ok = supervisor:terminate_child(rabbit_sup, Name), + ok = supervisor:delete_child(rabbit_sup, Name). + +-spec tcp_listener_started + (_, _, + string() | + {byte(),byte(),byte(),byte()} | + {char(),char(),char(),char(),char(),char(),char(),char()}, _) -> + 'ok'. + +tcp_listener_started(Protocol, Opts, IPAddress, Port) -> + %% We need the ip to distinguish e.g. 0.0.0.0 and 127.0.0.1 + %% We need the host so we can distinguish multiple instances of the above + %% in a cluster. + ok = mnesia:dirty_write( + rabbit_listener, + #listener{node = node(), + protocol = Protocol, + host = tcp_host(IPAddress), + ip_address = IPAddress, + port = Port, + opts = Opts}). + +-spec tcp_listener_stopped + (_, _, + string() | + {byte(),byte(),byte(),byte()} | + {char(),char(),char(),char(),char(),char(),char(),char()}, + _) -> + 'ok'. + +tcp_listener_stopped(Protocol, Opts, IPAddress, Port) -> + ok = mnesia:dirty_delete_object( + rabbit_listener, + #listener{node = node(), + protocol = Protocol, + host = tcp_host(IPAddress), + ip_address = IPAddress, + port = Port, + opts = Opts}). + +-spec record_distribution_listener() -> ok | no_return(). + +record_distribution_listener() -> + {Name, Host} = rabbit_nodes:parts(node()), + case erl_epmd:port_please(list_to_atom(Name), Host, infinity) of + {port, Port, _Version} -> + tcp_listener_started(clustering, [], {0,0,0,0,0,0,0,0}, Port); + noport -> + throw({error, no_epmd_port}) + end. + +-spec active_listeners() -> [rabbit_types:listener()]. + +active_listeners() -> + rabbit_misc:dirty_read_all(rabbit_listener). + +-spec node_listeners(node()) -> [rabbit_types:listener()]. + +node_listeners(Node) -> + mnesia:dirty_read(rabbit_listener, Node). + +-spec node_client_listeners(node()) -> [rabbit_types:listener()]. + +node_client_listeners(Node) -> + case node_listeners(Node) of + [] -> []; + Xs -> + lists:filter(fun (#listener{protocol = clustering}) -> false; + (_) -> true + end, Xs) + end. + +-spec on_node_down(node()) -> 'ok'. + +on_node_down(Node) -> + case lists:member(Node, nodes()) of + false -> + rabbit_log:info( + "Node ~s is down, deleting its listeners~n", [Node]), + ok = mnesia:dirty_delete(rabbit_listener, Node); + true -> + rabbit_log:info( + "Keeping ~s listeners: the node is already back~n", [Node]) + end. + +-spec register_connection(pid()) -> ok. + +register_connection(Pid) -> pg_local:join(rabbit_connections, Pid). + +-spec unregister_connection(pid()) -> ok. + +unregister_connection(Pid) -> pg_local:leave(rabbit_connections, Pid). + +-spec connections() -> [rabbit_types:connection()]. + +connections() -> + Nodes = rabbit_nodes:all_running(), + rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_networking, connections_local, [], ?RPC_TIMEOUT). + +-spec local_connections() -> [rabbit_types:connection()]. +%% @doc Returns pids of AMQP 0-9-1 and AMQP 1.0 connections local to this node. +local_connections() -> + connections_local(). + +-spec connections_local() -> [rabbit_types:connection()]. +%% @deprecated Prefer {@link local_connections} +connections_local() -> pg_local:get_members(rabbit_connections). + +-spec register_non_amqp_connection(pid()) -> ok. + +register_non_amqp_connection(Pid) -> pg_local:join(rabbit_non_amqp_connections, Pid). + +-spec unregister_non_amqp_connection(pid()) -> ok. + +unregister_non_amqp_connection(Pid) -> pg_local:leave(rabbit_non_amqp_connections, Pid). + +-spec non_amqp_connections() -> [rabbit_types:connection()]. + +non_amqp_connections() -> + Nodes = rabbit_nodes:all_running(), + rabbit_misc:append_rpc_all_nodes(Nodes, rabbit_networking, local_non_amqp_connections, [], ?RPC_TIMEOUT). + +-spec local_non_amqp_connections() -> [rabbit_types:connection()]. +local_non_amqp_connections() -> + pg_local:get_members(rabbit_non_amqp_connections). + +-spec connection_info_keys() -> rabbit_types:info_keys(). + +connection_info_keys() -> rabbit_reader:info_keys(). + +-spec connection_info(rabbit_types:connection()) -> rabbit_types:infos(). + +connection_info(Pid) -> rabbit_reader:info(Pid). + +-spec connection_info(rabbit_types:connection(), rabbit_types:info_keys()) -> + rabbit_types:infos(). + +connection_info(Pid, Items) -> rabbit_reader:info(Pid, Items). + +-spec connection_info_all() -> [rabbit_types:infos()]. + +connection_info_all() -> cmap(fun (Q) -> connection_info(Q) end). + +-spec connection_info_all(rabbit_types:info_keys()) -> + [rabbit_types:infos()]. + +connection_info_all(Items) -> cmap(fun (Q) -> connection_info(Q, Items) end). + +emit_connection_info_all(Nodes, Items, Ref, AggregatorPid) -> + Pids = [ spawn_link(Node, rabbit_networking, emit_connection_info_local, [Items, Ref, AggregatorPid]) || Node <- Nodes ], + rabbit_control_misc:await_emitters_termination(Pids), + ok. + +emit_connection_info_local(Items, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map_with_exit_handler( + AggregatorPid, Ref, fun(Q) -> connection_info(Q, Items) end, + connections_local()). + +-spec close_connection(pid(), string()) -> 'ok'. + +close_connection(Pid, Explanation) -> + case lists:member(Pid, connections()) of + true -> + Res = rabbit_reader:shutdown(Pid, Explanation), + rabbit_log:info("Closing connection ~p because ~p~n", [Pid, Explanation]), + Res; + false -> + rabbit_log:warning("Asked to close connection ~p (reason: ~p) " + "but no running cluster node reported it as an active connection. Was it already closed? ~n", + [Pid, Explanation]), + ok + end. + +-spec close_connections([pid()], string()) -> 'ok'. +close_connections(Pids, Explanation) -> + [close_connection(Pid, Explanation) || Pid <- Pids], + ok. + +%% Meant to be used by tests only +-spec close_all_connections(string()) -> 'ok'. +close_all_connections(Explanation) -> + Pids = connections(), + [close_connection(Pid, Explanation) || Pid <- Pids], + ok. + +-spec force_connection_event_refresh(reference()) -> 'ok'. +force_connection_event_refresh(Ref) -> + [rabbit_reader:force_event_refresh(C, Ref) || C <- connections()], + ok. + +-spec force_non_amqp_connection_event_refresh(reference()) -> 'ok'. +force_non_amqp_connection_event_refresh(Ref) -> + [gen_server:cast(Pid, {force_event_refresh, Ref}) || Pid <- non_amqp_connections()], + ok. + +-spec failed_to_recv_proxy_header(_, _) -> no_return(). +failed_to_recv_proxy_header(Ref, Error) -> + Msg = case Error of + closed -> "error when receiving proxy header: TCP socket was ~p prematurely"; + _Other -> "error when receiving proxy header: ~p" + end, + rabbit_log:debug(Msg, [Error]), + % The following call will clean up resources then exit + _ = ranch:handshake(Ref), + exit({shutdown, failed_to_recv_proxy_header}). + +handshake(Ref, ProxyProtocolEnabled) -> + case ProxyProtocolEnabled of + true -> + case ranch:recv_proxy_header(Ref, 3000) of + {error, Error} -> + failed_to_recv_proxy_header(Ref, Error); + {error, protocol_error, Error} -> + failed_to_recv_proxy_header(Ref, Error); + {ok, ProxyInfo} -> + {ok, Sock} = ranch:handshake(Ref), + setup_socket(Sock), + {ok, {rabbit_proxy_socket, Sock, ProxyInfo}} + end; + false -> + {ok, Sock} = ranch:handshake(Ref), + setup_socket(Sock), + {ok, Sock} + end. + +setup_socket(Sock) -> + ok = tune_buffer_size(Sock), + ok = file_handle_cache:obtain(). + +tune_buffer_size(Sock) -> + case tune_buffer_size1(Sock) of + ok -> ok; + {error, _} -> rabbit_net:fast_close(Sock), + exit(normal) + end. + +tune_buffer_size1(Sock) -> + case rabbit_net:getopts(Sock, [sndbuf, recbuf, buffer]) of + {ok, BufSizes} -> BufSz = lists:max([Sz || {_Opt, Sz} <- BufSizes]), + rabbit_net:setopts(Sock, [{buffer, BufSz}]); + Error -> Error + end. + +%%-------------------------------------------------------------------- + +tcp_host(IPAddress) -> + rabbit_net:tcp_host(IPAddress). + +cmap(F) -> rabbit_misc:filter_exit_map(F, connections()). + +tcp_opts() -> + {ok, ConfigOpts} = application:get_env(rabbit, tcp_listen_options), + ConfigOpts. + +%% inet_parse:address takes care of ip string, like "0.0.0.0" +%% inet:getaddr returns immediately for ip tuple {0,0,0,0}, +%% and runs 'inet_gethost' port process for dns lookups. +%% On Windows inet:getaddr runs dns resolver for ip string, which may fail. +getaddr(Host, Family) -> + case inet_parse:address(Host) of + {ok, IPAddress} -> [{IPAddress, resolve_family(IPAddress, Family)}]; + {error, _} -> gethostaddr(Host, Family) + end. + +gethostaddr(Host, auto) -> + Lookups = [{Family, inet:getaddr(Host, Family)} || Family <- [inet, inet6]], + case [{IP, Family} || {Family, {ok, IP}} <- Lookups] of + [] -> host_lookup_error(Host, Lookups); + IPs -> IPs + end; + +gethostaddr(Host, Family) -> + case inet:getaddr(Host, Family) of + {ok, IPAddress} -> [{IPAddress, Family}]; + {error, Reason} -> host_lookup_error(Host, Reason) + end. + +-spec host_lookup_error(_, _) -> no_return(). +host_lookup_error(Host, Reason) -> + rabbit_log:error("invalid host ~p - ~p~n", [Host, Reason]), + throw({error, {invalid_host, Host, Reason}}). + +resolve_family({_,_,_,_}, auto) -> inet; +resolve_family({_,_,_,_,_,_,_,_}, auto) -> inet6; +resolve_family(IP, auto) -> throw({error, {strange_family, IP}}); +resolve_family(_, F) -> F. + +%%-------------------------------------------------------------------- + +%% There are three kinds of machine (for our purposes). +%% +%% * Those which treat IPv4 addresses as a special kind of IPv6 address +%% ("Single stack") +%% - Linux by default, Windows Vista and later +%% - We also treat any (hypothetical?) IPv6-only machine the same way +%% * Those which consider IPv6 and IPv4 to be completely separate things +%% ("Dual stack") +%% - OpenBSD, Windows XP / 2003, Linux if so configured +%% * Those which do not support IPv6. +%% - Ancient/weird OSes, Linux if so configured +%% +%% How to reconfigure Linux to test this: +%% Single stack (default): +%% echo 0 > /proc/sys/net/ipv6/bindv6only +%% Dual stack: +%% echo 1 > /proc/sys/net/ipv6/bindv6only +%% IPv4 only: +%% add ipv6.disable=1 to GRUB_CMDLINE_LINUX_DEFAULT in /etc/default/grub then +%% sudo update-grub && sudo reboot +%% +%% This matters in (and only in) the case where the sysadmin (or the +%% app descriptor) has only supplied a port and we wish to bind to +%% "all addresses". This means different things depending on whether +%% we're single or dual stack. On single stack binding to "::" +%% implicitly includes all IPv4 addresses, and subsequently attempting +%% to bind to "0.0.0.0" will fail. On dual stack, binding to "::" will +%% only bind to IPv6 addresses, and we need another listener bound to +%% "0.0.0.0" for IPv4. Finally, on IPv4-only systems we of course only +%% want to bind to "0.0.0.0". +%% +%% Unfortunately it seems there is no way to detect single vs dual stack +%% apart from attempting to bind to the port. +port_to_listeners(Port) -> + IPv4 = {"0.0.0.0", Port, inet}, + IPv6 = {"::", Port, inet6}, + case ipv6_status(?FIRST_TEST_BIND_PORT) of + single_stack -> [IPv6]; + ipv6_only -> [IPv6]; + dual_stack -> [IPv6, IPv4]; + ipv4_only -> [IPv4] + end. + +ipv6_status(TestPort) -> + IPv4 = [inet, {ip, {0,0,0,0}}], + IPv6 = [inet6, {ip, {0,0,0,0,0,0,0,0}}], + case gen_tcp:listen(TestPort, IPv6) of + {ok, LSock6} -> + case gen_tcp:listen(TestPort, IPv4) of + {ok, LSock4} -> + %% Dual stack + gen_tcp:close(LSock6), + gen_tcp:close(LSock4), + dual_stack; + %% Checking the error here would only let us + %% distinguish single stack IPv6 / IPv4 vs IPv6 only, + %% which we figure out below anyway. + {error, _} -> + gen_tcp:close(LSock6), + case gen_tcp:listen(TestPort, IPv4) of + %% Single stack + {ok, LSock4} -> gen_tcp:close(LSock4), + single_stack; + %% IPv6-only machine. Welcome to the future. + {error, eafnosupport} -> ipv6_only; %% Linux + {error, eprotonosupport}-> ipv6_only; %% FreeBSD + %% Dual stack machine with something already + %% on IPv4. + {error, _} -> ipv6_status(TestPort + 1) + end + end; + %% IPv4-only machine. Welcome to the 90s. + {error, eafnosupport} -> %% Linux + ipv4_only; + {error, eprotonosupport} -> %% FreeBSD + ipv4_only; + %% Port in use + {error, _} -> + ipv6_status(TestPort + 1) + end. diff --git a/deps/rabbit/src/rabbit_node_monitor.erl b/deps/rabbit/src/rabbit_node_monitor.erl new file mode 100644 index 0000000000..b56180c54c --- /dev/null +++ b/deps/rabbit/src/rabbit_node_monitor.erl @@ -0,0 +1,926 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_node_monitor). + +%% Transitional step until we can require Erlang/OTP 21 and +%% use the now recommended try/catch syntax for obtaining the stack trace. +-compile(nowarn_deprecated_function). + +-behaviour(gen_server). + +-export([start_link/0]). +-export([running_nodes_filename/0, + cluster_status_filename/0, quorum_filename/0, default_quorum_filename/0, + prepare_cluster_status_files/0, + write_cluster_status/1, read_cluster_status/0, + update_cluster_status/0, reset_cluster_status/0]). +-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]). +-export([partitions/0, partitions/1, status/1, subscribe/1]). +-export([pause_partition_guard/0]). +-export([global_sync/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + + %% Utils +-export([all_rabbit_nodes_up/0, run_outside_applications/2, ping_all/0, + alive_nodes/1, alive_rabbit_nodes/1]). + +-define(SERVER, ?MODULE). +-define(NODE_REPLY_TIMEOUT, 5000). +-define(RABBIT_UP_RPC_TIMEOUT, 2000). +-define(RABBIT_DOWN_PING_INTERVAL, 1000). + +-record(state, {monitors, partitions, subscribers, down_ping_timer, + keepalive_timer, autoheal, guid, node_guids}). + +%%---------------------------------------------------------------------------- +%% Start +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +%%---------------------------------------------------------------------------- +%% Cluster file operations +%%---------------------------------------------------------------------------- + +%% The cluster file information is kept in two files. The "cluster +%% status file" contains all the clustered nodes and the disc nodes. +%% The "running nodes file" contains the currently running nodes or +%% the running nodes at shutdown when the node is down. +%% +%% We strive to keep the files up to date and we rely on this +%% assumption in various situations. Obviously when mnesia is offline +%% the information we have will be outdated, but it cannot be +%% otherwise. + +-spec running_nodes_filename() -> string(). + +running_nodes_filename() -> + filename:join(rabbit_mnesia:dir(), "nodes_running_at_shutdown"). + +-spec cluster_status_filename() -> string(). + +cluster_status_filename() -> + filename:join(rabbit_mnesia:dir(), "cluster_nodes.config"). + +quorum_filename() -> + ra_env:data_dir(). + +default_quorum_filename() -> + filename:join(rabbit_mnesia:dir(), "quorum"). + +-spec prepare_cluster_status_files() -> 'ok' | no_return(). + +prepare_cluster_status_files() -> + rabbit_mnesia:ensure_mnesia_dir(), + RunningNodes1 = case try_read_file(running_nodes_filename()) of + {ok, [Nodes]} when is_list(Nodes) -> Nodes; + {ok, Other} -> corrupt_cluster_status_files(Other); + {error, enoent} -> [] + end, + ThisNode = [node()], + %% The running nodes file might contain a set or a list, in case + %% of the legacy file + RunningNodes2 = lists:usort(ThisNode ++ RunningNodes1), + {AllNodes1, DiscNodes} = + case try_read_file(cluster_status_filename()) of + {ok, [{AllNodes, DiscNodes0}]} -> + {AllNodes, DiscNodes0}; + {ok, [AllNodes0]} when is_list(AllNodes0) -> + {legacy_cluster_nodes(AllNodes0), legacy_disc_nodes(AllNodes0)}; + {ok, Files} -> + corrupt_cluster_status_files(Files); + {error, enoent} -> + LegacyNodes = legacy_cluster_nodes([]), + {LegacyNodes, LegacyNodes} + end, + AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2), + ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). + +-spec corrupt_cluster_status_files(any()) -> no_return(). + +corrupt_cluster_status_files(F) -> + throw({error, corrupt_cluster_status_files, F}). + +-spec write_cluster_status(rabbit_mnesia:cluster_status()) -> 'ok'. + +write_cluster_status({All, Disc, Running}) -> + ClusterStatusFN = cluster_status_filename(), + Res = case rabbit_file:write_term_file(ClusterStatusFN, [{All, Disc}]) of + ok -> + RunningNodesFN = running_nodes_filename(), + {RunningNodesFN, + rabbit_file:write_term_file(RunningNodesFN, [Running])}; + E1 = {error, _} -> + {ClusterStatusFN, E1} + end, + case Res of + {_, ok} -> ok; + {FN, {error, E2}} -> throw({error, {could_not_write_file, FN, E2}}) + end. + +-spec read_cluster_status() -> rabbit_mnesia:cluster_status(). + +read_cluster_status() -> + case {try_read_file(cluster_status_filename()), + try_read_file(running_nodes_filename())} of + {{ok, [{All, Disc}]}, {ok, [Running]}} when is_list(Running) -> + {All, Disc, Running}; + {Stat, Run} -> + throw({error, {corrupt_or_missing_cluster_files, Stat, Run}}) + end. + +-spec update_cluster_status() -> 'ok'. + +update_cluster_status() -> + {ok, Status} = rabbit_mnesia:cluster_status_from_mnesia(), + write_cluster_status(Status). + +-spec reset_cluster_status() -> 'ok'. + +reset_cluster_status() -> + write_cluster_status({[node()], [node()], [node()]}). + +%%---------------------------------------------------------------------------- +%% Cluster notifications +%%---------------------------------------------------------------------------- + +-spec notify_node_up() -> 'ok'. + +notify_node_up() -> + gen_server:cast(?SERVER, notify_node_up). + +-spec notify_joined_cluster() -> 'ok'. + +notify_joined_cluster() -> + Nodes = rabbit_nodes:all_running() -- [node()], + gen_server:abcast(Nodes, ?SERVER, + {joined_cluster, node(), rabbit_mnesia:node_type()}), + ok. + +-spec notify_left_cluster(node()) -> 'ok'. + +notify_left_cluster(Node) -> + Nodes = rabbit_nodes:all_running(), + gen_server:abcast(Nodes, ?SERVER, {left_cluster, Node}), + ok. + +%%---------------------------------------------------------------------------- +%% Server calls +%%---------------------------------------------------------------------------- + +-spec partitions() -> [node()]. + +partitions() -> + gen_server:call(?SERVER, partitions, infinity). + +-spec partitions([node()]) -> [{node(), [node()]}]. + +partitions(Nodes) -> + {Replies, _} = gen_server:multi_call(Nodes, ?SERVER, partitions, ?NODE_REPLY_TIMEOUT), + Replies. + +-spec status([node()]) -> {[{node(), [node()]}], [node()]}. + +status(Nodes) -> + gen_server:multi_call(Nodes, ?SERVER, status, infinity). + +-spec subscribe(pid()) -> 'ok'. + +subscribe(Pid) -> + gen_server:cast(?SERVER, {subscribe, Pid}). + +%%---------------------------------------------------------------------------- +%% pause_minority/pause_if_all_down safety +%%---------------------------------------------------------------------------- + +%% If we are in a minority and pause_minority mode then a) we are +%% going to shut down imminently and b) we should not confirm anything +%% until then, since anything we confirm is likely to be lost. +%% +%% The same principles apply to a node which isn't part of the preferred +%% partition when we are in pause_if_all_down mode. +%% +%% We could confirm something by having an HA queue see the pausing +%% state (and fail over into it) before the node monitor stops us, or +%% by using unmirrored queues and just having them vanish (and +%% confirming messages as thrown away). +%% +%% So we have channels call in here before issuing confirms, to do a +%% lightweight check that we have not entered a pausing state. + +-spec pause_partition_guard() -> 'ok' | 'pausing'. + +pause_partition_guard() -> + case get(pause_partition_guard) of + not_pause_mode -> + ok; + undefined -> + {ok, M} = application:get_env(rabbit, cluster_partition_handling), + case M of + pause_minority -> + pause_minority_guard([], ok); + {pause_if_all_down, PreferredNodes, _} -> + pause_if_all_down_guard(PreferredNodes, [], ok); + _ -> + put(pause_partition_guard, not_pause_mode), + ok + end; + {minority_mode, Nodes, LastState} -> + pause_minority_guard(Nodes, LastState); + {pause_if_all_down_mode, PreferredNodes, Nodes, LastState} -> + pause_if_all_down_guard(PreferredNodes, Nodes, LastState) + end. + +pause_minority_guard(LastNodes, LastState) -> + case nodes() of + LastNodes -> LastState; + _ -> NewState = case majority() of + false -> pausing; + true -> ok + end, + put(pause_partition_guard, + {minority_mode, nodes(), NewState}), + NewState + end. + +pause_if_all_down_guard(PreferredNodes, LastNodes, LastState) -> + case nodes() of + LastNodes -> LastState; + _ -> NewState = case in_preferred_partition(PreferredNodes) of + false -> pausing; + true -> ok + end, + put(pause_partition_guard, + {pause_if_all_down_mode, PreferredNodes, nodes(), + NewState}), + NewState + end. + +%%---------------------------------------------------------------------------- +%% "global" hang workaround. +%%---------------------------------------------------------------------------- + +%% This code works around a possible inconsistency in the "global" +%% state, causing global:sync/0 to never return. +%% +%% 1. A process is spawned. +%% 2. If after 15", global:sync() didn't return, the "global" +%% state is parsed. +%% 3. If it detects that a sync is blocked for more than 10", +%% the process sends fake nodedown/nodeup events to the two +%% nodes involved (one local, one remote). +%% 4. Both "global" instances restart their synchronisation. +%% 5. globao:sync() finally returns. +%% +%% FIXME: Remove this workaround, once we got rid of the change to +%% "dist_auto_connect" and fixed the bugs uncovered. + +global_sync() -> + Pid = spawn(fun workaround_global_hang/0), + ok = global:sync(), + Pid ! global_sync_done, + ok. + +workaround_global_hang() -> + receive + global_sync_done -> + ok + after 10000 -> + find_blocked_global_peers() + end. + +find_blocked_global_peers() -> + Snapshot1 = snapshot_global_dict(), + timer:sleep(10000), + Snapshot2 = snapshot_global_dict(), + find_blocked_global_peers1(Snapshot2, Snapshot1). + +snapshot_global_dict() -> + {status, _, _, [Dict | _]} = sys:get_status(global_name_server), + [E || {{sync_tag_his, _}, _} = E <- Dict]. + +find_blocked_global_peers1([{{sync_tag_his, Peer}, _} = Item | Rest], + OlderSnapshot) -> + case lists:member(Item, OlderSnapshot) of + true -> unblock_global_peer(Peer); + false -> ok + end, + find_blocked_global_peers1(Rest, OlderSnapshot); +find_blocked_global_peers1([], _) -> + ok. + +unblock_global_peer(PeerNode) -> + ThisNode = node(), + PeerState = rpc:call(PeerNode, sys, get_status, [global_name_server]), + error_logger:info_msg( + "Global hang workaround: global state on ~s seems broken~n" + " * Peer global state: ~p~n" + " * Local global state: ~p~n" + "Faking nodedown/nodeup between ~s and ~s~n", + [PeerNode, PeerState, sys:get_status(global_name_server), + PeerNode, ThisNode]), + {global_name_server, ThisNode} ! {nodedown, PeerNode}, + {global_name_server, PeerNode} ! {nodedown, ThisNode}, + {global_name_server, ThisNode} ! {nodeup, PeerNode}, + {global_name_server, PeerNode} ! {nodeup, ThisNode}, + ok. + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + +init([]) -> + %% We trap exits so that the supervisor will not just kill us. We + %% want to be sure that we are not going to be killed while + %% writing out the cluster status files - bad things can then + %% happen. + process_flag(trap_exit, true), + net_kernel:monitor_nodes(true, [nodedown_reason]), + {ok, _} = mnesia:subscribe(system), + %% If the node has been restarted, Mnesia can trigger a system notification + %% before the monitor subscribes to receive them. To avoid autoheal blocking due to + %% the inconsistent database event never arriving, we being monitoring all running + %% nodes as early as possible. The rest of the monitoring ops will only be triggered + %% when notifications arrive. + Nodes = possibly_partitioned_nodes(), + startup_log(Nodes), + Monitors = lists:foldl(fun(Node, Monitors0) -> + pmon:monitor({rabbit, Node}, Monitors0) + end, pmon:new(), Nodes), + {ok, ensure_keepalive_timer(#state{monitors = Monitors, + subscribers = pmon:new(), + partitions = [], + guid = rabbit_guid:gen(), + node_guids = maps:new(), + autoheal = rabbit_autoheal:init()})}. + +handle_call(partitions, _From, State = #state{partitions = Partitions}) -> + {reply, Partitions, State}; + +handle_call(status, _From, State = #state{partitions = Partitions}) -> + {reply, [{partitions, Partitions}, + {nodes, [node() | nodes()]}], State}; + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(notify_node_up, State = #state{guid = GUID}) -> + Nodes = rabbit_nodes:all_running() -- [node()], + gen_server:abcast(Nodes, ?SERVER, + {node_up, node(), rabbit_mnesia:node_type(), GUID}), + %% register other active rabbits with this rabbit + DiskNodes = rabbit_mnesia:cluster_nodes(disc), + [gen_server:cast(?SERVER, {node_up, N, case lists:member(N, DiskNodes) of + true -> disc; + false -> ram + end}) || N <- Nodes], + {noreply, State}; + +%%---------------------------------------------------------------------------- +%% Partial partition detection +%% +%% Every node generates a GUID each time it starts, and announces that +%% GUID in 'node_up', with 'announce_guid' sent by return so the new +%% node knows the GUIDs of the others. These GUIDs are sent in all the +%% partial partition related messages to ensure that we ignore partial +%% partition messages from before we restarted (to avoid getting stuck +%% in a loop). +%% +%% When one node gets nodedown from another, it then sends +%% 'check_partial_partition' to all the nodes it still thinks are +%% alive. If any of those (intermediate) nodes still see the "down" +%% node as up, they inform it that this has happened. The original +%% node (in 'ignore', 'pause_if_all_down' or 'autoheal' mode) will then +%% disconnect from the intermediate node to "upgrade" to a full +%% partition. +%% +%% In pause_minority mode it will instead immediately pause until all +%% nodes come back. This is because the contract for pause_minority is +%% that nodes should never sit in a partitioned state - if it just +%% disconnected, it would become a minority, pause, realise it's not +%% in a minority any more, and come back, still partitioned (albeit no +%% longer partially). +%% ---------------------------------------------------------------------------- + +handle_cast({node_up, Node, NodeType, GUID}, + State = #state{guid = MyGUID, + node_guids = GUIDs}) -> + cast(Node, {announce_guid, node(), MyGUID}), + GUIDs1 = maps:put(Node, GUID, GUIDs), + handle_cast({node_up, Node, NodeType}, State#state{node_guids = GUIDs1}); + +handle_cast({announce_guid, Node, GUID}, State = #state{node_guids = GUIDs}) -> + {noreply, State#state{node_guids = maps:put(Node, GUID, GUIDs)}}; + +handle_cast({check_partial_partition, Node, Rep, NodeGUID, MyGUID, RepGUID}, + State = #state{guid = MyGUID, + node_guids = GUIDs}) -> + case lists:member(Node, rabbit_nodes:all_running()) andalso + maps:find(Node, GUIDs) =:= {ok, NodeGUID} of + true -> spawn_link( %%[1] + fun () -> + case rpc:call(Node, rabbit, is_running, []) of + {badrpc, _} -> ok; + _ -> + rabbit_log:warning("Received a 'DOWN' message" + " from ~p but still can" + " communicate with it ~n", + [Node]), + cast(Rep, {partial_partition, + Node, node(), RepGUID}) + end + end); + false -> ok + end, + {noreply, State}; +%% [1] We checked that we haven't heard the node go down - but we +%% really should make sure we can actually communicate with +%% it. Otherwise there's a race where we falsely detect a partial +%% partition. +%% +%% Now of course the rpc:call/4 may take a long time to return if +%% connectivity with the node is actually interrupted - but that's OK, +%% we only really want to do something in a timely manner if +%% connectivity is OK. However, of course as always we must not block +%% the node monitor, so we do the check in a separate process. + +handle_cast({check_partial_partition, _Node, _Reporter, + _NodeGUID, _GUID, _ReporterGUID}, State) -> + {noreply, State}; + +handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, + State = #state{guid = MyGUID}) -> + FmtBase = "Partial partition detected:~n" + " * We saw DOWN from ~s~n" + " * We can still see ~s which can see ~s~n", + ArgsBase = [NotReallyDown, Proxy, NotReallyDown], + case application:get_env(rabbit, cluster_partition_handling) of + {ok, pause_minority} -> + rabbit_log:error( + FmtBase ++ " * pause_minority mode enabled~n" + "We will therefore pause until the *entire* cluster recovers~n", + ArgsBase), + await_cluster_recovery(fun all_nodes_up/0), + {noreply, State}; + {ok, {pause_if_all_down, PreferredNodes, _}} -> + case in_preferred_partition(PreferredNodes) of + true -> rabbit_log:error( + FmtBase ++ "We will therefore intentionally " + "disconnect from ~s~n", ArgsBase ++ [Proxy]), + upgrade_to_full_partition(Proxy); + false -> rabbit_log:info( + FmtBase ++ "We are about to pause, no need " + "for further actions~n", ArgsBase) + end, + {noreply, State}; + {ok, _} -> + rabbit_log:error( + FmtBase ++ "We will therefore intentionally disconnect from ~s~n", + ArgsBase ++ [Proxy]), + upgrade_to_full_partition(Proxy), + {noreply, State} + end; + +handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) -> + {noreply, State}; + +%% Sometimes it appears the Erlang VM does not give us nodedown +%% messages reliably when another node disconnects from us. Therefore +%% we are told just before the disconnection so we can reciprocate. +handle_cast({partial_partition_disconnect, Other}, State) -> + rabbit_log:error("Partial partition disconnect from ~s~n", [Other]), + disconnect(Other), + {noreply, State}; + +%% Note: when updating the status file, we can't simply write the +%% mnesia information since the message can (and will) overtake the +%% mnesia propagation. +handle_cast({node_up, Node, NodeType}, + State = #state{monitors = Monitors}) -> + rabbit_log:info("rabbit on node ~p up~n", [Node]), + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + add_node(Node, RunningNodes)}), + ok = handle_live_rabbit(Node), + Monitors1 = case pmon:is_monitored({rabbit, Node}, Monitors) of + true -> + Monitors; + false -> + pmon:monitor({rabbit, Node}, Monitors) + end, + {noreply, maybe_autoheal(State#state{monitors = Monitors1})}; + +handle_cast({joined_cluster, Node, NodeType}, State) -> + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({add_node(Node, AllNodes), + case NodeType of + disc -> add_node(Node, DiscNodes); + ram -> DiscNodes + end, + RunningNodes}), + {noreply, State}; + +handle_cast({left_cluster, Node}, State) -> + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({del_node(Node, AllNodes), del_node(Node, DiscNodes), + del_node(Node, RunningNodes)}), + {noreply, State}; + +handle_cast({subscribe, Pid}, State = #state{subscribers = Subscribers}) -> + {noreply, State#state{subscribers = pmon:monitor(Pid, Subscribers)}}; + +handle_cast(keepalive, State) -> + {noreply, State}; + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, + State = #state{monitors = Monitors, subscribers = Subscribers}) -> + rabbit_log:info("rabbit on node ~p down~n", [Node]), + {AllNodes, DiscNodes, RunningNodes} = read_cluster_status(), + write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}), + [P ! {node_down, Node} || P <- pmon:monitored(Subscribers)], + {noreply, handle_dead_rabbit( + Node, + State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})}; + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, + State = #state{subscribers = Subscribers}) -> + {noreply, State#state{subscribers = pmon:erase(Pid, Subscribers)}}; + +handle_info({nodedown, Node, Info}, State = #state{guid = MyGUID, + node_guids = GUIDs}) -> + rabbit_log:info("node ~p down: ~p~n", + [Node, proplists:get_value(nodedown_reason, Info)]), + Check = fun (N, CheckGUID, DownGUID) -> + cast(N, {check_partial_partition, + Node, node(), DownGUID, CheckGUID, MyGUID}) + end, + case maps:find(Node, GUIDs) of + {ok, DownGUID} -> Alive = rabbit_nodes:all_running() + -- [node(), Node], + [case maps:find(N, GUIDs) of + {ok, CheckGUID} -> Check(N, CheckGUID, DownGUID); + error -> ok + end || N <- Alive]; + error -> ok + end, + {noreply, handle_dead_node(Node, State)}; + +handle_info({nodeup, Node, _Info}, State) -> + rabbit_log:info("node ~p up~n", [Node]), + {noreply, State}; + +handle_info({mnesia_system_event, + {inconsistent_database, running_partitioned_network, Node}}, + State = #state{partitions = Partitions, + monitors = Monitors}) -> + %% We will not get a node_up from this node - yet we should treat it as + %% up (mostly). + State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of + true -> State; + false -> State#state{ + monitors = pmon:monitor({rabbit, Node}, Monitors)} + end, + ok = handle_live_rabbit(Node), + Partitions1 = lists:usort([Node | Partitions]), + {noreply, maybe_autoheal(State1#state{partitions = Partitions1})}; + +handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState, + partitions = Partitions}) -> + AState1 = rabbit_autoheal:handle_msg(Msg, AState, Partitions), + {noreply, State#state{autoheal = AState1}}; + +handle_info(ping_down_nodes, State) -> + %% We ping nodes when some are down to ensure that we find out + %% about healed partitions quickly. We ping all nodes rather than + %% just the ones we know are down for simplicity; it's not expensive + %% to ping the nodes that are up, after all. + State1 = State#state{down_ping_timer = undefined}, + Self = self(), + %% We ping in a separate process since in a partition it might + %% take some noticeable length of time and we don't want to block + %% the node monitor for that long. + spawn_link(fun () -> + ping_all(), + case all_nodes_up() of + true -> ok; + false -> Self ! ping_down_nodes_again + end + end), + {noreply, State1}; + +handle_info(ping_down_nodes_again, State) -> + {noreply, ensure_ping_timer(State)}; + +handle_info(ping_up_nodes, State) -> + %% In this case we need to ensure that we ping "quickly" - + %% i.e. only nodes that we know to be up. + [cast(N, keepalive) || N <- alive_nodes() -- [node()]], + {noreply, ensure_keepalive_timer(State#state{keepalive_timer = undefined})}; + +handle_info({'EXIT', _, _} = Info, State = #state{autoheal = AState0}) -> + AState = rabbit_autoheal:process_down(Info, AState0), + {noreply, State#state{autoheal = AState}}; + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + rabbit_misc:stop_timer(State, #state.down_ping_timer), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% Functions that call the module specific hooks when nodes go up/down +%%---------------------------------------------------------------------------- + +handle_dead_node(Node, State = #state{autoheal = Autoheal}) -> + %% In general in rabbit_node_monitor we care about whether the + %% rabbit application is up rather than the node; we do this so + %% that we can respond in the same way to "rabbitmqctl stop_app" + %% and "rabbitmqctl stop" as much as possible. + %% + %% However, for pause_minority and pause_if_all_down modes we can't do + %% this, since we depend on looking at whether other nodes are up + %% to decide whether to come back up ourselves - if we decide that + %% based on the rabbit application we would go down and never come + %% back. + case application:get_env(rabbit, cluster_partition_handling) of + {ok, pause_minority} -> + case majority([Node]) of + true -> ok; + false -> await_cluster_recovery(fun majority/0) + end, + State; + {ok, {pause_if_all_down, PreferredNodes, HowToRecover}} -> + case in_preferred_partition(PreferredNodes, [Node]) of + true -> ok; + false -> await_cluster_recovery( + fun in_preferred_partition/0) + end, + case HowToRecover of + autoheal -> State#state{autoheal = + rabbit_autoheal:node_down(Node, Autoheal)}; + _ -> State + end; + {ok, ignore} -> + State; + {ok, autoheal} -> + State#state{autoheal = rabbit_autoheal:node_down(Node, Autoheal)}; + {ok, Term} -> + rabbit_log:warning("cluster_partition_handling ~p unrecognised, " + "assuming 'ignore'~n", [Term]), + State + end. + +await_cluster_recovery(Condition) -> + rabbit_log:warning("Cluster minority/secondary status detected - " + "awaiting recovery~n", []), + run_outside_applications(fun () -> + rabbit:stop(), + wait_for_cluster_recovery(Condition) + end, false), + ok. + +run_outside_applications(Fun, WaitForExistingProcess) -> + spawn_link(fun () -> + %% Ignore exit messages from the monitor - the link is needed + %% to ensure the monitor detects abnormal exits from this process + %% and can reset the 'restarting' status on the autoheal, avoiding + %% a deadlock. The monitor is restarted when rabbit does, so messages + %% in the other direction should be ignored. + process_flag(trap_exit, true), + %% If our group leader is inside an application we are about + %% to stop, application:stop/1 does not return. + group_leader(whereis(init), self()), + register_outside_app_process(Fun, WaitForExistingProcess) + end). + +register_outside_app_process(Fun, WaitForExistingProcess) -> + %% Ensure only one such process at a time, the exit(badarg) is + %% harmless if one is already running. + %% + %% If WaitForExistingProcess is false, the given fun is simply not + %% executed at all and the process exits. + %% + %% If WaitForExistingProcess is true, we wait for the end of the + %% currently running process before executing the given function. + try register(rabbit_outside_app_process, self()) of + true -> + do_run_outside_app_fun(Fun) + catch + error:badarg when WaitForExistingProcess -> + MRef = erlang:monitor(process, rabbit_outside_app_process), + receive + {'DOWN', MRef, _, _, _} -> + %% The existing process exited, let's try to + %% register again. + register_outside_app_process(Fun, WaitForExistingProcess) + end; + error:badarg -> + ok + end. + +do_run_outside_app_fun(Fun) -> + try + Fun() + catch _:E:Stacktrace -> + rabbit_log:error( + "rabbit_outside_app_process:~n~p~n~p~n", + [E, Stacktrace]) + end. + +wait_for_cluster_recovery(Condition) -> + ping_all(), + case Condition() of + true -> rabbit:start(); + false -> timer:sleep(?RABBIT_DOWN_PING_INTERVAL), + wait_for_cluster_recovery(Condition) + end. + +handle_dead_rabbit(Node, State = #state{partitions = Partitions, + autoheal = Autoheal}) -> + %% TODO: This may turn out to be a performance hog when there are + %% lots of nodes. We really only need to execute some of these + %% statements on *one* node, rather than all of them. + ok = rabbit_networking:on_node_down(Node), + ok = rabbit_amqqueue:on_node_down(Node), + ok = rabbit_alarm:on_node_down(Node), + ok = rabbit_mnesia:on_node_down(Node), + %% If we have been partitioned, and we are now in the only remaining + %% partition, we no longer care about partitions - forget them. Note + %% that we do not attempt to deal with individual (other) partitions + %% going away. It's only safe to forget anything about partitions when + %% there are no partitions. + Down = Partitions -- alive_rabbit_nodes(), + NoLongerPartitioned = rabbit_nodes:all_running(), + Partitions1 = case Partitions -- Down -- NoLongerPartitioned of + [] -> []; + _ -> Partitions + end, + ensure_ping_timer( + State#state{partitions = Partitions1, + autoheal = rabbit_autoheal:rabbit_down(Node, Autoheal)}). + +ensure_ping_timer(State) -> + rabbit_misc:ensure_timer( + State, #state.down_ping_timer, ?RABBIT_DOWN_PING_INTERVAL, + ping_down_nodes). + +ensure_keepalive_timer(State) -> + {ok, Interval} = application:get_env(rabbit, cluster_keepalive_interval), + rabbit_misc:ensure_timer( + State, #state.keepalive_timer, Interval, ping_up_nodes). + +handle_live_rabbit(Node) -> + ok = rabbit_amqqueue:on_node_up(Node), + ok = rabbit_alarm:on_node_up(Node), + ok = rabbit_mnesia:on_node_up(Node). + +maybe_autoheal(State = #state{partitions = []}) -> + State; + +maybe_autoheal(State = #state{autoheal = AState}) -> + case all_nodes_up() of + true -> State#state{autoheal = rabbit_autoheal:maybe_start(AState)}; + false -> State + end. + +%%-------------------------------------------------------------------- +%% Internal utils +%%-------------------------------------------------------------------- + +try_read_file(FileName) -> + case rabbit_file:read_term_file(FileName) of + {ok, Term} -> {ok, Term}; + {error, enoent} -> {error, enoent}; + {error, E} -> throw({error, {cannot_read_file, FileName, E}}) + end. + +legacy_cluster_nodes(Nodes) -> + %% We get all the info that we can, including the nodes from + %% mnesia, which will be there if the node is a disc node (empty + %% list otherwise) + lists:usort(Nodes ++ mnesia:system_info(db_nodes)). + +legacy_disc_nodes(AllNodes) -> + case AllNodes == [] orelse lists:member(node(), AllNodes) of + true -> [node()]; + false -> [] + end. + +add_node(Node, Nodes) -> lists:usort([Node | Nodes]). + +del_node(Node, Nodes) -> Nodes -- [Node]. + +cast(Node, Msg) -> gen_server:cast({?SERVER, Node}, Msg). + +upgrade_to_full_partition(Proxy) -> + cast(Proxy, {partial_partition_disconnect, node()}), + disconnect(Proxy). + +%% When we call this, it's because we want to force Mnesia to detect a +%% partition. But if we just disconnect_node/1 then Mnesia won't +%% detect a very short partition. So we want to force a slightly +%% longer disconnect. Unfortunately we don't have a way to blacklist +%% individual nodes; the best we can do is turn off auto-connect +%% altogether. +disconnect(Node) -> + application:set_env(kernel, dist_auto_connect, never), + erlang:disconnect_node(Node), + timer:sleep(1000), + application:unset_env(kernel, dist_auto_connect), + ok. + +%%-------------------------------------------------------------------- + +%% mnesia:system_info(db_nodes) (and hence +%% rabbit_nodes:all_running()) does not return all nodes +%% when partitioned, just those that we are sharing Mnesia state +%% with. So we have a small set of replacement functions +%% here. "rabbit" in a function's name implies we test if the rabbit +%% application is up, not just the node. + +%% As we use these functions to decide what to do in pause_minority or +%% pause_if_all_down states, they *must* be fast, even in the case where +%% TCP connections are timing out. So that means we should be careful +%% about whether we connect to nodes which are currently disconnected. + +majority() -> + majority([]). + +majority(NodesDown) -> + Nodes = rabbit_mnesia:cluster_nodes(all), + AliveNodes = alive_nodes(Nodes) -- NodesDown, + length(AliveNodes) / length(Nodes) > 0.5. + +in_preferred_partition() -> + {ok, {pause_if_all_down, PreferredNodes, _}} = + application:get_env(rabbit, cluster_partition_handling), + in_preferred_partition(PreferredNodes). + +in_preferred_partition(PreferredNodes) -> + in_preferred_partition(PreferredNodes, []). + +in_preferred_partition(PreferredNodes, NodesDown) -> + Nodes = rabbit_mnesia:cluster_nodes(all), + RealPreferredNodes = [N || N <- PreferredNodes, lists:member(N, Nodes)], + AliveNodes = alive_nodes(RealPreferredNodes) -- NodesDown, + RealPreferredNodes =:= [] orelse AliveNodes =/= []. + +all_nodes_up() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + length(alive_nodes(Nodes)) =:= length(Nodes). + +-spec all_rabbit_nodes_up() -> boolean(). + +all_rabbit_nodes_up() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + length(alive_rabbit_nodes(Nodes)) =:= length(Nodes). + +-spec alive_nodes([node()]) -> [node()]. + +alive_nodes() -> alive_nodes(rabbit_mnesia:cluster_nodes(all)). +alive_nodes(Nodes) -> [N || N <- Nodes, lists:member(N, [node()|nodes()])]. + +-spec alive_rabbit_nodes([node()]) -> [node()]. + +alive_rabbit_nodes() -> alive_rabbit_nodes(rabbit_mnesia:cluster_nodes(all)). + +alive_rabbit_nodes(Nodes) -> + [N || N <- alive_nodes(Nodes), rabbit:is_running(N)]. + +%% This one is allowed to connect! + +-spec ping_all() -> 'ok'. + +ping_all() -> + [net_adm:ping(N) || N <- rabbit_mnesia:cluster_nodes(all)], + ok. + +possibly_partitioned_nodes() -> + alive_rabbit_nodes() -- rabbit_nodes:all_running(). + +startup_log([]) -> + rabbit_log:info("Starting rabbit_node_monitor~n", []); +startup_log(Nodes) -> + rabbit_log:info("Starting rabbit_node_monitor, might be partitioned from ~p~n", + [Nodes]). diff --git a/deps/rabbit/src/rabbit_nodes.erl b/deps/rabbit/src/rabbit_nodes.erl new file mode 100644 index 0000000000..3034a4d513 --- /dev/null +++ b/deps/rabbit/src/rabbit_nodes.erl @@ -0,0 +1,157 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_nodes). + +-export([names/1, diagnostics/1, make/1, make/2, parts/1, cookie_hash/0, + is_running/2, is_process_running/2, + cluster_name/0, set_cluster_name/1, set_cluster_name/2, ensure_epmd/0, + all_running/0, name_type/0, running_count/0, total_count/0, + await_running_count/2, is_single_node_cluster/0, + boot/0]). +-export([persistent_cluster_id/0, seed_internal_cluster_id/0, seed_user_provided_cluster_name/0]). + +-include_lib("kernel/include/inet.hrl"). +-include_lib("rabbit_common/include/rabbit.hrl"). + +-define(SAMPLING_INTERVAL, 1000). + +-define(INTERNAL_CLUSTER_ID_PARAM_NAME, internal_cluster_id). + +%%---------------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------------- + +boot() -> + seed_internal_cluster_id(), + seed_user_provided_cluster_name(). + +name_type() -> + #{nodename_type := NodeType} = rabbit_prelaunch:get_context(), + NodeType. + +-spec names(string()) -> + rabbit_types:ok_or_error2([{string(), integer()}], term()). + +names(Hostname) -> + rabbit_nodes_common:names(Hostname). + +-spec diagnostics([node()]) -> string(). + +diagnostics(Nodes) -> + rabbit_nodes_common:diagnostics(Nodes). + +make(NameOrParts) -> + rabbit_nodes_common:make(NameOrParts). + +make(ShortName, Hostname) -> + make({ShortName, Hostname}). + +parts(NodeStr) -> + rabbit_nodes_common:parts(NodeStr). + +-spec cookie_hash() -> string(). + +cookie_hash() -> + rabbit_nodes_common:cookie_hash(). + +-spec is_running(node(), atom()) -> boolean(). + +is_running(Node, Application) -> + rabbit_nodes_common:is_running(Node, Application). + +-spec is_process_running(node(), atom()) -> boolean(). + +is_process_running(Node, Process) -> + rabbit_nodes_common:is_process_running(Node, Process). + +-spec cluster_name() -> binary(). + +cluster_name() -> + rabbit_runtime_parameters:value_global( + cluster_name, cluster_name_default()). + +cluster_name_default() -> + {ID, _} = parts(node()), + FQDN = rabbit_net:hostname(), + list_to_binary(atom_to_list(make({ID, FQDN}))). + +-spec persistent_cluster_id() -> binary(). +persistent_cluster_id() -> + case rabbit_runtime_parameters:lookup_global(?INTERNAL_CLUSTER_ID_PARAM_NAME) of + not_found -> + seed_internal_cluster_id(), + persistent_cluster_id(); + Param -> + #{value := Val, name := ?INTERNAL_CLUSTER_ID_PARAM_NAME} = maps:from_list(Param), + Val + end. + +-spec seed_internal_cluster_id() -> binary(). +seed_internal_cluster_id() -> + case rabbit_runtime_parameters:lookup_global(?INTERNAL_CLUSTER_ID_PARAM_NAME) of + not_found -> + Id = rabbit_guid:binary(rabbit_guid:gen(), "rabbitmq-cluster-id"), + rabbit_log:info("Initialising internal cluster ID to '~s'", [Id]), + rabbit_runtime_parameters:set_global(?INTERNAL_CLUSTER_ID_PARAM_NAME, Id, ?INTERNAL_USER), + Id; + Param -> + #{value := Val, name := ?INTERNAL_CLUSTER_ID_PARAM_NAME} = maps:from_list(Param), + Val + end. + +seed_user_provided_cluster_name() -> + case application:get_env(rabbit, cluster_name) of + undefined -> ok; + {ok, Name} -> + rabbit_log:info("Setting cluster name to '~s' as configured", [Name]), + set_cluster_name(rabbit_data_coercion:to_binary(Name)) + end. + +-spec set_cluster_name(binary()) -> 'ok'. + +set_cluster_name(Name) -> + set_cluster_name(Name, ?INTERNAL_USER). + +-spec set_cluster_name(binary(), rabbit_types:username()) -> 'ok'. + +set_cluster_name(Name, Username) -> + %% Cluster name should be binary + BinaryName = rabbit_data_coercion:to_binary(Name), + rabbit_runtime_parameters:set_global(cluster_name, BinaryName, Username). + +ensure_epmd() -> + rabbit_nodes_common:ensure_epmd(). + +-spec all_running() -> [node()]. +all_running() -> rabbit_mnesia:cluster_nodes(running). + +-spec running_count() -> integer(). +running_count() -> length(all_running()). + +-spec total_count() -> integer(). +total_count() -> length(rabbit_mnesia:cluster_nodes(all)). + +-spec is_single_node_cluster() -> boolean(). +is_single_node_cluster() -> + total_count() =:= 1. + +-spec await_running_count(integer(), integer()) -> 'ok' | {'error', atom()}. +await_running_count(TargetCount, Timeout) -> + Retries = round(Timeout/?SAMPLING_INTERVAL), + await_running_count_with_retries(TargetCount, Retries). + +await_running_count_with_retries(1, _Retries) -> ok; +await_running_count_with_retries(_TargetCount, Retries) when Retries =:= 0 -> + {error, timeout}; +await_running_count_with_retries(TargetCount, Retries) -> + case running_count() >= TargetCount of + true -> ok; + false -> + timer:sleep(?SAMPLING_INTERVAL), + await_running_count_with_retries(TargetCount, Retries - 1) + end. diff --git a/deps/rabbit/src/rabbit_osiris_metrics.erl b/deps/rabbit/src/rabbit_osiris_metrics.erl new file mode 100644 index 0000000000..7b2574c7e1 --- /dev/null +++ b/deps/rabbit/src/rabbit_osiris_metrics.erl @@ -0,0 +1,103 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at https://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% Copyright (c) 2012-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_osiris_metrics). + +-behaviour(gen_server). + +-export([start_link/0]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(TICK_TIMEOUT, 5000). +-define(SERVER, ?MODULE). + +-define(STATISTICS_KEYS, + [policy, + operator_policy, + effective_policy_definition, + state, + leader, + online, + members + ]). + +-record(state, {timeout :: non_neg_integer()}). + +%%---------------------------------------------------------------------------- +%% Starts the raw metrics storage and owns the ETS tables. +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +init([]) -> + Timeout = application:get_env(rabbit, stream_tick_interval, + ?TICK_TIMEOUT), + erlang:send_after(Timeout, self(), tick), + {ok, #state{timeout = Timeout}}. + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(tick, #state{timeout = Timeout} = State) -> + Data = osiris_counters:overview(), + maps:map( + fun ({osiris_writer, QName}, #{offset := Offs, + first_offset := FstOffs}) -> + COffs = Offs + 1 - FstOffs, + rabbit_core_metrics:queue_stats(QName, COffs, 0, COffs, 0), + Infos = try + %% TODO complete stats! + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + rabbit_stream_queue:info(Q, ?STATISTICS_KEYS); + _ -> + [] + end + catch + _:_ -> + %% It's possible that the writer has died but + %% it's still on the amqqueue record, so the + %% `erlang:process_info/2` calls will return + %% `undefined` and crash with a badmatch. + %% At least for now, skipping the metrics might + %% be the best option. Otherwise this brings + %% down `rabbit_sup` and the whole `rabbit` app. + [] + end, + rabbit_core_metrics:queue_stats(QName, Infos), + rabbit_event:notify(queue_stats, Infos ++ [{name, QName}, + {messages, COffs}, + {messages_ready, COffs}, + {messages_unacknowledged, 0}]), + ok; + (_, _V) -> + ok + end, Data), + erlang:send_after(Timeout, self(), tick), + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/deps/rabbit/src/rabbit_parameter_validation.erl b/deps/rabbit/src/rabbit_parameter_validation.erl new file mode 100644 index 0000000000..66287ec799 --- /dev/null +++ b/deps/rabbit/src/rabbit_parameter_validation.erl @@ -0,0 +1,88 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_parameter_validation). + +-export([number/2, integer/2, binary/2, boolean/2, list/2, regex/2, proplist/3, enum/1]). + +number(_Name, Term) when is_number(Term) -> + ok; + +number(Name, Term) -> + {error, "~s should be a number, actually was ~p", [Name, Term]}. + +integer(_Name, Term) when is_integer(Term) -> + ok; + +integer(Name, Term) -> + {error, "~s should be a number, actually was ~p", [Name, Term]}. + +binary(_Name, Term) when is_binary(Term) -> + ok; + +binary(Name, Term) -> + {error, "~s should be binary, actually was ~p", [Name, Term]}. + +boolean(_Name, Term) when is_boolean(Term) -> + ok; +boolean(Name, Term) -> + {error, "~s should be boolean, actually was ~p", [Name, Term]}. + +list(_Name, Term) when is_list(Term) -> + ok; + +list(Name, Term) -> + {error, "~s should be list, actually was ~p", [Name, Term]}. + +regex(Name, Term) when is_binary(Term) -> + case re:compile(Term) of + {ok, _} -> ok; + {error, Reason} -> {error, "~s should be regular expression " + "but is invalid: ~p", [Name, Reason]} + end; +regex(Name, Term) -> + {error, "~s should be a binary but was ~p", [Name, Term]}. + +proplist(Name, Constraints, Term) when is_list(Term) -> + {Results, Remainder} + = lists:foldl( + fun ({Key, Fun, Needed}, {Results0, Term0}) -> + case {lists:keytake(Key, 1, Term0), Needed} of + {{value, {Key, Value}, Term1}, _} -> + {[Fun(Key, Value) | Results0], + Term1}; + {false, mandatory} -> + {[{error, "Key \"~s\" not found in ~s", + [Key, Name]} | Results0], Term0}; + {false, optional} -> + {Results0, Term0} + end + end, {[], Term}, Constraints), + case Remainder of + [] -> Results; + _ -> [{error, "Unrecognised terms ~p in ~s", [Remainder, Name]} + | Results] + end; + +proplist(Name, Constraints, Term0) when is_map(Term0) -> + Term = maps:to_list(Term0), + proplist(Name, Constraints, Term); + +proplist(Name, _Constraints, Term) -> + {error, "~s not a list ~p", [Name, Term]}. + +enum(OptionsA) -> + Options = [list_to_binary(atom_to_list(O)) || O <- OptionsA], + fun (Name, Term) when is_binary(Term) -> + case lists:member(Term, Options) of + true -> ok; + false -> {error, "~s should be one of ~p, actually was ~p", + [Name, Options, Term]} + end; + (Name, Term) -> + {error, "~s should be binary, actually was ~p", [Name, Term]} + end. diff --git a/deps/rabbit/src/rabbit_password.erl b/deps/rabbit/src/rabbit_password.erl new file mode 100644 index 0000000000..6a5254b707 --- /dev/null +++ b/deps/rabbit/src/rabbit_password.erl @@ -0,0 +1,52 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_password). +-include("rabbit.hrl"). + +-define(DEFAULT_HASHING_MODULE, rabbit_password_hashing_sha256). + +%% +%% API +%% + +-export([hash/1, hash/2, generate_salt/0, salted_hash/2, salted_hash/3, + hashing_mod/0, hashing_mod/1]). + +hash(Cleartext) -> + hash(hashing_mod(), Cleartext). + +hash(HashingMod, Cleartext) -> + SaltBin = generate_salt(), + Hash = salted_hash(HashingMod, SaltBin, Cleartext), + <<SaltBin/binary, Hash/binary>>. + +generate_salt() -> + Salt = rand:uniform(16#ffffffff), + <<Salt:32>>. + +salted_hash(Salt, Cleartext) -> + salted_hash(hashing_mod(), Salt, Cleartext). + +salted_hash(Mod, Salt, Cleartext) -> + Fun = fun Mod:hash/1, + Fun(<<Salt/binary, Cleartext/binary>>). + +hashing_mod() -> + rabbit_misc:get_env(rabbit, password_hashing_module, + ?DEFAULT_HASHING_MODULE). + +hashing_mod(rabbit_password_hashing_sha256) -> + rabbit_password_hashing_sha256; +hashing_mod(rabbit_password_hashing_md5) -> + rabbit_password_hashing_md5; +%% fall back to the hashing function that's been used prior to 3.6.0 +hashing_mod(undefined) -> + rabbit_password_hashing_md5; +%% if a custom module is configured, simply use it +hashing_mod(CustomMod) when is_atom(CustomMod) -> + CustomMod. diff --git a/deps/rabbit/src/rabbit_password_hashing_md5.erl b/deps/rabbit/src/rabbit_password_hashing_md5.erl new file mode 100644 index 0000000000..1e306673ca --- /dev/null +++ b/deps/rabbit/src/rabbit_password_hashing_md5.erl @@ -0,0 +1,19 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% Legacy hashing implementation, only used as a last resort when +%% #internal_user.hashing_algorithm is md5 or undefined (the case in +%% pre-3.6.0 user records). + +-module(rabbit_password_hashing_md5). + +-behaviour(rabbit_password_hashing). + +-export([hash/1]). + +hash(Binary) -> + erlang:md5(Binary). diff --git a/deps/rabbit/src/rabbit_password_hashing_sha256.erl b/deps/rabbit/src/rabbit_password_hashing_sha256.erl new file mode 100644 index 0000000000..3ccc298efd --- /dev/null +++ b/deps/rabbit/src/rabbit_password_hashing_sha256.erl @@ -0,0 +1,15 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_password_hashing_sha256). + +-behaviour(rabbit_password_hashing). + +-export([hash/1]). + +hash(Binary) -> + crypto:hash(sha256, Binary). diff --git a/deps/rabbit/src/rabbit_password_hashing_sha512.erl b/deps/rabbit/src/rabbit_password_hashing_sha512.erl new file mode 100644 index 0000000000..c5edf8888a --- /dev/null +++ b/deps/rabbit/src/rabbit_password_hashing_sha512.erl @@ -0,0 +1,15 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_password_hashing_sha512). + +-behaviour(rabbit_password_hashing). + +-export([hash/1]). + +hash(Binary) -> + crypto:hash(sha512, Binary). diff --git a/deps/rabbit/src/rabbit_peer_discovery.erl b/deps/rabbit/src/rabbit_peer_discovery.erl new file mode 100644 index 0000000000..1688579450 --- /dev/null +++ b/deps/rabbit/src/rabbit_peer_discovery.erl @@ -0,0 +1,326 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_peer_discovery). + +%% +%% API +%% + +-export([maybe_init/0, discover_cluster_nodes/0, backend/0, node_type/0, + normalize/1, format_discovered_nodes/1, log_configured_backend/0, + register/0, unregister/0, maybe_register/0, maybe_unregister/0, + maybe_inject_randomized_delay/0, lock/0, unlock/1, + discovery_retries/0]). +-export([append_node_prefix/1, node_prefix/0, locking_retry_timeout/0, + lock_acquisition_failure_mode/0]). + +-define(DEFAULT_BACKEND, rabbit_peer_discovery_classic_config). + +%% what node type is used by default for this node when joining +%% a new cluster as a virgin node +-define(DEFAULT_NODE_TYPE, disc). + +%% default node prefix to attach to discovered hostnames +-define(DEFAULT_PREFIX, "rabbit"). + +%% default randomized delay range, in seconds +-define(DEFAULT_STARTUP_RANDOMIZED_DELAY, {5, 60}). + +%% default discovery retries and interval. +-define(DEFAULT_DISCOVERY_RETRY_COUNT, 10). +-define(DEFAULT_DISCOVERY_RETRY_INTERVAL_MS, 500). + +-define(NODENAME_PART_SEPARATOR, "@"). + +-spec backend() -> atom(). + +backend() -> + case application:get_env(rabbit, cluster_formation) of + {ok, Proplist} -> + proplists:get_value(peer_discovery_backend, Proplist, ?DEFAULT_BACKEND); + undefined -> + ?DEFAULT_BACKEND + end. + + + +-spec node_type() -> rabbit_types:node_type(). + +node_type() -> + case application:get_env(rabbit, cluster_formation) of + {ok, Proplist} -> + proplists:get_value(node_type, Proplist, ?DEFAULT_NODE_TYPE); + undefined -> + ?DEFAULT_NODE_TYPE + end. + +-spec locking_retry_timeout() -> {Retries :: integer(), Timeout :: integer()}. + +locking_retry_timeout() -> + case application:get_env(rabbit, cluster_formation) of + {ok, Proplist} -> + Retries = proplists:get_value(lock_retry_limit, Proplist, 10), + Timeout = proplists:get_value(lock_retry_timeout, Proplist, 30000), + {Retries, Timeout}; + undefined -> + {10, 30000} + end. + +-spec lock_acquisition_failure_mode() -> ignore | fail. + +lock_acquisition_failure_mode() -> + case application:get_env(rabbit, cluster_formation) of + {ok, Proplist} -> + proplists:get_value(lock_acquisition_failure_mode, Proplist, fail); + undefined -> + fail + end. + +-spec log_configured_backend() -> ok. + +log_configured_backend() -> + rabbit_log:info("Configured peer discovery backend: ~s~n", [backend()]). + +maybe_init() -> + Backend = backend(), + code:ensure_loaded(Backend), + case erlang:function_exported(Backend, init, 0) of + true -> + rabbit_log:debug("Peer discovery backend supports initialisation"), + case Backend:init() of + ok -> + rabbit_log:debug("Peer discovery backend initialisation succeeded"), + ok; + {error, Error} -> + rabbit_log:warning("Peer discovery backend initialisation failed: ~p.", [Error]), + ok + end; + false -> + rabbit_log:debug("Peer discovery backend does not support initialisation"), + ok + end. + + +%% This module doesn't currently sanity-check the return value of +%% `Backend:list_nodes()`. Therefore, it could return something invalid: +%% thus the `{œk, any()} in the spec. +%% +%% `rabbit_mnesia:init_from_config()` does some verifications. + +-spec discover_cluster_nodes() -> + {ok, {Nodes :: [node()], NodeType :: rabbit_types:node_type()} | any()} | + {error, Reason :: string()}. + +discover_cluster_nodes() -> + Backend = backend(), + normalize(Backend:list_nodes()). + + +-spec maybe_register() -> ok. + +maybe_register() -> + Backend = backend(), + case Backend:supports_registration() of + true -> + register(), + Backend:post_registration(); + false -> + rabbit_log:info("Peer discovery backend ~s does not support registration, skipping registration.", [Backend]), + ok + end. + + +-spec maybe_unregister() -> ok. + +maybe_unregister() -> + Backend = backend(), + case Backend:supports_registration() of + true -> + unregister(); + false -> + rabbit_log:info("Peer discovery backend ~s does not support registration, skipping unregistration.", [Backend]), + ok + end. + +-spec discovery_retries() -> {Retries :: integer(), Interval :: integer()}. + +discovery_retries() -> + case application:get_env(rabbit, cluster_formation) of + {ok, Proplist} -> + Retries = proplists:get_value(discovery_retry_limit, Proplist, ?DEFAULT_DISCOVERY_RETRY_COUNT), + Interval = proplists:get_value(discovery_retry_interval, Proplist, ?DEFAULT_DISCOVERY_RETRY_INTERVAL_MS), + {Retries, Interval}; + undefined -> + {?DEFAULT_DISCOVERY_RETRY_COUNT, ?DEFAULT_DISCOVERY_RETRY_INTERVAL_MS} + end. + + +-spec maybe_inject_randomized_delay() -> ok. +maybe_inject_randomized_delay() -> + Backend = backend(), + case Backend:supports_registration() of + true -> + rabbit_log:info("Peer discovery backend ~s supports registration.", [Backend]), + inject_randomized_delay(); + false -> + rabbit_log:info("Peer discovery backend ~s does not support registration, skipping randomized startup delay.", [Backend]), + ok + end. + +-spec inject_randomized_delay() -> ok. + +inject_randomized_delay() -> + {Min, Max} = randomized_delay_range_in_ms(), + case {Min, Max} of + %% When the max value is set to 0, consider the delay to be disabled. + %% In addition, `rand:uniform/1` will fail with a "no function clause" + %% when the argument is 0. + {_, 0} -> + rabbit_log:info("Randomized delay range's upper bound is set to 0. Considering it disabled."), + ok; + {_, N} when is_number(N) -> + rand:seed(exsplus), + RandomVal = rand:uniform(round(N)), + rabbit_log:debug("Randomized startup delay: configured range is from ~p to ~p milliseconds, PRNG pick: ~p...", + [Min, Max, RandomVal]), + Effective = case RandomVal < Min of + true -> Min; + false -> RandomVal + end, + rabbit_log:info("Will wait for ~p milliseconds before proceeding with registration...", [Effective]), + timer:sleep(Effective), + ok + end. + +-spec randomized_delay_range_in_ms() -> {integer(), integer()}. + +randomized_delay_range_in_ms() -> + Backend = backend(), + Default = case erlang:function_exported(Backend, randomized_startup_delay_range, 0) of + true -> Backend:randomized_startup_delay_range(); + false -> ?DEFAULT_STARTUP_RANDOMIZED_DELAY + end, + {Min, Max} = case application:get_env(rabbit, cluster_formation) of + {ok, Proplist} -> + proplists:get_value(randomized_startup_delay_range, Proplist, Default); + undefined -> + Default + end, + {Min * 1000, Max * 1000}. + + +-spec register() -> ok. + +register() -> + Backend = backend(), + rabbit_log:info("Will register with peer discovery backend ~s", [Backend]), + case Backend:register() of + ok -> ok; + {error, Error} -> + rabbit_log:error("Failed to register with peer discovery backend ~s: ~p", + [Backend, Error]), + ok + end. + + +-spec unregister() -> ok. + +unregister() -> + Backend = backend(), + rabbit_log:info("Will unregister with peer discovery backend ~s", [Backend]), + case Backend:unregister() of + ok -> ok; + {error, Error} -> + rabbit_log:error("Failed to unregister with peer discovery backend ~s: ~p", + [Backend, Error]), + ok + end. + +-spec lock() -> {ok, Data :: term()} | not_supported | {error, Reason :: string()}. + +lock() -> + Backend = backend(), + rabbit_log:info("Will try to lock with peer discovery backend ~s", [Backend]), + case Backend:lock(node()) of + {error, Reason} = Error -> + rabbit_log:error("Failed to lock with peer discovery backend ~s: ~p", + [Backend, Reason]), + Error; + Any -> + Any + end. + +-spec unlock(Data :: term()) -> ok | {error, Reason :: string()}. + +unlock(Data) -> + Backend = backend(), + rabbit_log:info("Will try to unlock with peer discovery backend ~s", [Backend]), + case Backend:unlock(Data) of + {error, Reason} = Error -> + rabbit_log:error("Failed to unlock with peer discovery backend ~s: ~p, " + "lock data: ~p", + [Backend, Reason, Data]), + Error; + Any -> + Any + end. + +%% +%% Implementation +%% + +-spec normalize(Nodes :: [node()] | + {Nodes :: [node()], + NodeType :: rabbit_types:node_type()} | + {ok, Nodes :: [node()]} | + {ok, {Nodes :: [node()], + NodeType :: rabbit_types:node_type()}} | + {error, Reason :: string()}) -> + {ok, {Nodes :: [node()], NodeType :: rabbit_types:node_type()}} | + {error, Reason :: string()}. + +normalize(Nodes) when is_list(Nodes) -> + {ok, {Nodes, disc}}; +normalize({Nodes, NodeType}) when is_list(Nodes) andalso is_atom(NodeType) -> + {ok, {Nodes, NodeType}}; +normalize({ok, Nodes}) when is_list(Nodes) -> + {ok, {Nodes, disc}}; +normalize({ok, {Nodes, NodeType}}) when is_list(Nodes) andalso is_atom(NodeType) -> + {ok, {Nodes, NodeType}}; +normalize({error, Reason}) -> + {error, Reason}. + +-spec format_discovered_nodes(Nodes :: list()) -> string(). + +format_discovered_nodes(Nodes) -> + %% NOTE: in OTP 21 string:join/2 is deprecated but still available. + %% Its recommended replacement is not a drop-in one, though, so + %% we will not be switching just yet. + string:join(lists:map(fun rabbit_data_coercion:to_list/1, Nodes), ", "). + + + +-spec node_prefix() -> string(). + +node_prefix() -> + case string:tokens(atom_to_list(node()), ?NODENAME_PART_SEPARATOR) of + [Prefix, _] -> Prefix; + [_] -> ?DEFAULT_PREFIX + end. + + + +-spec append_node_prefix(Value :: binary() | string()) -> string(). + +append_node_prefix(Value) when is_binary(Value) orelse is_list(Value) -> + Val = rabbit_data_coercion:to_list(Value), + Hostname = case string:tokens(Val, ?NODENAME_PART_SEPARATOR) of + [_ExistingPrefix, HN] -> HN; + [HN] -> HN + end, + string:join([node_prefix(), Hostname], ?NODENAME_PART_SEPARATOR). diff --git a/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl b/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl new file mode 100644 index 0000000000..8bc7382a75 --- /dev/null +++ b/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl @@ -0,0 +1,75 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_peer_discovery_classic_config). +-behaviour(rabbit_peer_discovery_backend). + +-include("rabbit.hrl"). + +-export([list_nodes/0, supports_registration/0, register/0, unregister/0, + post_registration/0, lock/1, unlock/1]). + +%% +%% API +%% + +-spec list_nodes() -> {ok, {Nodes :: [node()], rabbit_types:node_type()}} | + {error, Reason :: string()}. + +list_nodes() -> + case application:get_env(rabbit, cluster_nodes, {[], disc}) of + {_Nodes, _NodeType} = Pair -> {ok, Pair}; + Nodes when is_list(Nodes) -> {ok, {Nodes, disc}} + end. + +-spec supports_registration() -> boolean(). + +supports_registration() -> + %% If we don't have any nodes configured, skip randomized delay and similar operations + %% as we don't want to delay startup for no reason. MK. + has_any_peer_nodes_configured(). + +-spec register() -> ok. + +register() -> + ok. + +-spec unregister() -> ok. + +unregister() -> + ok. + +-spec post_registration() -> ok. + +post_registration() -> + ok. + +-spec lock(Node :: atom()) -> not_supported. + +lock(_Node) -> + not_supported. + +-spec unlock(Data :: term()) -> ok. + +unlock(_Data) -> + ok. + +%% +%% Helpers +%% + +has_any_peer_nodes_configured() -> + case application:get_env(rabbit, cluster_nodes, []) of + {[], _NodeType} -> + false; + {Nodes, _NodeType} when is_list(Nodes) -> + true; + [] -> + false; + Nodes when is_list(Nodes) -> + true + end. diff --git a/deps/rabbit/src/rabbit_peer_discovery_dns.erl b/deps/rabbit/src/rabbit_peer_discovery_dns.erl new file mode 100644 index 0000000000..6e343a6e2d --- /dev/null +++ b/deps/rabbit/src/rabbit_peer_discovery_dns.erl @@ -0,0 +1,113 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_peer_discovery_dns). +-behaviour(rabbit_peer_discovery_backend). + +-include("rabbit.hrl"). + +-export([list_nodes/0, supports_registration/0, register/0, unregister/0, + post_registration/0, lock/1, unlock/1]). +%% for tests +-export([discover_nodes/2, discover_hostnames/2]). + +%% +%% API +%% + +-spec list_nodes() -> + {ok, {Nodes :: [node()], rabbit_types:node_type()}}. + +list_nodes() -> + case application:get_env(rabbit, cluster_formation) of + undefined -> + {ok, {[], disc}}; + {ok, ClusterFormation} -> + case proplists:get_value(peer_discovery_dns, ClusterFormation) of + undefined -> + rabbit_log:warning("Peer discovery backend is set to ~s " + "but final config does not contain rabbit.cluster_formation.peer_discovery_dns. " + "Cannot discover any nodes because seed hostname is not configured!", + [?MODULE]), + {ok, {[], disc}}; + Proplist -> + Hostname = rabbit_data_coercion:to_list(proplists:get_value(hostname, Proplist)), + + {ok, {discover_nodes(Hostname, net_kernel:longnames()), rabbit_peer_discovery:node_type()}} + end + end. + + +-spec supports_registration() -> boolean(). + +supports_registration() -> + false. + + +-spec register() -> ok. + +register() -> + ok. + +-spec unregister() -> ok. + +unregister() -> + ok. + +-spec post_registration() -> ok. + +post_registration() -> + ok. + +-spec lock(Node :: atom()) -> not_supported. + +lock(_Node) -> + not_supported. + +-spec unlock(Data :: term()) -> ok. + +unlock(_Data) -> + ok. + +%% +%% Implementation +%% + +discover_nodes(SeedHostname, LongNamesUsed) -> + [list_to_atom(rabbit_peer_discovery:append_node_prefix(H)) || + H <- discover_hostnames(SeedHostname, LongNamesUsed)]. + +discover_hostnames(SeedHostname, LongNamesUsed) -> + lookup(SeedHostname, LongNamesUsed, ipv4) ++ + lookup(SeedHostname, LongNamesUsed, ipv6). + +decode_record(ipv4) -> + a; +decode_record(ipv6) -> + aaaa. + +lookup(SeedHostname, LongNamesUsed, IPv) -> + IPs = inet_res:lookup(SeedHostname, in, decode_record(IPv)), + rabbit_log:info("Addresses discovered via ~s records of ~s: ~s", + [string:to_upper(atom_to_list(decode_record(IPv))), + SeedHostname, + string:join([inet_parse:ntoa(IP) || IP <- IPs], ", ")]), + Hosts = [extract_host(inet:gethostbyaddr(A), LongNamesUsed, A) || + A <- IPs], + lists:filter(fun(E) -> E =/= error end, Hosts). + + +%% long node names are used +extract_host({ok, {hostent, FQDN, _, _, _, _}}, true, _Address) -> + FQDN; +%% short node names are used +extract_host({ok, {hostent, FQDN, _, _, _, _}}, false, _Address) -> + lists:nth(1, string:tokens(FQDN, ".")); +extract_host({error, Error}, _, Address) -> + rabbit_log:error("Reverse DNS lookup for address ~s failed: ~p", + [inet_parse:ntoa(Address), Error]), + error. diff --git a/deps/rabbit/src/rabbit_plugins.erl b/deps/rabbit/src/rabbit_plugins.erl new file mode 100644 index 0000000000..5697ffc29a --- /dev/null +++ b/deps/rabbit/src/rabbit_plugins.erl @@ -0,0 +1,699 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2011-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_plugins). +-include_lib("rabbit_common/include/rabbit.hrl"). +-include_lib("stdlib/include/zip.hrl"). + +-export([setup/0, active/0, read_enabled/1, list/1, list/2, dependencies/3, running_plugins/0]). +-export([ensure/1]). +-export([validate_plugins/1, format_invalid_plugins/1]). +-export([is_strictly_plugin/1, strictly_plugins/2, strictly_plugins/1]). +-export([plugins_dir/0, plugin_names/1, plugins_expand_dir/0, enabled_plugins_file/0]). + +% Export for testing purpose. +-export([is_version_supported/2, validate_plugins/2]). +%%---------------------------------------------------------------------------- + +-type plugin_name() :: atom(). + +%%---------------------------------------------------------------------------- + +-spec ensure(string()) -> {'ok', [atom()], [atom()]} | {error, any()}. + +ensure(FileJustChanged) -> + case rabbit:is_running() of + true -> ensure1(FileJustChanged); + false -> {error, rabbit_not_running} + end. + +ensure1(FileJustChanged0) -> + {ok, OurFile0} = application:get_env(rabbit, enabled_plugins_file), + FileJustChanged = filename:nativename(FileJustChanged0), + OurFile = filename:nativename(OurFile0), + case OurFile of + FileJustChanged -> + Enabled = read_enabled(OurFile), + Wanted = prepare_plugins(Enabled), + Current = active(), + Start = Wanted -- Current, + Stop = Current -- Wanted, + rabbit:start_apps(Start), + %% We need sync_notify here since mgmt will attempt to look at all + %% the modules for the disabled plugins - if they are unloaded + %% that won't work. + ok = rabbit_event:sync_notify(plugins_changed, [{enabled, Start}, + {disabled, Stop}]), + %% The app_utils module stops the apps in reverse order, so we should + %% pass them here in dependency order. + rabbit:stop_apps(lists:reverse(Stop)), + clean_plugins(Stop), + case {Start, Stop} of + {[], []} -> + ok; + {[], _} -> + rabbit_log:info("Plugins changed; disabled ~p~n", + [Stop]); + {_, []} -> + rabbit_log:info("Plugins changed; enabled ~p~n", + [Start]); + {_, _} -> + rabbit_log:info("Plugins changed; enabled ~p, disabled ~p~n", + [Start, Stop]) + end, + {ok, Start, Stop}; + _ -> + {error, {enabled_plugins_mismatch, FileJustChanged, OurFile}} + end. + +-spec plugins_expand_dir() -> file:filename(). +plugins_expand_dir() -> + case application:get_env(rabbit, plugins_expand_dir) of + {ok, ExpandDir} -> + ExpandDir; + _ -> + filename:join([rabbit_mnesia:dir(), "plugins_expand_dir"]) + end. + +-spec plugins_dir() -> file:filename(). +plugins_dir() -> + case application:get_env(rabbit, plugins_dir) of + {ok, PluginsDistDir} -> + PluginsDistDir; + _ -> + filename:join([rabbit_mnesia:dir(), "plugins_dir_stub"]) + end. + +-spec enabled_plugins_file() -> file:filename(). +enabled_plugins_file() -> + case application:get_env(rabbit, enabled_plugins_file) of + {ok, Val} -> + Val; + _ -> + filename:join([rabbit_mnesia:dir(), "enabled_plugins"]) + end. + +-spec enabled_plugins() -> [atom()]. +enabled_plugins() -> + case application:get_env(rabbit, enabled_plugins_file) of + {ok, EnabledFile} -> + read_enabled(EnabledFile); + _ -> + [] + end. + +%% @doc Prepares the file system and installs all enabled plugins. + +-spec setup() -> [plugin_name()]. + +setup() -> + ExpandDir = plugins_expand_dir(), + %% Eliminate the contents of the destination directory + case delete_recursively(ExpandDir) of + ok -> ok; + {error, E1} -> throw({error, {cannot_delete_plugins_expand_dir, + [ExpandDir, E1]}}) + end, + Enabled = enabled_plugins(), + prepare_plugins(Enabled). + +%% @doc Lists the plugins which are currently running. + +-spec active() -> [plugin_name()]. + +active() -> + InstalledPlugins = plugin_names(list(plugins_dir())), + [App || {App, _, _} <- rabbit_misc:which_applications(), + lists:member(App, InstalledPlugins)]. + +%% @doc Get the list of plugins which are ready to be enabled. + +-spec list(string()) -> [#plugin{}]. + +list(PluginsPath) -> + list(PluginsPath, false). + +-spec list(string(), boolean()) -> [#plugin{}]. + +list(PluginsPath, IncludeRequiredDeps) -> + {AllPlugins, LoadingProblems} = discover_plugins(split_path(PluginsPath)), + {UniquePlugins, DuplicateProblems} = remove_duplicate_plugins(AllPlugins), + Plugins1 = maybe_keep_required_deps(IncludeRequiredDeps, UniquePlugins), + Plugins2 = remove_plugins(Plugins1), + maybe_report_plugin_loading_problems(LoadingProblems ++ DuplicateProblems), + ensure_dependencies(Plugins2). + +%% @doc Read the list of enabled plugins from the supplied term file. + +-spec read_enabled(file:filename()) -> [plugin_name()]. + +read_enabled(PluginsFile) -> + case rabbit_file:read_term_file(PluginsFile) of + {ok, [Plugins]} -> Plugins; + {ok, []} -> []; + {ok, [_|_]} -> throw({error, {malformed_enabled_plugins_file, + PluginsFile}}); + {error, enoent} -> []; + {error, Reason} -> throw({error, {cannot_read_enabled_plugins_file, + PluginsFile, Reason}}) + end. + +%% @doc Calculate the dependency graph from <i>Sources</i>. +%% When Reverse =:= true the bottom/leaf level applications are returned in +%% the resulting list, otherwise they're skipped. + +-spec dependencies(boolean(), [plugin_name()], [#plugin{}]) -> + [plugin_name()]. + +dependencies(Reverse, Sources, AllPlugins) -> + {ok, G} = rabbit_misc:build_acyclic_graph( + fun ({App, _Deps}) -> [{App, App}] end, + fun ({App, Deps}) -> [{App, Dep} || Dep <- Deps] end, + [{Name, Deps} || #plugin{name = Name, + dependencies = Deps} <- AllPlugins]), + Dests = case Reverse of + false -> digraph_utils:reachable(Sources, G); + true -> digraph_utils:reaching(Sources, G) + end, + OrderedDests = digraph_utils:postorder(digraph_utils:subgraph(G, Dests)), + true = digraph:delete(G), + OrderedDests. + +%% Filter real plugins from application dependencies + +-spec is_strictly_plugin(#plugin{}) -> boolean(). + +is_strictly_plugin(#plugin{extra_dependencies = ExtraDeps}) -> + lists:member(rabbit, ExtraDeps). + +-spec strictly_plugins([plugin_name()], [#plugin{}]) -> [plugin_name()]. + +strictly_plugins(Plugins, AllPlugins) -> + lists:filter( + fun(Name) -> + is_strictly_plugin(lists:keyfind(Name, #plugin.name, AllPlugins)) + end, Plugins). + +-spec strictly_plugins([plugin_name()]) -> [plugin_name()]. + +strictly_plugins(Plugins) -> + AllPlugins = list(plugins_dir()), + lists:filter( + fun(Name) -> + is_strictly_plugin(lists:keyfind(Name, #plugin.name, AllPlugins)) + end, Plugins). + +%% For a few known cases, an externally provided plugin can be trusted. +%% In this special case, it overrides the plugin. +is_plugin_provided_by_otp(#plugin{name = eldap}) -> + %% eldap was added to Erlang/OTP R15B01 (ERTS 5.9.1). In this case, + %% we prefer this version to the plugin. + rabbit_misc:version_compare(erlang:system_info(version), "5.9.1", gte); +is_plugin_provided_by_otp(_) -> + false. + +%% Make sure we don't list OTP apps in here, and also that we detect +%% missing dependencies. +ensure_dependencies(Plugins) -> + Names = plugin_names(Plugins), + NotThere = [Dep || #plugin{dependencies = Deps} <- Plugins, + Dep <- Deps, + not lists:member(Dep, Names)], + {OTP, Missing} = lists:partition(fun is_loadable/1, lists:usort(NotThere)), + case Missing of + [] -> ok; + _ -> Blame = [Name || #plugin{name = Name, + dependencies = Deps} <- Plugins, + lists:any(fun (Dep) -> + lists:member(Dep, Missing) + end, Deps)], + throw({error, {missing_dependencies, Missing, Blame}}) + end, + [P#plugin{dependencies = Deps -- OTP, + extra_dependencies = Deps -- (Deps -- OTP)} + || P = #plugin{dependencies = Deps} <- Plugins]. + +is_loadable(App) -> + case application:load(App) of + {error, {already_loaded, _}} -> true; + ok -> application:unload(App), + true; + _ -> false + end. + + +%% List running plugins along with their version. +-spec running_plugins() -> {ok, [{atom(), Vsn :: string()}]}. +running_plugins() -> + ActivePlugins = active(), + {ok, [{App, Vsn} || {App, _ , Vsn} <- rabbit_misc:which_applications(), lists:member(App, ActivePlugins)]}. + +%%---------------------------------------------------------------------------- + +prepare_plugins(Enabled) -> + ExpandDir = plugins_expand_dir(), + AllPlugins = list(plugins_dir()), + Wanted = dependencies(false, Enabled, AllPlugins), + WantedPlugins = lookup_plugins(Wanted, AllPlugins), + {ValidPlugins, Problems} = validate_plugins(WantedPlugins), + maybe_warn_about_invalid_plugins(Problems), + case filelib:ensure_dir(ExpandDir ++ "/") of + ok -> ok; + {error, E2} -> throw({error, {cannot_create_plugins_expand_dir, + [ExpandDir, E2]}}) + end, + [prepare_plugin(Plugin, ExpandDir) || Plugin <- ValidPlugins], + Wanted. + +maybe_warn_about_invalid_plugins([]) -> + ok; +maybe_warn_about_invalid_plugins(InvalidPlugins) -> + %% TODO: error message formatting + rabbit_log:warning(format_invalid_plugins(InvalidPlugins)). + + +format_invalid_plugins(InvalidPlugins) -> + lists:flatten(["Failed to enable some plugins: \r\n" + | [format_invalid_plugin(Plugin) + || Plugin <- InvalidPlugins]]). + +format_invalid_plugin({Name, Errors}) -> + [io_lib:format(" ~p:~n", [Name]) + | [format_invalid_plugin_error(Err) || Err <- Errors]]. + +format_invalid_plugin_error({missing_dependency, Dep}) -> + io_lib:format(" Dependency is missing or invalid: ~p~n", [Dep]); +%% a plugin doesn't support the effective broker version +format_invalid_plugin_error({broker_version_mismatch, Version, Required}) -> + io_lib:format(" Plugin doesn't support current server version." + " Actual broker version: ~p, supported by the plugin: ~p~n", + [Version, format_required_versions(Required)]); +%% one of dependencies of a plugin doesn't match its version requirements +format_invalid_plugin_error({{dependency_version_mismatch, Version, Required}, Name}) -> + io_lib:format(" Version '~p' of dependency '~p' is unsupported." + " Version ranges supported by the plugin: ~p~n", + [Version, Name, Required]); +format_invalid_plugin_error(Err) -> + io_lib:format(" Unknown error ~p~n", [Err]). + +format_required_versions(Versions) -> + lists:map(fun(V) -> + case re:run(V, "^[0-9]*\.[0-9]*\.", [{capture, all, list}]) of + {match, [Sub]} -> + lists:flatten(io_lib:format("~s-~sx", [V, Sub])); + _ -> + V + end + end, Versions). + +validate_plugins(Plugins) -> + application:load(rabbit), + RabbitVersion = RabbitVersion = case application:get_key(rabbit, vsn) of + undefined -> "0.0.0"; + {ok, Val} -> Val + end, + validate_plugins(Plugins, RabbitVersion). + +validate_plugins(Plugins, BrokerVersion) -> + lists:foldl( + fun(#plugin{name = Name, + broker_version_requirements = BrokerVersionReqs, + dependency_version_requirements = DepsVersions} = Plugin, + {Plugins0, Errors}) -> + case is_version_supported(BrokerVersion, BrokerVersionReqs) of + true -> + case BrokerVersion of + "0.0.0" -> + rabbit_log:warning( + "Running development version of the broker." + " Requirement ~p for plugin ~p is ignored.", + [BrokerVersionReqs, Name]); + _ -> ok + end, + case check_plugins_versions(Name, Plugins0, DepsVersions) of + ok -> {[Plugin | Plugins0], Errors}; + {error, Err} -> {Plugins0, [{Name, Err} | Errors]} + end; + false -> + Error = [{broker_version_mismatch, BrokerVersion, BrokerVersionReqs}], + {Plugins0, [{Name, Error} | Errors]} + end + end, + {[],[]}, + Plugins). + +check_plugins_versions(PluginName, AllPlugins, RequiredVersions) -> + ExistingVersions = [{Name, Vsn} + || #plugin{name = Name, version = Vsn} <- AllPlugins], + Problems = lists:foldl( + fun({Name, Versions}, Acc) -> + case proplists:get_value(Name, ExistingVersions) of + undefined -> [{missing_dependency, Name} | Acc]; + Version -> + case is_version_supported(Version, Versions) of + true -> + case Version of + "" -> + rabbit_log:warning( + "~p plugin version is not defined." + " Requirement ~p for plugin ~p is ignored", + [Versions, PluginName]); + _ -> ok + end, + Acc; + false -> + [{{dependency_version_mismatch, Version, Versions}, Name} | Acc] + end + end + end, + [], + RequiredVersions), + case Problems of + [] -> ok; + _ -> {error, Problems} + end. + +is_version_supported("", _) -> true; +is_version_supported("0.0.0", _) -> true; +is_version_supported(_Version, []) -> true; +is_version_supported(VersionFull, ExpectedVersions) -> + %% Pre-release version should be supported in plugins, + %% therefore preview part should be removed + Version = remove_version_preview_part(VersionFull), + case lists:any(fun(ExpectedVersion) -> + rabbit_misc:strict_version_minor_equivalent(ExpectedVersion, + Version) + andalso + rabbit_misc:version_compare(ExpectedVersion, Version, lte) + end, + ExpectedVersions) of + true -> true; + false -> false + end. + +remove_version_preview_part(Version) -> + {Ver, _Preview} = rabbit_semver:parse(Version), + iolist_to_binary(rabbit_semver:format({Ver, {[], []}})). + +clean_plugins(Plugins) -> + ExpandDir = plugins_expand_dir(), + [clean_plugin(Plugin, ExpandDir) || Plugin <- Plugins]. + +clean_plugin(Plugin, ExpandDir) -> + {ok, Mods} = application:get_key(Plugin, modules), + application:unload(Plugin), + [begin + code:soft_purge(Mod), + code:delete(Mod), + false = code:is_loaded(Mod) + end || Mod <- Mods], + delete_recursively(rabbit_misc:format("~s/~s", [ExpandDir, Plugin])). + +prepare_dir_plugin(PluginAppDescPath) -> + PluginEbinDir = filename:dirname(PluginAppDescPath), + Plugin = filename:basename(PluginAppDescPath, ".app"), + code:add_patha(PluginEbinDir), + case filelib:wildcard(PluginEbinDir++ "/*.beam") of + [] -> + ok; + [BeamPath | _] -> + Module = list_to_atom(filename:basename(BeamPath, ".beam")), + case code:ensure_loaded(Module) of + {module, _} -> + ok; + {error, badfile} -> + rabbit_log:error("Failed to enable plugin \"~s\": " + "it may have been built with an " + "incompatible (more recent?) " + "version of Erlang~n", [Plugin]), + throw({plugin_built_with_incompatible_erlang, Plugin}); + Error -> + throw({plugin_module_unloadable, Plugin, Error}) + end + end. + +%%---------------------------------------------------------------------------- + +delete_recursively(Fn) -> + case rabbit_file:recursive_delete([Fn]) of + ok -> ok; + {error, {Path, E}} -> {error, {cannot_delete, Path, E}} + end. + +find_unzipped_app_file(ExpandDir, Files) -> + StripComponents = length(filename:split(ExpandDir)), + [ X || X <- Files, + [_AppName, "ebin", MaybeAppFile] <- + [lists:nthtail(StripComponents, filename:split(X))], + lists:suffix(".app", MaybeAppFile) + ]. + +prepare_plugin(#plugin{type = ez, name = Name, location = Location}, ExpandDir) -> + case zip:unzip(Location, [{cwd, ExpandDir}]) of + {ok, Files} -> + case find_unzipped_app_file(ExpandDir, Files) of + [PluginAppDescPath|_] -> + prepare_dir_plugin(PluginAppDescPath); + _ -> + rabbit_log:error("Plugin archive '~s' doesn't contain an .app file~n", [Location]), + throw({app_file_missing, Name, Location}) + end; + {error, Reason} -> + rabbit_log:error("Could not unzip plugin archive '~s': ~p~n", [Location, Reason]), + throw({failed_to_unzip_plugin, Name, Location, Reason}) + end; +prepare_plugin(#plugin{type = dir, location = Location, name = Name}, + _ExpandDir) -> + case filelib:wildcard(Location ++ "/ebin/*.app") of + [PluginAppDescPath|_] -> + prepare_dir_plugin(PluginAppDescPath); + _ -> + rabbit_log:error("Plugin directory '~s' doesn't contain an .app file~n", [Location]), + throw({app_file_missing, Name, Location}) + end. + +plugin_info({ez, EZ}) -> + case read_app_file(EZ) of + {application, Name, Props} -> mkplugin(Name, Props, ez, EZ); + {error, Reason} -> {error, EZ, Reason} + end; +plugin_info({app, App}) -> + case rabbit_file:read_term_file(App) of + {ok, [{application, Name, Props}]} -> + mkplugin(Name, Props, dir, + filename:absname( + filename:dirname(filename:dirname(App)))); + {error, Reason} -> + {error, App, {invalid_app, Reason}} + end. + +mkplugin(Name, Props, Type, Location) -> + Version = proplists:get_value(vsn, Props, "0"), + Description = proplists:get_value(description, Props, ""), + Dependencies = proplists:get_value(applications, Props, []), + BrokerVersions = proplists:get_value(broker_version_requirements, Props, []), + DepsVersions = proplists:get_value(dependency_version_requirements, Props, []), + #plugin{name = Name, version = Version, description = Description, + dependencies = Dependencies, location = Location, type = Type, + broker_version_requirements = BrokerVersions, + dependency_version_requirements = DepsVersions}. + +read_app_file(EZ) -> + case zip:list_dir(EZ) of + {ok, [_|ZippedFiles]} -> + case find_app_files(ZippedFiles) of + [AppPath|_] -> + {ok, [{AppPath, AppFile}]} = + zip:extract(EZ, [{file_list, [AppPath]}, memory]), + parse_binary(AppFile); + [] -> + {error, no_app_file} + end; + {error, Reason} -> + {error, {invalid_ez, Reason}} + end. + +find_app_files(ZippedFiles) -> + {ok, RE} = re:compile("^.*/ebin/.*.app$"), + [Path || {zip_file, Path, _, _, _, _} <- ZippedFiles, + re:run(Path, RE, [{capture, none}]) =:= match]. + +parse_binary(Bin) -> + try + {ok, Ts, _} = erl_scan:string(binary_to_list(Bin)), + {ok, Term} = erl_parse:parse_term(Ts), + Term + catch + Err -> {error, {invalid_app, Err}} + end. + +plugin_names(Plugins) -> + [Name || #plugin{name = Name} <- Plugins]. + +lookup_plugins(Names, AllPlugins) -> + %% Preserve order of Names + lists:map( + fun(Name) -> + lists:keyfind(Name, #plugin.name, AllPlugins) + end, + Names). + +%% Split PATH-like value into its components. +split_path(PathString) -> + Delimiters = case os:type() of + {unix, _} -> ":"; + {win32, _} -> ";" + end, + string:tokens(PathString, Delimiters). + +%% Search for files using glob in a given dir. Returns full filenames of those files. +full_path_wildcard(Glob, Dir) -> + [filename:join([Dir, File]) || File <- filelib:wildcard(Glob, Dir)]. + +%% Returns list off all .ez files in a given set of directories +list_ezs([]) -> + []; +list_ezs([Dir|Rest]) -> + [{ez, EZ} || EZ <- full_path_wildcard("*.ez", Dir)] ++ list_ezs(Rest). + +%% Returns list of all files that look like OTP applications in a +%% given set of directories. +list_free_apps([]) -> + []; +list_free_apps([Dir|Rest]) -> + [{app, App} || App <- full_path_wildcard("*/ebin/*.app", Dir)] + ++ list_free_apps(Rest). + +compare_by_name_and_version(#plugin{name = Name, version = VersionA}, + #plugin{name = Name, version = VersionB}) -> + rabbit_semver:lte(VersionA, VersionB); +compare_by_name_and_version(#plugin{name = NameA}, + #plugin{name = NameB}) -> + NameA =< NameB. + +-spec discover_plugins([Directory]) -> {[#plugin{}], [Problem]} when + Directory :: file:name(), + Problem :: {file:name(), term()}. +discover_plugins(PluginsDirs) -> + EZs = list_ezs(PluginsDirs), + FreeApps = list_free_apps(PluginsDirs), + read_plugins_info(EZs ++ FreeApps, {[], []}). + +read_plugins_info([], Acc) -> + Acc; +read_plugins_info([Path|Paths], {Plugins, Problems}) -> + case plugin_info(Path) of + #plugin{} = Plugin -> + read_plugins_info(Paths, {[Plugin|Plugins], Problems}); + {error, Location, Reason} -> + read_plugins_info(Paths, {Plugins, [{Location, Reason}|Problems]}) + end. + +remove_duplicate_plugins(Plugins) -> + %% Reverse order ensures that if there are several versions of the + %% same plugin, the most recent one comes first. + Sorted = lists:reverse( + lists:sort(fun compare_by_name_and_version/2, Plugins)), + remove_duplicate_plugins(Sorted, {[], []}). + +remove_duplicate_plugins([], Acc) -> + Acc; +remove_duplicate_plugins([Best = #plugin{name = Name}, Offender = #plugin{name = Name} | Rest], + {Plugins0, Problems0}) -> + Problems1 = [{Offender#plugin.location, duplicate_plugin}|Problems0], + remove_duplicate_plugins([Best|Rest], {Plugins0, Problems1}); +remove_duplicate_plugins([Plugin|Rest], {Plugins0, Problems0}) -> + Plugins1 = [Plugin|Plugins0], + remove_duplicate_plugins(Rest, {Plugins1, Problems0}). + +maybe_keep_required_deps(true, Plugins) -> + Plugins; +maybe_keep_required_deps(false, Plugins) -> + RabbitDeps = list_all_deps([rabbit]), + lists:filter(fun + (#plugin{name = Name}) -> + not lists:member(Name, RabbitDeps); + (Name) when is_atom(Name) -> + not lists:member(Name, RabbitDeps) + end, + Plugins). + +list_all_deps(Applications) -> + list_all_deps(Applications, []). + +list_all_deps([Application | Applications], Deps) -> + %% We load the application to be sure we can get the "applications" key. + %% This is required for rabbitmq-plugins for instance. + application:load(Application), + NewDeps = [Application | Deps], + case application:get_key(Application, applications) of + {ok, ApplicationDeps} -> + RemainingApplications0 = ApplicationDeps ++ Applications, + RemainingApplications = RemainingApplications0 -- NewDeps, + list_all_deps(RemainingApplications, NewDeps); + undefined -> + list_all_deps(Applications, NewDeps) + end; +list_all_deps([], Deps) -> + Deps. + +remove_plugins(Plugins) -> + %% We want to filter out all Erlang applications in the plugins + %% directories which are not actual RabbitMQ plugin. + %% + %% A RabbitMQ plugin must depend on `rabbit`. We also want to keep + %% all applications they depend on, except Erlang/OTP applications. + %% In the end, we will skip: + %% * Erlang/OTP applications + %% * All applications which do not depend on `rabbit` and which + %% are not direct or indirect dependencies of plugins. + ActualPlugins = [Plugin + || #plugin{dependencies = Deps} = Plugin <- Plugins, + lists:member(rabbit, Deps)], + %% As said above, we want to keep all non-plugins which are + %% dependencies of plugins. + PluginDeps = lists:usort( + lists:flatten( + [resolve_deps(Plugins, Plugin) + || Plugin <- ActualPlugins])), + lists:filter( + fun(#plugin{name = Name} = Plugin) -> + IsOTPApp = is_plugin_provided_by_otp(Plugin), + IsAPlugin = + lists:member(Plugin, ActualPlugins) orelse + lists:member(Name, PluginDeps), + if + IsOTPApp -> + rabbit_log:debug( + "Plugins discovery: " + "ignoring ~s, Erlang/OTP application", + [Name]); + not IsAPlugin -> + rabbit_log:debug( + "Plugins discovery: " + "ignoring ~s, not a RabbitMQ plugin", + [Name]); + true -> + ok + end, + not (IsOTPApp orelse not IsAPlugin) + end, Plugins). + +resolve_deps(Plugins, #plugin{dependencies = Deps}) -> + IndirectDeps = [case lists:keyfind(Dep, #plugin.name, Plugins) of + false -> []; + DepPlugin -> resolve_deps(Plugins, DepPlugin) + end + || Dep <- Deps], + Deps ++ IndirectDeps. + +maybe_report_plugin_loading_problems([]) -> + ok; +maybe_report_plugin_loading_problems(Problems) -> + io:format(standard_error, + "Problem reading some plugins: ~p~n", + [Problems]). diff --git a/deps/rabbit/src/rabbit_policies.erl b/deps/rabbit/src/rabbit_policies.erl new file mode 100644 index 0000000000..54e4d2c03e --- /dev/null +++ b/deps/rabbit/src/rabbit_policies.erl @@ -0,0 +1,179 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_policies). + +%% Provides built-in policy parameter +%% validation functions. + +-behaviour(rabbit_policy_validator). +-behaviour(rabbit_policy_merge_strategy). + +-include("rabbit.hrl"). + +-export([register/0, validate_policy/1, merge_policy_value/3]). + +-rabbit_boot_step({?MODULE, + [{description, "internal policies"}, + {mfa, {rabbit_policies, register, []}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + +register() -> + %% Note: there are more validators registered from other modules, + %% such as rabbit_mirror_queue_misc + [rabbit_registry:register(Class, Name, ?MODULE) || + {Class, Name} <- [{policy_validator, <<"alternate-exchange">>}, + {policy_validator, <<"dead-letter-exchange">>}, + {policy_validator, <<"dead-letter-routing-key">>}, + {policy_validator, <<"message-ttl">>}, + {policy_validator, <<"expires">>}, + {policy_validator, <<"max-length">>}, + {policy_validator, <<"max-length-bytes">>}, + {policy_validator, <<"max-in-memory-length">>}, + {policy_validator, <<"max-in-memory-bytes">>}, + {policy_validator, <<"queue-mode">>}, + {policy_validator, <<"overflow">>}, + {policy_validator, <<"delivery-limit">>}, + {policy_validator, <<"max-age">>}, + {policy_validator, <<"max-segment-size">>}, + {policy_validator, <<"queue-leader-locator">>}, + {policy_validator, <<"initial-cluster-size">>}, + {operator_policy_validator, <<"expires">>}, + {operator_policy_validator, <<"message-ttl">>}, + {operator_policy_validator, <<"max-length">>}, + {operator_policy_validator, <<"max-length-bytes">>}, + {operator_policy_validator, <<"max-in-memory-length">>}, + {operator_policy_validator, <<"max-in-memory-bytes">>}, + {operator_policy_validator, <<"delivery-limit">>}, + {policy_merge_strategy, <<"expires">>}, + {policy_merge_strategy, <<"message-ttl">>}, + {policy_merge_strategy, <<"max-length">>}, + {policy_merge_strategy, <<"max-length-bytes">>}, + {policy_merge_strategy, <<"max-in-memory-length">>}, + {policy_merge_strategy, <<"max-in-memory-bytes">>}, + {policy_merge_strategy, <<"delivery-limit">>}]], + ok. + +-spec validate_policy([{binary(), term()}]) -> rabbit_policy_validator:validate_results(). + +validate_policy(Terms) -> + lists:foldl(fun ({Key, Value}, ok) -> validate_policy0(Key, Value); + (_, Error) -> Error + end, ok, Terms). + +validate_policy0(<<"alternate-exchange">>, Value) + when is_binary(Value) -> + ok; +validate_policy0(<<"alternate-exchange">>, Value) -> + {error, "~p is not a valid alternate exchange name", [Value]}; + +validate_policy0(<<"dead-letter-exchange">>, Value) + when is_binary(Value) -> + ok; +validate_policy0(<<"dead-letter-exchange">>, Value) -> + {error, "~p is not a valid dead letter exchange name", [Value]}; + +validate_policy0(<<"dead-letter-routing-key">>, Value) + when is_binary(Value) -> + ok; +validate_policy0(<<"dead-letter-routing-key">>, Value) -> + {error, "~p is not a valid dead letter routing key", [Value]}; + +validate_policy0(<<"message-ttl">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"message-ttl">>, Value) -> + {error, "~p is not a valid message TTL", [Value]}; + +validate_policy0(<<"expires">>, Value) + when is_integer(Value), Value >= 1 -> + ok; +validate_policy0(<<"expires">>, Value) -> + {error, "~p is not a valid queue expiry", [Value]}; + +validate_policy0(<<"max-length">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"max-length">>, Value) -> + {error, "~p is not a valid maximum length", [Value]}; + +validate_policy0(<<"max-length-bytes">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"max-length-bytes">>, Value) -> + {error, "~p is not a valid maximum length in bytes", [Value]}; + +validate_policy0(<<"max-in-memory-length">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"max-in-memory-length">>, Value) -> + {error, "~p is not a valid maximum memory in bytes", [Value]}; + +validate_policy0(<<"max-in-memory-bytes">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"max-in-memory-bytes">>, Value) -> + {error, "~p is not a valid maximum memory in bytes", [Value]}; + +validate_policy0(<<"queue-mode">>, <<"default">>) -> + ok; +validate_policy0(<<"queue-mode">>, <<"lazy">>) -> + ok; +validate_policy0(<<"queue-mode">>, Value) -> + {error, "~p is not a valid queue-mode value", [Value]}; +validate_policy0(<<"overflow">>, <<"drop-head">>) -> + ok; +validate_policy0(<<"overflow">>, <<"reject-publish">>) -> + ok; +validate_policy0(<<"overflow">>, <<"reject-publish-dlx">>) -> + ok; +validate_policy0(<<"overflow">>, Value) -> + {error, "~p is not a valid overflow value", [Value]}; + +validate_policy0(<<"delivery-limit">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"delivery-limit">>, Value) -> + {error, "~p is not a valid delivery limit", [Value]}; + +validate_policy0(<<"max-age">>, Value) -> + case rabbit_amqqueue:check_max_age(Value) of + {error, _} -> + {error, "~p is not a valid max age", [Value]}; + _ -> + ok + end; + +validate_policy0(<<"queue-leader-locator">>, <<"client-local">>) -> + ok; +validate_policy0(<<"queue-leader-locator">>, <<"random">>) -> + ok; +validate_policy0(<<"queue-leader-locator">>, <<"least-leaders">>) -> + ok; +validate_policy0(<<"queue-leader-locator">>, Value) -> + {error, "~p is not a valid queue leader locator value", [Value]}; + +validate_policy0(<<"initial-cluster-size">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"initial-cluster-size">>, Value) -> + {error, "~p is not a valid cluster size", [Value]}; + +validate_policy0(<<"max-segment-size">>, Value) + when is_integer(Value), Value >= 0 -> + ok; +validate_policy0(<<"max-segment-size">>, Value) -> + {error, "~p is not a valid segment size", [Value]}. + +merge_policy_value(<<"message-ttl">>, Val, OpVal) -> min(Val, OpVal); +merge_policy_value(<<"max-length">>, Val, OpVal) -> min(Val, OpVal); +merge_policy_value(<<"max-length-bytes">>, Val, OpVal) -> min(Val, OpVal); +merge_policy_value(<<"max-in-memory-length">>, Val, OpVal) -> min(Val, OpVal); +merge_policy_value(<<"max-in-memory-bytes">>, Val, OpVal) -> min(Val, OpVal); +merge_policy_value(<<"expires">>, Val, OpVal) -> min(Val, OpVal); +merge_policy_value(<<"delivery-limit">>, Val, OpVal) -> min(Val, OpVal). diff --git a/deps/rabbit/src/rabbit_policy.erl b/deps/rabbit/src/rabbit_policy.erl new file mode 100644 index 0000000000..44807de97d --- /dev/null +++ b/deps/rabbit/src/rabbit_policy.erl @@ -0,0 +1,557 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_policy). + +%% Policies is a way to apply optional arguments ("x-args") +%% to exchanges and queues in bulk, using name matching. +%% +%% Only one policy can apply to a given queue or exchange +%% at a time. Priorities help determine what policy should +%% take precedence. +%% +%% Policies build on runtime parameters. Policy-driven parameters +%% are well known and therefore validated. +%% +%% See also: +%% +%% * rabbit_runtime_parameters +%% * rabbit_policies +%% * rabbit_registry + +%% TODO specs + +-behaviour(rabbit_runtime_parameter). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-import(rabbit_misc, [pget/2, pget/3]). + +-export([register/0]). +-export([invalidate/0, recover/0]). +-export([name/1, name_op/1, effective_definition/1, merge_operator_definitions/2, get/2, get_arg/3, set/1]). +-export([validate/5, notify/5, notify_clear/4]). +-export([parse_set/7, set/7, delete/3, lookup/2, list/0, list/1, + list_formatted/1, list_formatted/3, info_keys/0]). +-export([parse_set_op/7, set_op/7, delete_op/3, lookup_op/2, list_op/0, list_op/1, + list_formatted_op/1, list_formatted_op/3]). + +-rabbit_boot_step({?MODULE, + [{description, "policy parameters"}, + {mfa, {rabbit_policy, register, []}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + +register() -> + rabbit_registry:register(runtime_parameter, <<"policy">>, ?MODULE), + rabbit_registry:register(runtime_parameter, <<"operator_policy">>, ?MODULE). + +name(Q) when ?is_amqqueue(Q) -> + Policy = amqqueue:get_policy(Q), + name0(Policy); +name(#exchange{policy = Policy}) -> name0(Policy). + +name_op(Q) when ?is_amqqueue(Q) -> + OpPolicy = amqqueue:get_operator_policy(Q), + name0(OpPolicy); +name_op(#exchange{operator_policy = Policy}) -> name0(Policy). + +name0(undefined) -> none; +name0(Policy) -> pget(name, Policy). + +effective_definition(Q) when ?is_amqqueue(Q) -> + Policy = amqqueue:get_policy(Q), + OpPolicy = amqqueue:get_operator_policy(Q), + merge_operator_definitions(Policy, OpPolicy); +effective_definition(#exchange{policy = Policy, operator_policy = OpPolicy}) -> + merge_operator_definitions(Policy, OpPolicy). + +merge_operator_definitions(undefined, undefined) -> undefined; +merge_operator_definitions(Policy, undefined) -> pget(definition, Policy); +merge_operator_definitions(undefined, OpPolicy) -> pget(definition, OpPolicy); +merge_operator_definitions(Policy, OpPolicy) -> + OpDefinition = rabbit_data_coercion:to_map(pget(definition, OpPolicy, [])), + Definition = rabbit_data_coercion:to_map(pget(definition, Policy, [])), + Keys = maps:keys(Definition), + OpKeys = maps:keys(OpDefinition), + lists:map(fun(Key) -> + case {maps:get(Key, Definition, undefined), maps:get(Key, OpDefinition, undefined)} of + {Val, undefined} -> {Key, Val}; + {undefined, OpVal} -> {Key, OpVal}; + {Val, OpVal} -> {Key, merge_policy_value(Key, Val, OpVal)} + end + end, + lists:umerge(Keys, OpKeys)). + +set(Q0) when ?is_amqqueue(Q0) -> + Name = amqqueue:get_name(Q0), + Policy = match(Name), + OpPolicy = match_op(Name), + Q1 = amqqueue:set_policy(Q0, Policy), + Q2 = amqqueue:set_operator_policy(Q1, OpPolicy), + Q2; +set(X = #exchange{name = Name}) -> + X#exchange{policy = match(Name), operator_policy = match_op(Name)}. + +match(Name = #resource{virtual_host = VHost}) -> + match(Name, list(VHost)). + +match_op(Name = #resource{virtual_host = VHost}) -> + match(Name, list_op(VHost)). + +get(Name, Q) when ?is_amqqueue(Q) -> + Policy = amqqueue:get_policy(Q), + OpPolicy = amqqueue:get_operator_policy(Q), + get0(Name, Policy, OpPolicy); +get(Name, #exchange{policy = Policy, operator_policy = OpPolicy}) -> + get0(Name, Policy, OpPolicy); + +%% Caution - SLOW. +get(Name, EntityName = #resource{virtual_host = VHost}) -> + get0(Name, + match(EntityName, list(VHost)), + match(EntityName, list_op(VHost))). + +get0(_Name, undefined, undefined) -> undefined; +get0(Name, undefined, OpPolicy) -> pget(Name, pget(definition, OpPolicy, [])); +get0(Name, Policy, undefined) -> pget(Name, pget(definition, Policy, [])); +get0(Name, Policy, OpPolicy) -> + OpDefinition = pget(definition, OpPolicy, []), + Definition = pget(definition, Policy, []), + case {pget(Name, Definition), pget(Name, OpDefinition)} of + {undefined, undefined} -> undefined; + {Val, undefined} -> Val; + {undefined, Val} -> Val; + {Val, OpVal} -> merge_policy_value(Name, Val, OpVal) + end. + +merge_policy_value(Name, PolicyVal, OpVal) -> + case policy_merge_strategy(Name) of + {ok, Module} -> Module:merge_policy_value(Name, PolicyVal, OpVal); + {error, not_found} -> rabbit_policies:merge_policy_value(Name, PolicyVal, OpVal) + end. + +policy_merge_strategy(Name) -> + case rabbit_registry:binary_to_type(rabbit_data_coercion:to_binary(Name)) of + {error, not_found} -> + {error, not_found}; + T -> + rabbit_registry:lookup_module(policy_merge_strategy, T) + end. + +%% Many heads for optimisation +get_arg(_AName, _PName, #exchange{arguments = [], policy = undefined}) -> + undefined; +get_arg(_AName, PName, X = #exchange{arguments = []}) -> + get(PName, X); +get_arg(AName, PName, X = #exchange{arguments = Args}) -> + case rabbit_misc:table_lookup(Args, AName) of + undefined -> get(PName, X); + {_Type, Arg} -> Arg + end. + +%%---------------------------------------------------------------------------- + +%% Gets called during upgrades - therefore must not assume anything about the +%% state of Mnesia +invalidate() -> + rabbit_file:write_file(invalid_file(), <<"">>). + +recover() -> + case rabbit_file:is_file(invalid_file()) of + true -> recover0(), + rabbit_file:delete(invalid_file()); + false -> ok + end. + +%% To get here we have to have just completed an Mnesia upgrade - i.e. we are +%% the first node starting. So we can rewrite the whole database. Note that +%% recovery has not yet happened; we must work with the rabbit_durable_<thing> +%% variants. +recover0() -> + Xs = mnesia:dirty_match_object(rabbit_durable_exchange, #exchange{_ = '_'}), + Qs = rabbit_amqqueue:list_with_possible_retry( + fun() -> + mnesia:dirty_match_object( + rabbit_durable_queue, amqqueue:pattern_match_all()) + end), + Policies = list(), + OpPolicies = list_op(), + [rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:write( + rabbit_durable_exchange, + rabbit_exchange_decorator:set( + X#exchange{policy = match(Name, Policies), + operator_policy = match(Name, OpPolicies)}), + write) + end) || X = #exchange{name = Name} <- Xs], + [begin + QName = amqqueue:get_name(Q0), + Policy1 = match(QName, Policies), + Q1 = amqqueue:set_policy(Q0, Policy1), + OpPolicy1 = match(QName, OpPolicies), + Q2 = amqqueue:set_operator_policy(Q1, OpPolicy1), + Q3 = rabbit_queue_decorator:set(Q2), + ?try_mnesia_tx_or_upgrade_amqqueue_and_retry( + rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:write(rabbit_durable_queue, Q3, write) + end), + begin + Q4 = amqqueue:upgrade(Q3), + rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:write(rabbit_durable_queue, Q4, write) + end) + end) + end || Q0 <- Qs], + ok. + +invalid_file() -> + filename:join(rabbit_mnesia:dir(), "policies_are_invalid"). + +%%---------------------------------------------------------------------------- + +parse_set_op(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) -> + parse_set(<<"operator_policy">>, VHost, Name, Pattern, Definition, Priority, + ApplyTo, ActingUser). + +parse_set(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) -> + parse_set(<<"policy">>, VHost, Name, Pattern, Definition, Priority, ApplyTo, + ActingUser). + +parse_set(Type, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) -> + try rabbit_data_coercion:to_integer(Priority) of + Num -> parse_set0(Type, VHost, Name, Pattern, Definition, Num, ApplyTo, + ActingUser) + catch + error:badarg -> {error, "~p priority must be a number", [Priority]} + end. + +parse_set0(Type, VHost, Name, Pattern, Defn, Priority, ApplyTo, ActingUser) -> + case rabbit_json:try_decode(Defn) of + {ok, Term} -> + R = set0(Type, VHost, Name, + [{<<"pattern">>, Pattern}, + {<<"definition">>, maps:to_list(Term)}, + {<<"priority">>, Priority}, + {<<"apply-to">>, ApplyTo}], + ActingUser), + rabbit_log:info("Successfully set policy '~s' matching ~s names in virtual host '~s' using pattern '~s'", + [Name, ApplyTo, VHost, Pattern]), + R; + {error, Reason} -> + {error_string, + rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])} + end. + +set_op(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) -> + set(<<"operator_policy">>, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser). + +set(VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) -> + set(<<"policy">>, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser). + +set(Type, VHost, Name, Pattern, Definition, Priority, ApplyTo, ActingUser) -> + PolicyProps = [{<<"pattern">>, Pattern}, + {<<"definition">>, Definition}, + {<<"priority">>, case Priority of + undefined -> 0; + _ -> Priority + end}, + {<<"apply-to">>, case ApplyTo of + undefined -> <<"all">>; + _ -> ApplyTo + end}], + set0(Type, VHost, Name, PolicyProps, ActingUser). + +set0(Type, VHost, Name, Term, ActingUser) -> + rabbit_runtime_parameters:set_any(VHost, Type, Name, Term, ActingUser). + +delete_op(VHost, Name, ActingUser) -> + rabbit_runtime_parameters:clear_any(VHost, <<"operator_policy">>, Name, ActingUser). + +delete(VHost, Name, ActingUser) -> + rabbit_runtime_parameters:clear_any(VHost, <<"policy">>, Name, ActingUser). + +lookup_op(VHost, Name) -> + case rabbit_runtime_parameters:lookup(VHost, <<"operator_policy">>, Name) of + not_found -> not_found; + P -> p(P, fun ident/1) + end. + +lookup(VHost, Name) -> + case rabbit_runtime_parameters:lookup(VHost, <<"policy">>, Name) of + not_found -> not_found; + P -> p(P, fun ident/1) + end. + +list_op() -> + list_op('_'). + +list_op(VHost) -> + list0_op(VHost, fun ident/1). + +list_formatted_op(VHost) -> + order_policies(list0_op(VHost, fun rabbit_json:encode/1)). + +list_formatted_op(VHost, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map(AggregatorPid, Ref, + fun(P) -> P end, list_formatted_op(VHost)). + +list0_op(VHost, DefnFun) -> + [p(P, DefnFun) + || P <- rabbit_runtime_parameters:list(VHost, <<"operator_policy">>)]. + + +list() -> + list('_'). + +list(VHost) -> + list0(VHost, fun ident/1). + +list_formatted(VHost) -> + order_policies(list0(VHost, fun rabbit_json:encode/1)). + +list_formatted(VHost, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map(AggregatorPid, Ref, + fun(P) -> P end, list_formatted(VHost)). + +list0(VHost, DefnFun) -> + [p(P, DefnFun) || P <- rabbit_runtime_parameters:list(VHost, <<"policy">>)]. + +order_policies(PropList) -> + lists:sort(fun (A, B) -> not sort_pred(A, B) end, PropList). + +p(Parameter, DefnFun) -> + Value = pget(value, Parameter), + [{vhost, pget(vhost, Parameter)}, + {name, pget(name, Parameter)}, + {pattern, pget(<<"pattern">>, Value)}, + {'apply-to', pget(<<"apply-to">>, Value)}, + {definition, DefnFun(pget(<<"definition">>, Value))}, + {priority, pget(<<"priority">>, Value)}]. + +ident(X) -> X. + +info_keys() -> [vhost, name, 'apply-to', pattern, definition, priority]. + +%%---------------------------------------------------------------------------- + +validate(_VHost, <<"policy">>, Name, Term, _User) -> + rabbit_parameter_validation:proplist( + Name, policy_validation(), Term); +validate(_VHost, <<"operator_policy">>, Name, Term, _User) -> + rabbit_parameter_validation:proplist( + Name, operator_policy_validation(), Term). + +notify(VHost, <<"policy">>, Name, Term, ActingUser) -> + rabbit_event:notify(policy_set, [{name, Name}, {vhost, VHost}, + {user_who_performed_action, ActingUser} | Term]), + update_policies(VHost); +notify(VHost, <<"operator_policy">>, Name, Term, ActingUser) -> + rabbit_event:notify(policy_set, [{name, Name}, {vhost, VHost}, + {user_who_performed_action, ActingUser} | Term]), + update_policies(VHost). + +notify_clear(VHost, <<"policy">>, Name, ActingUser) -> + rabbit_event:notify(policy_cleared, [{name, Name}, {vhost, VHost}, + {user_who_performed_action, ActingUser}]), + update_policies(VHost); +notify_clear(VHost, <<"operator_policy">>, Name, ActingUser) -> + rabbit_event:notify(operator_policy_cleared, + [{name, Name}, {vhost, VHost}, + {user_who_performed_action, ActingUser}]), + update_policies(VHost). + +%%---------------------------------------------------------------------------- + +%% [1] We need to prevent this from becoming O(n^2) in a similar +%% manner to rabbit_binding:remove_for_{source,destination}. So see +%% the comment in rabbit_binding:lock_route_tables/0 for more rationale. +%% [2] We could be here in a post-tx fun after the vhost has been +%% deleted; in which case it's fine to do nothing. +update_policies(VHost) -> + Tabs = [rabbit_queue, rabbit_durable_queue, + rabbit_exchange, rabbit_durable_exchange], + {Xs, Qs} = rabbit_misc:execute_mnesia_transaction( + fun() -> + [mnesia:lock({table, T}, write) || T <- Tabs], %% [1] + case catch {list(VHost), list_op(VHost)} of + {'EXIT', {throw, {error, {no_such_vhost, _}}}} -> + {[], []}; %% [2] + {'EXIT', Exit} -> + exit(Exit); + {Policies, OpPolicies} -> + {[update_exchange(X, Policies, OpPolicies) || + X <- rabbit_exchange:list(VHost)], + [update_queue(Q, Policies, OpPolicies) || + Q <- rabbit_amqqueue:list(VHost)]} + end + end), + [catch notify(X) || X <- Xs], + [catch notify(Q) || Q <- Qs], + ok. + +update_exchange(X = #exchange{name = XName, + policy = OldPolicy, + operator_policy = OldOpPolicy}, + Policies, OpPolicies) -> + case {match(XName, Policies), match(XName, OpPolicies)} of + {OldPolicy, OldOpPolicy} -> no_change; + {NewPolicy, NewOpPolicy} -> + NewExchange = rabbit_exchange:update( + XName, + fun(X0) -> + rabbit_exchange_decorator:set( + X0 #exchange{policy = NewPolicy, + operator_policy = NewOpPolicy}) + end), + case NewExchange of + #exchange{} = X1 -> {X, X1}; + not_found -> {X, X } + end + end. + +update_queue(Q0, Policies, OpPolicies) when ?is_amqqueue(Q0) -> + QName = amqqueue:get_name(Q0), + OldPolicy = amqqueue:get_policy(Q0), + OldOpPolicy = amqqueue:get_operator_policy(Q0), + case {match(QName, Policies), match(QName, OpPolicies)} of + {OldPolicy, OldOpPolicy} -> no_change; + {NewPolicy, NewOpPolicy} -> + F = fun (QFun0) -> + QFun1 = amqqueue:set_policy(QFun0, NewPolicy), + QFun2 = amqqueue:set_operator_policy(QFun1, NewOpPolicy), + NewPolicyVersion = amqqueue:get_policy_version(QFun2) + 1, + QFun3 = amqqueue:set_policy_version(QFun2, NewPolicyVersion), + rabbit_queue_decorator:set(QFun3) + end, + NewQueue = rabbit_amqqueue:update(QName, F), + case NewQueue of + Q1 when ?is_amqqueue(Q1) -> + {Q0, Q1}; + not_found -> + {Q0, Q0} + end + end. + +notify(no_change)-> + ok; +notify({X1 = #exchange{}, X2 = #exchange{}}) -> + rabbit_exchange:policy_changed(X1, X2); +notify({Q1, Q2}) when ?is_amqqueue(Q1), ?is_amqqueue(Q2) -> + rabbit_amqqueue:policy_changed(Q1, Q2). + +match(Name, Policies) -> + case match_all(Name, Policies) of + [] -> undefined; + [Policy | _] -> Policy + end. + +match_all(Name, Policies) -> + lists:sort(fun sort_pred/2, [P || P <- Policies, matches(Name, P)]). + +matches(#resource{name = Name, kind = Kind, virtual_host = VHost} = Resource, Policy) -> + matches_type(Kind, pget('apply-to', Policy)) andalso + is_applicable(Resource, pget(definition, Policy)) andalso + match =:= re:run(Name, pget(pattern, Policy), [{capture, none}]) andalso + VHost =:= pget(vhost, Policy). + +matches_type(exchange, <<"exchanges">>) -> true; +matches_type(queue, <<"queues">>) -> true; +matches_type(exchange, <<"all">>) -> true; +matches_type(queue, <<"all">>) -> true; +matches_type(_, _) -> false. + +sort_pred(A, B) -> pget(priority, A) >= pget(priority, B). + +is_applicable(#resource{kind = queue} = Resource, Policy) -> + rabbit_amqqueue:is_policy_applicable(Resource, to_list(Policy)); +is_applicable(_, _) -> + true. + +to_list(L) when is_list(L) -> + L; +to_list(M) when is_map(M) -> + maps:to_list(M). + +%%---------------------------------------------------------------------------- + +operator_policy_validation() -> + [{<<"priority">>, fun rabbit_parameter_validation:number/2, mandatory}, + {<<"pattern">>, fun rabbit_parameter_validation:regex/2, mandatory}, + {<<"apply-to">>, fun apply_to_validation/2, optional}, + {<<"definition">>, fun validation_op/2, mandatory}]. + +policy_validation() -> + [{<<"priority">>, fun rabbit_parameter_validation:number/2, mandatory}, + {<<"pattern">>, fun rabbit_parameter_validation:regex/2, mandatory}, + {<<"apply-to">>, fun apply_to_validation/2, optional}, + {<<"definition">>, fun validation/2, mandatory}]. + +validation_op(Name, Terms) -> + validation(Name, Terms, operator_policy_validator). + +validation(Name, Terms) -> + validation(Name, Terms, policy_validator). + +validation(_Name, [], _Validator) -> + {error, "no policy provided", []}; +validation(Name, Terms0, Validator) when is_map(Terms0) -> + Terms = maps:to_list(Terms0), + validation(Name, Terms, Validator); +validation(_Name, Terms, Validator) when is_list(Terms) -> + {Keys, Modules} = lists:unzip( + rabbit_registry:lookup_all(Validator)), + [] = dups(Keys), %% ASSERTION + Validators = lists:zipwith(fun (M, K) -> {M, a2b(K)} end, Modules, Keys), + case is_proplist(Terms) of + true -> {TermKeys, _} = lists:unzip(Terms), + case dups(TermKeys) of + [] -> validation0(Validators, Terms); + Dup -> {error, "~p duplicate keys not allowed", [Dup]} + end; + false -> {error, "definition must be a dictionary: ~p", [Terms]} + end; +validation(Name, Term, Validator) -> + {error, "parse error while reading policy ~s: ~p. Validator: ~p.", + [Name, Term, Validator]}. + +validation0(Validators, Terms) -> + case lists:foldl( + fun (Mod, {ok, TermsLeft}) -> + ModKeys = proplists:get_all_values(Mod, Validators), + case [T || {Key, _} = T <- TermsLeft, + lists:member(Key, ModKeys)] of + [] -> {ok, TermsLeft}; + Scope -> {Mod:validate_policy(Scope), TermsLeft -- Scope} + end; + (_, Acc) -> + Acc + end, {ok, Terms}, proplists:get_keys(Validators)) of + {ok, []} -> + ok; + {ok, Unvalidated} -> + {error, "~p are not recognised policy settings", [Unvalidated]}; + {Error, _} -> + Error + end. + +a2b(A) -> list_to_binary(atom_to_list(A)). + +dups(L) -> L -- lists:usort(L). + +is_proplist(L) -> length(L) =:= length([I || I = {_, _} <- L]). + +apply_to_validation(_Name, <<"all">>) -> ok; +apply_to_validation(_Name, <<"exchanges">>) -> ok; +apply_to_validation(_Name, <<"queues">>) -> ok; +apply_to_validation(_Name, Term) -> + {error, "apply-to '~s' unrecognised; should be 'queues', 'exchanges' " + "or 'all'", [Term]}. diff --git a/deps/rabbit/src/rabbit_policy_merge_strategy.erl b/deps/rabbit/src/rabbit_policy_merge_strategy.erl new file mode 100644 index 0000000000..f2b79e5862 --- /dev/null +++ b/deps/rabbit/src/rabbit_policy_merge_strategy.erl @@ -0,0 +1,19 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_policy_merge_strategy). + +-behaviour(rabbit_registry_class). + +-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). + +-callback merge_policy_value(binary(), Value, Value) -> + Value + when Value :: term(). + +added_to_rabbit_registry(_Type, _ModuleName) -> ok. +removed_from_rabbit_registry(_Type) -> ok. diff --git a/deps/rabbit/src/rabbit_prelaunch_cluster.erl b/deps/rabbit/src/rabbit_prelaunch_cluster.erl new file mode 100644 index 0000000000..9d3cda99e3 --- /dev/null +++ b/deps/rabbit/src/rabbit_prelaunch_cluster.erl @@ -0,0 +1,22 @@ +-module(rabbit_prelaunch_cluster). + +-export([setup/1]). + +setup(Context) -> + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Clustering =="), + rabbit_log_prelaunch:debug("Preparing cluster status files"), + rabbit_node_monitor:prepare_cluster_status_files(), + case Context of + #{initial_pass := true} -> + rabbit_log_prelaunch:debug("Upgrading Mnesia schema"), + ok = rabbit_upgrade:maybe_upgrade_mnesia(); + _ -> + ok + end, + %% It's important that the consistency check happens after + %% the upgrade, since if we are a secondary node the + %% primary node will have forgotten us + rabbit_log_prelaunch:debug("Checking cluster consistency"), + rabbit_mnesia:check_cluster_consistency(), + ok. diff --git a/deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl b/deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl new file mode 100644 index 0000000000..57fe32f8e6 --- /dev/null +++ b/deps/rabbit/src/rabbit_prelaunch_enabled_plugins_file.erl @@ -0,0 +1,53 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_prelaunch_enabled_plugins_file). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([setup/1]). + +setup(Context) -> + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Enabled plugins file =="), + update_enabled_plugins_file(Context). + +%% ------------------------------------------------------------------- +%% `enabled_plugins` file content initialization. +%% ------------------------------------------------------------------- + +update_enabled_plugins_file(#{enabled_plugins := undefined}) -> + ok; +update_enabled_plugins_file(#{enabled_plugins := all, + plugins_path := Path} = Context) -> + List = [P#plugin.name || P <- rabbit_plugins:list(Path)], + do_update_enabled_plugins_file(Context, List); +update_enabled_plugins_file(#{enabled_plugins := List} = Context) -> + do_update_enabled_plugins_file(Context, List). + +do_update_enabled_plugins_file(#{enabled_plugins_file := File}, List) -> + SortedList = lists:usort(List), + case SortedList of + [] -> + rabbit_log_prelaunch:debug("Marking all plugins as disabled"); + _ -> + rabbit_log_prelaunch:debug( + "Marking the following plugins as enabled:"), + [rabbit_log_prelaunch:debug(" - ~s", [P]) || P <- SortedList] + end, + Content = io_lib:format("~p.~n", [SortedList]), + case file:write_file(File, Content) of + ok -> + rabbit_log_prelaunch:debug("Wrote plugins file: ~ts", [File]), + ok; + {error, Reason} -> + rabbit_log_prelaunch:error( + "Failed to update enabled plugins file \"~ts\" " + "from $RABBITMQ_ENABLED_PLUGINS: ~ts", + [File, file:format_error(Reason)]), + throw({error, failed_to_update_enabled_plugins_file}) + end. diff --git a/deps/rabbit/src/rabbit_prelaunch_feature_flags.erl b/deps/rabbit/src/rabbit_prelaunch_feature_flags.erl new file mode 100644 index 0000000000..cd7b276f4c --- /dev/null +++ b/deps/rabbit/src/rabbit_prelaunch_feature_flags.erl @@ -0,0 +1,32 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_prelaunch_feature_flags). + +-export([setup/1]). + +setup(#{feature_flags_file := FFFile}) -> + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Feature flags =="), + case filelib:ensure_dir(FFFile) of + ok -> + rabbit_log_prelaunch:debug("Initializing feature flags registry"), + case rabbit_feature_flags:initialize_registry() of + ok -> + ok; + {error, Reason} -> + rabbit_log_prelaunch:error( + "Failed to initialize feature flags registry: ~p", + [Reason]), + throw({error, failed_to_initialize_feature_flags_registry}) + end; + {error, Reason} -> + rabbit_log_prelaunch:error( + "Failed to create feature flags file \"~ts\" directory: ~ts", + [FFFile, file:format_error(Reason)]), + throw({error, failed_to_create_feature_flags_file_directory}) + end. diff --git a/deps/rabbit/src/rabbit_prelaunch_logging.erl b/deps/rabbit/src/rabbit_prelaunch_logging.erl new file mode 100644 index 0000000000..6e3f040ec5 --- /dev/null +++ b/deps/rabbit/src/rabbit_prelaunch_logging.erl @@ -0,0 +1,75 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_prelaunch_logging). + +-export([setup/1]). + +setup(Context) -> + rabbit_log_prelaunch:debug(""), + rabbit_log_prelaunch:debug("== Logging =="), + ok = set_ERL_CRASH_DUMP_envvar(Context), + ok = configure_lager(Context). + +set_ERL_CRASH_DUMP_envvar(#{log_base_dir := LogBaseDir}) -> + case os:getenv("ERL_CRASH_DUMP") of + false -> + ErlCrashDump = filename:join(LogBaseDir, "erl_crash.dump"), + rabbit_log_prelaunch:debug( + "Setting $ERL_CRASH_DUMP environment variable to \"~ts\"", + [ErlCrashDump]), + os:putenv("ERL_CRASH_DUMP", ErlCrashDump), + ok; + ErlCrashDump -> + rabbit_log_prelaunch:debug( + "$ERL_CRASH_DUMP environment variable already set to \"~ts\"", + [ErlCrashDump]), + ok + end. + +configure_lager(#{log_base_dir := LogBaseDir, + main_log_file := MainLog, + upgrade_log_file := UpgradeLog} = Context) -> + {SaslErrorLogger, + MainLagerHandler, + UpgradeLagerHandler} = case MainLog of + "-" -> + %% Log to STDOUT. + rabbit_log_prelaunch:debug( + "Logging to stdout"), + {tty, + tty, + tty}; + _ -> + rabbit_log_prelaunch:debug( + "Logging to:"), + [rabbit_log_prelaunch:debug( + " - ~ts", [Log]) + || Log <- [MainLog, UpgradeLog]], + %% Log to file. + {false, + MainLog, + UpgradeLog} + end, + + ok = application:set_env(lager, crash_log, "log/crash.log"), + + Fun = fun({App, Var, Value}) -> + case application:get_env(App, Var) of + undefined -> ok = application:set_env(App, Var, Value); + _ -> ok + end + end, + Vars = [{sasl, sasl_error_logger, SaslErrorLogger}, + {rabbit, lager_log_root, LogBaseDir}, + {rabbit, lager_default_file, MainLagerHandler}, + {rabbit, lager_upgrade_file, UpgradeLagerHandler}], + lists:foreach(Fun, Vars), + + ok = rabbit_lager:start_logger(), + + ok = rabbit_prelaunch_early_logging:setup_early_logging(Context, false). diff --git a/deps/rabbit/src/rabbit_prequeue.erl b/deps/rabbit/src/rabbit_prequeue.erl new file mode 100644 index 0000000000..b5af8927c7 --- /dev/null +++ b/deps/rabbit/src/rabbit_prequeue.erl @@ -0,0 +1,100 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2010-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_prequeue). + +%% This is the initial gen_server that all queue processes start off +%% as. It handles the decision as to whether we need to start a new +%% mirror, a new master/unmirrored, or whether we are restarting (and +%% if so, as what). Thus a crashing queue process can restart from here +%% and always do the right thing. + +-export([start_link/3]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-behaviour(gen_server2). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +%%---------------------------------------------------------------------------- + +-export_type([start_mode/0]). + +-type start_mode() :: 'declare' | 'recovery' | 'slave'. + +%%---------------------------------------------------------------------------- + +-spec start_link(amqqueue:amqqueue(), start_mode(), pid()) + -> rabbit_types:ok_pid_or_error(). + +start_link(Q, StartMode, Marker) -> + gen_server2:start_link(?MODULE, {Q, StartMode, Marker}, []). + +%%---------------------------------------------------------------------------- + +init({Q, StartMode, Marker}) -> + init(Q, case {is_process_alive(Marker), StartMode} of + {true, slave} -> slave; + {true, _} -> master; + {false, _} -> restart + end). + +init(Q, master) -> rabbit_amqqueue_process:init(Q); +init(Q, slave) -> rabbit_mirror_queue_slave:init(Q); + +init(Q0, restart) when ?is_amqqueue(Q0) -> + QueueName = amqqueue:get_name(Q0), + {ok, Q1} = rabbit_amqqueue:lookup(QueueName), + QPid = amqqueue:get_pid(Q1), + SPids = amqqueue:get_slave_pids(Q1), + LocalOrMasterDown = node(QPid) =:= node() + orelse not rabbit_mnesia:on_running_node(QPid), + Slaves = [SPid || SPid <- SPids, rabbit_mnesia:is_process_alive(SPid)], + case rabbit_mnesia:is_process_alive(QPid) of + true -> false = LocalOrMasterDown, %% assertion + rabbit_mirror_queue_slave:go(self(), async), + rabbit_mirror_queue_slave:init(Q1); %% [1] + false -> case LocalOrMasterDown andalso Slaves =:= [] of + true -> crash_restart(Q1); %% [2] + false -> timer:sleep(25), + init(Q1, restart) %% [3] + end + end. +%% [1] There is a master on another node. Regardless of whether we +%% were originally a master or a mirror, we are now a new slave. +%% +%% [2] Nothing is alive. We are the last best hope. Try to restart as a master. +%% +%% [3] The current master is dead but either there are alive mirrors to +%% take over or it's all happening on a different node anyway. This is +%% not a stable situation. Sleep and wait for somebody else to make a +%% move. + +crash_restart(Q0) when ?is_amqqueue(Q0) -> + QueueName = amqqueue:get_name(Q0), + rabbit_log:error("Restarting crashed ~s.~n", [rabbit_misc:rs(QueueName)]), + gen_server2:cast(self(), init), + Q1 = amqqueue:set_pid(Q0, self()), + rabbit_amqqueue_process:init(Q1). + +%%---------------------------------------------------------------------------- + +%% This gen_server2 always hands over to some other module at the end +%% of init/1. +-spec handle_call(_, _, _) -> no_return(). +handle_call(_Msg, _From, _State) -> exit(unreachable). +-spec handle_cast(_, _) -> no_return(). +handle_cast(_Msg, _State) -> exit(unreachable). +-spec handle_info(_, _) -> no_return(). +handle_info(_Msg, _State) -> exit(unreachable). +-spec terminate(_, _) -> no_return(). +terminate(_Reason, _State) -> exit(unreachable). +-spec code_change(_, _, _) -> no_return(). +code_change(_OldVsn, _State, _Extra) -> exit(unreachable). diff --git a/deps/rabbit/src/rabbit_priority_queue.erl b/deps/rabbit/src/rabbit_priority_queue.erl new file mode 100644 index 0000000000..4b41b8dfbd --- /dev/null +++ b/deps/rabbit/src/rabbit_priority_queue.erl @@ -0,0 +1,688 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2015-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_priority_queue). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include_lib("rabbit_common/include/rabbit_framing.hrl"). +-include("amqqueue.hrl"). + +-behaviour(rabbit_backing_queue). + +%% enabled unconditionally. Disabling priority queuing after +%% it has been enabled is dangerous. +-rabbit_boot_step({?MODULE, + [{description, "enable priority queue"}, + {mfa, {?MODULE, enable, []}}, + {requires, pre_boot}, + {enables, kernel_ready}]}). + +-export([enable/0]). + +-export([start/2, stop/1]). + +-export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1, + purge/1, purge_acks/1, + publish/6, publish_delivered/5, discard/4, drain_confirmed/1, + batch_publish/4, batch_publish_delivered/4, + dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, + ackfold/4, fold/3, len/1, is_empty/1, depth/1, + set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, + handle_pre_hibernate/1, resume/1, msg_rates/1, + info/2, invoke/3, is_duplicate/2, set_queue_mode/2, + zip_msgs_and_acks/4, handle_info/2]). + +-record(state, {bq, bqss, max_priority}). +-record(passthrough, {bq, bqs}). + +%% See 'note on suffixes' below +-define(passthrough1(F), State#passthrough{bqs = BQ:F}). +-define(passthrough2(F), + {Res, BQS1} = BQ:F, {Res, State#passthrough{bqs = BQS1}}). +-define(passthrough3(F), + {Res1, Res2, BQS1} = BQ:F, {Res1, Res2, State#passthrough{bqs = BQS1}}). + +%% This module adds support for priority queues. +%% +%% Priority queues have one backing queue per priority. Backing queue functions +%% then produce a list of results for each BQ and fold over them, sorting +%% by priority. +%% +%%For queues that do not +%% have priorities enabled, the functions in this module delegate to +%% their "regular" backing queue module counterparts. See the `passthrough` +%% record and passthrough{1,2,3} macros. +%% +%% Delivery to consumers happens by first "running" the queue with +%% the highest priority until there are no more messages to deliver, +%% then the next one, and so on. This offers good prioritisation +%% but may result in lower priority messages not being delivered +%% when there's a high ingress rate of messages with higher priority. + +enable() -> + {ok, RealBQ} = application:get_env(rabbit, backing_queue_module), + case RealBQ of + ?MODULE -> ok; + _ -> rabbit_log:info("Priority queues enabled, real BQ is ~s~n", + [RealBQ]), + application:set_env( + rabbitmq_priority_queue, backing_queue_module, RealBQ), + application:set_env(rabbit, backing_queue_module, ?MODULE) + end. + +%%---------------------------------------------------------------------------- + +start(VHost, QNames) -> + BQ = bq(), + %% TODO this expand-collapse dance is a bit ridiculous but it's what + %% rabbit_amqqueue:recover/0 expects. We could probably simplify + %% this if we rejigged recovery a bit. + {DupNames, ExpNames} = expand_queues(QNames), + case BQ:start(VHost, ExpNames) of + {ok, ExpRecovery} -> + {ok, collapse_recovery(QNames, DupNames, ExpRecovery)}; + Else -> + Else + end. + +stop(VHost) -> + BQ = bq(), + BQ:stop(VHost). + +%%---------------------------------------------------------------------------- + +mutate_name(P, Q) when ?is_amqqueue(Q) -> + Res0 = #resource{name = QNameBin0} = amqqueue:get_name(Q), + QNameBin1 = mutate_name_bin(P, QNameBin0), + Res1 = Res0#resource{name = QNameBin1}, + amqqueue:set_name(Q, Res1). + +mutate_name_bin(P, NameBin) -> + <<NameBin/binary, 0, P:8>>. + +expand_queues(QNames) -> + lists:unzip( + lists:append([expand_queue(QName) || QName <- QNames])). + +expand_queue(QName = #resource{name = QNameBin}) -> + {ok, Q} = rabbit_misc:dirty_read({rabbit_durable_queue, QName}), + case priorities(Q) of + none -> [{QName, QName}]; + Ps -> [{QName, QName#resource{name = mutate_name_bin(P, QNameBin)}} + || P <- Ps] + end. + +collapse_recovery(QNames, DupNames, Recovery) -> + NameToTerms = lists:foldl(fun({Name, RecTerm}, Dict) -> + dict:append(Name, RecTerm, Dict) + end, dict:new(), lists:zip(DupNames, Recovery)), + [dict:fetch(Name, NameToTerms) || Name <- QNames]. + +priorities(Q) when ?is_amqqueue(Q) -> + Args = amqqueue:get_arguments(Q), + Ints = [long, short, signedint, byte, unsignedbyte, unsignedshort, unsignedint], + case rabbit_misc:table_lookup(Args, <<"x-max-priority">>) of + {Type, RequestedMax} -> + case lists:member(Type, Ints) of + false -> none; + true -> + Max = min(RequestedMax, ?MAX_SUPPORTED_PRIORITY), + lists:reverse(lists:seq(0, Max)) + end; + _ -> none + end. + +%%---------------------------------------------------------------------------- + +init(Q, Recover, AsyncCallback) -> + BQ = bq(), + case priorities(Q) of + none -> RealRecover = case Recover of + [R] -> R; %% [0] + R -> R + end, + #passthrough{bq = BQ, + bqs = BQ:init(Q, RealRecover, AsyncCallback)}; + Ps -> Init = fun (P, Term) -> + BQ:init( + mutate_name(P, Q), Term, + fun (M, F) -> AsyncCallback(M, {P, F}) end) + end, + BQSs = case have_recovery_terms(Recover) of + false -> [{P, Init(P, Recover)} || P <- Ps]; + _ -> PsTerms = lists:zip(Ps, Recover), + [{P, Init(P, Term)} || {P, Term} <- PsTerms] + end, + #state{bq = BQ, + bqss = BQSs, + max_priority = hd(Ps)} + end. +%% [0] collapse_recovery has the effect of making a list of recovery +%% terms in priority order, even for non priority queues. It's easier +%% to do that and "unwrap" in init/3 than to have collapse_recovery be +%% aware of non-priority queues. + +have_recovery_terms(new) -> false; +have_recovery_terms(non_clean_shutdown) -> false; +have_recovery_terms(_) -> true. + +terminate(Reason, State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> BQ:terminate(Reason, BQSN) end, State); +terminate(Reason, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(terminate(Reason, BQS)). + +delete_and_terminate(Reason, State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> + BQ:delete_and_terminate(Reason, BQSN) + end, State); +delete_and_terminate(Reason, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(delete_and_terminate(Reason, BQS)). + +delete_crashed(Q) -> + BQ = bq(), + case priorities(Q) of + none -> BQ:delete_crashed(Q); + Ps -> [BQ:delete_crashed(mutate_name(P, Q)) || P <- Ps] + end. + +purge(State = #state{bq = BQ}) -> + fold_add2(fun (_P, BQSN) -> BQ:purge(BQSN) end, State); +purge(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(purge(BQS)). + +purge_acks(State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> BQ:purge_acks(BQSN) end, State); +purge_acks(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(purge_acks(BQS)). + +publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State = #state{bq = BQ}) -> + pick1(fun (_P, BQSN) -> + BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQSN) + end, Msg, State); +publish(Msg, MsgProps, IsDelivered, ChPid, Flow, + State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS)). + +batch_publish(Publishes, ChPid, Flow, State = #state{bq = BQ, bqss = [{MaxP, _} |_]}) -> + PubMap = partition_publish_batch(Publishes, MaxP), + lists:foldl( + fun ({Priority, Pubs}, St) -> + pick1(fun (_P, BQSN) -> + BQ:batch_publish(Pubs, ChPid, Flow, BQSN) + end, Priority, St) + end, State, maps:to_list(PubMap)); +batch_publish(Publishes, ChPid, Flow, + State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(batch_publish(Publishes, ChPid, Flow, BQS)). + +publish_delivered(Msg, MsgProps, ChPid, Flow, State = #state{bq = BQ}) -> + pick2(fun (P, BQSN) -> + {AckTag, BQSN1} = BQ:publish_delivered( + Msg, MsgProps, ChPid, Flow, BQSN), + {{P, AckTag}, BQSN1} + end, Msg, State); +publish_delivered(Msg, MsgProps, ChPid, Flow, + State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(publish_delivered(Msg, MsgProps, ChPid, Flow, BQS)). + +batch_publish_delivered(Publishes, ChPid, Flow, State = #state{bq = BQ, bqss = [{MaxP, _} |_]}) -> + PubMap = partition_publish_delivered_batch(Publishes, MaxP), + {PrioritiesAndAcks, State1} = + lists:foldl( + fun ({Priority, Pubs}, {PriosAndAcks, St}) -> + {PriosAndAcks1, St1} = + pick2(fun (P, BQSN) -> + {AckTags, BQSN1} = + BQ:batch_publish_delivered( + Pubs, ChPid, Flow, BQSN), + {priority_on_acktags(P, AckTags), BQSN1} + end, Priority, St), + {[PriosAndAcks1 | PriosAndAcks], St1} + end, {[], State}, maps:to_list(PubMap)), + {lists:reverse(PrioritiesAndAcks), State1}; +batch_publish_delivered(Publishes, ChPid, Flow, + State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(batch_publish_delivered(Publishes, ChPid, Flow, BQS)). + +%% TODO this is a hack. The BQ api does not give us enough information +%% here - if we had the Msg we could look at its priority and forward +%% to the appropriate sub-BQ. But we don't so we are stuck. +%% +%% But fortunately VQ ignores discard/4, so we can too, *assuming we +%% are talking to VQ*. discard/4 is used by HA, but that's "above" us +%% (if in use) so we don't break that either, just some hypothetical +%% alternate BQ implementation. +discard(_MsgId, _ChPid, _Flow, State = #state{}) -> + State; + %% We should have something a bit like this here: + %% pick1(fun (_P, BQSN) -> + %% BQ:discard(MsgId, ChPid, Flow, BQSN) + %% end, Msg, State); +discard(MsgId, ChPid, Flow, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(discard(MsgId, ChPid, Flow, BQS)). + +drain_confirmed(State = #state{bq = BQ}) -> + fold_append2(fun (_P, BQSN) -> BQ:drain_confirmed(BQSN) end, State); +drain_confirmed(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(drain_confirmed(BQS)). + +dropwhile(Pred, State = #state{bq = BQ}) -> + find2(fun (_P, BQSN) -> BQ:dropwhile(Pred, BQSN) end, undefined, State); +dropwhile(Pred, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(dropwhile(Pred, BQS)). + +%% TODO this is a bit nasty. In the one place where fetchwhile/4 is +%% actually used the accumulator is a list of acktags, which of course +%% we need to mutate - so we do that although we are encoding an +%% assumption here. +fetchwhile(Pred, Fun, Acc, State = #state{bq = BQ}) -> + findfold3( + fun (P, BQSN, AccN) -> + {Res, AccN1, BQSN1} = BQ:fetchwhile(Pred, Fun, AccN, BQSN), + {Res, priority_on_acktags(P, AccN1), BQSN1} + end, Acc, undefined, State); +fetchwhile(Pred, Fun, Acc, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough3(fetchwhile(Pred, Fun, Acc, BQS)). + +fetch(AckRequired, State = #state{bq = BQ}) -> + find2( + fun (P, BQSN) -> + case BQ:fetch(AckRequired, BQSN) of + {empty, BQSN1} -> {empty, BQSN1}; + {{Msg, Del, ATag}, BQSN1} -> {{Msg, Del, {P, ATag}}, BQSN1} + end + end, empty, State); +fetch(AckRequired, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(fetch(AckRequired, BQS)). + +drop(AckRequired, State = #state{bq = BQ}) -> + find2(fun (P, BQSN) -> + case BQ:drop(AckRequired, BQSN) of + {empty, BQSN1} -> {empty, BQSN1}; + {{MsgId, AckTag}, BQSN1} -> {{MsgId, {P, AckTag}}, BQSN1} + end + end, empty, State); +drop(AckRequired, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(drop(AckRequired, BQS)). + +ack(AckTags, State = #state{bq = BQ}) -> + fold_by_acktags2(fun (AckTagsN, BQSN) -> + BQ:ack(AckTagsN, BQSN) + end, AckTags, State); +ack(AckTags, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(ack(AckTags, BQS)). + +requeue(AckTags, State = #state{bq = BQ}) -> + fold_by_acktags2(fun (AckTagsN, BQSN) -> + BQ:requeue(AckTagsN, BQSN) + end, AckTags, State); +requeue(AckTags, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(requeue(AckTags, BQS)). + +%% Similar problem to fetchwhile/4 +ackfold(MsgFun, Acc, State = #state{bq = BQ}, AckTags) -> + AckTagsByPriority = partition_acktags(AckTags), + fold2( + fun (P, BQSN, AccN) -> + case maps:find(P, AckTagsByPriority) of + {ok, ATagsN} -> {AccN1, BQSN1} = + BQ:ackfold(MsgFun, AccN, BQSN, ATagsN), + {priority_on_acktags(P, AccN1), BQSN1}; + error -> {AccN, BQSN} + end + end, Acc, State); +ackfold(MsgFun, Acc, State = #passthrough{bq = BQ, bqs = BQS}, AckTags) -> + ?passthrough2(ackfold(MsgFun, Acc, BQS, AckTags)). + +fold(Fun, Acc, State = #state{bq = BQ}) -> + fold2(fun (_P, BQSN, AccN) -> BQ:fold(Fun, AccN, BQSN) end, Acc, State); +fold(Fun, Acc, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(fold(Fun, Acc, BQS)). + +len(#state{bq = BQ, bqss = BQSs}) -> + add0(fun (_P, BQSN) -> BQ:len(BQSN) end, BQSs); +len(#passthrough{bq = BQ, bqs = BQS}) -> + BQ:len(BQS). + +is_empty(#state{bq = BQ, bqss = BQSs}) -> + all0(fun (_P, BQSN) -> BQ:is_empty(BQSN) end, BQSs); +is_empty(#passthrough{bq = BQ, bqs = BQS}) -> + BQ:is_empty(BQS). + +depth(#state{bq = BQ, bqss = BQSs}) -> + add0(fun (_P, BQSN) -> BQ:depth(BQSN) end, BQSs); +depth(#passthrough{bq = BQ, bqs = BQS}) -> + BQ:depth(BQS). + +set_ram_duration_target(DurationTarget, State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> + BQ:set_ram_duration_target(DurationTarget, BQSN) + end, State); +set_ram_duration_target(DurationTarget, + State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(set_ram_duration_target(DurationTarget, BQS)). + +ram_duration(State = #state{bq = BQ}) -> + fold_min2(fun (_P, BQSN) -> BQ:ram_duration(BQSN) end, State); +ram_duration(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(ram_duration(BQS)). + +needs_timeout(#state{bq = BQ, bqss = BQSs}) -> + fold0(fun (_P, _BQSN, timed) -> timed; + (_P, BQSN, idle) -> case BQ:needs_timeout(BQSN) of + timed -> timed; + _ -> idle + end; + (_P, BQSN, false) -> BQ:needs_timeout(BQSN) + end, false, BQSs); +needs_timeout(#passthrough{bq = BQ, bqs = BQS}) -> + BQ:needs_timeout(BQS). + +timeout(State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> BQ:timeout(BQSN) end, State); +timeout(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(timeout(BQS)). + +handle_pre_hibernate(State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> + BQ:handle_pre_hibernate(BQSN) + end, State); +handle_pre_hibernate(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(handle_pre_hibernate(BQS)). + +handle_info(Msg, State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> BQ:handle_info(Msg, BQSN) end, State); +handle_info(Msg, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(handle_info(Msg, BQS)). + +resume(State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> BQ:resume(BQSN) end, State); +resume(State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(resume(BQS)). + +msg_rates(#state{bq = BQ, bqss = BQSs}) -> + fold0(fun(_P, BQSN, {InN, OutN}) -> + {In, Out} = BQ:msg_rates(BQSN), + {InN + In, OutN + Out} + end, {0.0, 0.0}, BQSs); +msg_rates(#passthrough{bq = BQ, bqs = BQS}) -> + BQ:msg_rates(BQS). + +info(backing_queue_status, #state{bq = BQ, bqss = BQSs}) -> + fold0(fun (P, BQSN, Acc) -> + combine_status(P, BQ:info(backing_queue_status, BQSN), Acc) + end, nothing, BQSs); +info(head_message_timestamp, #state{bq = BQ, bqss = BQSs}) -> + find_head_message_timestamp(BQ, BQSs, ''); +info(Item, #state{bq = BQ, bqss = BQSs}) -> + fold0(fun (_P, BQSN, Acc) -> + Acc + BQ:info(Item, BQSN) + end, 0, BQSs); +info(Item, #passthrough{bq = BQ, bqs = BQS}) -> + BQ:info(Item, BQS). + +invoke(Mod, {P, Fun}, State = #state{bq = BQ}) -> + pick1(fun (_P, BQSN) -> BQ:invoke(Mod, Fun, BQSN) end, P, State); +invoke(Mod, Fun, State = #state{bq = BQ, max_priority = P}) -> + pick1(fun (_P, BQSN) -> BQ:invoke(Mod, Fun, BQSN) end, P, State); +invoke(Mod, Fun, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(invoke(Mod, Fun, BQS)). + +is_duplicate(Msg, State = #state{bq = BQ}) -> + pick2(fun (_P, BQSN) -> BQ:is_duplicate(Msg, BQSN) end, Msg, State); +is_duplicate(Msg, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough2(is_duplicate(Msg, BQS)). + +set_queue_mode(Mode, State = #state{bq = BQ}) -> + foreach1(fun (_P, BQSN) -> BQ:set_queue_mode(Mode, BQSN) end, State); +set_queue_mode(Mode, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(set_queue_mode(Mode, BQS)). + +zip_msgs_and_acks(Msgs, AckTags, Accumulator, #state{bqss = [{MaxP, _} |_]}) -> + MsgsByPriority = partition_publish_delivered_batch(Msgs, MaxP), + lists:foldl(fun (Acks, MAs) -> + {P, _AckTag} = hd(Acks), + Pubs = maps:get(P, MsgsByPriority), + MAs0 = zip_msgs_and_acks(Pubs, Acks), + MAs ++ MAs0 + end, Accumulator, AckTags); +zip_msgs_and_acks(Msgs, AckTags, Accumulator, + #passthrough{bq = BQ, bqs = BQS}) -> + BQ:zip_msgs_and_acks(Msgs, AckTags, Accumulator, BQS). + +%%---------------------------------------------------------------------------- + +bq() -> + {ok, RealBQ} = application:get_env( + rabbitmq_priority_queue, backing_queue_module), + RealBQ. + +%% Note on suffixes: Many utility functions here have suffixes telling +%% you the arity of the return type of the BQ function they are +%% designed to work with. +%% +%% 0 - BQ function returns a value and does not modify state +%% 1 - BQ function just returns a new state +%% 2 - BQ function returns a 2-tuple of {Result, NewState} +%% 3 - BQ function returns a 3-tuple of {Result1, Result2, NewState} + +%% Fold over results +fold0(Fun, Acc, [{P, BQSN} | Rest]) -> fold0(Fun, Fun(P, BQSN, Acc), Rest); +fold0(_Fun, Acc, []) -> Acc. + +%% Do all BQs match? +all0(Pred, BQSs) -> fold0(fun (_P, _BQSN, false) -> false; + (P, BQSN, true) -> Pred(P, BQSN) + end, true, BQSs). + +%% Sum results +add0(Fun, BQSs) -> fold0(fun (P, BQSN, Acc) -> Acc + Fun(P, BQSN) end, 0, BQSs). + +%% Apply for all states +foreach1(Fun, State = #state{bqss = BQSs}) -> + a(State#state{bqss = foreach1(Fun, BQSs, [])}). +foreach1(Fun, [{Priority, BQSN} | Rest], BQSAcc) -> + BQSN1 = Fun(Priority, BQSN), + foreach1(Fun, Rest, [{Priority, BQSN1} | BQSAcc]); +foreach1(_Fun, [], BQSAcc) -> + lists:reverse(BQSAcc). + +%% For a given thing, just go to its BQ +pick1(Fun, Prioritisable, #state{bqss = BQSs} = State) -> + {P, BQSN} = priority_bq(Prioritisable, BQSs), + a(State#state{bqss = bq_store(P, Fun(P, BQSN), BQSs)}). + +%% Fold over results +fold2(Fun, Acc, State = #state{bqss = BQSs}) -> + {Res, BQSs1} = fold2(Fun, Acc, BQSs, []), + {Res, a(State#state{bqss = BQSs1})}. + +fold2(Fun, Acc, [{P, BQSN} | Rest], BQSAcc) -> + {Acc1, BQSN1} = Fun(P, BQSN, Acc), + fold2(Fun, Acc1, Rest, [{P, BQSN1} | BQSAcc]); +fold2(_Fun, Acc, [], BQSAcc) -> + {Acc, lists:reverse(BQSAcc)}. + +%% Fold over results assuming results are lists and we want to append them +fold_append2(Fun, State) -> + fold2(fun (P, BQSN, Acc) -> + {Res, BQSN1} = Fun(P, BQSN), + {Res ++ Acc, BQSN1} + end, [], State). + +%% Fold over results assuming results are numbers and we want to sum them +fold_add2(Fun, State) -> + fold2(fun (P, BQSN, Acc) -> + {Res, BQSN1} = Fun(P, BQSN), + {add_maybe_infinity(Res, Acc), BQSN1} + end, 0, State). + +%% Fold over results assuming results are numbers and we want the minimum +fold_min2(Fun, State) -> + fold2(fun (P, BQSN, Acc) -> + {Res, BQSN1} = Fun(P, BQSN), + {erlang:min(Res, Acc), BQSN1} + end, infinity, State). + +%% Fold over results assuming results are lists and we want to append +%% them, and also that we have some AckTags we want to pass in to each +%% invocation. +fold_by_acktags2(Fun, AckTags, State) -> + AckTagsByPriority = partition_acktags(AckTags), + fold_append2(fun (P, BQSN) -> + case maps:find(P, AckTagsByPriority) of + {ok, AckTagsN} -> Fun(AckTagsN, BQSN); + error -> {[], BQSN} + end + end, State). + +%% For a given thing, just go to its BQ +pick2(Fun, Prioritisable, #state{bqss = BQSs} = State) -> + {P, BQSN} = priority_bq(Prioritisable, BQSs), + {Res, BQSN1} = Fun(P, BQSN), + {Res, a(State#state{bqss = bq_store(P, BQSN1, BQSs)})}. + +%% Run through BQs in priority order until one does not return +%% {NotFound, NewState} or we have gone through them all. +find2(Fun, NotFound, State = #state{bqss = BQSs}) -> + {Res, BQSs1} = find2(Fun, NotFound, BQSs, []), + {Res, a(State#state{bqss = BQSs1})}. +find2(Fun, NotFound, [{P, BQSN} | Rest], BQSAcc) -> + case Fun(P, BQSN) of + {NotFound, BQSN1} -> find2(Fun, NotFound, Rest, [{P, BQSN1} | BQSAcc]); + {Res, BQSN1} -> {Res, lists:reverse([{P, BQSN1} | BQSAcc]) ++ Rest} + end; +find2(_Fun, NotFound, [], BQSAcc) -> + {NotFound, lists:reverse(BQSAcc)}. + +%% Run through BQs in priority order like find2 but also folding as we go. +findfold3(Fun, Acc, NotFound, State = #state{bqss = BQSs}) -> + {Res, Acc1, BQSs1} = findfold3(Fun, Acc, NotFound, BQSs, []), + {Res, Acc1, a(State#state{bqss = BQSs1})}. +findfold3(Fun, Acc, NotFound, [{P, BQSN} | Rest], BQSAcc) -> + case Fun(P, BQSN, Acc) of + {NotFound, Acc1, BQSN1} -> + findfold3(Fun, Acc1, NotFound, Rest, [{P, BQSN1} | BQSAcc]); + {Res, Acc1, BQSN1} -> + {Res, Acc1, lists:reverse([{P, BQSN1} | BQSAcc]) ++ Rest} + end; +findfold3(_Fun, Acc, NotFound, [], BQSAcc) -> + {NotFound, Acc, lists:reverse(BQSAcc)}. + +bq_fetch(P, []) -> exit({not_found, P}); +bq_fetch(P, [{P, BQSN} | _]) -> {P, BQSN}; +bq_fetch(P, [{_, _BQSN} | T]) -> bq_fetch(P, T). + +bq_store(P, BQS, BQSs) -> + [{PN, case PN of + P -> BQS; + _ -> BQSN + end} || {PN, BQSN} <- BQSs]. + +%% +a(State = #state{bqss = BQSs}) -> + Ps = [P || {P, _} <- BQSs], + case lists:reverse(lists:usort(Ps)) of + Ps -> State; + _ -> exit({bad_order, Ps}) + end. + +%%---------------------------------------------------------------------------- +partition_publish_batch(Publishes, MaxP) -> + partition_publishes( + Publishes, fun ({Msg, _, _}) -> Msg end, MaxP). + +partition_publish_delivered_batch(Publishes, MaxP) -> + partition_publishes( + Publishes, fun ({Msg, _}) -> Msg end, MaxP). + +partition_publishes(Publishes, ExtractMsg, MaxP) -> + Partitioned = + lists:foldl(fun (Pub, Dict) -> + Msg = ExtractMsg(Pub), + rabbit_misc:maps_cons(priority(Msg, MaxP), Pub, Dict) + end, maps:new(), Publishes), + maps:map(fun (_P, RevPubs) -> + lists:reverse(RevPubs) + end, Partitioned). + + +priority_bq(Priority, [{MaxP, _} | _] = BQSs) -> + bq_fetch(priority(Priority, MaxP), BQSs). + +%% Messages with a priority which is higher than the queue's maximum are treated +%% as if they were published with the maximum priority. +priority(undefined, _MaxP) -> + 0; +priority(Priority, MaxP) when is_integer(Priority), Priority =< MaxP -> + Priority; +priority(Priority, MaxP) when is_integer(Priority), Priority > MaxP -> + MaxP; +priority(#basic_message{content = Content}, MaxP) -> + priority(rabbit_binary_parser:ensure_content_decoded(Content), MaxP); +priority(#content{properties = Props}, MaxP) -> + #'P_basic'{priority = Priority0} = Props, + priority(Priority0, MaxP). + +add_maybe_infinity(infinity, _) -> infinity; +add_maybe_infinity(_, infinity) -> infinity; +add_maybe_infinity(A, B) -> A + B. + +partition_acktags(AckTags) -> partition_acktags(AckTags, maps:new()). + +partition_acktags([], Partitioned) -> + maps:map(fun (_P, RevAckTags) -> + lists:reverse(RevAckTags) + end, Partitioned); +partition_acktags([{P, AckTag} | Rest], Partitioned) -> + partition_acktags(Rest, rabbit_misc:maps_cons(P, AckTag, Partitioned)). + +priority_on_acktags(P, AckTags) -> + [case Tag of + _ when is_integer(Tag) -> {P, Tag}; + _ -> Tag + end || Tag <- AckTags]. + +combine_status(P, New, nothing) -> + [{priority_lengths, [{P, proplists:get_value(len, New)}]} | New]; +combine_status(P, New, Old) -> + Combined = [{K, cse(V, proplists:get_value(K, Old))} || {K, V} <- New], + Lens = [{P, proplists:get_value(len, New)} | + proplists:get_value(priority_lengths, Old)], + [{priority_lengths, Lens} | Combined]. + +cse(infinity, _) -> infinity; +cse(_, infinity) -> infinity; +%% queue modes +cse(_, default) -> default; +cse(default, _) -> default; +cse(_, lazy) -> lazy; +cse(lazy, _) -> lazy; +%% numerical stats +cse(A, B) when is_number(A) -> A + B; +cse({delta, _, _, _, _}, _) -> {delta, todo, todo, todo, todo}; +cse(_, _) -> undefined. + +%% When asked about 'head_message_timestamp' fro this priority queue, we +%% walk all the backing queues, starting by the highest priority. Once a +%% backing queue having messages (ready or unacknowledged) is found, its +%% 'head_message_timestamp' is returned even if it is null. + +find_head_message_timestamp(BQ, [{_, BQSN} | Rest], Timestamp) -> + MsgCount = BQ:len(BQSN) + BQ:info(messages_unacknowledged_ram, BQSN), + if + MsgCount =/= 0 -> BQ:info(head_message_timestamp, BQSN); + true -> find_head_message_timestamp(BQ, Rest, Timestamp) + end; +find_head_message_timestamp(_, [], Timestamp) -> + Timestamp. + +zip_msgs_and_acks(Pubs, AckTags) -> + lists:zipwith( + fun ({#basic_message{ id = Id }, _Props}, AckTag) -> + {Id, AckTag} + end, Pubs, AckTags). diff --git a/deps/rabbit/src/rabbit_queue_consumers.erl b/deps/rabbit/src/rabbit_queue_consumers.erl new file mode 100644 index 0000000000..4f826f72e8 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_consumers.erl @@ -0,0 +1,568 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_consumers). + +-export([new/0, max_active_priority/1, inactive/1, all/1, all/3, count/0, + unacknowledged_message_count/0, add/10, remove/3, erase_ch/2, + send_drained/0, deliver/5, record_ack/3, subtract_acks/3, + possibly_unblock/3, + resume_fun/0, notify_sent_fun/1, activate_limit_fun/0, + credit/6, utilisation/1, is_same/3, get_consumer/1, get/3, + consumer_tag/1, get_infos/1]). + +%%---------------------------------------------------------------------------- + +-define(QUEUE, lqueue). + +-define(UNSENT_MESSAGE_LIMIT, 200). + +%% Utilisation average calculations are all in μs. +-define(USE_AVG_HALF_LIFE, 1000000.0). + +-record(state, {consumers, use}). + +-record(consumer, {tag, ack_required, prefetch, args, user}). + +%% These are held in our process dictionary +-record(cr, {ch_pid, + monitor_ref, + acktags, + consumer_count, + %% Queue of {ChPid, #consumer{}} for consumers which have + %% been blocked (rate/prefetch limited) for any reason + blocked_consumers, + %% The limiter itself + limiter, + %% Internal flow control for queue -> writer + unsent_message_count}). + +%%---------------------------------------------------------------------------- + +-type time_micros() :: non_neg_integer(). +-type ratio() :: float(). +-type state() :: #state{consumers ::priority_queue:q(), + use :: {'inactive', + time_micros(), time_micros(), ratio()} | + {'active', time_micros(), ratio()}}. +-type consumer() :: #consumer{tag::rabbit_types:ctag(), ack_required::boolean(), + prefetch::non_neg_integer(), args::rabbit_framing:amqp_table(), + user::rabbit_types:username()}. +-type ch() :: pid(). +-type ack() :: non_neg_integer(). +-type cr_fun() :: fun ((#cr{}) -> #cr{}). +-type fetch_result() :: {rabbit_types:basic_message(), boolean(), ack()}. + +%%---------------------------------------------------------------------------- + +-spec new() -> state(). + +new() -> #state{consumers = priority_queue:new(), + use = {active, + erlang:monotonic_time(micro_seconds), + 1.0}}. + +-spec max_active_priority(state()) -> integer() | 'infinity' | 'empty'. + +max_active_priority(#state{consumers = Consumers}) -> + priority_queue:highest(Consumers). + +-spec inactive(state()) -> boolean(). + +inactive(#state{consumers = Consumers}) -> + priority_queue:is_empty(Consumers). + +-spec all(state()) -> [{ch(), rabbit_types:ctag(), boolean(), + non_neg_integer(), boolean(), atom(), + rabbit_framing:amqp_table(), rabbit_types:username()}]. + +all(State) -> + all(State, none, false). + +all(#state{consumers = Consumers}, SingleActiveConsumer, SingleActiveConsumerOn) -> + lists:foldl(fun (C, Acc) -> consumers(C#cr.blocked_consumers, SingleActiveConsumer, SingleActiveConsumerOn, Acc) end, + consumers(Consumers, SingleActiveConsumer, SingleActiveConsumerOn, []), all_ch_record()). + +consumers(Consumers, SingleActiveConsumer, SingleActiveConsumerOn, Acc) -> + ActiveActivityStatusFun = case SingleActiveConsumerOn of + true -> + fun({ChPid, Consumer}) -> + case SingleActiveConsumer of + {ChPid, Consumer} -> + {true, single_active}; + _ -> + {false, waiting} + end + end; + false -> + fun(_) -> {true, up} end + end, + priority_queue:fold( + fun ({ChPid, Consumer}, _P, Acc1) -> + #consumer{tag = CTag, ack_required = Ack, prefetch = Prefetch, + args = Args, user = Username} = Consumer, + {Active, ActivityStatus} = ActiveActivityStatusFun({ChPid, Consumer}), + [{ChPid, CTag, Ack, Prefetch, Active, ActivityStatus, Args, Username} | Acc1] + end, Acc, Consumers). + +-spec count() -> non_neg_integer(). + +count() -> lists:sum([Count || #cr{consumer_count = Count} <- all_ch_record()]). + +-spec unacknowledged_message_count() -> non_neg_integer(). + +unacknowledged_message_count() -> + lists:sum([?QUEUE:len(C#cr.acktags) || C <- all_ch_record()]). + +-spec add(ch(), rabbit_types:ctag(), boolean(), pid(), boolean(), + non_neg_integer(), rabbit_framing:amqp_table(), boolean(), + rabbit_types:username(), state()) + -> state(). + +add(ChPid, CTag, NoAck, LimiterPid, LimiterActive, Prefetch, Args, IsEmpty, + Username, State = #state{consumers = Consumers, + use = CUInfo}) -> + C = #cr{consumer_count = Count, + limiter = Limiter} = ch_record(ChPid, LimiterPid), + Limiter1 = case LimiterActive of + true -> rabbit_limiter:activate(Limiter); + false -> Limiter + end, + C1 = C#cr{consumer_count = Count + 1, limiter = Limiter1}, + update_ch_record( + case parse_credit_args(Prefetch, Args) of + {0, auto} -> C1; + {_Credit, auto} when NoAck -> C1; + {Credit, Mode} -> credit_and_drain( + C1, CTag, Credit, Mode, IsEmpty) + end), + Consumer = #consumer{tag = CTag, + ack_required = not NoAck, + prefetch = Prefetch, + args = Args, + user = Username}, + State#state{consumers = add_consumer({ChPid, Consumer}, Consumers), + use = update_use(CUInfo, active)}. + +-spec remove(ch(), rabbit_types:ctag(), state()) -> + 'not_found' | state(). + +remove(ChPid, CTag, State = #state{consumers = Consumers}) -> + case lookup_ch(ChPid) of + not_found -> + not_found; + C = #cr{consumer_count = Count, + limiter = Limiter, + blocked_consumers = Blocked} -> + Blocked1 = remove_consumer(ChPid, CTag, Blocked), + Limiter1 = case Count of + 1 -> rabbit_limiter:deactivate(Limiter); + _ -> Limiter + end, + Limiter2 = rabbit_limiter:forget_consumer(Limiter1, CTag), + update_ch_record(C#cr{consumer_count = Count - 1, + limiter = Limiter2, + blocked_consumers = Blocked1}), + State#state{consumers = + remove_consumer(ChPid, CTag, Consumers)} + end. + +-spec erase_ch(ch(), state()) -> + 'not_found' | {[ack()], [rabbit_types:ctag()], + state()}. + +erase_ch(ChPid, State = #state{consumers = Consumers}) -> + case lookup_ch(ChPid) of + not_found -> + not_found; + C = #cr{ch_pid = ChPid, + acktags = ChAckTags, + blocked_consumers = BlockedQ} -> + All = priority_queue:join(Consumers, BlockedQ), + ok = erase_ch_record(C), + Filtered = priority_queue:filter(chan_pred(ChPid, true), All), + {[AckTag || {AckTag, _CTag} <- ?QUEUE:to_list(ChAckTags)], + tags(priority_queue:to_list(Filtered)), + State#state{consumers = remove_consumers(ChPid, Consumers)}} + end. + +-spec send_drained() -> 'ok'. + +send_drained() -> [update_ch_record(send_drained(C)) || C <- all_ch_record()], + ok. + +-spec deliver(fun ((boolean()) -> {fetch_result(), T}), + rabbit_amqqueue:name(), state(), boolean(), + none | {ch(), rabbit_types:ctag()} | {ch(), consumer()}) -> + {'delivered', boolean(), T, state()} | + {'undelivered', boolean(), state()}. + +deliver(FetchFun, QName, State, SingleActiveConsumerIsOn, ActiveConsumer) -> + deliver(FetchFun, QName, false, State, SingleActiveConsumerIsOn, ActiveConsumer). + +deliver(_FetchFun, _QName, false, State, true, none) -> + {undelivered, false, + State#state{use = update_use(State#state.use, inactive)}}; +deliver(FetchFun, QName, false, State = #state{consumers = Consumers}, true, SingleActiveConsumer) -> + {ChPid, Consumer} = SingleActiveConsumer, + %% blocked (rate/prefetch limited) consumers are removed from the queue state, but not the exclusive_consumer field, + %% so we need to do this check to avoid adding the exclusive consumer to the channel record + %% over and over + case is_blocked(SingleActiveConsumer) of + true -> + {undelivered, false, + State#state{use = update_use(State#state.use, inactive)}}; + false -> + case deliver_to_consumer(FetchFun, SingleActiveConsumer, QName) of + {delivered, R} -> + {delivered, false, R, State}; + undelivered -> + {ChPid, Consumer} = SingleActiveConsumer, + Consumers1 = remove_consumer(ChPid, Consumer#consumer.tag, Consumers), + {undelivered, true, + State#state{consumers = Consumers1, use = update_use(State#state.use, inactive)}} + end + end; +deliver(FetchFun, QName, ConsumersChanged, + State = #state{consumers = Consumers}, false, _SingleActiveConsumer) -> + case priority_queue:out_p(Consumers) of + {empty, _} -> + {undelivered, ConsumersChanged, + State#state{use = update_use(State#state.use, inactive)}}; + {{value, QEntry, Priority}, Tail} -> + case deliver_to_consumer(FetchFun, QEntry, QName) of + {delivered, R} -> + {delivered, ConsumersChanged, R, + State#state{consumers = priority_queue:in(QEntry, Priority, + Tail)}}; + undelivered -> + deliver(FetchFun, QName, true, + State#state{consumers = Tail}, false, _SingleActiveConsumer) + end + end. + +deliver_to_consumer(FetchFun, E = {ChPid, Consumer}, QName) -> + C = lookup_ch(ChPid), + case is_ch_blocked(C) of + true -> + block_consumer(C, E), + undelivered; + false -> case rabbit_limiter:can_send(C#cr.limiter, + Consumer#consumer.ack_required, + Consumer#consumer.tag) of + {suspend, Limiter} -> + block_consumer(C#cr{limiter = Limiter}, E), + undelivered; + {continue, Limiter} -> + {delivered, deliver_to_consumer( + FetchFun, Consumer, + C#cr{limiter = Limiter}, QName)} + end + end. + +deliver_to_consumer(FetchFun, + #consumer{tag = CTag, + ack_required = AckRequired}, + C = #cr{ch_pid = ChPid, + acktags = ChAckTags, + unsent_message_count = Count}, + QName) -> + {{Message, IsDelivered, AckTag}, R} = FetchFun(AckRequired), + rabbit_channel:deliver(ChPid, CTag, AckRequired, + {QName, self(), AckTag, IsDelivered, Message}), + ChAckTags1 = case AckRequired of + true -> ?QUEUE:in({AckTag, CTag}, ChAckTags); + false -> ChAckTags + end, + update_ch_record(C#cr{acktags = ChAckTags1, + unsent_message_count = Count + 1}), + R. + +is_blocked(Consumer = {ChPid, _C}) -> + #cr{blocked_consumers = BlockedConsumers} = lookup_ch(ChPid), + priority_queue:member(Consumer, BlockedConsumers). + +-spec record_ack(ch(), pid(), ack()) -> 'ok'. + +record_ack(ChPid, LimiterPid, AckTag) -> + C = #cr{acktags = ChAckTags} = ch_record(ChPid, LimiterPid), + update_ch_record(C#cr{acktags = ?QUEUE:in({AckTag, none}, ChAckTags)}), + ok. + +-spec subtract_acks(ch(), [ack()], state()) -> + 'not_found' | 'unchanged' | {'unblocked', state()}. + +subtract_acks(ChPid, AckTags, State) -> + case lookup_ch(ChPid) of + not_found -> + not_found; + C = #cr{acktags = ChAckTags, limiter = Lim} -> + {CTagCounts, AckTags2} = subtract_acks( + AckTags, [], maps:new(), ChAckTags), + {Unblocked, Lim2} = + maps:fold( + fun (CTag, Count, {UnblockedN, LimN}) -> + {Unblocked1, LimN1} = + rabbit_limiter:ack_from_queue(LimN, CTag, Count), + {UnblockedN orelse Unblocked1, LimN1} + end, {false, Lim}, CTagCounts), + C2 = C#cr{acktags = AckTags2, limiter = Lim2}, + case Unblocked of + true -> unblock(C2, State); + false -> update_ch_record(C2), + unchanged + end + end. + +subtract_acks([], [], CTagCounts, AckQ) -> + {CTagCounts, AckQ}; +subtract_acks([], Prefix, CTagCounts, AckQ) -> + {CTagCounts, ?QUEUE:join(?QUEUE:from_list(lists:reverse(Prefix)), AckQ)}; +subtract_acks([T | TL] = AckTags, Prefix, CTagCounts, AckQ) -> + case ?QUEUE:out(AckQ) of + {{value, {T, CTag}}, QTail} -> + subtract_acks(TL, Prefix, + maps:update_with(CTag, fun (Old) -> Old + 1 end, 1, CTagCounts), QTail); + {{value, V}, QTail} -> + subtract_acks(AckTags, [V | Prefix], CTagCounts, QTail); + {empty, _} -> + subtract_acks([], Prefix, CTagCounts, AckQ) + end. + +-spec possibly_unblock(cr_fun(), ch(), state()) -> + 'unchanged' | {'unblocked', state()}. + +possibly_unblock(Update, ChPid, State) -> + case lookup_ch(ChPid) of + not_found -> unchanged; + C -> C1 = Update(C), + case is_ch_blocked(C) andalso not is_ch_blocked(C1) of + false -> update_ch_record(C1), + unchanged; + true -> unblock(C1, State) + end + end. + +unblock(C = #cr{blocked_consumers = BlockedQ, limiter = Limiter}, + State = #state{consumers = Consumers, use = Use}) -> + case lists:partition( + fun({_P, {_ChPid, #consumer{tag = CTag}}}) -> + rabbit_limiter:is_consumer_blocked(Limiter, CTag) + end, priority_queue:to_list(BlockedQ)) of + {_, []} -> + update_ch_record(C), + unchanged; + {Blocked, Unblocked} -> + BlockedQ1 = priority_queue:from_list(Blocked), + UnblockedQ = priority_queue:from_list(Unblocked), + update_ch_record(C#cr{blocked_consumers = BlockedQ1}), + {unblocked, + State#state{consumers = priority_queue:join(Consumers, UnblockedQ), + use = update_use(Use, active)}} + end. + +-spec resume_fun() -> cr_fun(). + +resume_fun() -> + fun (C = #cr{limiter = Limiter}) -> + C#cr{limiter = rabbit_limiter:resume(Limiter)} + end. + +-spec notify_sent_fun(non_neg_integer()) -> cr_fun(). + +notify_sent_fun(Credit) -> + fun (C = #cr{unsent_message_count = Count}) -> + C#cr{unsent_message_count = Count - Credit} + end. + +-spec activate_limit_fun() -> cr_fun(). + +activate_limit_fun() -> + fun (C = #cr{limiter = Limiter}) -> + C#cr{limiter = rabbit_limiter:activate(Limiter)} + end. + +-spec credit(boolean(), integer(), boolean(), ch(), rabbit_types:ctag(), + state()) -> 'unchanged' | {'unblocked', state()}. + +credit(IsEmpty, Credit, Drain, ChPid, CTag, State) -> + case lookup_ch(ChPid) of + not_found -> + unchanged; + #cr{limiter = Limiter} = C -> + C1 = #cr{limiter = Limiter1} = + credit_and_drain(C, CTag, Credit, drain_mode(Drain), IsEmpty), + case is_ch_blocked(C1) orelse + (not rabbit_limiter:is_consumer_blocked(Limiter, CTag)) orelse + rabbit_limiter:is_consumer_blocked(Limiter1, CTag) of + true -> update_ch_record(C1), + unchanged; + false -> unblock(C1, State) + end + end. + +drain_mode(true) -> drain; +drain_mode(false) -> manual. + +-spec utilisation(state()) -> ratio(). + +utilisation(#state{use = {active, Since, Avg}}) -> + use_avg(erlang:monotonic_time(micro_seconds) - Since, 0, Avg); +utilisation(#state{use = {inactive, Since, Active, Avg}}) -> + use_avg(Active, erlang:monotonic_time(micro_seconds) - Since, Avg). + +is_same(ChPid, ConsumerTag, {ChPid, #consumer{tag = ConsumerTag}}) -> + true; +is_same(_ChPid, _ConsumerTag, _Consumer) -> + false. + +get_consumer(#state{consumers = Consumers}) -> + case priority_queue:out_p(Consumers) of + {{value, Consumer, _Priority}, _Tail} -> Consumer; + {empty, _} -> undefined + end. + +-spec get(ch(), rabbit_types:ctag(), state()) -> undefined | consumer(). + +get(ChPid, ConsumerTag, #state{consumers = Consumers}) -> + Consumers1 = priority_queue:filter(fun ({CP, #consumer{tag = CT}}) -> + (CP == ChPid) and (CT == ConsumerTag) + end, Consumers), + case priority_queue:out_p(Consumers1) of + {empty, _} -> undefined; + {{value, Consumer, _Priority}, _Tail} -> Consumer + end. + +-spec get_infos(consumer()) -> term(). + +get_infos(Consumer) -> + {Consumer#consumer.tag,Consumer#consumer.ack_required, + Consumer#consumer.prefetch, Consumer#consumer.args}. + +-spec consumer_tag(consumer()) -> rabbit_types:ctag(). + +consumer_tag(#consumer{tag = CTag}) -> + CTag. + + + +%%---------------------------------------------------------------------------- + +parse_credit_args(Default, Args) -> + case rabbit_misc:table_lookup(Args, <<"x-credit">>) of + {table, T} -> case {rabbit_misc:table_lookup(T, <<"credit">>), + rabbit_misc:table_lookup(T, <<"drain">>)} of + {{long, C}, {bool, D}} -> {C, drain_mode(D)}; + _ -> {Default, auto} + end; + undefined -> {Default, auto} + end. + +lookup_ch(ChPid) -> + case get({ch, ChPid}) of + undefined -> not_found; + C -> C + end. + +ch_record(ChPid, LimiterPid) -> + Key = {ch, ChPid}, + case get(Key) of + undefined -> MonitorRef = erlang:monitor(process, ChPid), + Limiter = rabbit_limiter:client(LimiterPid), + C = #cr{ch_pid = ChPid, + monitor_ref = MonitorRef, + acktags = ?QUEUE:new(), + consumer_count = 0, + blocked_consumers = priority_queue:new(), + limiter = Limiter, + unsent_message_count = 0}, + put(Key, C), + C; + C = #cr{} -> C + end. + +update_ch_record(C = #cr{consumer_count = ConsumerCount, + acktags = ChAckTags, + unsent_message_count = UnsentMessageCount}) -> + case {?QUEUE:is_empty(ChAckTags), ConsumerCount, UnsentMessageCount} of + {true, 0, 0} -> ok = erase_ch_record(C); + _ -> ok = store_ch_record(C) + end, + C. + +store_ch_record(C = #cr{ch_pid = ChPid}) -> + put({ch, ChPid}, C), + ok. + +erase_ch_record(#cr{ch_pid = ChPid, monitor_ref = MonitorRef}) -> + erlang:demonitor(MonitorRef), + erase({ch, ChPid}), + ok. + +all_ch_record() -> [C || {{ch, _}, C} <- get()]. + +block_consumer(C = #cr{blocked_consumers = Blocked}, QEntry) -> + update_ch_record(C#cr{blocked_consumers = add_consumer(QEntry, Blocked)}). + +is_ch_blocked(#cr{unsent_message_count = Count, limiter = Limiter}) -> + Count >= ?UNSENT_MESSAGE_LIMIT orelse rabbit_limiter:is_suspended(Limiter). + +send_drained(C = #cr{ch_pid = ChPid, limiter = Limiter}) -> + case rabbit_limiter:drained(Limiter) of + {[], Limiter} -> C; + {CTagCredit, Limiter2} -> rabbit_channel:send_drained( + ChPid, CTagCredit), + C#cr{limiter = Limiter2} + end. + +credit_and_drain(C = #cr{ch_pid = ChPid, limiter = Limiter}, + CTag, Credit, Mode, IsEmpty) -> + case rabbit_limiter:credit(Limiter, CTag, Credit, Mode, IsEmpty) of + {true, Limiter1} -> rabbit_channel:send_drained(ChPid, + [{CTag, Credit}]), + C#cr{limiter = Limiter1}; + {false, Limiter1} -> C#cr{limiter = Limiter1} + end. + +tags(CList) -> [CTag || {_P, {_ChPid, #consumer{tag = CTag}}} <- CList]. + +add_consumer({ChPid, Consumer = #consumer{args = Args}}, Queue) -> + Priority = case rabbit_misc:table_lookup(Args, <<"x-priority">>) of + {_, P} -> P; + _ -> 0 + end, + priority_queue:in({ChPid, Consumer}, Priority, Queue). + +remove_consumer(ChPid, CTag, Queue) -> + priority_queue:filter(fun ({CP, #consumer{tag = CT}}) -> + (CP /= ChPid) or (CT /= CTag) + end, Queue). + +remove_consumers(ChPid, Queue) -> + priority_queue:filter(chan_pred(ChPid, false), Queue). + +chan_pred(ChPid, Want) -> + fun ({CP, _Consumer}) when CP =:= ChPid -> Want; + (_) -> not Want + end. + +update_use({inactive, _, _, _} = CUInfo, inactive) -> + CUInfo; +update_use({active, _, _} = CUInfo, active) -> + CUInfo; +update_use({active, Since, Avg}, inactive) -> + Now = erlang:monotonic_time(micro_seconds), + {inactive, Now, Now - Since, Avg}; +update_use({inactive, Since, Active, Avg}, active) -> + Now = erlang:monotonic_time(micro_seconds), + {active, Now, use_avg(Active, Now - Since, Avg)}. + +use_avg(0, 0, Avg) -> + Avg; +use_avg(Active, Inactive, Avg) -> + Time = Inactive + Active, + rabbit_misc:moving_average(Time, ?USE_AVG_HALF_LIFE, Active / Time, Avg). diff --git a/deps/rabbit/src/rabbit_queue_decorator.erl b/deps/rabbit/src/rabbit_queue_decorator.erl new file mode 100644 index 0000000000..cbb50456c1 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_decorator.erl @@ -0,0 +1,72 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_decorator). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([select/1, set/1, register/2, unregister/1]). + +-behaviour(rabbit_registry_class). + +-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). + +%%---------------------------------------------------------------------------- + +-callback startup(amqqueue:amqqueue()) -> 'ok'. + +-callback shutdown(amqqueue:amqqueue()) -> 'ok'. + +-callback policy_changed(amqqueue:amqqueue(), amqqueue:amqqueue()) -> + 'ok'. + +-callback active_for(amqqueue:amqqueue()) -> boolean(). + +%% called with Queue, MaxActivePriority, IsEmpty +-callback consumer_state_changed( + amqqueue:amqqueue(), integer(), boolean()) -> 'ok'. + +%%---------------------------------------------------------------------------- + +added_to_rabbit_registry(_Type, _ModuleName) -> ok. +removed_from_rabbit_registry(_Type) -> ok. + +select(Modules) -> + [M || M <- Modules, code:which(M) =/= non_existing]. + +set(Q) when ?is_amqqueue(Q) -> + Decorators = [D || D <- list(), D:active_for(Q)], + amqqueue:set_decorators(Q, Decorators). + +list() -> [M || {_, M} <- rabbit_registry:lookup_all(queue_decorator)]. + +register(TypeName, ModuleName) -> + rabbit_registry:register(queue_decorator, TypeName, ModuleName), + [maybe_recover(Q) || Q <- rabbit_amqqueue:list()], + ok. + +unregister(TypeName) -> + rabbit_registry:unregister(queue_decorator, TypeName), + [maybe_recover(Q) || Q <- rabbit_amqqueue:list()], + ok. + +maybe_recover(Q0) when ?is_amqqueue(Q0) -> + Name = amqqueue:get_name(Q0), + Decs0 = amqqueue:get_decorators(Q0), + Q1 = set(Q0), + Decs1 = amqqueue:get_decorators(Q1), + Old = lists:sort(select(Decs0)), + New = lists:sort(select(Decs1)), + case New of + Old -> + ok; + _ -> + %% TODO LRB JSP 160169569 should startup be passed Q1 here? + [M:startup(Q0) || M <- New -- Old], + rabbit_amqqueue:update_decorators(Name) + end. diff --git a/deps/rabbit/src/rabbit_queue_index.erl b/deps/rabbit/src/rabbit_queue_index.erl new file mode 100644 index 0000000000..faab4380b5 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_index.erl @@ -0,0 +1,1521 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_index). + +-export([erase/1, init/3, reset_state/1, recover/6, + terminate/3, delete_and_terminate/1, + pre_publish/7, flush_pre_publish_cache/2, + publish/6, deliver/2, ack/2, sync/1, needs_sync/1, flush/1, + read/3, next_segment_boundary/1, bounds/1, start/2, stop/1]). + +-export([add_queue_ttl/0, avoid_zeroes/0, store_msg_size/0, store_msg/0]). +-export([scan_queue_segments/3, scan_queue_segments/4]). + +%% Migrates from global to per-vhost message stores +-export([move_to_per_vhost_stores/1, + update_recovery_term/2, + read_global_recovery_terms/1, + cleanup_global_recovery_terms/0]). + +-define(CLEAN_FILENAME, "clean.dot"). + +%%---------------------------------------------------------------------------- + +%% The queue index is responsible for recording the order of messages +%% within a queue on disk. As such it contains records of messages +%% being published, delivered and acknowledged. The publish record +%% includes the sequence ID, message ID and a small quantity of +%% metadata about the message; the delivery and acknowledgement +%% records just contain the sequence ID. A publish record may also +%% contain the complete message if provided to publish/5; this allows +%% the message store to be avoided altogether for small messages. In +%% either case the publish record is stored in memory in the same +%% serialised format it will take on disk. +%% +%% Because of the fact that the queue can decide at any point to send +%% a queue entry to disk, you can not rely on publishes appearing in +%% order. The only thing you can rely on is a message being published, +%% then delivered, then ack'd. +%% +%% In order to be able to clean up ack'd messages, we write to segment +%% files. These files have a fixed number of entries: ?SEGMENT_ENTRY_COUNT +%% publishes, delivers and acknowledgements. They are numbered, and so +%% it is known that the 0th segment contains messages 0 -> +%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages +%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As +%% such, in the segment files, we only refer to message sequence ids +%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a +%% fixed size. +%% +%% However, transient messages which are not sent to disk at any point +%% will cause gaps to appear in segment files. Therefore, we delete a +%% segment file whenever the number of publishes == number of acks +%% (note that although it is not fully enforced, it is assumed that a +%% message will never be ackd before it is delivered, thus this test +%% also implies == number of delivers). In practise, this does not +%% cause disk churn in the pathological case because of the journal +%% and caching (see below). +%% +%% Because of the fact that publishes, delivers and acks can occur all +%% over, we wish to avoid lots of seeking. Therefore we have a fixed +%% sized journal to which all actions are appended. When the number of +%% entries in this journal reaches max_journal_entries, the journal +%% entries are scattered out to their relevant files, and the journal +%% is truncated to zero size. Note that entries in the journal must +%% carry the full sequence id, thus the format of entries in the +%% journal is different to that in the segments. +%% +%% The journal is also kept fully in memory, pre-segmented: the state +%% contains a mapping from segment numbers to state-per-segment (this +%% state is held for all segments which have been "seen": thus a +%% segment which has been read but has no pending entries in the +%% journal is still held in this mapping. Also note that a map is +%% used for this mapping, not an array because with an array, you will +%% always have entries from 0). Actions are stored directly in this +%% state. Thus at the point of flushing the journal, firstly no +%% reading from disk is necessary, but secondly if the known number of +%% acks and publishes in a segment are equal, given the known state of +%% the segment file combined with the journal, no writing needs to be +%% done to the segment file either (in fact it is deleted if it exists +%% at all). This is safe given that the set of acks is a subset of the +%% set of publishes. When it is necessary to sync messages, it is +%% sufficient to fsync on the journal: when entries are distributed +%% from the journal to segment files, those segments appended to are +%% fsync'd prior to the journal being truncated. +%% +%% This module is also responsible for scanning the queue index files +%% and seeding the message store on start up. +%% +%% Note that in general, the representation of a message's state as +%% the tuple: {('no_pub'|{IsPersistent, Bin, MsgBin}), +%% ('del'|'no_del'), ('ack'|'no_ack')} is richer than strictly +%% necessary for most operations. However, for startup, and to ensure +%% the safe and correct combination of journal entries with entries +%% read from the segment on disk, this richer representation vastly +%% simplifies and clarifies the code. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% rabbit_variable_queue. +%% +%%---------------------------------------------------------------------------- + +%% ---- Journal details ---- + +-define(JOURNAL_FILENAME, "journal.jif"). +-define(QUEUE_NAME_STUB_FILE, ".queue_name"). + +-define(PUB_PERSIST_JPREFIX, 2#00). +-define(PUB_TRANS_JPREFIX, 2#01). +-define(DEL_JPREFIX, 2#10). +-define(ACK_JPREFIX, 2#11). +-define(JPREFIX_BITS, 2). +-define(SEQ_BYTES, 8). +-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)). + +%% ---- Segment details ---- + +-define(SEGMENT_EXTENSION, ".idx"). + +%% TODO: The segment size would be configurable, but deriving all the +%% other values is quite hairy and quite possibly noticeably less +%% efficient, depending on how clever the compiler is when it comes to +%% binary generation/matching with constant vs variable lengths. + +-define(REL_SEQ_BITS, 14). +%% calculated as trunc(math:pow(2,?REL_SEQ_BITS))). +-define(SEGMENT_ENTRY_COUNT, 16384). + +%% seq only is binary 01 followed by 14 bits of rel seq id +%% (range: 0 - 16383) +-define(REL_SEQ_ONLY_PREFIX, 01). +-define(REL_SEQ_ONLY_PREFIX_BITS, 2). +-define(REL_SEQ_ONLY_RECORD_BYTES, 2). + +%% publish record is binary 1 followed by a bit for is_persistent, +%% then 14 bits of rel seq id, 64 bits for message expiry, 32 bits of +%% size and then 128 bits of md5sum msg id. +-define(PUB_PREFIX, 1). +-define(PUB_PREFIX_BITS, 1). + +-define(EXPIRY_BYTES, 8). +-define(EXPIRY_BITS, (?EXPIRY_BYTES * 8)). +-define(NO_EXPIRY, 0). + +-define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes +-define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)). + +%% This is the size of the message body content, for stats +-define(SIZE_BYTES, 4). +-define(SIZE_BITS, (?SIZE_BYTES * 8)). + +%% This is the size of the message record embedded in the queue +%% index. If 0, the message can be found in the message store. +-define(EMBEDDED_SIZE_BYTES, 4). +-define(EMBEDDED_SIZE_BITS, (?EMBEDDED_SIZE_BYTES * 8)). + +%% 16 bytes for md5sum + 8 for expiry +-define(PUB_RECORD_BODY_BYTES, (?MSG_ID_BYTES + ?EXPIRY_BYTES + ?SIZE_BYTES)). +%% + 4 for size +-define(PUB_RECORD_SIZE_BYTES, (?PUB_RECORD_BODY_BYTES + ?EMBEDDED_SIZE_BYTES)). + +%% + 2 for seq, bits and prefix +-define(PUB_RECORD_PREFIX_BYTES, 2). + +%% ---- misc ---- + +-define(PUB, {_, _, _}). %% {IsPersistent, Bin, MsgBin} + +-define(READ_MODE, [binary, raw, read]). +-define(WRITE_MODE, [write | ?READ_MODE]). + +%%---------------------------------------------------------------------------- + +-record(qistate, { + %% queue directory where segment and journal files are stored + dir, + %% map of #segment records + segments, + %% journal file handle obtained from/used by file_handle_cache + journal_handle, + %% how many not yet flushed entries are there + dirty_count, + %% this many not yet flushed journal entries will force a flush + max_journal_entries, + %% callback function invoked when a message is "handled" + %% by the index and potentially can be confirmed to the publisher + on_sync, + on_sync_msg, + %% set of IDs of unconfirmed [to publishers] messages + unconfirmed, + unconfirmed_msg, + %% optimisation + pre_publish_cache, + %% optimisation + delivered_cache, + %% queue name resource record + queue_name}). + +-record(segment, { + %% segment ID (an integer) + num, + %% segment file path (see also ?SEGMENT_EXTENSION) + path, + %% index operation log entries in this segment + journal_entries, + entries_to_segment, + %% counter of unacknowledged messages + unacked +}). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-rabbit_upgrade({add_queue_ttl, local, []}). +-rabbit_upgrade({avoid_zeroes, local, [add_queue_ttl]}). +-rabbit_upgrade({store_msg_size, local, [avoid_zeroes]}). +-rabbit_upgrade({store_msg, local, [store_msg_size]}). + +-type hdl() :: ('undefined' | any()). +-type segment() :: ('undefined' | + #segment { num :: non_neg_integer(), + path :: file:filename(), + journal_entries :: array:array(), + entries_to_segment :: array:array(), + unacked :: non_neg_integer() + }). +-type seq_id() :: integer(). +-type seg_map() :: {map(), [segment()]}. +-type on_sync_fun() :: fun ((gb_sets:set()) -> ok). +-type qistate() :: #qistate { dir :: file:filename(), + segments :: 'undefined' | seg_map(), + journal_handle :: hdl(), + dirty_count :: integer(), + max_journal_entries :: non_neg_integer(), + on_sync :: on_sync_fun(), + on_sync_msg :: on_sync_fun(), + unconfirmed :: gb_sets:set(), + unconfirmed_msg :: gb_sets:set(), + pre_publish_cache :: list(), + delivered_cache :: list() + }. +-type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()). +-type walker(A) :: fun ((A) -> 'finished' | + {rabbit_types:msg_id(), non_neg_integer(), A}). +-type shutdown_terms() :: [term()] | 'non_clean_shutdown'. + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +-spec erase(rabbit_amqqueue:name()) -> 'ok'. + +erase(#resource{ virtual_host = VHost } = Name) -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + #qistate { dir = Dir } = blank_state(VHostDir, Name), + erase_index_dir(Dir). + +%% used during variable queue purge when there are no pending acks + +-spec reset_state(qistate()) -> qistate(). + +reset_state(#qistate{ queue_name = Name, + dir = Dir, + on_sync = OnSyncFun, + on_sync_msg = OnSyncMsgFun, + journal_handle = JournalHdl }) -> + ok = case JournalHdl of + undefined -> ok; + _ -> file_handle_cache:close(JournalHdl) + end, + ok = erase_index_dir(Dir), + blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun). + +-spec init(rabbit_amqqueue:name(), + on_sync_fun(), on_sync_fun()) -> qistate(). + +init(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + State = #qistate { dir = Dir } = blank_state(VHostDir, Name), + false = rabbit_file:is_file(Dir), %% is_file == is file or dir + State#qistate{on_sync = OnSyncFun, + on_sync_msg = OnSyncMsgFun}. + +-spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(), + contains_predicate(), + on_sync_fun(), on_sync_fun()) -> + {'undefined' | non_neg_integer(), + 'undefined' | non_neg_integer(), qistate()}. + +recover(#resource{ virtual_host = VHost } = Name, Terms, MsgStoreRecovered, + ContainsCheckFun, OnSyncFun, OnSyncMsgFun) -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + State = blank_state(VHostDir, Name), + State1 = State #qistate{on_sync = OnSyncFun, + on_sync_msg = OnSyncMsgFun}, + CleanShutdown = Terms /= non_clean_shutdown, + case CleanShutdown andalso MsgStoreRecovered of + true -> RecoveredCounts = proplists:get_value(segments, Terms, []), + init_clean(RecoveredCounts, State1); + false -> init_dirty(CleanShutdown, ContainsCheckFun, State1) + end. + +-spec terminate(rabbit_types:vhost(), [any()], qistate()) -> qistate(). + +terminate(VHost, Terms, State = #qistate { dir = Dir }) -> + {SegmentCounts, State1} = terminate(State), + rabbit_recovery_terms:store(VHost, filename:basename(Dir), + [{segments, SegmentCounts} | Terms]), + State1. + +-spec delete_and_terminate(qistate()) -> qistate(). + +delete_and_terminate(State) -> + {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + ok = rabbit_file:recursive_delete([Dir]), + State1. + +pre_publish(MsgOrId, SeqId, MsgProps, IsPersistent, IsDelivered, JournalSizeHint, + State = #qistate{pre_publish_cache = PPC, + delivered_cache = DC}) -> + State1 = maybe_needs_confirming(MsgProps, MsgOrId, State), + + {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps), + + PPC1 = + [[<<(case IsPersistent of + true -> ?PUB_PERSIST_JPREFIX; + false -> ?PUB_TRANS_JPREFIX + end):?JPREFIX_BITS, + SeqId:?SEQ_BITS, Bin/binary, + (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin] | PPC], + + DC1 = + case IsDelivered of + true -> + [SeqId | DC]; + false -> + DC + end, + + State2 = add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1), + maybe_flush_pre_publish_cache( + JournalSizeHint, + State2#qistate{pre_publish_cache = PPC1, + delivered_cache = DC1}). + +%% pre_publish_cache is the entry with most elements when compared to +%% delivered_cache so we only check the former in the guard. +maybe_flush_pre_publish_cache(JournalSizeHint, + #qistate{pre_publish_cache = PPC} = State) + when length(PPC) >= ?SEGMENT_ENTRY_COUNT -> + flush_pre_publish_cache(JournalSizeHint, State); +maybe_flush_pre_publish_cache(_JournalSizeHint, State) -> + State. + +flush_pre_publish_cache(JournalSizeHint, State) -> + State1 = flush_pre_publish_cache(State), + State2 = flush_delivered_cache(State1), + maybe_flush_journal(JournalSizeHint, State2). + +flush_pre_publish_cache(#qistate{pre_publish_cache = []} = State) -> + State; +flush_pre_publish_cache(State = #qistate{pre_publish_cache = PPC}) -> + {JournalHdl, State1} = get_journal_handle(State), + file_handle_cache_stats:update(queue_index_journal_write), + ok = file_handle_cache:append(JournalHdl, lists:reverse(PPC)), + State1#qistate{pre_publish_cache = []}. + +flush_delivered_cache(#qistate{delivered_cache = []} = State) -> + State; +flush_delivered_cache(State = #qistate{delivered_cache = DC}) -> + State1 = deliver(lists:reverse(DC), State), + State1#qistate{delivered_cache = []}. + +-spec publish(rabbit_types:msg_id(), seq_id(), + rabbit_types:message_properties(), boolean(), + non_neg_integer(), qistate()) -> qistate(). + +publish(MsgOrId, SeqId, MsgProps, IsPersistent, JournalSizeHint, State) -> + {JournalHdl, State1} = + get_journal_handle( + maybe_needs_confirming(MsgProps, MsgOrId, State)), + file_handle_cache_stats:update(queue_index_journal_write), + {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps), + ok = file_handle_cache:append( + JournalHdl, [<<(case IsPersistent of + true -> ?PUB_PERSIST_JPREFIX; + false -> ?PUB_TRANS_JPREFIX + end):?JPREFIX_BITS, + SeqId:?SEQ_BITS, Bin/binary, + (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin]), + maybe_flush_journal( + JournalSizeHint, + add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1)). + +maybe_needs_confirming(MsgProps, MsgOrId, + State = #qistate{unconfirmed = UC, + unconfirmed_msg = UCM}) -> + MsgId = case MsgOrId of + #basic_message{id = Id} -> Id; + Id when is_binary(Id) -> Id + end, + ?MSG_ID_BYTES = size(MsgId), + case {MsgProps#message_properties.needs_confirming, MsgOrId} of + {true, MsgId} -> UC1 = gb_sets:add_element(MsgId, UC), + State#qistate{unconfirmed = UC1}; + {true, _} -> UCM1 = gb_sets:add_element(MsgId, UCM), + State#qistate{unconfirmed_msg = UCM1}; + {false, _} -> State + end. + +-spec deliver([seq_id()], qistate()) -> qistate(). + +deliver(SeqIds, State) -> + deliver_or_ack(del, SeqIds, State). + +-spec ack([seq_id()], qistate()) -> qistate(). + +ack(SeqIds, State) -> + deliver_or_ack(ack, SeqIds, State). + +%% This is called when there are outstanding confirms or when the +%% queue is idle and the journal needs syncing (see needs_sync/1). + +-spec sync(qistate()) -> qistate(). + +sync(State = #qistate { journal_handle = undefined }) -> + State; +sync(State = #qistate { journal_handle = JournalHdl }) -> + ok = file_handle_cache:sync(JournalHdl), + notify_sync(State). + +-spec needs_sync(qistate()) -> 'confirms' | 'other' | 'false'. + +needs_sync(#qistate{journal_handle = undefined}) -> + false; +needs_sync(#qistate{journal_handle = JournalHdl, + unconfirmed = UC, + unconfirmed_msg = UCM}) -> + case gb_sets:is_empty(UC) andalso gb_sets:is_empty(UCM) of + true -> case file_handle_cache:needs_sync(JournalHdl) of + true -> other; + false -> false + end; + false -> confirms + end. + +-spec flush(qistate()) -> qistate(). + +flush(State = #qistate { dirty_count = 0 }) -> State; +flush(State) -> flush_journal(State). + +-spec read(seq_id(), seq_id(), qistate()) -> + {[{rabbit_types:msg_id(), seq_id(), + rabbit_types:message_properties(), + boolean(), boolean()}], qistate()}. + +read(StartEnd, StartEnd, State) -> + {[], State}; +read(Start, End, State = #qistate { segments = Segments, + dir = Dir }) when Start =< End -> + %% Start is inclusive, End is exclusive. + LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start), + UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1), + {Messages, Segments1} = + lists:foldr(fun (Seg, Acc) -> + read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir) + end, {[], Segments}, lists:seq(StartSeg, EndSeg)), + {Messages, State #qistate { segments = Segments1 }}. + +-spec next_segment_boundary(seq_id()) -> seq_id(). + +next_segment_boundary(SeqId) -> + {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + reconstruct_seq_id(Seg + 1, 0). + +-spec bounds(qistate()) -> + {non_neg_integer(), non_neg_integer(), qistate()}. + +bounds(State = #qistate { segments = Segments }) -> + %% This is not particularly efficient, but only gets invoked on + %% queue initialisation. + SegNums = lists:sort(segment_nums(Segments)), + %% Don't bother trying to figure out the lowest seq_id, merely the + %% seq_id of the start of the lowest segment. That seq_id may not + %% actually exist, but that's fine. The important thing is that + %% the segment exists and the seq_id reported is on a segment + %% boundary. + %% + %% We also don't really care about the max seq_id. Just start the + %% next segment: it makes life much easier. + %% + %% SegNums is sorted, ascending. + {LowSeqId, NextSeqId} = + case SegNums of + [] -> {0, 0}; + [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0), + reconstruct_seq_id(1 + lists:last(SegNums), 0)} + end, + {LowSeqId, NextSeqId, State}. + +-spec start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}. + +start(VHost, DurableQueueNames) -> + ok = rabbit_recovery_terms:start(VHost), + {DurableTerms, DurableDirectories} = + lists:foldl( + fun(QName, {RecoveryTerms, ValidDirectories}) -> + DirName = queue_name_to_dir_name(QName), + RecoveryInfo = case rabbit_recovery_terms:read(VHost, DirName) of + {error, _} -> non_clean_shutdown; + {ok, Terms} -> Terms + end, + {[RecoveryInfo | RecoveryTerms], + sets:add_element(DirName, ValidDirectories)} + end, {[], sets:new()}, DurableQueueNames), + %% Any queue directory we've not been asked to recover is considered garbage + rabbit_file:recursive_delete( + [DirName || + DirName <- all_queue_directory_names(VHost), + not sets:is_element(filename:basename(DirName), DurableDirectories)]), + rabbit_recovery_terms:clear(VHost), + + %% The backing queue interface requires that the queue recovery terms + %% which come back from start/1 are in the same order as DurableQueueNames + OrderedTerms = lists:reverse(DurableTerms), + {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + + +stop(VHost) -> rabbit_recovery_terms:stop(VHost). + +all_queue_directory_names(VHost) -> + filelib:wildcard(filename:join([rabbit_vhost:msg_store_dir_path(VHost), + "queues", "*"])). + +all_queue_directory_names() -> + filelib:wildcard(filename:join([rabbit_vhost:msg_store_dir_wildcard(), + "queues", "*"])). + +%%---------------------------------------------------------------------------- +%% startup and shutdown +%%---------------------------------------------------------------------------- + +erase_index_dir(Dir) -> + case rabbit_file:is_dir(Dir) of + true -> rabbit_file:recursive_delete([Dir]); + false -> ok + end. + +blank_state(VHostDir, QueueName) -> + Dir = queue_dir(VHostDir, QueueName), + blank_state_name_dir_funs(QueueName, + Dir, + fun (_) -> ok end, + fun (_) -> ok end). + +queue_dir(VHostDir, QueueName) -> + %% Queue directory is + %% {node_database_dir}/msg_stores/vhosts/{vhost}/queues/{queue} + QueueDir = queue_name_to_dir_name(QueueName), + filename:join([VHostDir, "queues", QueueDir]). + +queue_name_to_dir_name(#resource { kind = queue, + virtual_host = VHost, + name = QName }) -> + <<Num:128>> = erlang:md5(<<"queue", VHost/binary, QName/binary>>), + rabbit_misc:format("~.36B", [Num]). + +queue_name_to_dir_name_legacy(Name = #resource { kind = queue }) -> + <<Num:128>> = erlang:md5(term_to_binary_compat:term_to_binary_1(Name)), + rabbit_misc:format("~.36B", [Num]). + +queues_base_dir() -> + rabbit_mnesia:dir(). + +blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun) -> + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + #qistate { dir = Dir, + segments = segments_new(), + journal_handle = undefined, + dirty_count = 0, + max_journal_entries = MaxJournal, + on_sync = OnSyncFun, + on_sync_msg = OnSyncMsgFun, + unconfirmed = gb_sets:new(), + unconfirmed_msg = gb_sets:new(), + pre_publish_cache = [], + delivered_cache = [], + queue_name = Name }. + +init_clean(RecoveredCounts, State) -> + %% Load the journal. Since this is a clean recovery this (almost) + %% gets us back to where we were on shutdown. + State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State), + %% The journal loading only creates records for segments touched + %% by the journal, and the counts are based on the journal entries + %% only. We need *complete* counts for *all* segments. By an + %% amazing coincidence we stored that information on shutdown. + Segments1 = + lists:foldl( + fun ({Seg, UnackedCount}, SegmentsN) -> + Segment = segment_find_or_new(Seg, Dir, SegmentsN), + segment_store(Segment #segment { unacked = UnackedCount }, + SegmentsN) + end, Segments, RecoveredCounts), + %% the counts above include transient messages, which would be the + %% wrong thing to return + {undefined, undefined, State1 # qistate { segments = Segments1 }}. + +init_dirty(CleanShutdown, ContainsCheckFun, State) -> + %% Recover the journal completely. This will also load segments + %% which have entries in the journal and remove duplicates. The + %% counts will correctly reflect the combination of the segment + %% and the journal. + State1 = #qistate { dir = Dir, segments = Segments } = + recover_journal(State), + {Segments1, Count, Bytes, DirtyCount} = + %% Load each segment in turn and filter out messages that are + %% not in the msg_store, by adding acks to the journal. These + %% acks only go to the RAM journal as it doesn't matter if we + %% lose them. Also mark delivered if not clean shutdown. Also + %% find the number of unacked messages. Also accumulate the + %% dirty count here, so we can call maybe_flush_journal below + %% and avoid unnecessary file system operations. + lists:foldl( + fun (Seg, {Segments2, CountAcc, BytesAcc, DirtyCount}) -> + {{Segment = #segment { unacked = UnackedCount }, Dirty}, + UnackedBytes} = + recover_segment(ContainsCheckFun, CleanShutdown, + segment_find_or_new(Seg, Dir, Segments2), + State1#qistate.max_journal_entries), + {segment_store(Segment, Segments2), + CountAcc + UnackedCount, + BytesAcc + UnackedBytes, DirtyCount + Dirty} + end, {Segments, 0, 0, 0}, all_segment_nums(State1)), + State2 = maybe_flush_journal(State1 #qistate { segments = Segments1, + dirty_count = DirtyCount }), + {Count, Bytes, State2}. + +terminate(State = #qistate { journal_handle = JournalHdl, + segments = Segments }) -> + ok = case JournalHdl of + undefined -> ok; + _ -> file_handle_cache:close(JournalHdl) + end, + SegmentCounts = + segment_fold( + fun (#segment { num = Seg, unacked = UnackedCount }, Acc) -> + [{Seg, UnackedCount} | Acc] + end, [], Segments), + {SegmentCounts, State #qistate { journal_handle = undefined, + segments = undefined }}. + +recover_segment(ContainsCheckFun, CleanShutdown, + Segment = #segment { journal_entries = JEntries }, MaxJournal) -> + {SegEntries, UnackedCount} = load_segment(false, Segment), + {SegEntries1, UnackedCountDelta} = + segment_plus_journal(SegEntries, JEntries), + array:sparse_foldl( + fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, no_ack}, + {SegmentAndDirtyCount, Bytes}) -> + {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin), + {recover_message(ContainsCheckFun(MsgOrId), CleanShutdown, + Del, RelSeq, SegmentAndDirtyCount, MaxJournal), + Bytes + case IsPersistent of + true -> MsgProps#message_properties.size; + false -> 0 + end} + end, + {{Segment #segment { unacked = UnackedCount + UnackedCountDelta }, 0}, 0}, + SegEntries1). + +recover_message( true, true, _Del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) -> + SegmentAndDirtyCount; +recover_message( true, false, del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) -> + SegmentAndDirtyCount; +recover_message( true, false, no_del, RelSeq, {Segment, _DirtyCount}, MaxJournal) -> + %% force to flush the segment + {add_to_journal(RelSeq, del, Segment), MaxJournal + 1}; +recover_message(false, _, del, RelSeq, {Segment, DirtyCount}, _MaxJournal) -> + {add_to_journal(RelSeq, ack, Segment), DirtyCount + 1}; +recover_message(false, _, no_del, RelSeq, {Segment, DirtyCount}, _MaxJournal) -> + {add_to_journal(RelSeq, ack, + add_to_journal(RelSeq, del, Segment)), + DirtyCount + 2}. + +%%---------------------------------------------------------------------------- +%% msg store startup delta function +%%---------------------------------------------------------------------------- + +queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> + {ok, Gatherer} = gatherer:start_link(), + [begin + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> link(Gatherer), + ok = queue_index_walker_reader(QueueName, Gatherer), + unlink(Gatherer), + ok + end) + end || QueueName <- DurableQueues], + queue_index_walker({next, Gatherer}); + +queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + finished; + {value, {MsgId, Count}} -> + {MsgId, Count, {next, Gatherer}} + end. + +queue_index_walker_reader(QueueName, Gatherer) -> + ok = scan_queue_segments( + fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) + when is_binary(MsgId) -> + gatherer:sync_in(Gatherer, {MsgId, 1}); + (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, + _IsAcked, Acc) -> + Acc + end, ok, QueueName), + ok = gatherer:finish(Gatherer). + +scan_queue_segments(Fun, Acc, #resource{ virtual_host = VHost } = QueueName) -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + scan_queue_segments(Fun, Acc, VHostDir, QueueName). + +scan_queue_segments(Fun, Acc, VHostDir, QueueName) -> + State = #qistate { segments = Segments, dir = Dir } = + recover_journal(blank_state(VHostDir, QueueName)), + Result = lists:foldr( + fun (Seg, AccN) -> + segment_entries_foldr( + fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, + IsDelivered, IsAcked}, AccM) -> + Fun(reconstruct_seq_id(Seg, RelSeq), MsgOrId, MsgProps, + IsPersistent, IsDelivered, IsAcked, AccM) + end, AccN, segment_find_or_new(Seg, Dir, Segments)) + end, Acc, all_segment_nums(State)), + {_SegmentCounts, _State} = terminate(State), + Result. + +%%---------------------------------------------------------------------------- +%% expiry/binary manipulation +%%---------------------------------------------------------------------------- + +create_pub_record_body(MsgOrId, #message_properties { expiry = Expiry, + size = Size }) -> + ExpiryBin = expiry_to_binary(Expiry), + case MsgOrId of + MsgId when is_binary(MsgId) -> + {<<MsgId/binary, ExpiryBin/binary, Size:?SIZE_BITS>>, <<>>}; + #basic_message{id = MsgId} -> + MsgBin = term_to_binary(MsgOrId), + {<<MsgId/binary, ExpiryBin/binary, Size:?SIZE_BITS>>, MsgBin} + end. + +expiry_to_binary(undefined) -> <<?NO_EXPIRY:?EXPIRY_BITS>>; +expiry_to_binary(Expiry) -> <<Expiry:?EXPIRY_BITS>>. + +parse_pub_record_body(<<MsgIdNum:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, + Size:?SIZE_BITS>>, MsgBin) -> + %% work around for binary data fragmentation. See + %% rabbit_msg_file:read_next/2 + <<MsgId:?MSG_ID_BYTES/binary>> = <<MsgIdNum:?MSG_ID_BITS>>, + Props = #message_properties{expiry = case Expiry of + ?NO_EXPIRY -> undefined; + X -> X + end, + size = Size}, + case MsgBin of + <<>> -> {MsgId, Props}; + _ -> Msg = #basic_message{id = MsgId} = binary_to_term(MsgBin), + {Msg, Props} + end. + +%%---------------------------------------------------------------------------- +%% journal manipulation +%%---------------------------------------------------------------------------- + +add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, + segments = Segments, + dir = Dir }) -> + {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + Segment = segment_find_or_new(Seg, Dir, Segments), + Segment1 = add_to_journal(RelSeq, Action, Segment), + State #qistate { dirty_count = DCount + 1, + segments = segment_store(Segment1, Segments) }; + +add_to_journal(RelSeq, Action, + Segment = #segment { journal_entries = JEntries, + entries_to_segment = EToSeg, + unacked = UnackedCount }) -> + + {Fun, Entry} = action_to_entry(RelSeq, Action, JEntries), + + {JEntries1, EToSeg1} = + case Fun of + set -> + {array:set(RelSeq, Entry, JEntries), + array:set(RelSeq, entry_to_segment(RelSeq, Entry, []), + EToSeg)}; + reset -> + {array:reset(RelSeq, JEntries), + array:reset(RelSeq, EToSeg)} + end, + + Segment #segment { + journal_entries = JEntries1, + entries_to_segment = EToSeg1, + unacked = UnackedCount + case Action of + ?PUB -> +1; + del -> 0; + ack -> -1 + end}. + +action_to_entry(RelSeq, Action, JEntries) -> + case array:get(RelSeq, JEntries) of + undefined -> + {set, + case Action of + ?PUB -> {Action, no_del, no_ack}; + del -> {no_pub, del, no_ack}; + ack -> {no_pub, no_del, ack} + end}; + ({Pub, no_del, no_ack}) when Action == del -> + {set, {Pub, del, no_ack}}; + ({no_pub, del, no_ack}) when Action == ack -> + {set, {no_pub, del, ack}}; + ({?PUB, del, no_ack}) when Action == ack -> + {reset, none} + end. + +maybe_flush_journal(State) -> + maybe_flush_journal(infinity, State). + +maybe_flush_journal(Hint, State = #qistate { dirty_count = DCount, + max_journal_entries = MaxJournal }) + when DCount > MaxJournal orelse (Hint =/= infinity andalso DCount > Hint) -> + flush_journal(State); +maybe_flush_journal(_Hint, State) -> + State. + +flush_journal(State = #qistate { segments = Segments }) -> + Segments1 = + segment_fold( + fun (#segment { unacked = 0, path = Path }, SegmentsN) -> + case rabbit_file:is_file(Path) of + true -> ok = rabbit_file:delete(Path); + false -> ok + end, + SegmentsN; + (#segment {} = Segment, SegmentsN) -> + segment_store(append_journal_to_segment(Segment), SegmentsN) + end, segments_new(), Segments), + {JournalHdl, State1} = + get_journal_handle(State #qistate { segments = Segments1 }), + ok = file_handle_cache:clear(JournalHdl), + notify_sync(State1 #qistate { dirty_count = 0 }). + +append_journal_to_segment(#segment { journal_entries = JEntries, + entries_to_segment = EToSeg, + path = Path } = Segment) -> + case array:sparse_size(JEntries) of + 0 -> Segment; + _ -> + file_handle_cache_stats:update(queue_index_write), + + {ok, Hdl} = file_handle_cache:open_with_absolute_path( + Path, ?WRITE_MODE, + [{write_buffer, infinity}]), + %% the file_handle_cache also does a list reverse, so this + %% might not be required here, but before we were doing a + %% sparse_foldr, a lists:reverse/1 seems to be the correct + %% thing to do for now. + file_handle_cache:append(Hdl, lists:reverse(array:to_list(EToSeg))), + ok = file_handle_cache:close(Hdl), + Segment #segment { journal_entries = array_new(), + entries_to_segment = array_new([]) } + end. + +get_journal_handle(State = #qistate { journal_handle = undefined, + dir = Dir, + queue_name = Name }) -> + Path = filename:join(Dir, ?JOURNAL_FILENAME), + ok = rabbit_file:ensure_dir(Path), + ok = ensure_queue_name_stub_file(Dir, Name), + {ok, Hdl} = file_handle_cache:open_with_absolute_path( + Path, ?WRITE_MODE, [{write_buffer, infinity}]), + {Hdl, State #qistate { journal_handle = Hdl }}; +get_journal_handle(State = #qistate { journal_handle = Hdl }) -> + {Hdl, State}. + +%% Loading Journal. This isn't idempotent and will mess up the counts +%% if you call it more than once on the same state. Assumes the counts +%% are 0 to start with. +load_journal(State = #qistate { dir = Dir }) -> + Path = filename:join(Dir, ?JOURNAL_FILENAME), + case rabbit_file:is_file(Path) of + true -> {JournalHdl, State1} = get_journal_handle(State), + Size = rabbit_file:file_size(Path), + {ok, 0} = file_handle_cache:position(JournalHdl, 0), + {ok, JournalBin} = file_handle_cache:read(JournalHdl, Size), + parse_journal_entries(JournalBin, State1); + false -> State + end. + +%% ditto +recover_journal(State) -> + State1 = #qistate { segments = Segments } = load_journal(State), + Segments1 = + segment_map( + fun (Segment = #segment { journal_entries = JEntries, + entries_to_segment = EToSeg, + unacked = UnackedCountInJournal }) -> + %% We want to keep ack'd entries in so that we can + %% remove them if duplicates are in the journal. The + %% counts here are purely from the segment itself. + {SegEntries, UnackedCountInSeg} = load_segment(true, Segment), + {JEntries1, EToSeg1, UnackedCountDuplicates} = + journal_minus_segment(JEntries, EToSeg, SegEntries), + Segment #segment { journal_entries = JEntries1, + entries_to_segment = EToSeg1, + unacked = (UnackedCountInJournal + + UnackedCountInSeg - + UnackedCountDuplicates) } + end, Segments), + State1 #qistate { segments = Segments1 }. + +parse_journal_entries(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>, State) -> + parse_journal_entries(Rest, add_to_journal(SeqId, del, State)); + +parse_journal_entries(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>, State) -> + parse_journal_entries(Rest, add_to_journal(SeqId, ack, State)); +parse_journal_entries(<<0:?JPREFIX_BITS, 0:?SEQ_BITS, + 0:?PUB_RECORD_SIZE_BYTES/unit:8, _/binary>>, State) -> + %% Journal entry composed only of zeroes was probably + %% produced during a dirty shutdown so stop reading + State; +parse_journal_entries(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Bin:?PUB_RECORD_BODY_BYTES/binary, + MsgSize:?EMBEDDED_SIZE_BITS, MsgBin:MsgSize/binary, + Rest/binary>>, State) -> + IsPersistent = case Prefix of + ?PUB_PERSIST_JPREFIX -> true; + ?PUB_TRANS_JPREFIX -> false + end, + parse_journal_entries( + Rest, add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State)); +parse_journal_entries(_ErrOrEoF, State) -> + State. + +deliver_or_ack(_Kind, [], State) -> + State; +deliver_or_ack(Kind, SeqIds, State) -> + JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end, + {JournalHdl, State1} = get_journal_handle(State), + file_handle_cache_stats:update(queue_index_journal_write), + ok = file_handle_cache:append( + JournalHdl, + [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]), + maybe_flush_journal(lists:foldl(fun (SeqId, StateN) -> + add_to_journal(SeqId, Kind, StateN) + end, State1, SeqIds)). + +notify_sync(State = #qistate{unconfirmed = UC, + unconfirmed_msg = UCM, + on_sync = OnSyncFun, + on_sync_msg = OnSyncMsgFun}) -> + State1 = case gb_sets:is_empty(UC) of + true -> State; + false -> OnSyncFun(UC), + State#qistate{unconfirmed = gb_sets:new()} + end, + case gb_sets:is_empty(UCM) of + true -> State1; + false -> OnSyncMsgFun(UCM), + State1#qistate{unconfirmed_msg = gb_sets:new()} + end. + +%%---------------------------------------------------------------------------- +%% segment manipulation +%%---------------------------------------------------------------------------- + +seq_id_to_seg_and_rel_seq_id(SeqId) -> + { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }. + +reconstruct_seq_id(Seg, RelSeq) -> + (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq. + +all_segment_nums(#qistate { dir = Dir, segments = Segments }) -> + lists:sort( + sets:to_list( + lists:foldl( + fun (SegName, Set) -> + sets:add_element( + list_to_integer( + lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end, + SegName)), Set) + end, sets:from_list(segment_nums(Segments)), + rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)))). + +segment_find_or_new(Seg, Dir, Segments) -> + case segment_find(Seg, Segments) of + {ok, Segment} -> Segment; + error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION, + Path = filename:join(Dir, SegName), + #segment { num = Seg, + path = Path, + journal_entries = array_new(), + entries_to_segment = array_new([]), + unacked = 0 } + end. + +segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) -> + {ok, Segment}; %% 1 or (2, matches head) +segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) -> + {ok, Segment}; %% 2, matches tail +segment_find(Seg, {Segments, _}) -> %% no match + maps:find(Seg, Segments). + +segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head) + {Segments, [#segment { num = Seg } | Tail]}) -> + {Segments, [Segment | Tail]}; +segment_store(Segment = #segment { num = Seg }, %% 2, matches tail + {Segments, [SegmentA, #segment { num = Seg }]}) -> + {Segments, [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, {Segments, []}) -> + {maps:remove(Seg, Segments), [Segment]}; +segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) -> + {maps:remove(Seg, Segments), [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, + {Segments, [SegmentA, SegmentB]}) -> + {maps:put(SegmentB#segment.num, SegmentB, maps:remove(Seg, Segments)), + [Segment, SegmentA]}. + +segment_fold(Fun, Acc, {Segments, CachedSegments}) -> + maps:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end, + lists:foldl(Fun, Acc, CachedSegments), Segments). + +segment_map(Fun, {Segments, CachedSegments}) -> + {maps:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments), + lists:map(Fun, CachedSegments)}. + +segment_nums({Segments, CachedSegments}) -> + lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++ + maps:keys(Segments). + +segments_new() -> + {#{}, []}. + +entry_to_segment(_RelSeq, {?PUB, del, ack}, Initial) -> + Initial; +entry_to_segment(RelSeq, {Pub, Del, Ack}, Initial) -> + %% NB: we are assembling the segment in reverse order here, so + %% del/ack comes first. + Buf1 = case {Del, Ack} of + {no_del, no_ack} -> + Initial; + _ -> + Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>, + case {Del, Ack} of + {del, ack} -> [[Binary, Binary] | Initial]; + _ -> [Binary | Initial] + end + end, + case Pub of + no_pub -> + Buf1; + {IsPersistent, Bin, MsgBin} -> + [[<<?PUB_PREFIX:?PUB_PREFIX_BITS, + (bool_to_int(IsPersistent)):1, + RelSeq:?REL_SEQ_BITS, Bin/binary, + (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin] | Buf1] + end. + +read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, + {Messages, Segments}, Dir) -> + Segment = segment_find_or_new(Seg, Dir, Segments), + {segment_entries_foldr( + fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, IsDelivered, no_ack}, + Acc) + when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso + (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> + [{MsgOrId, reconstruct_seq_id(StartSeg, RelSeq), MsgProps, + IsPersistent, IsDelivered == del} | Acc]; + (_RelSeq, _Value, Acc) -> + Acc + end, Messages, Segment), + segment_store(Segment, Segments)}. + +segment_entries_foldr(Fun, Init, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, _UnackedCount} = load_segment(false, Segment), + {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries), + array:sparse_foldr( + fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, Ack}, Acc) -> + {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin), + Fun(RelSeq, {{MsgOrId, MsgProps, IsPersistent}, Del, Ack}, Acc) + end, Init, SegEntries1). + +%% Loading segments +%% +%% Does not do any combining with the journal at all. +load_segment(KeepAcked, #segment { path = Path }) -> + Empty = {array_new(), 0}, + case rabbit_file:is_file(Path) of + false -> Empty; + true -> Size = rabbit_file:file_size(Path), + file_handle_cache_stats:update(queue_index_read), + {ok, Hdl} = file_handle_cache:open_with_absolute_path( + Path, ?READ_MODE, []), + {ok, 0} = file_handle_cache:position(Hdl, bof), + {ok, SegBin} = file_handle_cache:read(Hdl, Size), + ok = file_handle_cache:close(Hdl), + Res = parse_segment_entries(SegBin, KeepAcked, Empty), + Res + end. + +parse_segment_entries(<<?PUB_PREFIX:?PUB_PREFIX_BITS, + IsPersistNum:1, RelSeq:?REL_SEQ_BITS, Rest/binary>>, + KeepAcked, Acc) -> + parse_segment_publish_entry( + Rest, 1 == IsPersistNum, RelSeq, KeepAcked, Acc); +parse_segment_entries(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, Rest/binary>>, KeepAcked, Acc) -> + parse_segment_entries( + Rest, KeepAcked, add_segment_relseq_entry(KeepAcked, RelSeq, Acc)); +parse_segment_entries(<<>>, _KeepAcked, Acc) -> + Acc. + +parse_segment_publish_entry(<<Bin:?PUB_RECORD_BODY_BYTES/binary, + MsgSize:?EMBEDDED_SIZE_BITS, + MsgBin:MsgSize/binary, Rest/binary>>, + IsPersistent, RelSeq, KeepAcked, + {SegEntries, Unacked}) -> + Obj = {{IsPersistent, Bin, MsgBin}, no_del, no_ack}, + SegEntries1 = array:set(RelSeq, Obj, SegEntries), + parse_segment_entries(Rest, KeepAcked, {SegEntries1, Unacked + 1}); +parse_segment_publish_entry(Rest, _IsPersistent, _RelSeq, KeepAcked, Acc) -> + parse_segment_entries(Rest, KeepAcked, Acc). + +add_segment_relseq_entry(KeepAcked, RelSeq, {SegEntries, Unacked}) -> + case array:get(RelSeq, SegEntries) of + {Pub, no_del, no_ack} -> + {array:set(RelSeq, {Pub, del, no_ack}, SegEntries), Unacked}; + {Pub, del, no_ack} when KeepAcked -> + {array:set(RelSeq, {Pub, del, ack}, SegEntries), Unacked - 1}; + {_Pub, del, no_ack} -> + {array:reset(RelSeq, SegEntries), Unacked - 1} + end. + +array_new() -> + array_new(undefined). + +array_new(Default) -> + array:new([{default, Default}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]). + +bool_to_int(true ) -> 1; +bool_to_int(false) -> 0. + +%%---------------------------------------------------------------------------- +%% journal & segment combination +%%---------------------------------------------------------------------------- + +%% Combine what we have just read from a segment file with what we're +%% holding for that segment in memory. There must be no duplicates. +segment_plus_journal(SegEntries, JEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) -> + SegEntry = array:get(RelSeq, SegEntriesOut), + {Obj, AdditionalUnackedDelta} = + segment_plus_journal1(SegEntry, JObj), + {case Obj of + undefined -> array:reset(RelSeq, SegEntriesOut); + _ -> array:set(RelSeq, Obj, SegEntriesOut) + end, + AdditionalUnacked + AdditionalUnackedDelta} + end, {SegEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item which we may be adding to (for items only in the journal), +%% modifying in (bits in both), or, when returning 'undefined', +%% erasing from (ack in journal, not segment) the segment array. The +%% other element of the tuple is the delta for AdditionalUnacked. +segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, ack}) -> + {undefined, 0}; + +segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) -> + {{Pub, del, no_ack}, 0}; +segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) -> + {undefined, -1}; +segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) -> + {undefined, -1}. + +%% Remove from the journal entries for a segment, items that are +%% duplicates of entries found in the segment itself. Used on start up +%% to clean up the journal. +%% +%% We need to update the entries_to_segment since they are just a +%% cache of what's on the journal. +journal_minus_segment(JEntries, EToSeg, SegEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {JEntriesOut, EToSegOut, UnackedRemoved}) -> + SegEntry = array:get(RelSeq, SegEntries), + {Obj, UnackedRemovedDelta} = + journal_minus_segment1(JObj, SegEntry), + {JEntriesOut1, EToSegOut1} = + case Obj of + keep -> + {JEntriesOut, EToSegOut}; + undefined -> + {array:reset(RelSeq, JEntriesOut), + array:reset(RelSeq, EToSegOut)}; + _ -> + {array:set(RelSeq, Obj, JEntriesOut), + array:set(RelSeq, entry_to_segment(RelSeq, Obj, []), + EToSegOut)} + end, + {JEntriesOut1, EToSegOut1, UnackedRemoved + UnackedRemovedDelta} + end, {JEntries, EToSeg, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item we are adding to or modifying in the (initially fresh) journal +%% array. If the item is 'undefined' we leave the journal array +%% alone. The other element of the tuple is the deltas for +%% UnackedRemoved. + +%% Both the same. Must be at least the publish +journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) -> + {undefined, 1}; +journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) -> + {undefined, 0}; + +%% Just publish in journal +journal_minus_segment1({?PUB, no_del, no_ack}, undefined) -> + {keep, 0}; + +%% Publish and deliver in journal +journal_minus_segment1({?PUB, del, no_ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, no_ack}, 1}; + +%% Publish, deliver and ack in journal +journal_minus_segment1({?PUB, del, ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, ack}, 1}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) -> + {{no_pub, no_del, ack}, 1}; + +%% Just deliver in journal +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) -> + {undefined, 0}; + +%% Just ack in journal +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) -> + {undefined, -1}; + +%% Deliver and ack in journal +journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> + {{no_pub, no_del, ack}, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> + {undefined, -1}; + +%% Missing segment. If flush_journal/1 is interrupted after deleting +%% the segment but before truncating the journal we can get these +%% cases: a delivery and an acknowledgement in the journal, or just an +%% acknowledgement in the journal, but with no segment. In both cases +%% we have really forgotten the message; so ignore what's in the +%% journal. +journal_minus_segment1({no_pub, no_del, ack}, undefined) -> + {undefined, 0}; +journal_minus_segment1({no_pub, del, ack}, undefined) -> + {undefined, 0}. + +%%---------------------------------------------------------------------------- +%% upgrade +%%---------------------------------------------------------------------------- + +-spec add_queue_ttl() -> 'ok'. + +add_queue_ttl() -> + foreach_queue_index({fun add_queue_ttl_journal/1, + fun add_queue_ttl_segment/1}). + +add_queue_ttl_journal(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>) -> + {<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; +add_queue_ttl_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>) -> + {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; +add_queue_ttl_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, + MsgId:?MSG_ID_BYTES/binary, Rest/binary>>) -> + {[<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, MsgId, + expiry_to_binary(undefined)], Rest}; +add_queue_ttl_journal(_) -> + stop. + +add_queue_ttl_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, + RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BYTES/binary, + Rest/binary>>) -> + {[<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>, + MsgId, expiry_to_binary(undefined)], Rest}; +add_queue_ttl_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, Rest/binary>>) -> + {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, + Rest}; +add_queue_ttl_segment(_) -> + stop. + +avoid_zeroes() -> + foreach_queue_index({none, fun avoid_zeroes_segment/1}). + +avoid_zeroes_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, + RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS, + Expiry:?EXPIRY_BITS, Rest/binary>>) -> + {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS, + MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS>>, Rest}; +avoid_zeroes_segment(<<0:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, Rest/binary>>) -> + {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, + Rest}; +avoid_zeroes_segment(_) -> + stop. + +%% At upgrade time we just define every message's size as 0 - that +%% will save us a load of faff with the message store, and means we +%% can actually use the clean recovery terms in VQ. It does mean we +%% don't count message bodies from before the migration, but we can +%% live with that. +store_msg_size() -> + foreach_queue_index({fun store_msg_size_journal/1, + fun store_msg_size_segment/1}). + +store_msg_size_journal(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>) -> + {<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; +store_msg_size_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>) -> + {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; +store_msg_size_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, + MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, + Rest/binary>>) -> + {<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, MsgId:?MSG_ID_BITS, + Expiry:?EXPIRY_BITS, 0:?SIZE_BITS>>, Rest}; +store_msg_size_journal(_) -> + stop. + +store_msg_size_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, + RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS, + Expiry:?EXPIRY_BITS, Rest/binary>>) -> + {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS, + MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, 0:?SIZE_BITS>>, Rest}; +store_msg_size_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, Rest/binary>>) -> + {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, + Rest}; +store_msg_size_segment(_) -> + stop. + +store_msg() -> + foreach_queue_index({fun store_msg_journal/1, + fun store_msg_segment/1}). + +store_msg_journal(<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>) -> + {<<?DEL_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; +store_msg_journal(<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS, + Rest/binary>>) -> + {<<?ACK_JPREFIX:?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Rest}; +store_msg_journal(<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, + MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, Size:?SIZE_BITS, + Rest/binary>>) -> + {<<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS, MsgId:?MSG_ID_BITS, + Expiry:?EXPIRY_BITS, Size:?SIZE_BITS, + 0:?EMBEDDED_SIZE_BITS>>, Rest}; +store_msg_journal(_) -> + stop. + +store_msg_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, + RelSeq:?REL_SEQ_BITS, MsgId:?MSG_ID_BITS, + Expiry:?EXPIRY_BITS, Size:?SIZE_BITS, Rest/binary>>) -> + {<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS, + MsgId:?MSG_ID_BITS, Expiry:?EXPIRY_BITS, Size:?SIZE_BITS, + 0:?EMBEDDED_SIZE_BITS>>, Rest}; +store_msg_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS, Rest/binary>>) -> + {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, + Rest}; +store_msg_segment(_) -> + stop. + + + +%%---------------------------------------------------------------------------- +%% Migration functions +%%---------------------------------------------------------------------------- + +foreach_queue_index(Funs) -> + QueueDirNames = all_queue_directory_names(), + {ok, Gatherer} = gatherer:start_link(), + [begin + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> + transform_queue(QueueDirName, Gatherer, Funs) + end) + end || QueueDirName <- QueueDirNames], + empty = gatherer:out(Gatherer), + ok = gatherer:stop(Gatherer). + +transform_queue(Dir, Gatherer, {JournalFun, SegmentFun}) -> + ok = transform_file(filename:join(Dir, ?JOURNAL_FILENAME), JournalFun), + [ok = transform_file(filename:join(Dir, Seg), SegmentFun) + || Seg <- rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)], + ok = gatherer:finish(Gatherer). + +transform_file(_Path, none) -> + ok; +transform_file(Path, Fun) when is_function(Fun)-> + PathTmp = Path ++ ".upgrade", + case rabbit_file:file_size(Path) of + 0 -> ok; + Size -> {ok, PathTmpHdl} = + file_handle_cache:open_with_absolute_path( + PathTmp, ?WRITE_MODE, + [{write_buffer, infinity}]), + + {ok, PathHdl} = file_handle_cache:open_with_absolute_path( + Path, ?READ_MODE, [{read_buffer, Size}]), + {ok, Content} = file_handle_cache:read(PathHdl, Size), + ok = file_handle_cache:close(PathHdl), + + ok = drive_transform_fun(Fun, PathTmpHdl, Content), + + ok = file_handle_cache:close(PathTmpHdl), + ok = rabbit_file:rename(PathTmp, Path) + end. + +drive_transform_fun(Fun, Hdl, Contents) -> + case Fun(Contents) of + stop -> ok; + {Output, Contents1} -> ok = file_handle_cache:append(Hdl, Output), + drive_transform_fun(Fun, Hdl, Contents1) + end. + +move_to_per_vhost_stores(#resource{virtual_host = VHost} = QueueName) -> + OldQueueDir = filename:join([queues_base_dir(), "queues", + queue_name_to_dir_name_legacy(QueueName)]), + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + NewQueueDir = queue_dir(VHostDir, QueueName), + rabbit_log_upgrade:info("About to migrate queue directory '~s' to '~s'", + [OldQueueDir, NewQueueDir]), + case rabbit_file:is_dir(OldQueueDir) of + true -> + ok = rabbit_file:ensure_dir(NewQueueDir), + ok = rabbit_file:rename(OldQueueDir, NewQueueDir), + ok = ensure_queue_name_stub_file(NewQueueDir, QueueName); + false -> + Msg = "Queue index directory '~s' not found for ~s~n", + Args = [OldQueueDir, rabbit_misc:rs(QueueName)], + rabbit_log_upgrade:error(Msg, Args), + rabbit_log:error(Msg, Args) + end, + ok. + +ensure_queue_name_stub_file(Dir, #resource{virtual_host = VHost, name = QName}) -> + QueueNameFile = filename:join(Dir, ?QUEUE_NAME_STUB_FILE), + file:write_file(QueueNameFile, <<"VHOST: ", VHost/binary, "\n", + "QUEUE: ", QName/binary, "\n">>). + +read_global_recovery_terms(DurableQueueNames) -> + ok = rabbit_recovery_terms:open_global_table(), + + DurableTerms = + lists:foldl( + fun(QName, RecoveryTerms) -> + DirName = queue_name_to_dir_name_legacy(QName), + RecoveryInfo = case rabbit_recovery_terms:read_global(DirName) of + {error, _} -> non_clean_shutdown; + {ok, Terms} -> Terms + end, + [RecoveryInfo | RecoveryTerms] + end, [], DurableQueueNames), + + ok = rabbit_recovery_terms:close_global_table(), + %% The backing queue interface requires that the queue recovery terms + %% which come back from start/1 are in the same order as DurableQueueNames + OrderedTerms = lists:reverse(DurableTerms), + {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + +cleanup_global_recovery_terms() -> + rabbit_file:recursive_delete([filename:join([queues_base_dir(), "queues"])]), + rabbit_recovery_terms:delete_global_table(), + ok. + + +update_recovery_term(#resource{virtual_host = VHost} = QueueName, Term) -> + Key = queue_name_to_dir_name(QueueName), + rabbit_recovery_terms:store(VHost, Key, Term). diff --git a/deps/rabbit/src/rabbit_queue_location_client_local.erl b/deps/rabbit/src/rabbit_queue_location_client_local.erl new file mode 100644 index 0000000000..2df1608534 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_location_client_local.erl @@ -0,0 +1,39 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_location_client_local). +-behaviour(rabbit_queue_master_locator). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([description/0, queue_master_location/1]). + +-rabbit_boot_step({?MODULE, + [{description, "locate queue master client local"}, + {mfa, {rabbit_registry, register, + [queue_master_locator, + <<"client-local">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + + +%%--------------------------------------------------------------------------- +%% Queue Master Location Callbacks +%%--------------------------------------------------------------------------- + +description() -> + [{description, <<"Locate queue master node as the client local node">>}]. + +queue_master_location(Q) when ?is_amqqueue(Q) -> + %% unlike with other locator strategies we do not check node maintenance + %% status for two reasons: + %% + %% * nodes in maintenance mode will drop their client connections + %% * with other strategies, if no nodes are available, the current node + %% is returned but this strategy already does just that + {ok, node()}. diff --git a/deps/rabbit/src/rabbit_queue_location_min_masters.erl b/deps/rabbit/src/rabbit_queue_location_min_masters.erl new file mode 100644 index 0000000000..6535f082fe --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_location_min_masters.erl @@ -0,0 +1,70 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_location_min_masters). +-behaviour(rabbit_queue_master_locator). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([description/0, queue_master_location/1]). + +-rabbit_boot_step({?MODULE, + [{description, "locate queue master min bound queues"}, + {mfa, {rabbit_registry, register, + [queue_master_locator, + <<"min-masters">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +%%--------------------------------------------------------------------------- +%% Queue Master Location Callbacks +%%--------------------------------------------------------------------------- + +description() -> + [{description, + <<"Locate queue master node from cluster node with least bound queues">>}]. + +queue_master_location(Q) when ?is_amqqueue(Q) -> + Cluster = rabbit_queue_master_location_misc:all_nodes(Q), + QueueNames = rabbit_amqqueue:list_names(), + MastersPerNode0 = lists:foldl( + fun(#resource{virtual_host = VHost, name = QueueName}, NodeMasters) -> + case rabbit_queue_master_location_misc:lookup_master(QueueName, VHost) of + {ok, Master} when is_atom(Master) -> + case maps:is_key(Master, NodeMasters) of + true -> maps:update_with(Master, + fun(N) -> N + 1 end, + NodeMasters); + false -> NodeMasters + end; + _ -> NodeMasters + end + end, + maps:from_list([{N, 0} || N <- Cluster]), + QueueNames), + + MastersPerNode = maps:filter(fun (Node, _N) -> + not rabbit_maintenance:is_being_drained_local_read(Node) + end, MastersPerNode0), + + case map_size(MastersPerNode) > 0 of + true -> + {MinNode, _NMasters} = maps:fold( + fun(Node, NMasters, init) -> + {Node, NMasters}; + (Node, NMasters, {MinNode, MinMasters}) -> + case NMasters < MinMasters of + true -> {Node, NMasters}; + false -> {MinNode, MinMasters} + end + end, + init, MastersPerNode), + {ok, MinNode}; + false -> + undefined + end. diff --git a/deps/rabbit/src/rabbit_queue_location_random.erl b/deps/rabbit/src/rabbit_queue_location_random.erl new file mode 100644 index 0000000000..7232fc6703 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_location_random.erl @@ -0,0 +1,42 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_location_random). +-behaviour(rabbit_queue_master_locator). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([description/0, queue_master_location/1]). + +-rabbit_boot_step({?MODULE, + [{description, "locate queue master random"}, + {mfa, {rabbit_registry, register, + [queue_master_locator, + <<"random">>, ?MODULE]}}, + {requires, rabbit_registry}, + {enables, kernel_ready}]}). + +%%--------------------------------------------------------------------------- +%% Queue Master Location Callbacks +%%--------------------------------------------------------------------------- + +description() -> + [{description, + <<"Locate queue master node from cluster in a random manner">>}]. + +queue_master_location(Q) when ?is_amqqueue(Q) -> + Cluster0 = rabbit_queue_master_location_misc:all_nodes(Q), + Cluster = rabbit_maintenance:filter_out_drained_nodes_local_read(Cluster0), + case Cluster of + [] -> + undefined; + Candidates when is_list(Candidates) -> + RandomPos = erlang:phash2(erlang:monotonic_time(), length(Candidates)), + MasterNode = lists:nth(RandomPos + 1, Candidates), + {ok, MasterNode} + end. diff --git a/deps/rabbit/src/rabbit_queue_location_validator.erl b/deps/rabbit/src/rabbit_queue_location_validator.erl new file mode 100644 index 0000000000..bf41be622c --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_location_validator.erl @@ -0,0 +1,67 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_location_validator). +-behaviour(rabbit_policy_validator). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([validate_policy/1, validate_strategy/1]). + +-rabbit_boot_step({?MODULE, + [{description, "Queue location policy validation"}, + {mfa, {rabbit_registry, register, + [policy_validator, + <<"queue-master-locator">>, + ?MODULE]}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + +validate_policy(KeyList) -> + case proplists:lookup(<<"queue-master-locator">> , KeyList) of + {_, Strategy} -> case validate_strategy(Strategy) of + {error, _, _} = Er -> Er; + _ -> ok + end; + _ -> {error, "queue-master-locator undefined"} + end. + +validate_strategy(Strategy) -> + case module(Strategy) of + R = {ok, _M} -> R; + _ -> + {error, "~p invalid queue-master-locator value", [Strategy]} + end. + +policy(Policy, Q) -> + case rabbit_policy:get(Policy, Q) of + undefined -> none; + P -> P + end. + +module(Q) when ?is_amqqueue(Q) -> + case policy(<<"queue-master-locator">>, Q) of + undefined -> no_location_strategy; + Mode -> module(Mode) + end; +module(Strategy) when is_binary(Strategy) -> + case rabbit_registry:binary_to_type(Strategy) of + {error, not_found} -> no_location_strategy; + T -> + case rabbit_registry:lookup_module(queue_master_locator, T) of + {ok, Module} -> + case code:which(Module) of + non_existing -> no_location_strategy; + _ -> {ok, Module} + end; + _ -> + no_location_strategy + end + end; +module(Strategy) -> + module(rabbit_data_coercion:to_binary(Strategy)). diff --git a/deps/rabbit/src/rabbit_queue_master_location_misc.erl b/deps/rabbit/src/rabbit_queue_master_location_misc.erl new file mode 100644 index 0000000000..37698e184f --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_master_location_misc.erl @@ -0,0 +1,108 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_master_location_misc). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("amqqueue.hrl"). + +-export([lookup_master/2, + lookup_queue/2, + get_location/1, + get_location_mod_by_config/1, + get_location_mod_by_args/1, + get_location_mod_by_policy/1, + all_nodes/1]). + +-spec lookup_master(binary(), binary()) -> {ok, node()} | {error, not_found}. +lookup_master(QueueNameBin, VHostPath) when is_binary(QueueNameBin), + is_binary(VHostPath) -> + QueueR = rabbit_misc:r(VHostPath, queue, QueueNameBin), + case rabbit_amqqueue:lookup(QueueR) of + {ok, Queue} when ?amqqueue_has_valid_pid(Queue) -> + Pid = amqqueue:get_pid(Queue), + {ok, node(Pid)}; + Error -> Error + end. + +lookup_queue(QueueNameBin, VHostPath) when is_binary(QueueNameBin), + is_binary(VHostPath) -> + QueueR = rabbit_misc:r(VHostPath, queue, QueueNameBin), + case rabbit_amqqueue:lookup(QueueR) of + Reply = {ok, Queue} when ?is_amqqueue(Queue) -> + Reply; + Error -> + Error + end. + +get_location(Queue) when ?is_amqqueue(Queue) -> + Reply1 = case get_location_mod_by_args(Queue) of + _Err1 = {error, _} -> + case get_location_mod_by_policy(Queue) of + _Err2 = {error, _} -> + case get_location_mod_by_config(Queue) of + Err3 = {error, _} -> Err3; + Reply0 = {ok, _Module} -> Reply0 + end; + Reply0 = {ok, _Module} -> Reply0 + end; + Reply0 = {ok, _Module} -> Reply0 + end, + + case Reply1 of + {ok, CB} -> CB:queue_master_location(Queue); + Error -> Error + end. + +get_location_mod_by_args(Queue) when ?is_amqqueue(Queue) -> + Args = amqqueue:get_arguments(Queue), + case rabbit_misc:table_lookup(Args, <<"x-queue-master-locator">>) of + {_Type, Strategy} -> + case rabbit_queue_location_validator:validate_strategy(Strategy) of + Reply = {ok, _CB} -> Reply; + Error -> Error + end; + _ -> {error, "x-queue-master-locator undefined"} + end. + +get_location_mod_by_policy(Queue) when ?is_amqqueue(Queue) -> + case rabbit_policy:get(<<"queue-master-locator">> , Queue) of + undefined -> {error, "queue-master-locator policy undefined"}; + Strategy -> + case rabbit_queue_location_validator:validate_strategy(Strategy) of + Reply = {ok, _CB} -> Reply; + Error -> Error + end + end. + +get_location_mod_by_config(Queue) when ?is_amqqueue(Queue) -> + case application:get_env(rabbit, queue_master_locator) of + {ok, Strategy} -> + case rabbit_queue_location_validator:validate_strategy(Strategy) of + Reply = {ok, _CB} -> Reply; + Error -> Error + end; + _ -> {error, "queue_master_locator undefined"} + end. + +all_nodes(Queue) when ?is_amqqueue(Queue) -> + handle_is_mirrored_ha_nodes(rabbit_mirror_queue_misc:is_mirrored_ha_nodes(Queue), Queue). + +handle_is_mirrored_ha_nodes(false, _Queue) -> + % Note: ha-mode is NOT 'nodes' - it is either exactly or all, which means + % that any node in the cluster is eligible to be the new queue master node + rabbit_nodes:all_running(); +handle_is_mirrored_ha_nodes(true, Queue) -> + % Note: ha-mode is 'nodes', which explicitly specifies allowed nodes. + % We must use suggested_queue_nodes to get that list of nodes as the + % starting point for finding the queue master location + handle_suggested_queue_nodes(rabbit_mirror_queue_misc:suggested_queue_nodes(Queue)). + +handle_suggested_queue_nodes({_MNode, []}) -> + rabbit_nodes:all_running(); +handle_suggested_queue_nodes({MNode, SNodes}) -> + [MNode | SNodes]. diff --git a/deps/rabbit/src/rabbit_queue_master_locator.erl b/deps/rabbit/src/rabbit_queue_master_locator.erl new file mode 100644 index 0000000000..ff2e30f587 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_master_locator.erl @@ -0,0 +1,19 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_queue_master_locator). + +-behaviour(rabbit_registry_class). + +-export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). + +-callback description() -> [proplists:property()]. +-callback queue_master_location(amqqueue:amqqueue()) -> + {'ok', node()} | {'error', term()}. + +added_to_rabbit_registry(_Type, _ModuleName) -> ok. +removed_from_rabbit_registry(_Type) -> ok. diff --git a/deps/rabbit/src/rabbit_queue_type.erl b/deps/rabbit/src/rabbit_queue_type.erl new file mode 100644 index 0000000000..4e59b6a7c0 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_type.erl @@ -0,0 +1,581 @@ +-module(rabbit_queue_type). +-include("amqqueue.hrl"). +-include_lib("rabbit_common/include/resource.hrl"). + +-export([ + init/0, + close/1, + discover/1, + default/0, + is_enabled/1, + declare/2, + delete/4, + is_recoverable/1, + recover/2, + purge/1, + policy_changed/1, + stat/1, + remove/2, + info/2, + state_info/1, + info_down/2, + info_down/3, + %% stateful client API + new/2, + consume/3, + cancel/5, + handle_down/3, + handle_event/3, + module/2, + deliver/3, + settle/5, + credit/5, + dequeue/5, + fold_state/3, + is_policy_applicable/2, + is_server_named_allowed/1 + ]). + +%% gah what is a good identity of a classic queue including all replicas +-type queue_name() :: rabbit_types:r(queue). +-type queue_ref() :: queue_name() | atom(). +-type queue_state() :: term(). +-type msg_tag() :: term(). + +-define(STATE, ?MODULE). + +%% Recoverable slaves shouldn't really be a generic one, but let's keep it here until +%% mirrored queues are deprecated. +-define(DOWN_KEYS, [name, durable, auto_delete, arguments, pid, recoverable_slaves, type, state]). + +-define(QREF(QueueReference), + (is_tuple(QueueReference) andalso element(1, QueueReference) == resource) + orelse is_atom(QueueReference)). +%% anything that the host process needs to do on behalf of the queue type +%% session, like knowing when to notify on monitor down +-type action() :: + {monitor, Pid :: pid(), queue_ref()} | + %% indicate to the queue type module that a message has been delivered + %% fully to the queue + {settled, Success :: boolean(), [msg_tag()]} | + {deliver, rabbit_types:ctag(), boolean(), [rabbit_amqqueue:qmsg()]}. + +-type actions() :: [action()]. + +-type event() :: + {down, pid(), Info :: term()} | + term(). + +-record(ctx, {module :: module(), + name :: queue_name(), + %% "publisher confirm queue accounting" + %% queue type implementation should emit a: + %% {settle, Success :: boolean(), msg_tag()} + %% to either settle or reject the delivery of a + %% message to the queue instance + %% The queue type module will then emit a {confirm | reject, [msg_tag()} + %% action to the channel or channel like process when a msg_tag + %% has reached its conclusion + state :: queue_state()}). + + +-record(?STATE, {ctxs = #{} :: #{queue_ref() => #ctx{} | queue_ref()}, + monitor_registry = #{} :: #{pid() => queue_ref()} + }). + +-opaque state() :: #?STATE{}. + +-type consume_spec() :: #{no_ack := boolean(), + channel_pid := pid(), + limiter_pid => pid(), + limiter_active => boolean(), + prefetch_count => non_neg_integer(), + consumer_tag := rabbit_types:ctag(), + exclusive_consume => boolean(), + args => rabbit_framing:amqp_table(), + ok_msg := term(), + acting_user := rabbit_types:username()}. + + + +% copied from rabbit_amqqueue +-type absent_reason() :: 'nodedown' | 'crashed' | stopped | timeout. + +-type settle_op() :: 'complete' | 'requeue' | 'discard'. + +-export_type([state/0, + consume_spec/0, + action/0, + actions/0, + settle_op/0]). + +%% is the queue type feature enabled +-callback is_enabled() -> boolean(). + +-callback declare(amqqueue:amqqueue(), node()) -> + {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} | + {'absent', amqqueue:amqqueue(), absent_reason()} | + {'protocol_error', Type :: atom(), Reason :: string(), Args :: term()}. + +-callback delete(amqqueue:amqqueue(), + boolean(), + boolean(), + rabbit_types:username()) -> + rabbit_types:ok(non_neg_integer()) | + rabbit_types:error(in_use | not_empty) | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. + +-callback recover(rabbit_types:vhost(), [amqqueue:amqqueue()]) -> + {Recovered :: [amqqueue:amqqueue()], + Failed :: [amqqueue:amqqueue()]}. + +%% checks if the queue should be recovered +-callback is_recoverable(amqqueue:amqqueue()) -> + boolean(). + +-callback purge(amqqueue:amqqueue()) -> + {ok, non_neg_integer()} | {error, term()}. + +-callback policy_changed(amqqueue:amqqueue()) -> ok. + +%% stateful +%% intitialise and return a queue type specific session context +-callback init(amqqueue:amqqueue()) -> queue_state(). + +-callback close(queue_state()) -> ok. +%% update the queue type state from amqqrecord +-callback update(amqqueue:amqqueue(), queue_state()) -> queue_state(). + +-callback consume(amqqueue:amqqueue(), + consume_spec(), + queue_state()) -> + {ok, queue_state(), actions()} | {error, term()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. + +-callback cancel(amqqueue:amqqueue(), + rabbit_types:ctag(), + term(), + rabbit_types:username(), + queue_state()) -> + {ok, queue_state()} | {error, term()}. + +%% any async events returned from the queue system should be processed through +%% this +-callback handle_event(Event :: event(), + queue_state()) -> + {ok, queue_state(), actions()} | {error, term()} | eol | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. + +-callback deliver([{amqqueue:amqqueue(), queue_state()}], + Delivery :: term()) -> + {[{amqqueue:amqqueue(), queue_state()}], actions()}. + +-callback settle(settle_op(), rabbit_types:ctag(), [non_neg_integer()], queue_state()) -> + {queue_state(), actions()} | + {'protocol_error', Type :: atom(), Reason :: string(), Args :: term()}. + +-callback credit(rabbit_types:ctag(), + non_neg_integer(), Drain :: boolean(), queue_state()) -> + {queue_state(), actions()}. + +-callback dequeue(NoAck :: boolean(), LimiterPid :: pid(), + rabbit_types:ctag(), queue_state()) -> + {ok, Count :: non_neg_integer(), rabbit_amqqueue:qmsg(), queue_state()} | + {empty, queue_state()} | + {error, term()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. + +%% return a map of state summary information +-callback state_info(queue_state()) -> + #{atom() := term()}. + +%% general queue info +-callback info(amqqueue:amqqueue(), all_keys | rabbit_types:info_keys()) -> + rabbit_types:infos(). + +-callback stat(amqqueue:amqqueue()) -> + {'ok', non_neg_integer(), non_neg_integer()}. + +-callback capabilities() -> + #{atom() := term()}. + +%% TODO: this should be controlled by a registry that is populated on boot +discover(<<"quorum">>) -> + rabbit_quorum_queue; +discover(<<"classic">>) -> + rabbit_classic_queue; +discover(<<"stream">>) -> + rabbit_stream_queue. + +default() -> + rabbit_classic_queue. + +-spec is_enabled(module()) -> boolean(). +is_enabled(Type) -> + Type:is_enabled(). + +-spec declare(amqqueue:amqqueue(), node()) -> + {'new' | 'existing' | 'owner_died', amqqueue:amqqueue()} | + {'absent', amqqueue:amqqueue(), absent_reason()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +declare(Q, Node) -> + Mod = amqqueue:get_type(Q), + Mod:declare(Q, Node). + +-spec delete(amqqueue:amqqueue(), boolean(), + boolean(), rabbit_types:username()) -> + rabbit_types:ok(non_neg_integer()) | + rabbit_types:error(in_use | not_empty) | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +delete(Q, IfUnused, IfEmpty, ActingUser) -> + Mod = amqqueue:get_type(Q), + Mod:delete(Q, IfUnused, IfEmpty, ActingUser). + +-spec purge(amqqueue:amqqueue()) -> + {'ok', non_neg_integer()} | {error, term()}. +purge(Q) -> + Mod = amqqueue:get_type(Q), + Mod:purge(Q). + +-spec policy_changed(amqqueue:amqqueue()) -> 'ok'. +policy_changed(Q) -> + Mod = amqqueue:get_type(Q), + Mod:policy_changed(Q). + +-spec stat(amqqueue:amqqueue()) -> + {'ok', non_neg_integer(), non_neg_integer()}. +stat(Q) -> + Mod = amqqueue:get_type(Q), + Mod:stat(Q). + +-spec remove(queue_ref(), state()) -> state(). +remove(QRef, #?STATE{ctxs = Ctxs0} = State) -> + case maps:take(QRef, Ctxs0) of + error -> + State; + {_, Ctxs} -> + State#?STATE{ctxs = Ctxs} + end. + +-spec info(amqqueue:amqqueue(), all_keys | rabbit_types:info_keys()) -> + rabbit_types:infos(). +info(Q, Items) when ?amqqueue_state_is(Q, crashed) -> + info_down(Q, Items, crashed); +info(Q, Items) when ?amqqueue_state_is(Q, stopped) -> + info_down(Q, Items, stopped); +info(Q, Items) -> + Mod = amqqueue:get_type(Q), + Mod:info(Q, Items). + +fold_state(Fun, Acc, #?STATE{ctxs = Ctxs}) -> + maps:fold(Fun, Acc, Ctxs). + +state_info(#ctx{state = S, + module = Mod}) -> + Mod:state_info(S); +state_info(_) -> + #{}. + +down_keys() -> ?DOWN_KEYS. + +info_down(Q, DownReason) -> + info_down(Q, down_keys(), DownReason). + +info_down(Q, all_keys, DownReason) -> + info_down(Q, down_keys(), DownReason); +info_down(Q, Items, DownReason) -> + [{Item, i_down(Item, Q, DownReason)} || Item <- Items]. + +i_down(name, Q, _) -> amqqueue:get_name(Q); +i_down(durable, Q, _) -> amqqueue:is_durable(Q); +i_down(auto_delete, Q, _) -> amqqueue:is_auto_delete(Q); +i_down(arguments, Q, _) -> amqqueue:get_arguments(Q); +i_down(pid, Q, _) -> amqqueue:get_pid(Q); +i_down(recoverable_slaves, Q, _) -> amqqueue:get_recoverable_slaves(Q); +i_down(type, Q, _) -> amqqueue:get_type(Q); +i_down(state, _Q, DownReason) -> DownReason; +i_down(_K, _Q, _DownReason) -> ''. + +is_policy_applicable(Q, Policy) -> + Mod = amqqueue:get_type(Q), + Capabilities = Mod:capabilities(), + Applicable = maps:get(policies, Capabilities, []), + lists:all(fun({P, _}) -> + lists:member(P, Applicable) + end, Policy). + +is_server_named_allowed(Type) -> + Capabilities = Type:capabilities(), + maps:get(server_named, Capabilities, false). + +-spec init() -> state(). +init() -> + #?STATE{}. + +-spec close(state()) -> ok. +close(#?STATE{ctxs = Contexts}) -> + _ = maps:map( + fun (_, #ctx{module = Mod, + state = S}) -> + ok = Mod:close(S) + end, Contexts), + ok. + +-spec new(amqqueue:amqqueue(), state()) -> state(). +new(Q, State) when ?is_amqqueue(Q) -> + Ctx = get_ctx(Q, State), + set_ctx(Q, Ctx, State). + +-spec consume(amqqueue:amqqueue(), consume_spec(), state()) -> + {ok, state(), actions()} | {error, term()}. +consume(Q, Spec, State) -> + #ctx{state = CtxState0} = Ctx = get_ctx(Q, State), + Mod = amqqueue:get_type(Q), + case Mod:consume(Q, Spec, CtxState0) of + {ok, CtxState, Actions} -> + return_ok(set_ctx(Q, Ctx#ctx{state = CtxState}, State), Actions); + Err -> + Err + end. + +%% TODO switch to cancel spec api +-spec cancel(amqqueue:amqqueue(), + rabbit_types:ctag(), + term(), + rabbit_types:username(), + state()) -> + {ok, state()} | {error, term()}. +cancel(Q, Tag, OkMsg, ActiveUser, Ctxs) -> + #ctx{state = State0} = Ctx = get_ctx(Q, Ctxs), + Mod = amqqueue:get_type(Q), + case Mod:cancel(Q, Tag, OkMsg, ActiveUser, State0) of + {ok, State} -> + {ok, set_ctx(Q, Ctx#ctx{state = State}, Ctxs)}; + Err -> + Err + end. + +-spec is_recoverable(amqqueue:amqqueue()) -> + boolean(). +is_recoverable(Q) -> + Mod = amqqueue:get_type(Q), + Mod:is_recoverable(Q). + +-spec recover(rabbit_types:vhost(), [amqqueue:amqqueue()]) -> + {Recovered :: [amqqueue:amqqueue()], + Failed :: [amqqueue:amqqueue()]}. +recover(VHost, Qs) -> + ByType = lists:foldl( + fun (Q, Acc) -> + T = amqqueue:get_type(Q), + maps:update_with(T, fun (X) -> + [Q | X] + end, Acc) + %% TODO resolve all registered queue types from registry + end, #{rabbit_classic_queue => [], + rabbit_quorum_queue => [], + rabbit_stream_queue => []}, Qs), + maps:fold(fun (Mod, Queues, {R0, F0}) -> + {R, F} = Mod:recover(VHost, Queues), + {R0 ++ R, F0 ++ F} + end, {[], []}, ByType). + +-spec handle_down(pid(), term(), state()) -> + {ok, state(), actions()} | {eol, queue_ref()} | {error, term()}. +handle_down(Pid, Info, #?STATE{monitor_registry = Reg0} = State0) -> + %% lookup queue ref in monitor registry + case maps:take(Pid, Reg0) of + {QRef, Reg} -> + case handle_event(QRef, {down, Pid, Info}, State0) of + {ok, State, Actions} -> + {ok, State#?STATE{monitor_registry = Reg}, Actions}; + eol -> + {eol, QRef}; + Err -> + Err + end; + error -> + {ok, State0, []} + end. + +%% messages sent from queues +-spec handle_event(queue_ref(), term(), state()) -> + {ok, state(), actions()} | eol | {error, term()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +handle_event(QRef, Evt, Ctxs) -> + %% events can arrive after a queue state has been cleared up + %% so need to be defensive here + case get_ctx(QRef, Ctxs, undefined) of + #ctx{module = Mod, + state = State0} = Ctx -> + case Mod:handle_event(Evt, State0) of + {ok, State, Actions} -> + return_ok(set_ctx(QRef, Ctx#ctx{state = State}, Ctxs), Actions); + Err -> + Err + end; + undefined -> + {ok, Ctxs, []} + end. + +-spec module(queue_ref(), state()) -> + {ok, module()} | {error, not_found}. +module(QRef, Ctxs) -> + %% events can arrive after a queue state has been cleared up + %% so need to be defensive here + case get_ctx(QRef, Ctxs, undefined) of + #ctx{module = Mod} -> + {ok, Mod}; + undefined -> + {error, not_found} + end. + +-spec deliver([amqqueue:amqqueue()], Delivery :: term(), + stateless | state()) -> + {ok, state(), actions()}. +deliver(Qs, Delivery, stateless) -> + _ = lists:map(fun(Q) -> + Mod = amqqueue:get_type(Q), + _ = Mod:deliver([{Q, stateless}], Delivery) + end, Qs), + {ok, stateless, []}; +deliver(Qs, Delivery, #?STATE{} = State0) -> + %% sort by queue type - then dispatch each group + ByType = lists:foldl( + fun (Q, Acc) -> + T = amqqueue:get_type(Q), + Ctx = get_ctx(Q, State0), + maps:update_with( + T, fun (A) -> + [{Q, Ctx#ctx.state} | A] + end, [{Q, Ctx#ctx.state}], Acc) + end, #{}, Qs), + %%% dispatch each group to queue type interface? + {Xs, Actions} = maps:fold(fun(Mod, QSs, {X0, A0}) -> + {X, A} = Mod:deliver(QSs, Delivery), + {X0 ++ X, A0 ++ A} + end, {[], []}, ByType), + State = lists:foldl( + fun({Q, S}, Acc) -> + Ctx = get_ctx(Q, Acc), + set_ctx(qref(Q), Ctx#ctx{state = S}, Acc) + end, State0, Xs), + return_ok(State, Actions). + + +-spec settle(queue_ref(), settle_op(), rabbit_types:ctag(), + [non_neg_integer()], state()) -> + {ok, state(), actions()} | + {'protocol_error', Type :: atom(), Reason :: string(), Args :: term()}. +settle(QRef, Op, CTag, MsgIds, Ctxs) + when ?QREF(QRef) -> + case get_ctx(QRef, Ctxs, undefined) of + undefined -> + %% if we receive a settlement and there is no queue state it means + %% the queue was deleted with active consumers + {ok, Ctxs, []}; + #ctx{state = State0, + module = Mod} = Ctx -> + case Mod:settle(Op, CTag, MsgIds, State0) of + {State, Actions} -> + {ok, set_ctx(QRef, Ctx#ctx{state = State}, Ctxs), Actions}; + Err -> + Err + end + end. + +-spec credit(amqqueue:amqqueue() | queue_ref(), + rabbit_types:ctag(), non_neg_integer(), + boolean(), state()) -> {ok, state(), actions()}. +credit(Q, CTag, Credit, Drain, Ctxs) -> + #ctx{state = State0, + module = Mod} = Ctx = get_ctx(Q, Ctxs), + {State, Actions} = Mod:credit(CTag, Credit, Drain, State0), + {ok, set_ctx(Q, Ctx#ctx{state = State}, Ctxs), Actions}. + +-spec dequeue(amqqueue:amqqueue(), boolean(), + pid(), rabbit_types:ctag(), state()) -> + {ok, non_neg_integer(), term(), state()} | + {empty, state()}. +dequeue(Q, NoAck, LimiterPid, CTag, Ctxs) -> + #ctx{state = State0} = Ctx = get_ctx(Q, Ctxs), + Mod = amqqueue:get_type(Q), + case Mod:dequeue(NoAck, LimiterPid, CTag, State0) of + {ok, Num, Msg, State} -> + {ok, Num, Msg, set_ctx(Q, Ctx#ctx{state = State}, Ctxs)}; + {empty, State} -> + {empty, set_ctx(Q, Ctx#ctx{state = State}, Ctxs)}; + {error, _} = Err -> + Err; + {protocol_error, _, _, _} = Err -> + Err + end. + +get_ctx(Q, #?STATE{ctxs = Contexts}) when ?is_amqqueue(Q) -> + Ref = qref(Q), + case Contexts of + #{Ref := #ctx{module = Mod, + state = State} = Ctx} -> + Ctx#ctx{state = Mod:update(Q, State)}; + _ -> + %% not found - initialize + Mod = amqqueue:get_type(Q), + Name = amqqueue:get_name(Q), + #ctx{module = Mod, + name = Name, + state = Mod:init(Q)} + end; +get_ctx(QRef, Contexts) when ?QREF(QRef) -> + case get_ctx(QRef, Contexts, undefined) of + undefined -> + exit({queue_context_not_found, QRef}); + Ctx -> + Ctx + end. + +get_ctx(QRef, #?STATE{ctxs = Contexts}, Default) -> + Ref = qref(QRef), + %% if we use a QRef it should always be initialised + case maps:get(Ref, Contexts, undefined) of + #ctx{} = Ctx -> + Ctx; + undefined -> + Default + end. + +set_ctx(Q, Ctx, #?STATE{ctxs = Contexts} = State) + when ?is_amqqueue(Q) -> + Ref = qref(Q), + State#?STATE{ctxs = maps:put(Ref, Ctx, Contexts)}; +set_ctx(QRef, Ctx, #?STATE{ctxs = Contexts} = State) -> + Ref = qref(QRef), + State#?STATE{ctxs = maps:put(Ref, Ctx, Contexts)}. + +qref(#resource{kind = queue} = QName) -> + QName; +qref(Q) when ?is_amqqueue(Q) -> + amqqueue:get_name(Q). + +return_ok(State0, []) -> + {ok, State0, []}; +return_ok(State0, Actions0) -> + {State, Actions} = + lists:foldl( + fun({monitor, Pid, QRef}, + {#?STATE{monitor_registry = M0} = S0, A0}) -> + case M0 of + #{Pid := QRef} -> + %% already monitored by the qref + {S0, A0}; + #{Pid := _} -> + %% TODO: allow multiple Qrefs to monitor the same pid + exit(return_ok_duplicate_monitored_pid); + _ -> + _ = erlang:monitor(process, Pid), + M = M0#{Pid => QRef}, + {S0#?STATE{monitor_registry = M}, A0} + end; + (Act, {S, A0}) -> + {S, [Act | A0]} + end, {State0, []}, Actions0), + {ok, State, lists:reverse(Actions)}. diff --git a/deps/rabbit/src/rabbit_queue_type_util.erl b/deps/rabbit/src/rabbit_queue_type_util.erl new file mode 100644 index 0000000000..e417cb13c4 --- /dev/null +++ b/deps/rabbit/src/rabbit_queue_type_util.erl @@ -0,0 +1,74 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at https://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2018-2020 Pivotal Software, Inc. All rights reserved. +%% + +-module(rabbit_queue_type_util). + +-export([args_policy_lookup/3, + qname_to_internal_name/1, + check_auto_delete/1, + check_exclusive/1, + check_non_durable/1, + run_checks/2]). + +-include("rabbit.hrl"). +-include("amqqueue.hrl"). + +args_policy_lookup(Name, Resolve, Q) when ?is_amqqueue(Q) -> + Args = amqqueue:get_arguments(Q), + AName = <<"x-", Name/binary>>, + case {rabbit_policy:get(Name, Q), rabbit_misc:table_lookup(Args, AName)} of + {undefined, undefined} -> undefined; + {undefined, {_Type, Val}} -> Val; + {Val, undefined} -> Val; + {PolVal, {_Type, ArgVal}} -> Resolve(PolVal, ArgVal) + end. + +%% TODO escape hack +qname_to_internal_name(#resource{virtual_host = <<"/">>, name = Name}) -> + erlang:binary_to_atom(<<"%2F_", Name/binary>>, utf8); +qname_to_internal_name(#resource{virtual_host = VHost, name = Name}) -> + erlang:binary_to_atom(<<VHost/binary, "_", Name/binary>>, utf8). + +check_auto_delete(Q) when ?amqqueue_is_auto_delete(Q) -> + Name = amqqueue:get_name(Q), + {protocol_error, precondition_failed, "invalid property 'auto-delete' for ~s", + [rabbit_misc:rs(Name)]}; +check_auto_delete(_) -> + ok. + +check_exclusive(Q) when ?amqqueue_exclusive_owner_is(Q, none) -> + ok; +check_exclusive(Q) when ?is_amqqueue(Q) -> + Name = amqqueue:get_name(Q), + {protocol_error, precondition_failed, "invalid property 'exclusive-owner' for ~s", + [rabbit_misc:rs(Name)]}. + +check_non_durable(Q) when ?amqqueue_is_durable(Q) -> + ok; +check_non_durable(Q) when not ?amqqueue_is_durable(Q) -> + Name = amqqueue:get_name(Q), + {protocol_error, precondition_failed, "invalid property 'non-durable' for ~s", + [rabbit_misc:rs(Name)]}. + +run_checks([], _) -> + ok; +run_checks([C | Checks], Q) -> + case C(Q) of + ok -> + run_checks(Checks, Q); + Err -> + Err + end. diff --git a/deps/rabbit/src/rabbit_quorum_memory_manager.erl b/deps/rabbit/src/rabbit_quorum_memory_manager.erl new file mode 100644 index 0000000000..94c2ef6b4b --- /dev/null +++ b/deps/rabbit/src/rabbit_quorum_memory_manager.erl @@ -0,0 +1,67 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +-module(rabbit_quorum_memory_manager). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +-export([init/1, handle_call/2, handle_event/2, handle_info/2, + terminate/2, code_change/3]). +-export([register/0, unregister/0]). + +-record(state, {last_roll_over, + interval}). + +-rabbit_boot_step({rabbit_quorum_memory_manager, + [{description, "quorum memory manager"}, + {mfa, {?MODULE, register, []}}, + {cleanup, {?MODULE, unregister, []}}, + {requires, rabbit_event}, + {enables, recovery}]}). + +register() -> + gen_event:add_handler(rabbit_alarm, ?MODULE, []). + +unregister() -> + gen_event:delete_handler(rabbit_alarm, ?MODULE, []). + +init([]) -> + {ok, #state{interval = interval()}}. + +handle_call( _, State) -> + {ok, ok, State}. + +handle_event({set_alarm, {{resource_limit, memory, Node}, []}}, + #state{last_roll_over = undefined} = State) when Node == node() -> + {ok, force_roll_over(State)}; +handle_event({set_alarm, {{resource_limit, memory, Node}, []}}, + #state{last_roll_over = Last, interval = Interval } = State) + when Node == node() -> + Now = erlang:system_time(millisecond), + case Now > (Last + Interval) of + true -> + {ok, force_roll_over(State)}; + false -> + {ok, State} + end; +handle_event(_, State) -> + {ok, State}. + +handle_info(_, State) -> + {ok, State}. + +terminate(_, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +force_roll_over(State) -> + ra_log_wal:force_roll_over(ra_log_wal), + State#state{last_roll_over = erlang:system_time(millisecond)}. + +interval() -> + application:get_env(rabbit, min_wal_roll_over_interval, 20000). diff --git a/deps/rabbit/src/rabbit_quorum_queue.erl b/deps/rabbit/src/rabbit_quorum_queue.erl new file mode 100644 index 0000000000..95cc93d728 --- /dev/null +++ b/deps/rabbit/src/rabbit_quorum_queue.erl @@ -0,0 +1,1523 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_quorum_queue). + +-behaviour(rabbit_queue_type). + +-export([init/1, + close/1, + update/2, + handle_event/2]). +-export([is_recoverable/1, recover/2, stop/1, delete/4, delete_immediately/2]). +-export([state_info/1, info/2, stat/1, infos/1]). +-export([settle/4, dequeue/4, consume/3, cancel/5]). +-export([credit/4]). +-export([purge/1]). +-export([stateless_deliver/2, deliver/3, deliver/2]). +-export([dead_letter_publish/4]). +-export([queue_name/1]). +-export([cluster_state/1, status/2]). +-export([update_consumer_handler/8, update_consumer/9]). +-export([cancel_consumer_handler/2, cancel_consumer/3]). +-export([become_leader/2, handle_tick/3, spawn_deleter/1]). +-export([rpc_delete_metrics/1]). +-export([format/1]). +-export([open_files/1]). +-export([peek/2, peek/3]). +-export([add_member/4]). +-export([delete_member/3]). +-export([requeue/3]). +-export([policy_changed/1]). +-export([format_ra_event/3]). +-export([cleanup_data_dir/0]). +-export([shrink_all/1, + grow/4]). +-export([transfer_leadership/2, get_replicas/1, queue_length/1]). +-export([file_handle_leader_reservation/1, file_handle_other_reservation/0]). +-export([file_handle_release_reservation/0]). +-export([list_with_minimum_quorum/0, list_with_minimum_quorum_for_cli/0, + filter_quorum_critical/1, filter_quorum_critical/2, + all_replica_states/0]). +-export([capabilities/0]). +-export([repair_amqqueue_nodes/1, + repair_amqqueue_nodes/2 + ]). +-export([reclaim_memory/2]). + +-export([is_enabled/0, + declare/2]). + +-import(rabbit_queue_type_util, [args_policy_lookup/3, + qname_to_internal_name/1]). + +-include_lib("stdlib/include/qlc.hrl"). +-include("rabbit.hrl"). +-include("amqqueue.hrl"). + +-type msg_id() :: non_neg_integer(). +-type qmsg() :: {rabbit_types:r('queue'), pid(), msg_id(), boolean(), rabbit_types:message()}. + +-define(STATISTICS_KEYS, + [policy, + operator_policy, + effective_policy_definition, + consumers, + memory, + state, + garbage_collection, + leader, + online, + members, + open_files, + single_active_consumer_pid, + single_active_consumer_ctag, + messages_ram, + message_bytes_ram + ]). + +-define(INFO_KEYS, [name, durable, auto_delete, arguments, pid, messages, messages_ready, + messages_unacknowledged, local_state, type] ++ ?STATISTICS_KEYS). + +-define(RPC_TIMEOUT, 1000). +-define(TICK_TIMEOUT, 5000). %% the ra server tick time +-define(DELETE_TIMEOUT, 5000). +-define(ADD_MEMBER_TIMEOUT, 5000). + +%%----------- rabbit_queue_type --------------------------------------------- + +-spec is_enabled() -> boolean(). +is_enabled() -> + rabbit_feature_flags:is_enabled(quorum_queue). + +%%---------------------------------------------------------------------------- + +-spec init(amqqueue:amqqueue()) -> rabbit_fifo_client:state(). +init(Q) when ?is_amqqueue(Q) -> + {ok, SoftLimit} = application:get_env(rabbit, quorum_commands_soft_limit), + %% This lookup could potentially return an {error, not_found}, but we do not + %% know what to do if the queue has `disappeared`. Let it crash. + {Name, _LeaderNode} = Leader = amqqueue:get_pid(Q), + Nodes = get_nodes(Q), + QName = amqqueue:get_name(Q), + %% Ensure the leader is listed first + Servers0 = [{Name, N} || N <- Nodes], + Servers = [Leader | lists:delete(Leader, Servers0)], + rabbit_fifo_client:init(QName, Servers, SoftLimit, + fun() -> credit_flow:block(Name) end, + fun() -> credit_flow:unblock(Name), ok end). + +-spec close(rabbit_fifo_client:state()) -> ok. +close(_State) -> + ok. + +-spec update(amqqueue:amqqueue(), rabbit_fifo_client:state()) -> + rabbit_fifo_client:state(). +update(Q, State) when ?amqqueue_is_quorum(Q) -> + %% QQ state maintains it's own updates + State. + +-spec handle_event({amqqueue:ra_server_id(), any()}, + rabbit_fifo_client:state()) -> + {ok, rabbit_fifo_client:state(), rabbit_queue_type:actions()} | + eol | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +handle_event({From, Evt}, QState) -> + rabbit_fifo_client:handle_ra_event(From, Evt, QState). + +-spec declare(amqqueue:amqqueue(), node()) -> + {new | existing, amqqueue:amqqueue()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +declare(Q, _Node) when ?amqqueue_is_quorum(Q) -> + case rabbit_queue_type_util:run_checks( + [fun rabbit_queue_type_util:check_auto_delete/1, + fun rabbit_queue_type_util:check_exclusive/1, + fun rabbit_queue_type_util:check_non_durable/1], + Q) of + ok -> + start_cluster(Q); + Err -> + Err + end. + +start_cluster(Q) -> + QName = amqqueue:get_name(Q), + Durable = amqqueue:is_durable(Q), + AutoDelete = amqqueue:is_auto_delete(Q), + Arguments = amqqueue:get_arguments(Q), + Opts = amqqueue:get_options(Q), + ActingUser = maps:get(user, Opts, ?UNKNOWN_USER), + QuorumSize = get_default_quorum_initial_group_size(Arguments), + RaName = qname_to_internal_name(QName), + Id = {RaName, node()}, + Nodes = select_quorum_nodes(QuorumSize, rabbit_mnesia:cluster_nodes(all)), + NewQ0 = amqqueue:set_pid(Q, Id), + NewQ1 = amqqueue:set_type_state(NewQ0, #{nodes => Nodes}), + case rabbit_amqqueue:internal_declare(NewQ1, false) of + {created, NewQ} -> + TickTimeout = application:get_env(rabbit, quorum_tick_interval, ?TICK_TIMEOUT), + RaConfs = [make_ra_conf(NewQ, ServerId, TickTimeout) + || ServerId <- members(NewQ)], + case ra:start_cluster(RaConfs) of + {ok, _, _} -> + %% TODO: handle error - what should be done if the + %% config cannot be updated + ok = rabbit_fifo_client:update_machine_state(Id, + ra_machine_config(NewQ)), + %% force a policy change to ensure the latest config is + %% updated even when running the machine version from 0 + rabbit_event:notify(queue_created, + [{name, QName}, + {durable, Durable}, + {auto_delete, AutoDelete}, + {arguments, Arguments}, + {user_who_performed_action, + ActingUser}]), + {new, NewQ}; + {error, Error} -> + _ = rabbit_amqqueue:internal_delete(QName, ActingUser), + {protocol_error, internal_error, + "Cannot declare a queue '~s' on node '~s': ~255p", + [rabbit_misc:rs(QName), node(), Error]} + end; + {existing, _} = Ex -> + Ex + end. + +ra_machine(Q) -> + {module, rabbit_fifo, ra_machine_config(Q)}. + +ra_machine_config(Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + {Name, _} = amqqueue:get_pid(Q), + %% take the minimum value of the policy and the queue arg if present + MaxLength = args_policy_lookup(<<"max-length">>, fun min/2, Q), + %% prefer the policy defined strategy if available + Overflow = args_policy_lookup(<<"overflow">>, fun (A, _B) -> A end , Q), + MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun min/2, Q), + MaxMemoryLength = args_policy_lookup(<<"max-in-memory-length">>, fun min/2, Q), + MaxMemoryBytes = args_policy_lookup(<<"max-in-memory-bytes">>, fun min/2, Q), + DeliveryLimit = args_policy_lookup(<<"delivery-limit">>, fun min/2, Q), + Expires = args_policy_lookup(<<"expires">>, + fun (A, _B) -> A end, + Q), + #{name => Name, + queue_resource => QName, + dead_letter_handler => dlx_mfa(Q), + become_leader_handler => {?MODULE, become_leader, [QName]}, + max_length => MaxLength, + max_bytes => MaxBytes, + max_in_memory_length => MaxMemoryLength, + max_in_memory_bytes => MaxMemoryBytes, + single_active_consumer_on => single_active_consumer_on(Q), + delivery_limit => DeliveryLimit, + overflow_strategy => overflow(Overflow, drop_head, QName), + created => erlang:system_time(millisecond), + expires => Expires + }. + +single_active_consumer_on(Q) -> + QArguments = amqqueue:get_arguments(Q), + case rabbit_misc:table_lookup(QArguments, <<"x-single-active-consumer">>) of + {bool, true} -> true; + _ -> false + end. + +update_consumer_handler(QName, {ConsumerTag, ChPid}, Exclusive, AckRequired, Prefetch, Active, ActivityStatus, Args) -> + local_or_remote_handler(ChPid, rabbit_quorum_queue, update_consumer, + [QName, ChPid, ConsumerTag, Exclusive, AckRequired, Prefetch, Active, ActivityStatus, Args]). + +update_consumer(QName, ChPid, ConsumerTag, Exclusive, AckRequired, Prefetch, Active, ActivityStatus, Args) -> + catch rabbit_core_metrics:consumer_updated(ChPid, ConsumerTag, Exclusive, AckRequired, + QName, Prefetch, Active, ActivityStatus, Args). + +cancel_consumer_handler(QName, {ConsumerTag, ChPid}) -> + local_or_remote_handler(ChPid, rabbit_quorum_queue, cancel_consumer, + [QName, ChPid, ConsumerTag]). + +cancel_consumer(QName, ChPid, ConsumerTag) -> + catch rabbit_core_metrics:consumer_deleted(ChPid, ConsumerTag, QName), + emit_consumer_deleted(ChPid, ConsumerTag, QName, ?INTERNAL_USER). + +local_or_remote_handler(ChPid, Module, Function, Args) -> + Node = node(ChPid), + case Node == node() of + true -> + erlang:apply(Module, Function, Args); + false -> + %% this could potentially block for a while if the node is + %% in disconnected state or tcp buffers are full + rpc:cast(Node, Module, Function, Args) + end. + +become_leader(QName, Name) -> + Fun = fun (Q1) -> + amqqueue:set_state( + amqqueue:set_pid(Q1, {Name, node()}), + live) + end, + %% as this function is called synchronously when a ra node becomes leader + %% we need to ensure there is no chance of blocking as else the ra node + %% may not be able to establish it's leadership + spawn(fun() -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + rabbit_amqqueue:update(QName, Fun) + end), + case rabbit_amqqueue:lookup(QName) of + {ok, Q0} when ?is_amqqueue(Q0) -> + Nodes = get_nodes(Q0), + [rpc:call(Node, ?MODULE, rpc_delete_metrics, + [QName], ?RPC_TIMEOUT) + || Node <- Nodes, Node =/= node()]; + _ -> + ok + end + end). + +-spec all_replica_states() -> {node(), #{atom() => atom()}}. +all_replica_states() -> + Rows = ets:tab2list(ra_state), + {node(), maps:from_list(Rows)}. + +-spec list_with_minimum_quorum() -> [amqqueue:amqqueue()]. +list_with_minimum_quorum() -> + filter_quorum_critical( + rabbit_amqqueue:list_local_quorum_queues()). + +-spec list_with_minimum_quorum_for_cli() -> [#{binary() => term()}]. +list_with_minimum_quorum_for_cli() -> + QQs = list_with_minimum_quorum(), + [begin + #resource{name = Name} = amqqueue:get_name(Q), + #{ + <<"readable_name">> => rabbit_data_coercion:to_binary(rabbit_misc:rs(amqqueue:get_name(Q))), + <<"name">> => Name, + <<"virtual_host">> => amqqueue:get_vhost(Q), + <<"type">> => <<"quorum">> + } + end || Q <- QQs]. + +-spec filter_quorum_critical([amqqueue:amqqueue()]) -> [amqqueue:amqqueue()]. +filter_quorum_critical(Queues) -> + %% Example map of QQ replica states: + %% #{rabbit@warp10 => + %% #{'%2F_qq.636' => leader,'%2F_qq.243' => leader, + %% '%2F_qq.1939' => leader,'%2F_qq.1150' => leader, + %% '%2F_qq.1109' => leader,'%2F_qq.1654' => leader, + %% '%2F_qq.1679' => leader,'%2F_qq.1003' => leader, + %% '%2F_qq.1593' => leader,'%2F_qq.1765' => leader, + %% '%2F_qq.933' => leader,'%2F_qq.38' => leader, + %% '%2F_qq.1357' => leader,'%2F_qq.1345' => leader, + %% '%2F_qq.1694' => leader,'%2F_qq.994' => leader, + %% '%2F_qq.490' => leader,'%2F_qq.1704' => leader, + %% '%2F_qq.58' => leader,'%2F_qq.564' => leader, + %% '%2F_qq.683' => leader,'%2F_qq.386' => leader, + %% '%2F_qq.753' => leader,'%2F_qq.6' => leader, + %% '%2F_qq.1590' => leader,'%2F_qq.1363' => leader, + %% '%2F_qq.882' => leader,'%2F_qq.1161' => leader,...}} + ReplicaStates = maps:from_list( + rabbit_misc:append_rpc_all_nodes(rabbit_nodes:all_running(), + ?MODULE, all_replica_states, [])), + filter_quorum_critical(Queues, ReplicaStates). + +-spec filter_quorum_critical([amqqueue:amqqueue()], #{node() => #{atom() => atom()}}) -> [amqqueue:amqqueue()]. + +filter_quorum_critical(Queues, ReplicaStates) -> + lists:filter(fun (Q) -> + MemberNodes = rabbit_amqqueue:get_quorum_nodes(Q), + {Name, _Node} = amqqueue:get_pid(Q), + AllUp = lists:filter(fun (N) -> + {Name, _} = amqqueue:get_pid(Q), + case maps:get(N, ReplicaStates, undefined) of + #{Name := State} when State =:= follower orelse State =:= leader -> + true; + _ -> false + end + end, MemberNodes), + MinQuorum = length(MemberNodes) div 2 + 1, + length(AllUp) =< MinQuorum + end, Queues). + +capabilities() -> + #{policies => [<<"max-length">>, <<"max-length-bytes">>, <<"overflow">>, + <<"expires">>, <<"max-in-memory-length">>, <<"max-in-memory-bytes">>, + <<"delivery-limit">>, <<"dead-letter-exchange">>, <<"dead-letter-routing-key">>], + queue_arguments => [<<"x-expires">>, <<"x-dead-letter-exchange">>, + <<"x-dead-letter-routing-key">>, <<"x-max-length">>, + <<"x-max-length-bytes">>, <<"x-max-in-memory-length">>, + <<"x-max-in-memory-bytes">>, <<"x-overflow">>, + <<"x-single-active-consumer">>, <<"x-queue-type">>, + <<"x-quorum-initial-group-size">>, <<"x-delivery-limit">>], + consumer_arguments => [<<"x-priority">>, <<"x-credit">>], + server_named => false}. + +rpc_delete_metrics(QName) -> + ets:delete(queue_coarse_metrics, QName), + ets:delete(queue_metrics, QName), + ok. + +spawn_deleter(QName) -> + spawn(fun () -> + {ok, Q} = rabbit_amqqueue:lookup(QName), + delete(Q, false, false, <<"expired">>) + end). + +handle_tick(QName, + {Name, MR, MU, M, C, MsgBytesReady, MsgBytesUnack}, + Nodes) -> + %% this makes calls to remote processes so cannot be run inside the + %% ra server + Self = self(), + _ = spawn(fun() -> + R = reductions(Name), + rabbit_core_metrics:queue_stats(QName, MR, MU, M, R), + Util = case C of + 0 -> 0; + _ -> rabbit_fifo:usage(Name) + end, + Infos = [{consumers, C}, + {consumer_utilisation, Util}, + {message_bytes_ready, MsgBytesReady}, + {message_bytes_unacknowledged, MsgBytesUnack}, + {message_bytes, MsgBytesReady + MsgBytesUnack}, + {message_bytes_persistent, MsgBytesReady + MsgBytesUnack}, + {messages_persistent, M} + + | infos(QName, ?STATISTICS_KEYS -- [consumers])], + rabbit_core_metrics:queue_stats(QName, Infos), + rabbit_event:notify(queue_stats, + Infos ++ [{name, QName}, + {messages, M}, + {messages_ready, MR}, + {messages_unacknowledged, MU}, + {reductions, R}]), + ok = repair_leader_record(QName, Self), + ExpectedNodes = rabbit_mnesia:cluster_nodes(all), + case Nodes -- ExpectedNodes of + [] -> + ok; + Stale -> + rabbit_log:info("~s: stale nodes detected. Purging ~w~n", + [rabbit_misc:rs(QName), Stale]), + %% pipeline purge command + {ok, Q} = rabbit_amqqueue:lookup(QName), + ok = ra:pipeline_command(amqqueue:get_pid(Q), + rabbit_fifo:make_purge_nodes(Stale)), + + ok + end + end), + ok. + +repair_leader_record(QName, Self) -> + {ok, Q} = rabbit_amqqueue:lookup(QName), + Node = node(), + case amqqueue:get_pid(Q) of + {_, Node} -> + %% it's ok - we don't need to do anything + ok; + _ -> + rabbit_log:debug("~s: repairing leader record", + [rabbit_misc:rs(QName)]), + {_, Name} = erlang:process_info(Self, registered_name), + become_leader(QName, Name) + end, + ok. + +repair_amqqueue_nodes(VHost, QueueName) -> + QName = #resource{virtual_host = VHost, name = QueueName, kind = queue}, + repair_amqqueue_nodes(QName). + +-spec repair_amqqueue_nodes(rabbit_types:r('queue') | amqqueue:amqqueue()) -> + ok | repaired. +repair_amqqueue_nodes(QName = #resource{}) -> + {ok, Q0} = rabbit_amqqueue:lookup(QName), + repair_amqqueue_nodes(Q0); +repair_amqqueue_nodes(Q0) -> + QName = amqqueue:get_name(Q0), + Leader = amqqueue:get_pid(Q0), + {ok, Members, _} = ra:members(Leader), + RaNodes = [N || {_, N} <- Members], + #{nodes := Nodes} = amqqueue:get_type_state(Q0), + case lists:sort(RaNodes) =:= lists:sort(Nodes) of + true -> + %% up to date + ok; + false -> + %% update amqqueue record + Fun = fun (Q) -> + TS0 = amqqueue:get_type_state(Q), + TS = TS0#{nodes => RaNodes}, + amqqueue:set_type_state(Q, TS) + end, + rabbit_misc:execute_mnesia_transaction( + fun() -> + rabbit_amqqueue:update(QName, Fun) + end), + repaired + end. + +reductions(Name) -> + try + {reductions, R} = process_info(whereis(Name), reductions), + R + catch + error:badarg -> + 0 + end. + +is_recoverable(Q) -> + Node = node(), + Nodes = get_nodes(Q), + lists:member(Node, Nodes). + +-spec recover(binary(), [amqqueue:amqqueue()]) -> + {[amqqueue:amqqueue()], [amqqueue:amqqueue()]}. +recover(_Vhost, Queues) -> + lists:foldl( + fun (Q0, {R0, F0}) -> + {Name, _} = amqqueue:get_pid(Q0), + QName = amqqueue:get_name(Q0), + Nodes = get_nodes(Q0), + Formatter = {?MODULE, format_ra_event, [QName]}, + Res = case ra:restart_server({Name, node()}, + #{ra_event_formatter => Formatter}) of + ok -> + % queue was restarted, good + ok; + {error, Err1} + when Err1 == not_started orelse + Err1 == name_not_registered -> + % queue was never started on this node + % so needs to be started from scratch. + Machine = ra_machine(Q0), + RaNodes = [{Name, Node} || Node <- Nodes], + case ra:start_server(Name, {Name, node()}, Machine, RaNodes) of + ok -> ok; + Err2 -> + rabbit_log:warning("recover: quorum queue ~w could not" + " be started ~w", [Name, Err2]), + fail + end; + {error, {already_started, _}} -> + %% this is fine and can happen if a vhost crashes and performs + %% recovery whilst the ra application and servers are still + %% running + ok; + Err -> + %% catch all clause to avoid causing the vhost not to start + rabbit_log:warning("recover: quorum queue ~w could not be " + "restarted ~w", [Name, Err]), + fail + end, + %% we have to ensure the quorum queue is + %% present in the rabbit_queue table and not just in + %% rabbit_durable_queue + %% So many code paths are dependent on this. + {ok, Q} = rabbit_amqqueue:ensure_rabbit_queue_record_is_initialized(Q0), + case Res of + ok -> + {[Q | R0], F0}; + fail -> + {R0, [Q | F0]} + end + end, {[], []}, Queues). + +-spec stop(rabbit_types:vhost()) -> ok. +stop(VHost) -> + _ = [begin + Pid = amqqueue:get_pid(Q), + ra:stop_server(Pid) + end || Q <- find_quorum_queues(VHost)], + ok. + +-spec delete(amqqueue:amqqueue(), + boolean(), boolean(), + rabbit_types:username()) -> + {ok, QLen :: non_neg_integer()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +delete(Q, true, _IfEmpty, _ActingUser) when ?amqqueue_is_quorum(Q) -> + {protocol_error, not_implemented, + "cannot delete ~s. queue.delete operations with if-unused flag set are not supported by quorum queues", + [rabbit_misc:rs(amqqueue:get_name(Q))]}; +delete(Q, _IfUnused, true, _ActingUser) when ?amqqueue_is_quorum(Q) -> + {protocol_error, not_implemented, + "cannot delete ~s. queue.delete operations with if-empty flag set are not supported by quorum queues", + [rabbit_misc:rs(amqqueue:get_name(Q))]}; +delete(Q, _IfUnused, _IfEmpty, ActingUser) when ?amqqueue_is_quorum(Q) -> + {Name, _} = amqqueue:get_pid(Q), + QName = amqqueue:get_name(Q), + QNodes = get_nodes(Q), + %% TODO Quorum queue needs to support consumer tracking for IfUnused + Timeout = ?DELETE_TIMEOUT, + {ok, ReadyMsgs, _} = stat(Q), + Servers = [{Name, Node} || Node <- QNodes], + case ra:delete_cluster(Servers, Timeout) of + {ok, {_, LeaderNode} = Leader} -> + MRef = erlang:monitor(process, Leader), + receive + {'DOWN', MRef, process, _, _} -> + ok + after Timeout -> + ok = force_delete_queue(Servers) + end, + ok = delete_queue_data(QName, ActingUser), + rpc:call(LeaderNode, rabbit_core_metrics, queue_deleted, [QName], + ?RPC_TIMEOUT), + {ok, ReadyMsgs}; + {error, {no_more_servers_to_try, Errs}} -> + case lists:all(fun({{error, noproc}, _}) -> true; + (_) -> false + end, Errs) of + true -> + %% If all ra nodes were already down, the delete + %% has succeed + delete_queue_data(QName, ActingUser), + {ok, ReadyMsgs}; + false -> + %% attempt forced deletion of all servers + rabbit_log:warning( + "Could not delete quorum queue '~s', not enough nodes " + " online to reach a quorum: ~255p." + " Attempting force delete.", + [rabbit_misc:rs(QName), Errs]), + ok = force_delete_queue(Servers), + delete_queue_data(QName, ActingUser), + {ok, ReadyMsgs} + end + end. + +force_delete_queue(Servers) -> + [begin + case catch(ra:force_delete_server(S)) of + ok -> ok; + Err -> + rabbit_log:warning( + "Force delete of ~w failed with: ~w" + "This may require manual data clean up~n", + [S, Err]), + ok + end + end || S <- Servers], + ok. + +delete_queue_data(QName, ActingUser) -> + _ = rabbit_amqqueue:internal_delete(QName, ActingUser), + ok. + + +delete_immediately(Resource, {_Name, _} = QPid) -> + _ = rabbit_amqqueue:internal_delete(Resource, ?INTERNAL_USER), + {ok, _} = ra:delete_cluster([QPid]), + rabbit_core_metrics:queue_deleted(Resource), + ok. + +settle(complete, CTag, MsgIds, QState) -> + rabbit_fifo_client:settle(quorum_ctag(CTag), MsgIds, QState); +settle(requeue, CTag, MsgIds, QState) -> + rabbit_fifo_client:return(quorum_ctag(CTag), MsgIds, QState); +settle(discard, CTag, MsgIds, QState) -> + rabbit_fifo_client:discard(quorum_ctag(CTag), MsgIds, QState). + +credit(CTag, Credit, Drain, QState) -> + rabbit_fifo_client:credit(quorum_ctag(CTag), Credit, Drain, QState). + +-spec dequeue(NoAck :: boolean(), pid(), + rabbit_types:ctag(), rabbit_fifo_client:state()) -> + {empty, rabbit_fifo_client:state()} | + {ok, QLen :: non_neg_integer(), qmsg(), rabbit_fifo_client:state()} | + {error, term()}. +dequeue(NoAck, _LimiterPid, CTag0, QState0) -> + CTag = quorum_ctag(CTag0), + Settlement = case NoAck of + true -> + settled; + false -> + unsettled + end, + rabbit_fifo_client:dequeue(CTag, Settlement, QState0). + +-spec consume(amqqueue:amqqueue(), + rabbit_queue_type:consume_spec(), + rabbit_fifo_client:state()) -> + {ok, rabbit_fifo_client:state(), rabbit_queue_type:actions()} | + {error, global_qos_not_supported_for_queue_type}. +consume(Q, #{limiter_active := true}, _State) + when ?amqqueue_is_quorum(Q) -> + {error, global_qos_not_supported_for_queue_type}; +consume(Q, Spec, QState0) when ?amqqueue_is_quorum(Q) -> + #{no_ack := NoAck, + channel_pid := ChPid, + prefetch_count := ConsumerPrefetchCount, + consumer_tag := ConsumerTag0, + exclusive_consume := ExclusiveConsume, + args := Args, + ok_msg := OkMsg, + acting_user := ActingUser} = Spec, + %% TODO: validate consumer arguments + %% currently quorum queues do not support any arguments + QName = amqqueue:get_name(Q), + QPid = amqqueue:get_pid(Q), + maybe_send_reply(ChPid, OkMsg), + ConsumerTag = quorum_ctag(ConsumerTag0), + %% A prefetch count of 0 means no limitation, + %% let's make it into something large for ra + Prefetch0 = case ConsumerPrefetchCount of + 0 -> 2000; + Other -> Other + end, + %% consumer info is used to describe the consumer properties + AckRequired = not NoAck, + ConsumerMeta = #{ack => AckRequired, + prefetch => ConsumerPrefetchCount, + args => Args, + username => ActingUser}, + + {CreditMode, Credit, Drain} = parse_credit_args(Prefetch0, Args), + %% if the mode is credited we should send a separate credit command + %% after checkout and give 0 credits initally + Prefetch = case CreditMode of + credited -> 0; + simple_prefetch -> Prefetch0 + end, + {ok, QState1} = rabbit_fifo_client:checkout(ConsumerTag, Prefetch, + CreditMode, ConsumerMeta, + QState0), + QState = case CreditMode of + credited when Credit > 0 -> + rabbit_fifo_client:credit(ConsumerTag, Credit, Drain, + QState1); + _ -> QState1 + end, + case ra:local_query(QPid, + fun rabbit_fifo:query_single_active_consumer/1) of + {ok, {_, SacResult}, _} -> + SingleActiveConsumerOn = single_active_consumer_on(Q), + {IsSingleActiveConsumer, ActivityStatus} = case {SingleActiveConsumerOn, SacResult} of + {false, _} -> + {true, up}; + {true, {value, {ConsumerTag, ChPid}}} -> + {true, single_active}; + _ -> + {false, waiting} + end, + rabbit_core_metrics:consumer_created( + ChPid, ConsumerTag, ExclusiveConsume, + AckRequired, QName, + ConsumerPrefetchCount, IsSingleActiveConsumer, + ActivityStatus, Args), + emit_consumer_created(ChPid, ConsumerTag, ExclusiveConsume, + AckRequired, QName, Prefetch, + Args, none, ActingUser), + {ok, QState, []}; + {error, Error} -> + Error; + {timeout, _} -> + {error, timeout} + end. + +% -spec basic_cancel(rabbit_types:ctag(), ChPid :: pid(), any(), rabbit_fifo_client:state()) -> +% {'ok', rabbit_fifo_client:state()}. + +cancel(_Q, ConsumerTag, OkMsg, _ActingUser, State) -> + maybe_send_reply(self(), OkMsg), + rabbit_fifo_client:cancel_checkout(quorum_ctag(ConsumerTag), State). + +emit_consumer_created(ChPid, CTag, Exclusive, AckRequired, QName, PrefetchCount, Args, Ref, ActingUser) -> + rabbit_event:notify(consumer_created, + [{consumer_tag, CTag}, + {exclusive, Exclusive}, + {ack_required, AckRequired}, + {channel, ChPid}, + {queue, QName}, + {prefetch_count, PrefetchCount}, + {arguments, Args}, + {user_who_performed_action, ActingUser}], + Ref). + +emit_consumer_deleted(ChPid, ConsumerTag, QName, ActingUser) -> + rabbit_event:notify(consumer_deleted, + [{consumer_tag, ConsumerTag}, + {channel, ChPid}, + {queue, QName}, + {user_who_performed_action, ActingUser}]). + +-spec stateless_deliver(amqqueue:ra_server_id(), rabbit_types:delivery()) -> 'ok'. + +stateless_deliver(ServerId, Delivery) -> + ok = rabbit_fifo_client:untracked_enqueue([ServerId], + Delivery#delivery.message). + +-spec deliver(Confirm :: boolean(), rabbit_types:delivery(), + rabbit_fifo_client:state()) -> + {ok | slow, rabbit_fifo_client:state()} | + {reject_publish, rabbit_fifo_client:state()}. +deliver(false, Delivery, QState0) -> + case rabbit_fifo_client:enqueue(Delivery#delivery.message, QState0) of + {ok, _} = Res -> Res; + {slow, _} = Res -> Res; + {reject_publish, State} -> + {ok, State} + end; +deliver(true, Delivery, QState0) -> + rabbit_fifo_client:enqueue(Delivery#delivery.msg_seq_no, + Delivery#delivery.message, QState0). + +deliver(QSs, #delivery{confirm = Confirm} = Delivery) -> + lists:foldl( + fun({Q, stateless}, {Qs, Actions}) -> + QRef = amqqueue:get_pid(Q), + ok = rabbit_fifo_client:untracked_enqueue( + [QRef], Delivery#delivery.message), + {Qs, Actions}; + ({Q, S0}, {Qs, Actions}) -> + case deliver(Confirm, Delivery, S0) of + {reject_publish, S} -> + Seq = Delivery#delivery.msg_seq_no, + QName = rabbit_fifo_client:cluster_name(S), + {[{Q, S} | Qs], [{rejected, QName, [Seq]} | Actions]}; + {_, S} -> + {[{Q, S} | Qs], Actions} + end + end, {[], []}, QSs). + + +state_info(S) -> + #{pending_raft_commands => rabbit_fifo_client:pending_size(S)}. + + + +-spec infos(rabbit_types:r('queue')) -> rabbit_types:infos(). +infos(QName) -> + infos(QName, ?STATISTICS_KEYS). + +infos(QName, Keys) -> + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + info(Q, Keys); + {error, not_found} -> + [] + end. + +info(Q, all_keys) -> + info(Q, ?INFO_KEYS); +info(Q, Items) -> + lists:foldr(fun(totals, Acc) -> + i_totals(Q) ++ Acc; + (type_specific, Acc) -> + format(Q) ++ Acc; + (Item, Acc) -> + [{Item, i(Item, Q)} | Acc] + end, [], Items). + +-spec stat(amqqueue:amqqueue()) -> + {'ok', non_neg_integer(), non_neg_integer()}. +stat(Q) when ?is_amqqueue(Q) -> + %% same short default timeout as in rabbit_fifo_client:stat/1 + stat(Q, 250). + +-spec stat(amqqueue:amqqueue(), non_neg_integer()) -> {'ok', non_neg_integer(), non_neg_integer()}. + +stat(Q, Timeout) when ?is_amqqueue(Q) -> + Leader = amqqueue:get_pid(Q), + try + case rabbit_fifo_client:stat(Leader, Timeout) of + {ok, _, _} = Success -> Success; + {error, _} -> {ok, 0, 0}; + {timeout, _} -> {ok, 0, 0} + end + catch + _:_ -> + %% Leader is not available, cluster might be in minority + {ok, 0, 0} + end. + +-spec purge(amqqueue:amqqueue()) -> + {ok, non_neg_integer()}. +purge(Q) when ?is_amqqueue(Q) -> + Node = amqqueue:get_pid(Q), + rabbit_fifo_client:purge(Node). + +requeue(ConsumerTag, MsgIds, QState) -> + rabbit_fifo_client:return(quorum_ctag(ConsumerTag), MsgIds, QState). + +cleanup_data_dir() -> + Names = [begin + {Name, _} = amqqueue:get_pid(Q), + Name + end + || Q <- rabbit_amqqueue:list_by_type(?MODULE), + lists:member(node(), get_nodes(Q))], + NoQQClusters = rabbit_ra_registry:list_not_quorum_clusters(), + Registered = ra_directory:list_registered(), + Running = Names ++ NoQQClusters, + _ = [maybe_delete_data_dir(UId) || {Name, UId} <- Registered, + not lists:member(Name, Running)], + ok. + +maybe_delete_data_dir(UId) -> + Dir = ra_env:server_data_dir(UId), + {ok, Config} = ra_log:read_config(Dir), + case maps:get(machine, Config) of + {module, rabbit_fifo, _} -> + ra_lib:recursive_delete(Dir), + ra_directory:unregister_name(UId); + _ -> + ok + end. + +policy_changed(Q) -> + QPid = amqqueue:get_pid(Q), + _ = rabbit_fifo_client:update_machine_state(QPid, ra_machine_config(Q)), + ok. + +-spec cluster_state(Name :: atom()) -> 'down' | 'recovering' | 'running'. + +cluster_state(Name) -> + case whereis(Name) of + undefined -> down; + _ -> + case ets:lookup(ra_state, Name) of + [{_, recover}] -> recovering; + _ -> running + end + end. + +-spec status(rabbit_types:vhost(), Name :: rabbit_misc:resource_name()) -> + [[{binary(), term()}]] | {error, term()}. +status(Vhost, QueueName) -> + %% Handle not found queues + QName = #resource{virtual_host = Vhost, name = QueueName, kind = queue}, + RName = qname_to_internal_name(QName), + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}; + {ok, Q} when ?amqqueue_is_quorum(Q) -> + Nodes = get_nodes(Q), + [begin + case get_sys_status({RName, N}) of + {ok, Sys} -> + {_, M} = lists:keyfind(ra_server_state, 1, Sys), + {_, RaftState} = lists:keyfind(raft_state, 1, Sys), + #{commit_index := Commit, + machine_version := MacVer, + current_term := Term, + log := #{last_index := Last, + snapshot_index := SnapIdx}} = M, + [{<<"Node Name">>, N}, + {<<"Raft State">>, RaftState}, + {<<"Log Index">>, Last}, + {<<"Commit Index">>, Commit}, + {<<"Snapshot Index">>, SnapIdx}, + {<<"Term">>, Term}, + {<<"Machine Version">>, MacVer} + ]; + {error, Err} -> + [{<<"Node Name">>, N}, + {<<"Raft State">>, Err}, + {<<"Log Index">>, <<>>}, + {<<"Commit Index">>, <<>>}, + {<<"Snapshot Index">>, <<>>}, + {<<"Term">>, <<>>}, + {<<"Machine Version">>, <<>>} + ] + end + end || N <- Nodes]; + {error, not_found} = E -> + E + end. + +get_sys_status(Proc) -> + try lists:nth(5, element(4, sys:get_status(Proc))) of + Sys -> {ok, Sys} + catch + _:Err when is_tuple(Err) -> + {error, element(1, Err)}; + _:_ -> + {error, other} + + end. + + +add_member(VHost, Name, Node, Timeout) -> + QName = #resource{virtual_host = VHost, name = Name, kind = queue}, + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}; + {ok, Q} when ?amqqueue_is_quorum(Q) -> + QNodes = get_nodes(Q), + case lists:member(Node, rabbit_nodes:all_running()) of + false -> + {error, node_not_running}; + true -> + case lists:member(Node, QNodes) of + true -> + %% idempotent by design + ok; + false -> + add_member(Q, Node, Timeout) + end + end; + {error, not_found} = E -> + E + end. + +add_member(Q, Node, Timeout) when ?amqqueue_is_quorum(Q) -> + {RaName, _} = amqqueue:get_pid(Q), + QName = amqqueue:get_name(Q), + %% TODO parallel calls might crash this, or add a duplicate in quorum_nodes + ServerId = {RaName, Node}, + Members = members(Q), + TickTimeout = application:get_env(rabbit, quorum_tick_interval, + ?TICK_TIMEOUT), + Conf = make_ra_conf(Q, ServerId, TickTimeout), + case ra:start_server(Conf) of + ok -> + case ra:add_member(Members, ServerId, Timeout) of + {ok, _, Leader} -> + Fun = fun(Q1) -> + Q2 = update_type_state( + Q1, fun(#{nodes := Nodes} = Ts) -> + Ts#{nodes => [Node | Nodes]} + end), + amqqueue:set_pid(Q2, Leader) + end, + rabbit_misc:execute_mnesia_transaction( + fun() -> rabbit_amqqueue:update(QName, Fun) end), + ok; + {timeout, _} -> + _ = ra:force_delete_server(ServerId), + _ = ra:remove_member(Members, ServerId), + {error, timeout}; + E -> + _ = ra:force_delete_server(ServerId), + E + end; + E -> + E + end. + +delete_member(VHost, Name, Node) -> + QName = #resource{virtual_host = VHost, name = Name, kind = queue}, + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}; + {ok, Q} when ?amqqueue_is_quorum(Q) -> + QNodes = get_nodes(Q), + case lists:member(Node, QNodes) of + false -> + %% idempotent by design + ok; + true -> + delete_member(Q, Node) + end; + {error, not_found} = E -> + E + end. + + +delete_member(Q, Node) when ?amqqueue_is_quorum(Q) -> + QName = amqqueue:get_name(Q), + {RaName, _} = amqqueue:get_pid(Q), + ServerId = {RaName, Node}, + case members(Q) of + [{_, Node}] -> + + %% deleting the last member is not allowed + {error, last_node}; + Members -> + case ra:remove_member(Members, ServerId) of + {ok, _, _Leader} -> + Fun = fun(Q1) -> + update_type_state( + Q1, + fun(#{nodes := Nodes} = Ts) -> + Ts#{nodes => lists:delete(Node, Nodes)} + end) + end, + rabbit_misc:execute_mnesia_transaction( + fun() -> rabbit_amqqueue:update(QName, Fun) end), + case ra:force_delete_server(ServerId) of + ok -> + ok; + {error, {badrpc, nodedown}} -> + ok; + {error, {badrpc, {'EXIT', {badarg, _}}}} -> + %% DETS/ETS tables can't be found, application isn't running + ok; + {error, _} = Err -> + Err; + Err -> + {error, Err} + end; + {timeout, _} -> + {error, timeout}; + E -> + E + end + end. + +-spec shrink_all(node()) -> + [{rabbit_amqqueue:name(), + {ok, pos_integer()} | {error, pos_integer(), term()}}]. +shrink_all(Node) -> + [begin + QName = amqqueue:get_name(Q), + rabbit_log:info("~s: removing member (replica) on node ~w", + [rabbit_misc:rs(QName), Node]), + Size = length(get_nodes(Q)), + case delete_member(Q, Node) of + ok -> + {QName, {ok, Size-1}}; + {error, Err} -> + rabbit_log:warning("~s: failed to remove member (replica) on node ~w, error: ~w", + [rabbit_misc:rs(QName), Node, Err]), + {QName, {error, Size, Err}} + end + end || Q <- rabbit_amqqueue:list(), + amqqueue:get_type(Q) == ?MODULE, + lists:member(Node, get_nodes(Q))]. + +-spec grow(node(), binary(), binary(), all | even) -> + [{rabbit_amqqueue:name(), + {ok, pos_integer()} | {error, pos_integer(), term()}}]. +grow(Node, VhostSpec, QueueSpec, Strategy) -> + Running = rabbit_nodes:all_running(), + [begin + Size = length(get_nodes(Q)), + QName = amqqueue:get_name(Q), + rabbit_log:info("~s: adding a new member (replica) on node ~w", + [rabbit_misc:rs(QName), Node]), + case add_member(Q, Node, ?ADD_MEMBER_TIMEOUT) of + ok -> + {QName, {ok, Size + 1}}; + {error, Err} -> + rabbit_log:warning( + "~s: failed to add member (replica) on node ~w, error: ~w", + [rabbit_misc:rs(QName), Node, Err]), + {QName, {error, Size, Err}} + end + end + || Q <- rabbit_amqqueue:list(), + amqqueue:get_type(Q) == ?MODULE, + %% don't add a member if there is already one on the node + not lists:member(Node, get_nodes(Q)), + %% node needs to be running + lists:member(Node, Running), + matches_strategy(Strategy, get_nodes(Q)), + is_match(amqqueue:get_vhost(Q), VhostSpec) andalso + is_match(get_resource_name(amqqueue:get_name(Q)), QueueSpec) ]. + +transfer_leadership(Q, Destination) -> + {RaName, _} = Pid = amqqueue:get_pid(Q), + case ra:transfer_leadership(Pid, {RaName, Destination}) of + ok -> + case ra:members(Pid) of + {_, _, {_, NewNode}} -> + {migrated, NewNode}; + {timeout, _} -> + {not_migrated, ra_members_timeout} + end; + already_leader -> + {not_migrated, already_leader}; + {error, Reason} -> + {not_migrated, Reason}; + {timeout, _} -> + %% TODO should we retry once? + {not_migrated, timeout} + end. + +queue_length(Q) -> + Name = amqqueue:get_name(Q), + case ets:lookup(ra_metrics, Name) of + [] -> 0; + [{_, _, SnapIdx, _, _, LastIdx, _}] -> LastIdx - SnapIdx + end. + +get_replicas(Q) -> + get_nodes(Q). + +get_resource_name(#resource{name = Name}) -> + Name. + +matches_strategy(all, _) -> true; +matches_strategy(even, Members) -> + length(Members) rem 2 == 0. + +is_match(Subj, E) -> + nomatch /= re:run(Subj, E). + +file_handle_leader_reservation(QName) -> + {ok, Q} = rabbit_amqqueue:lookup(QName), + ClusterSize = length(get_nodes(Q)), + file_handle_cache:set_reservation(2 + ClusterSize). + +file_handle_other_reservation() -> + file_handle_cache:set_reservation(2). + +file_handle_release_reservation() -> + file_handle_cache:release_reservation(). + +-spec reclaim_memory(rabbit_types:vhost(), Name :: rabbit_misc:resource_name()) -> ok | {error, term()}. +reclaim_memory(Vhost, QueueName) -> + QName = #resource{virtual_host = Vhost, name = QueueName, kind = queue}, + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}; + {ok, Q} when ?amqqueue_is_quorum(Q) -> + ok = ra:pipeline_command(amqqueue:get_pid(Q), + rabbit_fifo:make_garbage_collection()); + {error, not_found} = E -> + E + end. + +%%---------------------------------------------------------------------------- +dlx_mfa(Q) -> + DLX = init_dlx(args_policy_lookup(<<"dead-letter-exchange">>, + fun res_arg/2, Q), Q), + DLXRKey = args_policy_lookup(<<"dead-letter-routing-key">>, + fun res_arg/2, Q), + {?MODULE, dead_letter_publish, [DLX, DLXRKey, amqqueue:get_name(Q)]}. + +init_dlx(undefined, _Q) -> + undefined; +init_dlx(DLX, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + rabbit_misc:r(QName, exchange, DLX). + +res_arg(_PolVal, ArgVal) -> ArgVal. + +dead_letter_publish(undefined, _, _, _) -> + ok; +dead_letter_publish(X, RK, QName, ReasonMsgs) -> + case rabbit_exchange:lookup(X) of + {ok, Exchange} -> + [rabbit_dead_letter:publish(Msg, Reason, Exchange, RK, QName) + || {Reason, Msg} <- ReasonMsgs]; + {error, not_found} -> + ok + end. + +find_quorum_queues(VHost) -> + Node = node(), + mnesia:async_dirty( + fun () -> + qlc:e(qlc:q([Q || Q <- mnesia:table(rabbit_durable_queue), + ?amqqueue_is_quorum(Q), + amqqueue:get_vhost(Q) =:= VHost, + amqqueue:qnode(Q) == Node])) + end). + +i_totals(Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_coarse_metrics, QName) of + [{_, MR, MU, M, _}] -> + [{messages_ready, MR}, + {messages_unacknowledged, MU}, + {messages, M}]; + [] -> + [{messages_ready, 0}, + {messages_unacknowledged, 0}, + {messages, 0}] + end. + +i(name, Q) when ?is_amqqueue(Q) -> amqqueue:get_name(Q); +i(durable, Q) when ?is_amqqueue(Q) -> amqqueue:is_durable(Q); +i(auto_delete, Q) when ?is_amqqueue(Q) -> amqqueue:is_auto_delete(Q); +i(arguments, Q) when ?is_amqqueue(Q) -> amqqueue:get_arguments(Q); +i(pid, Q) when ?is_amqqueue(Q) -> + {Name, _} = amqqueue:get_pid(Q), + whereis(Name); +i(messages, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + quorum_messages(QName); +i(messages_ready, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_coarse_metrics, QName) of + [{_, MR, _, _, _}] -> + MR; + [] -> + 0 + end; +i(messages_unacknowledged, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_coarse_metrics, QName) of + [{_, _, MU, _, _}] -> + MU; + [] -> + 0 + end; +i(policy, Q) -> + case rabbit_policy:name(Q) of + none -> ''; + Policy -> Policy + end; +i(operator_policy, Q) -> + case rabbit_policy:name_op(Q) of + none -> ''; + Policy -> Policy + end; +i(effective_policy_definition, Q) -> + case rabbit_policy:effective_definition(Q) of + undefined -> []; + Def -> Def + end; +i(consumers, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_metrics, QName) of + [{_, M, _}] -> + proplists:get_value(consumers, M, 0); + [] -> + 0 + end; +i(memory, Q) when ?is_amqqueue(Q) -> + {Name, _} = amqqueue:get_pid(Q), + try + {memory, M} = process_info(whereis(Name), memory), + M + catch + error:badarg -> + 0 + end; +i(state, Q) when ?is_amqqueue(Q) -> + {Name, Node} = amqqueue:get_pid(Q), + %% Check against the leader or last known leader + case rpc:call(Node, ?MODULE, cluster_state, [Name], ?RPC_TIMEOUT) of + {badrpc, _} -> down; + State -> State + end; +i(local_state, Q) when ?is_amqqueue(Q) -> + {Name, _} = amqqueue:get_pid(Q), + case ets:lookup(ra_state, Name) of + [{_, State}] -> State; + _ -> not_member + end; +i(garbage_collection, Q) when ?is_amqqueue(Q) -> + {Name, _} = amqqueue:get_pid(Q), + try + rabbit_misc:get_gc_info(whereis(Name)) + catch + error:badarg -> + [] + end; +i(members, Q) when ?is_amqqueue(Q) -> + get_nodes(Q); +i(online, Q) -> online(Q); +i(leader, Q) -> leader(Q); +i(open_files, Q) when ?is_amqqueue(Q) -> + {Name, _} = amqqueue:get_pid(Q), + Nodes = get_nodes(Q), + {Data, _} = rpc:multicall(Nodes, ?MODULE, open_files, [Name]), + lists:flatten(Data); +i(single_active_consumer_pid, Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + case ra:local_query(QPid, fun rabbit_fifo:query_single_active_consumer/1) of + {ok, {_, {value, {_ConsumerTag, ChPid}}}, _} -> + ChPid; + {ok, _, _} -> + ''; + {error, _} -> + ''; + {timeout, _} -> + '' + end; +i(single_active_consumer_ctag, Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + case ra:local_query(QPid, + fun rabbit_fifo:query_single_active_consumer/1) of + {ok, {_, {value, {ConsumerTag, _ChPid}}}, _} -> + ConsumerTag; + {ok, _, _} -> + ''; + {error, _} -> + ''; + {timeout, _} -> + '' + end; +i(type, _) -> quorum; +i(messages_ram, Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + case ra:local_query(QPid, + fun rabbit_fifo:query_in_memory_usage/1) of + {ok, {_, {Length, _}}, _} -> + Length; + {error, _} -> + 0; + {timeout, _} -> + 0 + end; +i(message_bytes_ram, Q) when ?is_amqqueue(Q) -> + QPid = amqqueue:get_pid(Q), + case ra:local_query(QPid, + fun rabbit_fifo:query_in_memory_usage/1) of + {ok, {_, {_, Bytes}}, _} -> + Bytes; + {error, _} -> + 0; + {timeout, _} -> + 0 + end; +i(_K, _Q) -> ''. + +open_files(Name) -> + case whereis(Name) of + undefined -> {node(), 0}; + Pid -> case ets:lookup(ra_open_file_metrics, Pid) of + [] -> {node(), 0}; + [{_, Count}] -> {node(), Count} + end + end. + +leader(Q) when ?is_amqqueue(Q) -> + {Name, Leader} = amqqueue:get_pid(Q), + case is_process_alive(Name, Leader) of + true -> Leader; + false -> '' + end. + +peek(Vhost, Queue, Pos) -> + peek(Pos, rabbit_misc:r(Vhost, queue, Queue)). + +peek(Pos, #resource{} = QName) -> + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + peek(Pos, Q); + Err -> + Err + end; +peek(Pos, Q) when ?is_amqqueue(Q) andalso ?amqqueue_is_quorum(Q) -> + LeaderPid = amqqueue:get_pid(Q), + case ra:aux_command(LeaderPid, {peek, Pos}) of + {ok, {MsgHeader, Msg0}} -> + Count = case MsgHeader of + #{delivery_count := C} -> C; + _ -> 0 + end, + Msg = rabbit_basic:add_header(<<"x-delivery-count">>, long, + Count, Msg0), + {ok, rabbit_basic:peek_fmt_message(Msg)}; + {error, Err} -> + {error, Err}; + Err -> + Err + end; +peek(_Pos, Q) when ?is_amqqueue(Q) andalso ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}. + +online(Q) when ?is_amqqueue(Q) -> + Nodes = get_nodes(Q), + {Name, _} = amqqueue:get_pid(Q), + [Node || Node <- Nodes, is_process_alive(Name, Node)]. + +format(Q) when ?is_amqqueue(Q) -> + Nodes = get_nodes(Q), + [{members, Nodes}, {online, online(Q)}, {leader, leader(Q)}]. + +is_process_alive(Name, Node) -> + erlang:is_pid(rpc:call(Node, erlang, whereis, [Name], ?RPC_TIMEOUT)). + +-spec quorum_messages(rabbit_amqqueue:name()) -> non_neg_integer(). + +quorum_messages(QName) -> + case ets:lookup(queue_coarse_metrics, QName) of + [{_, _, _, M, _}] -> + M; + [] -> + 0 + end. + +quorum_ctag(Int) when is_integer(Int) -> + integer_to_binary(Int); +quorum_ctag(Other) -> + Other. + +maybe_send_reply(_ChPid, undefined) -> ok; +maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). + +queue_name(RaFifoState) -> + rabbit_fifo_client:cluster_name(RaFifoState). + +get_default_quorum_initial_group_size(Arguments) -> + case rabbit_misc:table_lookup(Arguments, <<"x-quorum-initial-group-size">>) of + undefined -> application:get_env(rabbit, default_quorum_initial_group_size); + {_Type, Val} -> Val + end. + +select_quorum_nodes(Size, All) when length(All) =< Size -> + All; +select_quorum_nodes(Size, All) -> + Node = node(), + case lists:member(Node, All) of + true -> + select_quorum_nodes(Size - 1, lists:delete(Node, All), [Node]); + false -> + select_quorum_nodes(Size, All, []) + end. + +select_quorum_nodes(0, _, Selected) -> + Selected; +select_quorum_nodes(Size, Rest, Selected) -> + S = lists:nth(rand:uniform(length(Rest)), Rest), + select_quorum_nodes(Size - 1, lists:delete(S, Rest), [S | Selected]). + +%% member with the current leader first +members(Q) when ?amqqueue_is_quorum(Q) -> + {RaName, LeaderNode} = amqqueue:get_pid(Q), + Nodes = lists:delete(LeaderNode, get_nodes(Q)), + [{RaName, N} || N <- [LeaderNode | Nodes]]. + +format_ra_event(ServerId, Evt, QRef) -> + {'$gen_cast', {queue_event, QRef, {ServerId, Evt}}}. + +make_ra_conf(Q, ServerId, TickTimeout) -> + QName = amqqueue:get_name(Q), + RaMachine = ra_machine(Q), + [{ClusterName, _} | _] = Members = members(Q), + UId = ra:new_uid(ra_lib:to_binary(ClusterName)), + FName = rabbit_misc:rs(QName), + Formatter = {?MODULE, format_ra_event, [QName]}, + #{cluster_name => ClusterName, + id => ServerId, + uid => UId, + friendly_name => FName, + metrics_key => QName, + initial_members => Members, + log_init_args => #{uid => UId}, + tick_timeout => TickTimeout, + machine => RaMachine, + ra_event_formatter => Formatter}. + +get_nodes(Q) when ?is_amqqueue(Q) -> + #{nodes := Nodes} = amqqueue:get_type_state(Q), + Nodes. + +update_type_state(Q, Fun) when ?is_amqqueue(Q) -> + Ts = amqqueue:get_type_state(Q), + amqqueue:set_type_state(Q, Fun(Ts)). + +overflow(undefined, Def, _QName) -> Def; +overflow(<<"reject-publish">>, _Def, _QName) -> reject_publish; +overflow(<<"drop-head">>, _Def, _QName) -> drop_head; +overflow(<<"reject-publish-dlx">> = V, Def, QName) -> + rabbit_log:warning("Invalid overflow strategy ~p for quorum queue: ~p", + [V, rabbit_misc:rs(QName)]), + Def. + +parse_credit_args(Default, Args) -> + case rabbit_misc:table_lookup(Args, <<"x-credit">>) of + {table, T} -> + case {rabbit_misc:table_lookup(T, <<"credit">>), + rabbit_misc:table_lookup(T, <<"drain">>)} of + {{long, C}, {bool, D}} -> + {credited, C, D}; + _ -> + {simple_prefetch, Default, false} + end; + undefined -> + {simple_prefetch, Default, false} + end. diff --git a/deps/rabbit/src/rabbit_ra_registry.erl b/deps/rabbit/src/rabbit_ra_registry.erl new file mode 100644 index 0000000000..b02d89eda5 --- /dev/null +++ b/deps/rabbit/src/rabbit_ra_registry.erl @@ -0,0 +1,25 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at https://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_ra_registry). + +-export([list_not_quorum_clusters/0]). + +%% Not all ra clusters are quorum queues. We need to keep a list of these so we don't +%% take them into account in operations such as memory calculation and data cleanup. +%% Hardcoded atm +list_not_quorum_clusters() -> + [rabbit_stream_coordinator]. diff --git a/deps/rabbit/src/rabbit_reader.erl b/deps/rabbit/src/rabbit_reader.erl new file mode 100644 index 0000000000..c91dbbc105 --- /dev/null +++ b/deps/rabbit/src/rabbit_reader.erl @@ -0,0 +1,1803 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_reader). + +%% Transitional step until we can require Erlang/OTP 21 and +%% use the now recommended try/catch syntax for obtaining the stack trace. +-compile(nowarn_deprecated_function). + +%% This is an AMQP 0-9-1 connection implementation. If AMQP 1.0 plugin is enabled, +%% this module passes control of incoming AMQP 1.0 connections to it. +%% +%% Every connection (as in, a process using this module) +%% is a controlling process for a server socket. +%% +%% Connections have a number of responsibilities: +%% +%% * Performing protocol handshake +%% * Parsing incoming data and dispatching protocol methods +%% * Authenticating clients (with the help of authentication backends) +%% * Enforcing TCP backpressure (throttling clients) +%% * Enforcing connection limits, e.g. channel_max +%% * Channel management +%% * Setting up heartbeater and alarm notifications +%% * Emitting connection and network activity metric events +%% * Gracefully handling client disconnects, channel termination, etc +%% +%% and a few more. +%% +%% Every connection has +%% +%% * a queue collector which is responsible for keeping +%% track of exclusive queues on the connection and their cleanup. +%% * a heartbeater that's responsible for sending heartbeat frames to clients, +%% keeping track of the incoming ones and notifying connection about +%% heartbeat timeouts +%% * Stats timer, a timer that is used to periodically emit metric events +%% +%% Some dependencies are started under a separate supervisor to avoid deadlocks +%% during system shutdown. See rabbit_channel_sup:start_link/0 for details. +%% +%% Reader processes are special processes (in the OTP sense). + +-include("rabbit_framing.hrl"). +-include("rabbit.hrl"). + +-export([start_link/2, info_keys/0, info/1, info/2, force_event_refresh/2, + shutdown/2]). + +-export([system_continue/3, system_terminate/4, system_code_change/4]). + +-export([init/3, mainloop/4, recvloop/4]). + +-export([conserve_resources/3, server_properties/1]). + +-define(NORMAL_TIMEOUT, 3). +-define(CLOSING_TIMEOUT, 30). +-define(CHANNEL_TERMINATION_TIMEOUT, 3). +%% we wait for this many seconds before closing TCP connection +%% with a client that failed to log in. Provides some relief +%% from connection storms and DoS. +-define(SILENT_CLOSE_DELAY, 3). +-define(CHANNEL_MIN, 1). + +%%-------------------------------------------------------------------------- + +-record(v1, { + %% parent process + parent, + %% socket + sock, + %% connection state, see connection record + connection, + callback, + recv_len, + pending_recv, + %% pre_init | securing | running | blocking | blocked | closing | closed | {become, F} + connection_state, + %% see comment in rabbit_connection_sup:start_link/0 + helper_sup, + %% takes care of cleaning up exclusive queues, + %% see rabbit_queue_collector + queue_collector, + %% sends and receives heartbeat frames, + %% see rabbit_heartbeat + heartbeater, + %% timer used to emit statistics + stats_timer, + %% channel supervisor + channel_sup_sup_pid, + %% how many channels this connection has + channel_count, + %% throttling state, for both + %% credit- and resource-driven flow control + throttle, + proxy_socket}). + +-record(throttle, { + %% never | timestamp() + last_blocked_at, + %% a set of the reasons why we are + %% blocked: {resource, memory}, {resource, disk}. + %% More reasons can be added in the future. + blocked_by, + %% true if received any publishes, false otherwise + %% note that this will also be true when connection is + %% already blocked + should_block, + %% true if we had we sent a connection.blocked, + %% false otherwise + connection_blocked_message_sent +}). + +-define(STATISTICS_KEYS, [pid, recv_oct, recv_cnt, send_oct, send_cnt, + send_pend, state, channels, reductions, + garbage_collection]). + +-define(SIMPLE_METRICS, [pid, recv_oct, send_oct, reductions]). +-define(OTHER_METRICS, [recv_cnt, send_cnt, send_pend, state, channels, + garbage_collection]). + +-define(CREATION_EVENT_KEYS, + [pid, name, port, peer_port, host, + peer_host, ssl, peer_cert_subject, peer_cert_issuer, + peer_cert_validity, auth_mechanism, ssl_protocol, + ssl_key_exchange, ssl_cipher, ssl_hash, protocol, user, vhost, + timeout, frame_max, channel_max, client_properties, connected_at, + node, user_who_performed_action]). + +-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). + +-define(AUTH_NOTIFICATION_INFO_KEYS, + [host, name, peer_host, peer_port, protocol, auth_mechanism, + ssl, ssl_protocol, ssl_cipher, peer_cert_issuer, peer_cert_subject, + peer_cert_validity]). + +-define(IS_RUNNING(State), + (State#v1.connection_state =:= running orelse + State#v1.connection_state =:= blocked)). + +-define(IS_STOPPING(State), + (State#v1.connection_state =:= closing orelse + State#v1.connection_state =:= closed)). + +%%-------------------------------------------------------------------------- + +-type resource_alert() :: {WasAlarmSetForNode :: boolean(), + IsThereAnyAlarmsWithSameSourceInTheCluster :: boolean(), + NodeForWhichAlarmWasSetOrCleared :: node()}. + +%%-------------------------------------------------------------------------- + +-spec start_link(pid(), any()) -> rabbit_types:ok(pid()). + +start_link(HelperSup, Ref) -> + Pid = proc_lib:spawn_link(?MODULE, init, [self(), HelperSup, Ref]), + + {ok, Pid}. + +-spec shutdown(pid(), string()) -> 'ok'. + +shutdown(Pid, Explanation) -> + gen_server:call(Pid, {shutdown, Explanation}, infinity). + +-spec init(pid(), pid(), any()) -> no_return(). + +init(Parent, HelperSup, Ref) -> + ?LG_PROCESS_TYPE(reader), + {ok, Sock} = rabbit_networking:handshake(Ref, + application:get_env(rabbit, proxy_protocol, false)), + Deb = sys:debug_options([]), + start_connection(Parent, HelperSup, Deb, Sock). + +-spec system_continue(_,_,{[binary()], non_neg_integer(), #v1{}}) -> any(). + +system_continue(Parent, Deb, {Buf, BufLen, State}) -> + mainloop(Deb, Buf, BufLen, State#v1{parent = Parent}). + +-spec system_terminate(_,_,_,_) -> no_return(). + +system_terminate(Reason, _Parent, _Deb, _State) -> + exit(Reason). + +-spec system_code_change(_,_,_,_) -> {'ok',_}. + +system_code_change(Misc, _Module, _OldVsn, _Extra) -> + {ok, Misc}. + +-spec info_keys() -> rabbit_types:info_keys(). + +info_keys() -> ?INFO_KEYS. + +-spec info(pid()) -> rabbit_types:infos(). + +info(Pid) -> + gen_server:call(Pid, info, infinity). + +-spec info(pid(), rabbit_types:info_keys()) -> rabbit_types:infos(). + +info(Pid, Items) -> + case gen_server:call(Pid, {info, Items}, infinity) of + {ok, Res} -> Res; + {error, Error} -> throw(Error) + end. + +-spec force_event_refresh(pid(), reference()) -> 'ok'. + +% Note: https://www.pivotaltracker.com/story/show/166962656 +% This event is necessary for the stats timer to be initialized with +% the correct values once the management agent has started +force_event_refresh(Pid, Ref) -> + gen_server:cast(Pid, {force_event_refresh, Ref}). + +-spec conserve_resources(pid(), atom(), resource_alert()) -> 'ok'. + +conserve_resources(Pid, Source, {_, Conserve, _}) -> + Pid ! {conserve_resources, Source, Conserve}, + ok. + +-spec server_properties(rabbit_types:protocol()) -> + rabbit_framing:amqp_table(). + +server_properties(Protocol) -> + {ok, Product} = application:get_key(rabbit, description), + {ok, Version} = application:get_key(rabbit, vsn), + + %% Get any configuration-specified server properties + {ok, RawConfigServerProps} = application:get_env(rabbit, + server_properties), + + %% Normalize the simplified (2-tuple) and unsimplified (3-tuple) forms + %% from the config and merge them with the generated built-in properties + NormalizedConfigServerProps = + [{<<"capabilities">>, table, server_capabilities(Protocol)} | + [case X of + {KeyAtom, Value} -> {list_to_binary(atom_to_list(KeyAtom)), + longstr, + maybe_list_to_binary(Value)}; + {BinKey, Type, Value} -> {BinKey, Type, Value} + end || X <- RawConfigServerProps ++ + [{product, Product}, + {version, Version}, + {cluster_name, rabbit_nodes:cluster_name()}, + {platform, rabbit_misc:platform_and_version()}, + {copyright, ?COPYRIGHT_MESSAGE}, + {information, ?INFORMATION_MESSAGE}]]], + + %% Filter duplicated properties in favour of config file provided values + lists:usort(fun ({K1,_,_}, {K2,_,_}) -> K1 =< K2 end, + NormalizedConfigServerProps). + +maybe_list_to_binary(V) when is_binary(V) -> V; +maybe_list_to_binary(V) when is_list(V) -> list_to_binary(V). + +server_capabilities(rabbit_framing_amqp_0_9_1) -> + [{<<"publisher_confirms">>, bool, true}, + {<<"exchange_exchange_bindings">>, bool, true}, + {<<"basic.nack">>, bool, true}, + {<<"consumer_cancel_notify">>, bool, true}, + {<<"connection.blocked">>, bool, true}, + {<<"consumer_priorities">>, bool, true}, + {<<"authentication_failure_close">>, bool, true}, + {<<"per_consumer_qos">>, bool, true}, + {<<"direct_reply_to">>, bool, true}]; +server_capabilities(_) -> + []. + +%%-------------------------------------------------------------------------- + +socket_error(Reason) when is_atom(Reason) -> + rabbit_log_connection:error("Error on AMQP connection ~p: ~s~n", + [self(), rabbit_misc:format_inet_error(Reason)]); +socket_error(Reason) -> + Fmt = "Error on AMQP connection ~p:~n~p~n", + Args = [self(), Reason], + case Reason of + %% The socket was closed while upgrading to SSL. + %% This is presumably a TCP healthcheck, so don't log + %% it unless specified otherwise. + {ssl_upgrade_error, closed} -> + %% Lager sinks (rabbit_log_connection) + %% are handled by the lager parse_transform. + %% Hence have to define the loglevel as a function call. + rabbit_log_connection:debug(Fmt, Args); + _ -> + rabbit_log_connection:error(Fmt, Args) + end. + +inet_op(F) -> rabbit_misc:throw_on_error(inet_error, F). + +socket_op(Sock, Fun) -> + RealSocket = rabbit_net:unwrap_socket(Sock), + case Fun(Sock) of + {ok, Res} -> Res; + {error, Reason} -> socket_error(Reason), + rabbit_net:fast_close(RealSocket), + exit(normal) + end. + +-spec start_connection(pid(), pid(), any(), rabbit_net:socket()) -> + no_return(). + +start_connection(Parent, HelperSup, Deb, Sock) -> + process_flag(trap_exit, true), + RealSocket = rabbit_net:unwrap_socket(Sock), + Name = case rabbit_net:connection_string(Sock, inbound) of + {ok, Str} -> list_to_binary(Str); + {error, enotconn} -> rabbit_net:fast_close(RealSocket), + exit(normal); + {error, Reason} -> socket_error(Reason), + rabbit_net:fast_close(RealSocket), + exit(normal) + end, + {ok, HandshakeTimeout} = application:get_env(rabbit, handshake_timeout), + InitialFrameMax = application:get_env(rabbit, initial_frame_max, ?FRAME_MIN_SIZE), + erlang:send_after(HandshakeTimeout, self(), handshake_timeout), + {PeerHost, PeerPort, Host, Port} = + socket_op(Sock, fun (S) -> rabbit_net:socket_ends(S, inbound) end), + ?store_proc_name(Name), + State = #v1{parent = Parent, + sock = RealSocket, + connection = #connection{ + name = Name, + log_name = Name, + host = Host, + peer_host = PeerHost, + port = Port, + peer_port = PeerPort, + protocol = none, + user = none, + timeout_sec = (HandshakeTimeout / 1000), + frame_max = InitialFrameMax, + vhost = none, + client_properties = none, + capabilities = [], + auth_mechanism = none, + auth_state = none, + connected_at = os:system_time( + milli_seconds)}, + callback = uninitialized_callback, + recv_len = 0, + pending_recv = false, + connection_state = pre_init, + queue_collector = undefined, %% started on tune-ok + helper_sup = HelperSup, + heartbeater = none, + channel_sup_sup_pid = none, + channel_count = 0, + throttle = #throttle{ + last_blocked_at = never, + should_block = false, + blocked_by = sets:new(), + connection_blocked_message_sent = false + }, + proxy_socket = rabbit_net:maybe_get_proxy_socket(Sock)}, + try + case run({?MODULE, recvloop, + [Deb, [], 0, switch_callback(rabbit_event:init_stats_timer( + State, #v1.stats_timer), + handshake, 8)]}) of + %% connection was closed cleanly by the client + #v1{connection = #connection{user = #user{username = Username}, + vhost = VHost}} -> + rabbit_log_connection:info("closing AMQP connection ~p (~s, vhost: '~s', user: '~s')~n", + [self(), dynamic_connection_name(Name), VHost, Username]); + %% just to be more defensive + _ -> + rabbit_log_connection:info("closing AMQP connection ~p (~s)~n", + [self(), dynamic_connection_name(Name)]) + end + catch + Ex -> + log_connection_exception(dynamic_connection_name(Name), Ex) + after + %% We don't call gen_tcp:close/1 here since it waits for + %% pending output to be sent, which results in unnecessary + %% delays. We could just terminate - the reader is the + %% controlling process and hence its termination will close + %% the socket. However, to keep the file_handle_cache + %% accounting as accurate as possible we ought to close the + %% socket w/o delay before termination. + rabbit_net:fast_close(RealSocket), + rabbit_networking:unregister_connection(self()), + rabbit_core_metrics:connection_closed(self()), + ClientProperties = case get(client_properties) of + undefined -> + []; + Properties -> + Properties + end, + EventProperties = [{name, Name}, + {pid, self()}, + {node, node()}, + {client_properties, ClientProperties}], + EventProperties1 = case get(connection_user_provided_name) of + undefined -> + EventProperties; + ConnectionUserProvidedName -> + [{user_provided_name, ConnectionUserProvidedName} | EventProperties] + end, + rabbit_event:notify(connection_closed, EventProperties1) + end, + done. + +log_connection_exception(Name, Ex) -> + Severity = case Ex of + connection_closed_with_no_data_received -> debug; + {connection_closed_abruptly, _} -> warning; + connection_closed_abruptly -> warning; + _ -> error + end, + log_connection_exception(Severity, Name, Ex). + +log_connection_exception(Severity, Name, {heartbeat_timeout, TimeoutSec}) -> + %% Long line to avoid extra spaces and line breaks in log + log_connection_exception_with_severity(Severity, + "closing AMQP connection ~p (~s):~n" + "missed heartbeats from client, timeout: ~ps~n", + [self(), Name, TimeoutSec]); +log_connection_exception(Severity, Name, {connection_closed_abruptly, + #v1{connection = #connection{user = #user{username = Username}, + vhost = VHost}}}) -> + log_connection_exception_with_severity(Severity, + "closing AMQP connection ~p (~s, vhost: '~s', user: '~s'):~nclient unexpectedly closed TCP connection~n", + [self(), Name, VHost, Username]); +%% when client abruptly closes connection before connection.open/authentication/authorization +%% succeeded, don't log username and vhost as 'none' +log_connection_exception(Severity, Name, {connection_closed_abruptly, _}) -> + log_connection_exception_with_severity(Severity, + "closing AMQP connection ~p (~s):~nclient unexpectedly closed TCP connection~n", + [self(), Name]); +%% failed connection.tune negotiations +log_connection_exception(Severity, Name, {handshake_error, tuning, _Channel, + {exit, #amqp_error{explanation = Explanation}, + _Method, _Stacktrace}}) -> + log_connection_exception_with_severity(Severity, + "closing AMQP connection ~p (~s):~nfailed to negotiate connection parameters: ~s~n", + [self(), Name, Explanation]); +%% old exception structure +log_connection_exception(Severity, Name, connection_closed_abruptly) -> + log_connection_exception_with_severity(Severity, + "closing AMQP connection ~p (~s):~n" + "client unexpectedly closed TCP connection~n", + [self(), Name]); +log_connection_exception(Severity, Name, Ex) -> + log_connection_exception_with_severity(Severity, + "closing AMQP connection ~p (~s):~n~p~n", + [self(), Name, Ex]). + +log_connection_exception_with_severity(Severity, Fmt, Args) -> + case Severity of + debug -> rabbit_log_connection:debug(Fmt, Args); + warning -> rabbit_log_connection:warning(Fmt, Args); + error -> rabbit_log_connection:error(Fmt, Args) + end. + +run({M, F, A}) -> + try apply(M, F, A) + catch {become, MFA} -> run(MFA) + end. + +recvloop(Deb, Buf, BufLen, State = #v1{pending_recv = true}) -> + mainloop(Deb, Buf, BufLen, State); +recvloop(Deb, Buf, BufLen, State = #v1{connection_state = blocked}) -> + mainloop(Deb, Buf, BufLen, State); +recvloop(Deb, Buf, BufLen, State = #v1{connection_state = {become, F}}) -> + throw({become, F(Deb, Buf, BufLen, State)}); +recvloop(Deb, Buf, BufLen, State = #v1{sock = Sock, recv_len = RecvLen}) + when BufLen < RecvLen -> + case rabbit_net:setopts(Sock, [{active, once}]) of + ok -> mainloop(Deb, Buf, BufLen, + State#v1{pending_recv = true}); + {error, Reason} -> stop(Reason, State) + end; +recvloop(Deb, [B], _BufLen, State) -> + {Rest, State1} = handle_input(State#v1.callback, B, State), + recvloop(Deb, [Rest], size(Rest), State1); +recvloop(Deb, Buf, BufLen, State = #v1{recv_len = RecvLen}) -> + {DataLRev, RestLRev} = binlist_split(BufLen - RecvLen, Buf, []), + Data = list_to_binary(lists:reverse(DataLRev)), + {<<>>, State1} = handle_input(State#v1.callback, Data, State), + recvloop(Deb, lists:reverse(RestLRev), BufLen - RecvLen, State1). + +binlist_split(0, L, Acc) -> + {L, Acc}; +binlist_split(Len, L, [Acc0|Acc]) when Len < 0 -> + {H, T} = split_binary(Acc0, -Len), + {[H|L], [T|Acc]}; +binlist_split(Len, [H|T], Acc) -> + binlist_split(Len - size(H), T, [H|Acc]). + +-spec mainloop(_,[binary()], non_neg_integer(), #v1{}) -> any(). + +mainloop(Deb, Buf, BufLen, State = #v1{sock = Sock, + connection_state = CS, + connection = #connection{ + name = ConnName}}) -> + Recv = rabbit_net:recv(Sock), + case CS of + pre_init when Buf =:= [] -> + %% We only log incoming connections when either the + %% first byte was received or there was an error (eg. a + %% timeout). + %% + %% The goal is to not log TCP healthchecks (a connection + %% with no data received) unless specified otherwise. + Fmt = "accepting AMQP connection ~p (~s)~n", + Args = [self(), ConnName], + case Recv of + closed -> rabbit_log_connection:debug(Fmt, Args); + _ -> rabbit_log_connection:info(Fmt, Args) + end; + _ -> + ok + end, + case Recv of + {data, Data} -> + recvloop(Deb, [Data | Buf], BufLen + size(Data), + State#v1{pending_recv = false}); + closed when State#v1.connection_state =:= closed -> + State; + closed when CS =:= pre_init andalso Buf =:= [] -> + stop(tcp_healthcheck, State); + closed -> + stop(closed, State); + {other, {heartbeat_send_error, Reason}} -> + %% The only portable way to detect disconnect on blocked + %% connection is to wait for heartbeat send failure. + stop(Reason, State); + {error, Reason} -> + stop(Reason, State); + {other, {system, From, Request}} -> + sys:handle_system_msg(Request, From, State#v1.parent, + ?MODULE, Deb, {Buf, BufLen, State}); + {other, Other} -> + case handle_other(Other, State) of + stop -> State; + NewState -> recvloop(Deb, Buf, BufLen, NewState) + end + end. + +-spec stop(_, #v1{}) -> no_return(). +stop(tcp_healthcheck, State) -> + %% The connection was closed before any packet was received. It's + %% probably a load-balancer healthcheck: don't consider this a + %% failure. + maybe_emit_stats(State), + throw(connection_closed_with_no_data_received); +stop(closed, State) -> + maybe_emit_stats(State), + throw({connection_closed_abruptly, State}); +stop(Reason, State) -> + maybe_emit_stats(State), + throw({inet_error, Reason}). + +handle_other({conserve_resources, Source, Conserve}, + State = #v1{throttle = Throttle = #throttle{blocked_by = Blockers}}) -> + Resource = {resource, Source}, + Blockers1 = case Conserve of + true -> sets:add_element(Resource, Blockers); + false -> sets:del_element(Resource, Blockers) + end, + control_throttle(State#v1{throttle = Throttle#throttle{blocked_by = Blockers1}}); +handle_other({channel_closing, ChPid}, State) -> + ok = rabbit_channel:ready_for_close(ChPid), + {_, State1} = channel_cleanup(ChPid, State), + maybe_close(control_throttle(State1)); +handle_other({'EXIT', Parent, normal}, State = #v1{parent = Parent}) -> + %% rabbitmq/rabbitmq-server#544 + %% The connection port process has exited due to the TCP socket being closed. + %% Handle this case in the same manner as receiving {error, closed} + stop(closed, State); +handle_other({'EXIT', Parent, Reason}, State = #v1{parent = Parent}) -> + Msg = io_lib:format("broker forced connection closure with reason '~w'", [Reason]), + terminate(Msg, State), + %% this is what we are expected to do according to + %% https://www.erlang.org/doc/man/sys.html + %% + %% If we wanted to be *really* nice we should wait for a while for + %% clients to close the socket at their end, just as we do in the + %% ordinary error case. However, since this termination is + %% initiated by our parent it is probably more important to exit + %% quickly. + maybe_emit_stats(State), + exit(Reason); +handle_other({channel_exit, _Channel, E = {writer, send_failed, _E}}, State) -> + maybe_emit_stats(State), + throw(E); +handle_other({channel_exit, Channel, Reason}, State) -> + handle_exception(State, Channel, Reason); +handle_other({'DOWN', _MRef, process, ChPid, Reason}, State) -> + handle_dependent_exit(ChPid, Reason, State); +handle_other(terminate_connection, State) -> + maybe_emit_stats(State), + stop; +handle_other(handshake_timeout, State) + when ?IS_RUNNING(State) orelse ?IS_STOPPING(State) -> + State; +handle_other(handshake_timeout, State) -> + maybe_emit_stats(State), + throw({handshake_timeout, State#v1.callback}); +handle_other(heartbeat_timeout, State = #v1{connection_state = closed}) -> + State; +handle_other(heartbeat_timeout, + State = #v1{connection = #connection{timeout_sec = T}}) -> + maybe_emit_stats(State), + throw({heartbeat_timeout, T}); +handle_other({'$gen_call', From, {shutdown, Explanation}}, State) -> + {ForceTermination, NewState} = terminate(Explanation, State), + gen_server:reply(From, ok), + case ForceTermination of + force -> stop; + normal -> NewState + end; +handle_other({'$gen_call', From, info}, State) -> + gen_server:reply(From, infos(?INFO_KEYS, State)), + State; +handle_other({'$gen_call', From, {info, Items}}, State) -> + gen_server:reply(From, try {ok, infos(Items, State)} + catch Error -> {error, Error} + end), + State; +handle_other({'$gen_cast', {force_event_refresh, Ref}}, State) + when ?IS_RUNNING(State) -> + rabbit_event:notify( + connection_created, + augment_infos_with_user_provided_connection_name( + [{type, network} | infos(?CREATION_EVENT_KEYS, State)], State), + Ref), + rabbit_event:init_stats_timer(State, #v1.stats_timer); +handle_other({'$gen_cast', {force_event_refresh, _Ref}}, State) -> + %% Ignore, we will emit a created event once we start running. + State; +handle_other(ensure_stats, State) -> + ensure_stats_timer(State); +handle_other(emit_stats, State) -> + emit_stats(State); +handle_other({bump_credit, Msg}, State) -> + %% Here we are receiving credit by some channel process. + credit_flow:handle_bump_msg(Msg), + control_throttle(State); +handle_other(Other, State) -> + %% internal error -> something worth dying for + maybe_emit_stats(State), + exit({unexpected_message, Other}). + +switch_callback(State, Callback, Length) -> + State#v1{callback = Callback, recv_len = Length}. + +terminate(Explanation, State) when ?IS_RUNNING(State) -> + {normal, handle_exception(State, 0, + rabbit_misc:amqp_error( + connection_forced, "~s", [Explanation], none))}; +terminate(_Explanation, State) -> + {force, State}. + +send_blocked(#v1{connection = #connection{protocol = Protocol, + capabilities = Capabilities}, + sock = Sock}, Reason) -> + case rabbit_misc:table_lookup(Capabilities, <<"connection.blocked">>) of + {bool, true} -> + + ok = send_on_channel0(Sock, #'connection.blocked'{reason = Reason}, + Protocol); + _ -> + ok + end. + +send_unblocked(#v1{connection = #connection{protocol = Protocol, + capabilities = Capabilities}, + sock = Sock}) -> + case rabbit_misc:table_lookup(Capabilities, <<"connection.blocked">>) of + {bool, true} -> + ok = send_on_channel0(Sock, #'connection.unblocked'{}, Protocol); + _ -> + ok + end. + +%%-------------------------------------------------------------------------- +%% error handling / termination + +close_connection(State = #v1{queue_collector = Collector, + connection = #connection{ + timeout_sec = TimeoutSec}}) -> + %% The spec says "Exclusive queues may only be accessed by the + %% current connection, and are deleted when that connection + %% closes." This does not strictly imply synchrony, but in + %% practice it seems to be what people assume. + clean_up_exclusive_queues(Collector), + %% We terminate the connection after the specified interval, but + %% no later than ?CLOSING_TIMEOUT seconds. + erlang:send_after((if TimeoutSec > 0 andalso + TimeoutSec < ?CLOSING_TIMEOUT -> TimeoutSec; + true -> ?CLOSING_TIMEOUT + end) * 1000, self(), terminate_connection), + State#v1{connection_state = closed}. + +%% queue collector will be undefined when connection +%% tuning was never performed or didn't finish. In such cases +%% there's also nothing to clean up. +clean_up_exclusive_queues(undefined) -> + ok; + +clean_up_exclusive_queues(Collector) -> + rabbit_queue_collector:delete_all(Collector). + +handle_dependent_exit(ChPid, Reason, State) -> + {Channel, State1} = channel_cleanup(ChPid, State), + case {Channel, termination_kind(Reason)} of + {undefined, controlled} -> State1; + {undefined, uncontrolled} -> handle_uncontrolled_channel_close(ChPid), + exit({abnormal_dependent_exit, + ChPid, Reason}); + {_, controlled} -> maybe_close(control_throttle(State1)); + {_, uncontrolled} -> handle_uncontrolled_channel_close(ChPid), + State2 = handle_exception( + State1, Channel, Reason), + maybe_close(control_throttle(State2)) + end. + +terminate_channels(#v1{channel_count = 0} = State) -> + State; +terminate_channels(#v1{channel_count = ChannelCount} = State) -> + lists:foreach(fun rabbit_channel:shutdown/1, all_channels()), + Timeout = 1000 * ?CHANNEL_TERMINATION_TIMEOUT * ChannelCount, + TimerRef = erlang:send_after(Timeout, self(), cancel_wait), + wait_for_channel_termination(ChannelCount, TimerRef, State). + +wait_for_channel_termination(0, TimerRef, State) -> + case erlang:cancel_timer(TimerRef) of + false -> receive + cancel_wait -> State + end; + _ -> State + end; +wait_for_channel_termination(N, TimerRef, + State = #v1{connection_state = CS, + connection = #connection{ + log_name = ConnName, + user = User, + vhost = VHost}, + sock = Sock}) -> + receive + {'DOWN', _MRef, process, ChPid, Reason} -> + {Channel, State1} = channel_cleanup(ChPid, State), + case {Channel, termination_kind(Reason)} of + {undefined, _} -> + exit({abnormal_dependent_exit, ChPid, Reason}); + {_, controlled} -> + wait_for_channel_termination(N-1, TimerRef, State1); + {_, uncontrolled} -> + rabbit_log_connection:error( + "Error on AMQP connection ~p (~s, vhost: '~s'," + " user: '~s', state: ~p), channel ~p:" + "error while terminating:~n~p~n", + [self(), ConnName, VHost, User#user.username, + CS, Channel, Reason]), + handle_uncontrolled_channel_close(ChPid), + wait_for_channel_termination(N-1, TimerRef, State1) + end; + {'EXIT', Sock, _Reason} -> + clean_up_all_channels(State), + exit(normal); + cancel_wait -> + exit(channel_termination_timeout) + end. + +maybe_close(State = #v1{connection_state = closing, + channel_count = 0, + connection = #connection{protocol = Protocol}, + sock = Sock}) -> + NewState = close_connection(State), + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), + NewState; +maybe_close(State) -> + State. + +termination_kind(normal) -> controlled; +termination_kind(_) -> uncontrolled. + +format_hard_error(#amqp_error{name = N, explanation = E, method = M}) -> + io_lib:format("operation ~s caused a connection exception ~s: ~p", [M, N, E]); +format_hard_error(Reason) -> + case io_lib:deep_char_list(Reason) of + true -> Reason; + false -> rabbit_misc:format("~p", [Reason]) + end. + +log_hard_error(#v1{connection_state = CS, + connection = #connection{ + log_name = ConnName, + user = User, + vhost = VHost}}, Channel, Reason) -> + rabbit_log_connection:error( + "Error on AMQP connection ~p (~s, vhost: '~s'," + " user: '~s', state: ~p), channel ~p:~n ~s~n", + [self(), ConnName, VHost, User#user.username, CS, Channel, format_hard_error(Reason)]). + +handle_exception(State = #v1{connection_state = closed}, Channel, Reason) -> + log_hard_error(State, Channel, Reason), + State; +handle_exception(State = #v1{connection = #connection{protocol = Protocol}, + connection_state = CS}, + Channel, Reason) + when ?IS_RUNNING(State) orelse CS =:= closing -> + respond_and_close(State, Channel, Protocol, Reason, Reason); +%% authentication failure +handle_exception(State = #v1{connection = #connection{protocol = Protocol, + log_name = ConnName, + capabilities = Capabilities}, + connection_state = starting}, + Channel, Reason = #amqp_error{name = access_refused, + explanation = ErrMsg}) -> + rabbit_log_connection:error( + "Error on AMQP connection ~p (~s, state: ~p):~n~s~n", + [self(), ConnName, starting, ErrMsg]), + %% respect authentication failure notification capability + case rabbit_misc:table_lookup(Capabilities, + <<"authentication_failure_close">>) of + {bool, true} -> + send_error_on_channel0_and_close(Channel, Protocol, Reason, State); + _ -> + close_connection(terminate_channels(State)) + end; +%% when loopback-only user tries to connect from a non-local host +%% when user tries to access a vhost it has no permissions for +handle_exception(State = #v1{connection = #connection{protocol = Protocol, + log_name = ConnName, + user = User}, + connection_state = opening}, + Channel, Reason = #amqp_error{name = not_allowed, + explanation = ErrMsg}) -> + rabbit_log_connection:error( + "Error on AMQP connection ~p (~s, user: '~s', state: ~p):~n~s~n", + [self(), ConnName, User#user.username, opening, ErrMsg]), + send_error_on_channel0_and_close(Channel, Protocol, Reason, State); +handle_exception(State = #v1{connection = #connection{protocol = Protocol}, + connection_state = CS = opening}, + Channel, Reason = #amqp_error{}) -> + respond_and_close(State, Channel, Protocol, Reason, + {handshake_error, CS, Reason}); +%% when negotiation fails, e.g. due to channel_max being higher than the +%% maximum allowed limit +handle_exception(State = #v1{connection = #connection{protocol = Protocol, + log_name = ConnName, + user = User}, + connection_state = tuning}, + Channel, Reason = #amqp_error{name = not_allowed, + explanation = ErrMsg}) -> + rabbit_log_connection:error( + "Error on AMQP connection ~p (~s," + " user: '~s', state: ~p):~n~s~n", + [self(), ConnName, User#user.username, tuning, ErrMsg]), + send_error_on_channel0_and_close(Channel, Protocol, Reason, State); +handle_exception(State, Channel, Reason) -> + %% We don't trust the client at this point - force them to wait + %% for a bit so they can't DOS us with repeated failed logins etc. + timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw({handshake_error, State#v1.connection_state, Channel, Reason}). + +%% we've "lost sync" with the client and hence must not accept any +%% more input +-spec fatal_frame_error(_, _, _, _, _) -> no_return(). +fatal_frame_error(Error, Type, Channel, Payload, State) -> + frame_error(Error, Type, Channel, Payload, State), + %% grace period to allow transmission of error + timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw(fatal_frame_error). + +frame_error(Error, Type, Channel, Payload, State) -> + {Str, Bin} = payload_snippet(Payload), + handle_exception(State, Channel, + rabbit_misc:amqp_error(frame_error, + "type ~p, ~s octets = ~p: ~p", + [Type, Str, Bin, Error], none)). + +unexpected_frame(Type, Channel, Payload, State) -> + {Str, Bin} = payload_snippet(Payload), + handle_exception(State, Channel, + rabbit_misc:amqp_error(unexpected_frame, + "type ~p, ~s octets = ~p", + [Type, Str, Bin], none)). + +payload_snippet(Payload) when size(Payload) =< 16 -> + {"all", Payload}; +payload_snippet(<<Snippet:16/binary, _/binary>>) -> + {"first 16", Snippet}. + +%%-------------------------------------------------------------------------- + +create_channel(_Channel, + #v1{channel_count = ChannelCount, + connection = #connection{channel_max = ChannelMax}}) + when ChannelMax /= 0 andalso ChannelCount >= ChannelMax -> + {error, rabbit_misc:amqp_error( + not_allowed, "number of channels opened (~w) has reached the " + "negotiated channel_max (~w)", + [ChannelCount, ChannelMax], 'none')}; +create_channel(Channel, + #v1{sock = Sock, + queue_collector = Collector, + channel_sup_sup_pid = ChanSupSup, + channel_count = ChannelCount, + connection = + #connection{name = Name, + protocol = Protocol, + frame_max = FrameMax, + vhost = VHost, + capabilities = Capabilities, + user = #user{username = Username} = User} + } = State) -> + case rabbit_auth_backend_internal:is_over_channel_limit(Username) of + false -> + {ok, _ChSupPid, {ChPid, AState}} = + rabbit_channel_sup_sup:start_channel( + ChanSupSup, {tcp, Sock, Channel, FrameMax, self(), Name, + Protocol, User, VHost, Capabilities, + Collector}), + MRef = erlang:monitor(process, ChPid), + put({ch_pid, ChPid}, {Channel, MRef}), + put({channel, Channel}, {ChPid, AState}), + {ok, {ChPid, AState}, State#v1{channel_count = ChannelCount + 1}}; + {true, Limit} -> + {error, rabbit_misc:amqp_error(not_allowed, + "number of channels opened for user '~s' has reached " + "the maximum allowed user limit of (~w)", + [Username, Limit], 'none')} + end. + +channel_cleanup(ChPid, State = #v1{channel_count = ChannelCount}) -> + case get({ch_pid, ChPid}) of + undefined -> {undefined, State}; + {Channel, MRef} -> credit_flow:peer_down(ChPid), + erase({channel, Channel}), + erase({ch_pid, ChPid}), + erlang:demonitor(MRef, [flush]), + {Channel, State#v1{channel_count = ChannelCount - 1}} + end. + +all_channels() -> [ChPid || {{ch_pid, ChPid}, _ChannelMRef} <- get()]. + +clean_up_all_channels(State) -> + CleanupFun = fun(ChPid) -> + channel_cleanup(ChPid, State) + end, + lists:foreach(CleanupFun, all_channels()). + +%%-------------------------------------------------------------------------- + +handle_frame(Type, 0, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) + when ?IS_STOPPING(State) -> + case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of + {method, MethodName, FieldsBin} -> + handle_method0(MethodName, FieldsBin, State); + _Other -> State + end; +handle_frame(Type, 0, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of + error -> frame_error(unknown_frame, Type, 0, Payload, State); + heartbeat -> State; + {method, MethodName, FieldsBin} -> + handle_method0(MethodName, FieldsBin, State); + _Other -> unexpected_frame(Type, 0, Payload, State) + end; +handle_frame(Type, Channel, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) + when ?IS_RUNNING(State) -> + case rabbit_command_assembler:analyze_frame(Type, Payload, Protocol) of + error -> frame_error(unknown_frame, Type, Channel, Payload, State); + heartbeat -> unexpected_frame(Type, Channel, Payload, State); + Frame -> process_frame(Frame, Channel, State) + end; +handle_frame(_Type, _Channel, _Payload, State) when ?IS_STOPPING(State) -> + State; +handle_frame(Type, Channel, Payload, State) -> + unexpected_frame(Type, Channel, Payload, State). + +process_frame(Frame, Channel, State) -> + ChKey = {channel, Channel}, + case (case get(ChKey) of + undefined -> create_channel(Channel, State); + Other -> {ok, Other, State} + end) of + {error, Error} -> + handle_exception(State, Channel, Error); + {ok, {ChPid, AState}, State1} -> + case rabbit_command_assembler:process(Frame, AState) of + {ok, NewAState} -> + put(ChKey, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, State1); + {ok, Method, NewAState} -> + rabbit_channel:do(ChPid, Method), + put(ChKey, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, State1); + {ok, Method, Content, NewAState} -> + rabbit_channel:do_flow(ChPid, Method, Content), + put(ChKey, {ChPid, NewAState}), + post_process_frame(Frame, ChPid, control_throttle(State1)); + {error, Reason} -> + handle_exception(State1, Channel, Reason) + end + end. + +post_process_frame({method, 'channel.close_ok', _}, ChPid, State) -> + {_, State1} = channel_cleanup(ChPid, State), + %% This is not strictly necessary, but more obviously + %% correct. Also note that we do not need to call maybe_close/1 + %% since we cannot possibly be in the 'closing' state. + control_throttle(State1); +post_process_frame({content_header, _, _, _, _}, _ChPid, State) -> + publish_received(State); +post_process_frame({content_body, _}, _ChPid, State) -> + publish_received(State); +post_process_frame(_Frame, _ChPid, State) -> + State. + +%%-------------------------------------------------------------------------- + +%% We allow clients to exceed the frame size a little bit since quite +%% a few get it wrong - off-by 1 or 8 (empty frame size) are typical. +-define(FRAME_SIZE_FUDGE, ?EMPTY_FRAME_SIZE). + +handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32, _/binary>>, + State = #v1{connection = #connection{frame_max = FrameMax}}) + when FrameMax /= 0 andalso + PayloadSize > FrameMax - ?EMPTY_FRAME_SIZE + ?FRAME_SIZE_FUDGE -> + fatal_frame_error( + {frame_too_large, PayloadSize, FrameMax - ?EMPTY_FRAME_SIZE}, + Type, Channel, <<>>, State); +handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32, + Payload:PayloadSize/binary, ?FRAME_END, + Rest/binary>>, + State) -> + {Rest, ensure_stats_timer(handle_frame(Type, Channel, Payload, State))}; +handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32, Rest/binary>>, + State) -> + {Rest, ensure_stats_timer( + switch_callback(State, + {frame_payload, Type, Channel, PayloadSize}, + PayloadSize + 1))}; +handle_input({frame_payload, Type, Channel, PayloadSize}, Data, State) -> + <<Payload:PayloadSize/binary, EndMarker, Rest/binary>> = Data, + case EndMarker of + ?FRAME_END -> State1 = handle_frame(Type, Channel, Payload, State), + {Rest, switch_callback(State1, frame_header, 7)}; + _ -> fatal_frame_error({invalid_frame_end_marker, EndMarker}, + Type, Channel, Payload, State) + end; +handle_input(handshake, <<"AMQP", A, B, C, D, Rest/binary>>, State) -> + {Rest, handshake({A, B, C, D}, State)}; +handle_input(handshake, <<Other:8/binary, _/binary>>, #v1{sock = Sock}) -> + refuse_connection(Sock, {bad_header, Other}); +handle_input(Callback, Data, _State) -> + throw({bad_input, Callback, Data}). + +%% The two rules pertaining to version negotiation: +%% +%% * If the server cannot support the protocol specified in the +%% protocol header, it MUST respond with a valid protocol header and +%% then close the socket connection. +%% +%% * The server MUST provide a protocol version that is lower than or +%% equal to that requested by the client in the protocol header. +handshake({0, 0, 9, 1}, State) -> + start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State); + +%% This is the protocol header for 0-9, which we can safely treat as +%% though it were 0-9-1. +handshake({1, 1, 0, 9}, State) -> + start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State); + +%% This is what most clients send for 0-8. The 0-8 spec, confusingly, +%% defines the version as 8-0. +handshake({1, 1, 8, 0}, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +%% The 0-8 spec as on the AMQP web site actually has this as the +%% protocol header; some libraries e.g., py-amqplib, send it when they +%% want 0-8. +handshake({1, 1, 9, 1}, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +%% ... and finally, the 1.0 spec is crystal clear! +handshake({Id, 1, 0, 0}, State) -> + become_1_0(Id, State); + +handshake(Vsn, #v1{sock = Sock}) -> + refuse_connection(Sock, {bad_version, Vsn}). + +%% Offer a protocol version to the client. Connection.start only +%% includes a major and minor version number, Luckily 0-9 and 0-9-1 +%% are similar enough that clients will be happy with either. +start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, + Protocol, + State = #v1{sock = Sock, connection = Connection}) -> + rabbit_networking:register_connection(self()), + Start = #'connection.start'{ + version_major = ProtocolMajor, + version_minor = ProtocolMinor, + server_properties = server_properties(Protocol), + mechanisms = auth_mechanisms_binary(Sock), + locales = <<"en_US">> }, + ok = send_on_channel0(Sock, Start, Protocol), + switch_callback(State#v1{connection = Connection#connection{ + timeout_sec = ?NORMAL_TIMEOUT, + protocol = Protocol}, + connection_state = starting}, + frame_header, 7). + +-spec refuse_connection(_, _, _) -> no_return(). +refuse_connection(Sock, Exception, {A, B, C, D}) -> + ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",A,B,C,D>>) end), + throw(Exception). + +-spec refuse_connection(rabbit_net:socket(), any()) -> no_return(). + +refuse_connection(Sock, Exception) -> + refuse_connection(Sock, Exception, {0, 0, 9, 1}). + +ensure_stats_timer(State = #v1{connection_state = running}) -> + rabbit_event:ensure_stats_timer(State, #v1.stats_timer, emit_stats); +ensure_stats_timer(State) -> + State. + +%%-------------------------------------------------------------------------- + +handle_method0(MethodName, FieldsBin, + State = #v1{connection = #connection{protocol = Protocol}}) -> + try + handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), + State) + catch throw:{inet_error, E} when E =:= closed; E =:= enotconn -> + maybe_emit_stats(State), + throw({connection_closed_abruptly, State}); + exit:#amqp_error{method = none} = Reason -> + handle_exception(State, 0, Reason#amqp_error{method = MethodName}); + Type:Reason:Stacktrace -> + handle_exception(State, 0, {Type, Reason, MethodName, Stacktrace}) + end. + +handle_method0(#'connection.start_ok'{mechanism = Mechanism, + response = Response, + client_properties = ClientProperties}, + State0 = #v1{connection_state = starting, + connection = Connection0, + sock = Sock}) -> + AuthMechanism = auth_mechanism_to_module(Mechanism, Sock), + Capabilities = + case rabbit_misc:table_lookup(ClientProperties, <<"capabilities">>) of + {table, Capabilities1} -> Capabilities1; + _ -> [] + end, + Connection1 = Connection0#connection{ + client_properties = ClientProperties, + capabilities = Capabilities, + auth_mechanism = {Mechanism, AuthMechanism}, + auth_state = AuthMechanism:init(Sock)}, + Connection2 = augment_connection_log_name(Connection1), + State = State0#v1{connection_state = securing, + connection = Connection2}, + % adding client properties to process dictionary to send them later + % in the connection_closed event + put(client_properties, ClientProperties), + case user_provided_connection_name(Connection2) of + undefined -> + undefined; + UserProvidedConnectionName -> + put(connection_user_provided_name, UserProvidedConnectionName) + end, + auth_phase(Response, State); + +handle_method0(#'connection.secure_ok'{response = Response}, + State = #v1{connection_state = securing}) -> + auth_phase(Response, State); + +handle_method0(#'connection.tune_ok'{frame_max = FrameMax, + channel_max = ChannelMax, + heartbeat = ClientHeartbeat}, + State = #v1{connection_state = tuning, + connection = Connection, + helper_sup = SupPid, + sock = Sock}) -> + ok = validate_negotiated_integer_value( + frame_max, ?FRAME_MIN_SIZE, FrameMax), + ok = validate_negotiated_integer_value( + channel_max, ?CHANNEL_MIN, ChannelMax), + {ok, Collector} = rabbit_connection_helper_sup:start_queue_collector( + SupPid, Connection#connection.name), + Frame = rabbit_binary_generator:build_heartbeat_frame(), + Parent = self(), + SendFun = + fun() -> + case catch rabbit_net:send(Sock, Frame) of + ok -> + ok; + {error, Reason} -> + Parent ! {heartbeat_send_error, Reason}; + Unexpected -> + Parent ! {heartbeat_send_error, Unexpected} + end, + ok + end, + ReceiveFun = fun() -> Parent ! heartbeat_timeout end, + Heartbeater = rabbit_heartbeat:start( + SupPid, Sock, Connection#connection.name, + ClientHeartbeat, SendFun, ClientHeartbeat, ReceiveFun), + State#v1{connection_state = opening, + connection = Connection#connection{ + frame_max = FrameMax, + channel_max = ChannelMax, + timeout_sec = ClientHeartbeat}, + queue_collector = Collector, + heartbeater = Heartbeater}; + +handle_method0(#'connection.open'{virtual_host = VHost}, + State = #v1{connection_state = opening, + connection = Connection = #connection{ + log_name = ConnName, + user = User = #user{username = Username}, + protocol = Protocol}, + helper_sup = SupPid, + sock = Sock, + throttle = Throttle}) -> + + ok = is_over_vhost_connection_limit(VHost, User), + ok = is_over_user_connection_limit(User), + ok = rabbit_access_control:check_vhost_access(User, VHost, {socket, Sock}, #{}), + ok = is_vhost_alive(VHost, User), + NewConnection = Connection#connection{vhost = VHost}, + ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol), + + Alarms = rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}), + BlockedBy = sets:from_list([{resource, Alarm} || Alarm <- Alarms]), + Throttle1 = Throttle#throttle{blocked_by = BlockedBy}, + + {ok, ChannelSupSupPid} = + rabbit_connection_helper_sup:start_channel_sup_sup(SupPid), + State1 = control_throttle( + State#v1{connection_state = running, + connection = NewConnection, + channel_sup_sup_pid = ChannelSupSupPid, + throttle = Throttle1}), + Infos = augment_infos_with_user_provided_connection_name( + [{type, network} | infos(?CREATION_EVENT_KEYS, State1)], + State1 + ), + rabbit_core_metrics:connection_created(proplists:get_value(pid, Infos), + Infos), + rabbit_event:notify(connection_created, Infos), + maybe_emit_stats(State1), + rabbit_log_connection:info( + "connection ~p (~s): " + "user '~s' authenticated and granted access to vhost '~s'~n", + [self(), dynamic_connection_name(ConnName), Username, VHost]), + State1; +handle_method0(#'connection.close'{}, State) when ?IS_RUNNING(State) -> + lists:foreach(fun rabbit_channel:shutdown/1, all_channels()), + maybe_close(State#v1{connection_state = closing}); +handle_method0(#'connection.close'{}, + State = #v1{connection = #connection{protocol = Protocol}, + sock = Sock}) + when ?IS_STOPPING(State) -> + %% We're already closed or closing, so we don't need to cleanup + %% anything. + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), + State; +handle_method0(#'connection.close_ok'{}, + State = #v1{connection_state = closed}) -> + self() ! terminate_connection, + State; +handle_method0(#'connection.update_secret'{new_secret = NewSecret, reason = Reason}, + State = #v1{connection = + #connection{protocol = Protocol, + user = User = #user{username = Username}, + log_name = ConnName} = Conn, + sock = Sock}) when ?IS_RUNNING(State) -> + rabbit_log_connection:debug( + "connection ~p (~s) of user '~s': " + "asked to update secret, reason: ~s~n", + [self(), dynamic_connection_name(ConnName), Username, Reason]), + case rabbit_access_control:update_state(User, NewSecret) of + {ok, User1} -> + %% User/auth backend state has been updated. Now we can propagate it to channels + %% asynchronously and return. All the channels have to do is to update their + %% own state. + %% + %% Any secret update errors coming from the authz backend will be handled in the other branch. + %% Therefore we optimistically do no error handling here. MK. + lists:foreach(fun(Ch) -> + rabbit_log:debug("Updating user/auth backend state for channel ~p", [Ch]), + _ = rabbit_channel:update_user_state(Ch, User1) + end, all_channels()), + ok = send_on_channel0(Sock, #'connection.update_secret_ok'{}, Protocol), + rabbit_log_connection:info( + "connection ~p (~s): " + "user '~s' updated secret, reason: ~s~n", + [self(), dynamic_connection_name(ConnName), Username, Reason]), + State#v1{connection = Conn#connection{user = User1}}; + {refused, Message} -> + rabbit_log_connection:error("Secret update was refused for user '~p': ~p", + [Username, Message]), + rabbit_misc:protocol_error(not_allowed, "New secret was refused by one of the backends", []); + {error, Message} -> + rabbit_log_connection:error("Secret update for user '~p' failed: ~p", + [Username, Message]), + rabbit_misc:protocol_error(not_allowed, + "Secret update failed", []) + end; +handle_method0(_Method, State) when ?IS_STOPPING(State) -> + State; +handle_method0(_Method, #v1{connection_state = S}) -> + rabbit_misc:protocol_error( + channel_error, "unexpected method in connection state ~w", [S]). + +is_vhost_alive(VHostPath, User) -> + case rabbit_vhost_sup_sup:is_vhost_alive(VHostPath) of + true -> ok; + false -> + rabbit_misc:protocol_error(internal_error, + "access to vhost '~s' refused for user '~s': " + "vhost '~s' is down", + [VHostPath, User#user.username, VHostPath]) + end. + +is_over_vhost_connection_limit(VHostPath, User) -> + try rabbit_vhost_limit:is_over_connection_limit(VHostPath) of + false -> ok; + {true, Limit} -> rabbit_misc:protocol_error(not_allowed, + "access to vhost '~s' refused for user '~s': " + "connection limit (~p) is reached", + [VHostPath, User#user.username, Limit]) + catch + throw:{error, {no_such_vhost, VHostPath}} -> + rabbit_misc:protocol_error(not_allowed, "vhost ~s not found", [VHostPath]) + end. + +is_over_user_connection_limit(#user{username = Username}) -> + case rabbit_auth_backend_internal:is_over_connection_limit(Username) of + false -> ok; + {true, Limit} -> rabbit_misc:protocol_error(not_allowed, + "Connection refused for user '~s': " + "user connection limit (~p) is reached", + [Username, Limit]) + end. + +validate_negotiated_integer_value(Field, Min, ClientValue) -> + ServerValue = get_env(Field), + if ClientValue /= 0 andalso ClientValue < Min -> + fail_negotiation(Field, min, Min, ClientValue); + ServerValue /= 0 andalso (ClientValue =:= 0 orelse + ClientValue > ServerValue) -> + fail_negotiation(Field, max, ServerValue, ClientValue); + true -> + ok + end. + +%% keep dialyzer happy +-spec fail_negotiation(atom(), 'min' | 'max', integer(), integer()) -> + no_return(). +fail_negotiation(Field, MinOrMax, ServerValue, ClientValue) -> + {S1, S2} = case MinOrMax of + min -> {lower, minimum}; + max -> {higher, maximum} + end, + ClientValueDetail = get_client_value_detail(Field, ClientValue), + rabbit_misc:protocol_error( + not_allowed, "negotiated ~w = ~w~s is ~w than the ~w allowed value (~w)", + [Field, ClientValue, ClientValueDetail, S1, S2, ServerValue], 'connection.tune'). + +get_env(Key) -> + {ok, Value} = application:get_env(rabbit, Key), + Value. + +send_on_channel0(Sock, Method, Protocol) -> + ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). + +auth_mechanism_to_module(TypeBin, Sock) -> + case rabbit_registry:binary_to_type(TypeBin) of + {error, not_found} -> + rabbit_misc:protocol_error( + command_invalid, "unknown authentication mechanism '~s'", + [TypeBin]); + T -> + case {lists:member(T, auth_mechanisms(Sock)), + rabbit_registry:lookup_module(auth_mechanism, T)} of + {true, {ok, Module}} -> + Module; + _ -> + rabbit_misc:protocol_error( + command_invalid, + "invalid authentication mechanism '~s'", [T]) + end + end. + +auth_mechanisms(Sock) -> + {ok, Configured} = application:get_env(auth_mechanisms), + [Name || {Name, Module} <- rabbit_registry:lookup_all(auth_mechanism), + Module:should_offer(Sock), lists:member(Name, Configured)]. + +auth_mechanisms_binary(Sock) -> + list_to_binary( + string:join([atom_to_list(A) || A <- auth_mechanisms(Sock)], " ")). + +auth_phase(Response, + State = #v1{connection = Connection = + #connection{protocol = Protocol, + auth_mechanism = {Name, AuthMechanism}, + auth_state = AuthState}, + sock = Sock}) -> + RemoteAddress = list_to_binary(inet:ntoa(Connection#connection.host)), + case AuthMechanism:handle_response(Response, AuthState) of + {refused, Username, Msg, Args} -> + rabbit_core_metrics:auth_attempt_failed(RemoteAddress, Username, amqp091), + auth_fail(Username, Msg, Args, Name, State); + {protocol_error, Msg, Args} -> + rabbit_core_metrics:auth_attempt_failed(RemoteAddress, <<>>, amqp091), + notify_auth_result(none, user_authentication_failure, + [{error, rabbit_misc:format(Msg, Args)}], + State), + rabbit_misc:protocol_error(syntax_error, Msg, Args); + {challenge, Challenge, AuthState1} -> + rabbit_core_metrics:auth_attempt_succeeded(RemoteAddress, <<>>, amqp091), + Secure = #'connection.secure'{challenge = Challenge}, + ok = send_on_channel0(Sock, Secure, Protocol), + State#v1{connection = Connection#connection{ + auth_state = AuthState1}}; + {ok, User = #user{username = Username}} -> + case rabbit_access_control:check_user_loopback(Username, Sock) of + ok -> + rabbit_core_metrics:auth_attempt_succeeded(RemoteAddress, Username, amqp091), + notify_auth_result(Username, user_authentication_success, + [], State); + not_allowed -> + rabbit_core_metrics:auth_attempt_failed(RemoteAddress, Username, amqp091), + auth_fail(Username, "user '~s' can only connect via " + "localhost", [Username], Name, State) + end, + Tune = #'connection.tune'{frame_max = get_env(frame_max), + channel_max = get_env(channel_max), + heartbeat = get_env(heartbeat)}, + ok = send_on_channel0(Sock, Tune, Protocol), + State#v1{connection_state = tuning, + connection = Connection#connection{user = User, + auth_state = none}} + end. + +-spec auth_fail + (rabbit_types:username() | none, string(), [any()], binary(), #v1{}) -> + no_return(). + +auth_fail(Username, Msg, Args, AuthName, + State = #v1{connection = #connection{protocol = Protocol, + capabilities = Capabilities}}) -> + notify_auth_result(Username, user_authentication_failure, + [{error, rabbit_misc:format(Msg, Args)}], State), + AmqpError = rabbit_misc:amqp_error( + access_refused, "~s login refused: ~s", + [AuthName, io_lib:format(Msg, Args)], none), + case rabbit_misc:table_lookup(Capabilities, + <<"authentication_failure_close">>) of + {bool, true} -> + SafeMsg = io_lib:format( + "Login was refused using authentication " + "mechanism ~s. For details see the broker " + "logfile.", [AuthName]), + AmqpError1 = AmqpError#amqp_error{explanation = SafeMsg}, + {0, CloseMethod} = rabbit_binary_generator:map_exception( + 0, AmqpError1, Protocol), + ok = send_on_channel0(State#v1.sock, CloseMethod, Protocol); + _ -> ok + end, + rabbit_misc:protocol_error(AmqpError). + +notify_auth_result(Username, AuthResult, ExtraProps, State) -> + EventProps = [{connection_type, network}, + {name, case Username of none -> ''; _ -> Username end}] ++ + [case Item of + name -> {connection_name, i(name, State)}; + _ -> {Item, i(Item, State)} + end || Item <- ?AUTH_NOTIFICATION_INFO_KEYS] ++ + ExtraProps, + rabbit_event:notify(AuthResult, [P || {_, V} = P <- EventProps, V =/= '']). + +%%-------------------------------------------------------------------------- + +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. + +i(pid, #v1{}) -> self(); +i(node, #v1{}) -> node(); +i(SockStat, S) when SockStat =:= recv_oct; + SockStat =:= recv_cnt; + SockStat =:= send_oct; + SockStat =:= send_cnt; + SockStat =:= send_pend -> + socket_info(fun (Sock) -> rabbit_net:getstat(Sock, [SockStat]) end, + fun ([{_, I}]) -> I end, S); +i(ssl, #v1{sock = Sock}) -> rabbit_net:is_ssl(Sock); +i(ssl_protocol, S) -> ssl_info(fun ({P, _}) -> P end, S); +i(ssl_key_exchange, S) -> ssl_info(fun ({_, {K, _, _}}) -> K end, S); +i(ssl_cipher, S) -> ssl_info(fun ({_, {_, C, _}}) -> C end, S); +i(ssl_hash, S) -> ssl_info(fun ({_, {_, _, H}}) -> H end, S); +i(peer_cert_issuer, S) -> cert_info(fun rabbit_ssl:peer_cert_issuer/1, S); +i(peer_cert_subject, S) -> cert_info(fun rabbit_ssl:peer_cert_subject/1, S); +i(peer_cert_validity, S) -> cert_info(fun rabbit_ssl:peer_cert_validity/1, S); +i(channels, #v1{channel_count = ChannelCount}) -> ChannelCount; +i(state, #v1{connection_state = ConnectionState, + throttle = #throttle{blocked_by = Reasons, + last_blocked_at = T} = Throttle}) -> + %% not throttled by resource or other longer-term reasons + %% TODO: come up with a sensible function name + case sets:size(sets:del_element(flow, Reasons)) =:= 0 andalso + (credit_flow:blocked() %% throttled by flow now + orelse %% throttled by flow recently + (is_blocked_by_flow(Throttle) andalso T =/= never andalso + erlang:convert_time_unit(erlang:monotonic_time() - T, + native, + micro_seconds) < 5000000)) of + true -> flow; + false -> + case {has_reasons_to_block(Throttle), ConnectionState} of + %% blocked + {_, blocked} -> blocked; + %% not yet blocked (there were no publishes) + {true, running} -> blocking; + %% not blocked + {false, _} -> ConnectionState; + %% catch all to be defensive + _ -> ConnectionState + end + end; +i(garbage_collection, _State) -> + rabbit_misc:get_gc_info(self()); +i(reductions, _State) -> + {reductions, Reductions} = erlang:process_info(self(), reductions), + Reductions; +i(Item, #v1{connection = Conn}) -> ic(Item, Conn). + +ic(name, #connection{name = Name}) -> Name; +ic(host, #connection{host = Host}) -> Host; +ic(peer_host, #connection{peer_host = PeerHost}) -> PeerHost; +ic(port, #connection{port = Port}) -> Port; +ic(peer_port, #connection{peer_port = PeerPort}) -> PeerPort; +ic(protocol, #connection{protocol = none}) -> none; +ic(protocol, #connection{protocol = P}) -> P:version(); +ic(user, #connection{user = none}) -> ''; +ic(user, #connection{user = U}) -> U#user.username; +ic(user_who_performed_action, C) -> ic(user, C); +ic(vhost, #connection{vhost = VHost}) -> VHost; +ic(timeout, #connection{timeout_sec = Timeout}) -> Timeout; +ic(frame_max, #connection{frame_max = FrameMax}) -> FrameMax; +ic(channel_max, #connection{channel_max = ChMax}) -> ChMax; +ic(client_properties, #connection{client_properties = CP}) -> CP; +ic(auth_mechanism, #connection{auth_mechanism = none}) -> none; +ic(auth_mechanism, #connection{auth_mechanism = {Name, _Mod}}) -> Name; +ic(connected_at, #connection{connected_at = T}) -> T; +ic(Item, #connection{}) -> throw({bad_argument, Item}). + +socket_info(Get, Select, #v1{sock = Sock}) -> + case Get(Sock) of + {ok, T} -> case Select(T) of + N when is_number(N) -> N; + _ -> 0 + end; + {error, _} -> 0 + end. + +ssl_info(F, #v1{sock = Sock}) -> + case rabbit_net:ssl_info(Sock) of + nossl -> ''; + {error, _} -> ''; + {ok, Items} -> + P = proplists:get_value(protocol, Items), + #{cipher := C, + key_exchange := K, + mac := H} = proplists:get_value(selected_cipher_suite, Items), + F({P, {K, C, H}}) + end. + +cert_info(F, #v1{sock = Sock}) -> + case rabbit_net:peercert(Sock) of + nossl -> ''; + {error, _} -> ''; + {ok, Cert} -> list_to_binary(F(Cert)) + end. + +maybe_emit_stats(State) -> + rabbit_event:if_enabled(State, #v1.stats_timer, + fun() -> emit_stats(State) end). + +emit_stats(State) -> + [{_, Pid}, {_, Recv_oct}, {_, Send_oct}, {_, Reductions}] = I + = infos(?SIMPLE_METRICS, State), + Infos = infos(?OTHER_METRICS, State), + rabbit_core_metrics:connection_stats(Pid, Infos), + rabbit_core_metrics:connection_stats(Pid, Recv_oct, Send_oct, Reductions), + rabbit_event:notify(connection_stats, Infos ++ I), + State1 = rabbit_event:reset_stats_timer(State, #v1.stats_timer), + ensure_stats_timer(State1). + +%% 1.0 stub +-spec become_1_0(non_neg_integer(), #v1{}) -> no_return(). + +become_1_0(Id, State = #v1{sock = Sock}) -> + case code:is_loaded(rabbit_amqp1_0_reader) of + false -> refuse_connection(Sock, amqp1_0_plugin_not_enabled); + _ -> Mode = case Id of + 0 -> amqp; + 3 -> sasl; + _ -> refuse_connection( + Sock, {unsupported_amqp1_0_protocol_id, Id}, + {3, 1, 0, 0}) + end, + F = fun (_Deb, Buf, BufLen, S) -> + {rabbit_amqp1_0_reader, init, + [Mode, pack_for_1_0(Buf, BufLen, S)]} + end, + State#v1{connection_state = {become, F}} + end. + +pack_for_1_0(Buf, BufLen, #v1{parent = Parent, + sock = Sock, + recv_len = RecvLen, + pending_recv = PendingRecv, + helper_sup = SupPid, + proxy_socket = ProxySocket}) -> + {Parent, Sock, RecvLen, PendingRecv, SupPid, Buf, BufLen, ProxySocket}. + +respond_and_close(State, Channel, Protocol, Reason, LogErr) -> + log_hard_error(State, Channel, LogErr), + send_error_on_channel0_and_close(Channel, Protocol, Reason, State). + +send_error_on_channel0_and_close(Channel, Protocol, Reason, State) -> + {0, CloseMethod} = + rabbit_binary_generator:map_exception(Channel, Reason, Protocol), + State1 = close_connection(terminate_channels(State)), + ok = send_on_channel0(State#v1.sock, CloseMethod, Protocol), + State1. + +%% +%% Publisher throttling +%% + +blocked_by_message(#throttle{blocked_by = Reasons}) -> + %% we don't want to report internal flow as a reason here since + %% it is entirely transient + Reasons1 = sets:del_element(flow, Reasons), + RStr = string:join([format_blocked_by(R) || R <- sets:to_list(Reasons1)], " & "), + list_to_binary(rabbit_misc:format("low on ~s", [RStr])). + +format_blocked_by({resource, memory}) -> "memory"; +format_blocked_by({resource, disk}) -> "disk"; +format_blocked_by({resource, disc}) -> "disk". + +update_last_blocked_at(Throttle) -> + Throttle#throttle{last_blocked_at = erlang:monotonic_time()}. + +connection_blocked_message_sent( + #throttle{connection_blocked_message_sent = BS}) -> BS. + +should_send_blocked(Throttle = #throttle{blocked_by = Reasons}) -> + should_block(Throttle) + andalso + sets:size(sets:del_element(flow, Reasons)) =/= 0 + andalso + not connection_blocked_message_sent(Throttle). + +should_send_unblocked(Throttle = #throttle{blocked_by = Reasons}) -> + connection_blocked_message_sent(Throttle) + andalso + sets:size(sets:del_element(flow, Reasons)) == 0. + +%% Returns true if we have a reason to block +%% this connection. +has_reasons_to_block(#throttle{blocked_by = Reasons}) -> + sets:size(Reasons) > 0. + +is_blocked_by_flow(#throttle{blocked_by = Reasons}) -> + sets:is_element(flow, Reasons). + +should_block(#throttle{should_block = Val}) -> Val. + +should_block_connection(Throttle) -> + should_block(Throttle) andalso has_reasons_to_block(Throttle). + +should_unblock_connection(Throttle) -> + not should_block_connection(Throttle). + +maybe_block(State = #v1{connection_state = CS, throttle = Throttle}) -> + case should_block_connection(Throttle) of + true -> + State1 = State#v1{connection_state = blocked, + throttle = update_last_blocked_at(Throttle)}, + case CS of + running -> + ok = rabbit_heartbeat:pause_monitor(State#v1.heartbeater); + _ -> ok + end, + maybe_send_blocked_or_unblocked(State1); + false -> State + end. + +maybe_unblock(State = #v1{throttle = Throttle}) -> + case should_unblock_connection(Throttle) of + true -> + ok = rabbit_heartbeat:resume_monitor(State#v1.heartbeater), + State1 = State#v1{connection_state = running, + throttle = Throttle#throttle{should_block = false}}, + maybe_send_unblocked(State1); + false -> State + end. + +maybe_send_unblocked(State = #v1{throttle = Throttle}) -> + case should_send_unblocked(Throttle) of + true -> + ok = send_unblocked(State), + State#v1{throttle = + Throttle#throttle{connection_blocked_message_sent = false}}; + false -> State + end. + +maybe_send_blocked_or_unblocked(State = #v1{throttle = Throttle}) -> + case should_send_blocked(Throttle) of + true -> + ok = send_blocked(State, blocked_by_message(Throttle)), + State#v1{throttle = + Throttle#throttle{connection_blocked_message_sent = true}}; + false -> maybe_send_unblocked(State) + end. + +publish_received(State = #v1{throttle = Throttle}) -> + case has_reasons_to_block(Throttle) of + false -> State; + true -> + Throttle1 = Throttle#throttle{should_block = true}, + maybe_block(State#v1{throttle = Throttle1}) + end. + +control_throttle(State = #v1{connection_state = CS, + throttle = #throttle{blocked_by = Reasons} = Throttle}) -> + Throttle1 = case credit_flow:blocked() of + true -> + Throttle#throttle{blocked_by = sets:add_element(flow, Reasons)}; + false -> + Throttle#throttle{blocked_by = sets:del_element(flow, Reasons)} + end, + State1 = State#v1{throttle = Throttle1}, + case CS of + running -> maybe_block(State1); + %% unblock or re-enable blocking + blocked -> maybe_block(maybe_unblock(State1)); + _ -> State1 + end. + +augment_connection_log_name(#connection{name = Name} = Connection) -> + case user_provided_connection_name(Connection) of + undefined -> + Connection; + UserSpecifiedName -> + LogName = <<Name/binary, " - ", UserSpecifiedName/binary>>, + rabbit_log_connection:info("Connection ~p (~s) has a client-provided name: ~s~n", [self(), Name, UserSpecifiedName]), + ?store_proc_name(LogName), + Connection#connection{log_name = LogName} + end. + +augment_infos_with_user_provided_connection_name(Infos, #v1{connection = Connection}) -> + case user_provided_connection_name(Connection) of + undefined -> + Infos; + UserProvidedConnectionName -> + [{user_provided_name, UserProvidedConnectionName} | Infos] + end. + +user_provided_connection_name(#connection{client_properties = ClientProperties}) -> + case rabbit_misc:table_lookup(ClientProperties, <<"connection_name">>) of + {longstr, UserSpecifiedName} -> + UserSpecifiedName; + _ -> + undefined + end. + +dynamic_connection_name(Default) -> + case rabbit_misc:get_proc_name() of + {ok, Name} -> + Name; + _ -> + Default + end. + +handle_uncontrolled_channel_close(ChPid) -> + rabbit_core_metrics:channel_closed(ChPid), + rabbit_event:notify(channel_closed, [{pid, ChPid}]). + +-spec get_client_value_detail(atom(), integer()) -> string(). +get_client_value_detail(channel_max, 0) -> + " (no limit)"; +get_client_value_detail(_Field, _ClientValue) -> + "". diff --git a/deps/rabbit/src/rabbit_recovery_terms.erl b/deps/rabbit/src/rabbit_recovery_terms.erl new file mode 100644 index 0000000000..d89de9ece3 --- /dev/null +++ b/deps/rabbit/src/rabbit_recovery_terms.erl @@ -0,0 +1,240 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% We use a gen_server simply so that during the terminate/2 call +%% (i.e., during shutdown), we can sync/flush the dets table to disk. + +-module(rabbit_recovery_terms). + +-behaviour(gen_server). + +-export([start/1, stop/1, store/3, read/2, clear/1]). + +-export([start_link/1]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-export([upgrade_recovery_terms/0, persistent_bytes/0]). +-export([open_global_table/0, close_global_table/0, + read_global/1, delete_global_table/0]). +-export([open_table/1, close_table/1]). + +-rabbit_upgrade({upgrade_recovery_terms, local, []}). +-rabbit_upgrade({persistent_bytes, local, [upgrade_recovery_terms]}). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-spec start(rabbit_types:vhost()) -> rabbit_types:ok_or_error(term()). + +start(VHost) -> + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, VHostSup} -> + {ok, _} = supervisor2:start_child( + VHostSup, + {?MODULE, + {?MODULE, start_link, [VHost]}, + transient, ?WORKER_WAIT, worker, + [?MODULE]}); + %% we can get here if a vhost is added and removed concurrently + %% e.g. some integration tests do it + {error, {no_such_vhost, VHost}} -> + rabbit_log:error("Failed to start a recovery terms manager for vhost ~s: vhost no longer exists!", + [VHost]) + end, + ok. + +-spec stop(rabbit_types:vhost()) -> rabbit_types:ok_or_error(term()). + +stop(VHost) -> + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, VHostSup} -> + case supervisor:terminate_child(VHostSup, ?MODULE) of + ok -> supervisor:delete_child(VHostSup, ?MODULE); + E -> E + end; + %% see start/1 + {error, {no_such_vhost, VHost}} -> + rabbit_log:error("Failed to stop a recovery terms manager for vhost ~s: vhost no longer exists!", + [VHost]), + + ok + end. + +-spec store(rabbit_types:vhost(), file:filename(), term()) -> rabbit_types:ok_or_error(term()). + +store(VHost, DirBaseName, Terms) -> + dets:insert(VHost, {DirBaseName, Terms}). + +-spec read(rabbit_types:vhost(), file:filename()) -> rabbit_types:ok_or_error2(term(), not_found). + +read(VHost, DirBaseName) -> + case dets:lookup(VHost, DirBaseName) of + [{_, Terms}] -> {ok, Terms}; + _ -> {error, not_found} + end. + +-spec clear(rabbit_types:vhost()) -> 'ok'. + +clear(VHost) -> + try + dets:delete_all_objects(VHost) + %% see start/1 + catch _:badarg -> + rabbit_log:error("Failed to clear recovery terms for vhost ~s: table no longer exists!", + [VHost]), + ok + end, + flush(VHost). + +start_link(VHost) -> + gen_server:start_link(?MODULE, [VHost], []). + +%%---------------------------------------------------------------------------- + +upgrade_recovery_terms() -> + open_global_table(), + try + QueuesDir = filename:join(rabbit_mnesia:dir(), "queues"), + Dirs = case rabbit_file:list_dir(QueuesDir) of + {ok, Entries} -> Entries; + {error, _} -> [] + end, + [begin + File = filename:join([QueuesDir, Dir, "clean.dot"]), + case rabbit_file:read_term_file(File) of + {ok, Terms} -> ok = store_global_table(Dir, Terms); + {error, _} -> ok + end, + file:delete(File) + end || Dir <- Dirs], + ok + after + close_global_table() + end. + +persistent_bytes() -> dets_upgrade(fun persistent_bytes/1). +persistent_bytes(Props) -> Props ++ [{persistent_bytes, 0}]. + +dets_upgrade(Fun)-> + open_global_table(), + try + ok = dets:foldl(fun ({DirBaseName, Terms}, Acc) -> + store_global_table(DirBaseName, Fun(Terms)), + Acc + end, ok, ?MODULE), + ok + after + close_global_table() + end. + +open_global_table() -> + File = filename:join(rabbit_mnesia:dir(), "recovery.dets"), + {ok, _} = dets:open_file(?MODULE, [{file, File}, + {ram_file, true}, + {auto_save, infinity}]), + ok. + +close_global_table() -> + try + dets:sync(?MODULE), + dets:close(?MODULE) + %% see clear/1 + catch _:badarg -> + rabbit_log:error("Failed to clear global recovery terms: table no longer exists!", + []), + ok + end. + +store_global_table(DirBaseName, Terms) -> + dets:insert(?MODULE, {DirBaseName, Terms}). + +read_global(DirBaseName) -> + case dets:lookup(?MODULE, DirBaseName) of + [{_, Terms}] -> {ok, Terms}; + _ -> {error, not_found} + end. + +delete_global_table() -> + file:delete(filename:join(rabbit_mnesia:dir(), "recovery.dets")). + +%%---------------------------------------------------------------------------- + +init([VHost]) -> + process_flag(trap_exit, true), + open_table(VHost), + {ok, VHost}. + +handle_call(Msg, _, State) -> {stop, {unexpected_call, Msg}, State}. + +handle_cast(Msg, State) -> {stop, {unexpected_cast, Msg}, State}. + +handle_info(_Info, State) -> {noreply, State}. + +terminate(_Reason, VHost) -> + close_table(VHost). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- + +-spec open_table(vhost:name()) -> rabbit_types:ok_or_error(any()). + +open_table(VHost) -> + open_table(VHost, 10). + +-spec open_table(vhost:name(), non_neg_integer()) -> rabbit_types:ok_or_error(any()). + +open_table(VHost, RetriesLeft) -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + File = filename:join(VHostDir, "recovery.dets"), + Opts = [{file, File}, + {ram_file, true}, + {auto_save, infinity}], + case dets:open_file(VHost, Opts) of + {ok, _} -> ok; + {error, Error} -> + case RetriesLeft of + 0 -> + {error, Error}; + N when is_integer(N) -> + _ = file:delete(File), + %% Wait before retrying + DelayInMs = 1000, + rabbit_log:warning("Failed to open a recovery terms DETS file at ~p. Will delete it and retry in ~p ms (~p retries left)", + [File, DelayInMs, RetriesLeft]), + timer:sleep(DelayInMs), + open_table(VHost, RetriesLeft - 1) + end + end. + +-spec flush(vhost:name()) -> rabbit_types:ok_or_error(any()). + +flush(VHost) -> + try + dets:sync(VHost) + %% see clear/1 + catch _:badarg -> + rabbit_log:error("Failed to sync recovery terms table for vhost ~s: the table no longer exists!", + [VHost]), + ok + end. + +-spec close_table(vhost:name()) -> rabbit_types:ok_or_error(any()). + +close_table(VHost) -> + try + ok = flush(VHost), + ok = dets:close(VHost) + %% see clear/1 + catch _:badarg -> + rabbit_log:error("Failed to close recovery terms table for vhost ~s: the table no longer exists!", + [VHost]), + ok + end. diff --git a/deps/rabbit/src/rabbit_restartable_sup.erl b/deps/rabbit/src/rabbit_restartable_sup.erl new file mode 100644 index 0000000000..46fcace99f --- /dev/null +++ b/deps/rabbit/src/rabbit_restartable_sup.erl @@ -0,0 +1,33 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_restartable_sup). + +-behaviour(supervisor2). + +-export([start_link/3]). + +-export([init/1]). + +-include("rabbit.hrl"). + +-define(DELAY, 2). + +%%---------------------------------------------------------------------------- + +-spec start_link(atom(), rabbit_types:mfargs(), boolean()) -> + rabbit_types:ok_pid_or_error(). + +start_link(Name, {_M, _F, _A} = Fun, Delay) -> + supervisor2:start_link({local, Name}, ?MODULE, [Fun, Delay]). + +init([{Mod, _F, _A} = Fun, Delay]) -> + {ok, {{one_for_one, 10, 10}, + [{Mod, Fun, case Delay of + true -> {transient, 1}; + false -> transient + end, ?WORKER_WAIT, worker, [Mod]}]}}. diff --git a/deps/rabbit/src/rabbit_router.erl b/deps/rabbit/src/rabbit_router.erl new file mode 100644 index 0000000000..ed170bcd8e --- /dev/null +++ b/deps/rabbit/src/rabbit_router.erl @@ -0,0 +1,65 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_router). +-include_lib("stdlib/include/qlc.hrl"). +-include("rabbit.hrl"). + +-export([match_bindings/2, match_routing_key/2]). + +%%---------------------------------------------------------------------------- + +-export_type([routing_key/0, match_result/0]). + +-type routing_key() :: binary(). +-type match_result() :: [rabbit_types:binding_destination()]. + +-spec match_bindings(rabbit_types:binding_source(), + fun ((rabbit_types:binding()) -> boolean())) -> + match_result(). +-spec match_routing_key(rabbit_types:binding_source(), + [routing_key()] | ['_']) -> + match_result(). + +%%---------------------------------------------------------------------------- + +match_bindings(SrcName, Match) -> + MatchHead = #route{binding = #binding{source = SrcName, + _ = '_'}}, + Routes = ets:select(rabbit_route, [{MatchHead, [], [['$_']]}]), + [Dest || [#route{binding = Binding = #binding{destination = Dest}}] <- + Routes, Match(Binding)]. + +match_routing_key(SrcName, [RoutingKey]) -> + find_routes(#route{binding = #binding{source = SrcName, + destination = '$1', + key = RoutingKey, + _ = '_'}}, + []); +match_routing_key(SrcName, [_|_] = RoutingKeys) -> + find_routes(#route{binding = #binding{source = SrcName, + destination = '$1', + key = '$2', + _ = '_'}}, + [list_to_tuple(['orelse' | [{'=:=', '$2', RKey} || + RKey <- RoutingKeys]])]). + +%%-------------------------------------------------------------------- + +%% Normally we'd call mnesia:dirty_select/2 here, but that is quite +%% expensive for the same reasons as above, and, additionally, due to +%% mnesia 'fixing' the table with ets:safe_fixtable/2, which is wholly +%% unnecessary. According to the ets docs (and the code in erl_db.c), +%% 'select' is safe anyway ("Functions that internally traverse over a +%% table, like select and match, will give the same guarantee as +%% safe_fixtable.") and, furthermore, even the lower level iterators +%% ('first' and 'next') are safe on ordered_set tables ("Note that for +%% tables of the ordered_set type, safe_fixtable/2 is not necessary as +%% calls to first/1 and next/2 will always succeed."), which +%% rabbit_route is. +find_routes(MatchHead, Conditions) -> + ets:select(rabbit_route, [{MatchHead, Conditions, ['$1']}]). diff --git a/deps/rabbit/src/rabbit_runtime_parameters.erl b/deps/rabbit/src/rabbit_runtime_parameters.erl new file mode 100644 index 0000000000..1870b5dfa5 --- /dev/null +++ b/deps/rabbit/src/rabbit_runtime_parameters.erl @@ -0,0 +1,412 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_runtime_parameters). + +%% Runtime parameters are bits of configuration that are +%% set, as the name implies, at runtime and not in the config file. +%% +%% The benefits of storing some bits of configuration at runtime vary: +%% +%% * Some parameters are vhost-specific +%% * Others are specific to individual nodes +%% * ...or even queues, exchanges, etc +%% +%% The most obvious use case for runtime parameters is policies but +%% there are others: +%% +%% * Plugin-specific parameters that only make sense at runtime, +%% e.g. Federation and Shovel link settings +%% * Exchange and queue decorators +%% +%% Parameters are grouped by components, e.g. <<"policy">> or <<"shovel">>. +%% Components are mapped to modules that perform validation. +%% Runtime parameter values are then looked up by the modules that +%% need to use them. +%% +%% Parameters are stored in Mnesia and can be global. Their changes +%% are broadcasted over rabbit_event. +%% +%% Global parameters keys are atoms and values are JSON documents. +%% +%% See also: +%% +%% * rabbit_policies +%% * rabbit_policy +%% * rabbit_registry +%% * rabbit_event + +-include("rabbit.hrl"). + +-export([parse_set/5, set/5, set_any/5, clear/4, clear_any/4, list/0, list/1, + list_component/1, list/2, list_formatted/1, list_formatted/3, + lookup/3, value/3, value/4, info_keys/0, clear_component/2]). + +-export([parse_set_global/3, set_global/3, value_global/1, value_global/2, + list_global/0, list_global_formatted/0, list_global_formatted/2, + lookup_global/1, global_info_keys/0, clear_global/2]). + +%%---------------------------------------------------------------------------- + +-type ok_or_error_string() :: 'ok' | {'error_string', string()}. +-type ok_thunk_or_error_string() :: ok_or_error_string() | fun(() -> 'ok'). + +-spec parse_set(rabbit_types:vhost(), binary(), binary(), string(), + rabbit_types:user() | rabbit_types:username() | 'none') + -> ok_or_error_string(). +-spec set(rabbit_types:vhost(), binary(), binary(), term(), + rabbit_types:user() | rabbit_types:username() | 'none') + -> ok_or_error_string(). +-spec set_any(rabbit_types:vhost(), binary(), binary(), term(), + rabbit_types:user() | rabbit_types:username() | 'none') + -> ok_or_error_string(). +-spec set_global(atom(), term(), rabbit_types:username()) -> 'ok'. +-spec clear(rabbit_types:vhost(), binary(), binary(), rabbit_types:username()) + -> ok_thunk_or_error_string(). +-spec clear_any(rabbit_types:vhost(), binary(), binary(), rabbit_types:username()) + -> ok_thunk_or_error_string(). +-spec list() -> [rabbit_types:infos()]. +-spec list(rabbit_types:vhost() | '_') -> [rabbit_types:infos()]. +-spec list_component(binary()) -> [rabbit_types:infos()]. +-spec list(rabbit_types:vhost() | '_', binary() | '_') + -> [rabbit_types:infos()]. +-spec list_formatted(rabbit_types:vhost()) -> [rabbit_types:infos()]. +-spec list_formatted(rabbit_types:vhost(), reference(), pid()) -> 'ok'. +-spec lookup(rabbit_types:vhost(), binary(), binary()) + -> rabbit_types:infos() | 'not_found'. +-spec value(rabbit_types:vhost(), binary(), binary()) -> term(). +-spec value(rabbit_types:vhost(), binary(), binary(), term()) -> term(). +-spec value_global(atom()) -> term() | 'not_found'. +-spec value_global(atom(), term()) -> term(). +-spec info_keys() -> rabbit_types:info_keys(). + +%%--------------------------------------------------------------------------- + +-import(rabbit_misc, [pget/2]). + +-define(TABLE, rabbit_runtime_parameters). + +%%--------------------------------------------------------------------------- + +parse_set(_, <<"policy">>, _, _, _) -> + {error_string, "policies may not be set using this method"}; +parse_set(VHost, Component, Name, String, User) -> + Definition = rabbit_data_coercion:to_binary(String), + case rabbit_json:try_decode(Definition) of + {ok, Term} when is_map(Term) -> set(VHost, Component, Name, maps:to_list(Term), User); + {ok, Term} -> set(VHost, Component, Name, Term, User); + {error, Reason} -> + {error_string, + rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])} + end. + +set(_, <<"policy">>, _, _, _) -> + {error_string, "policies may not be set using this method"}; +set(VHost, Component, Name, Term, User) -> + set_any(VHost, Component, Name, Term, User). + +parse_set_global(Name, String, ActingUser) -> + Definition = rabbit_data_coercion:to_binary(String), + case rabbit_json:try_decode(Definition) of + {ok, Term} when is_map(Term) -> set_global(Name, maps:to_list(Term), ActingUser); + {ok, Term} -> set_global(Name, Term, ActingUser); + {error, Reason} -> + {error_string, + rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])} + end. + +set_global(Name, Term, ActingUser) -> + NameAsAtom = rabbit_data_coercion:to_atom(Name), + rabbit_log:debug("Setting global parameter '~s' to ~p", [NameAsAtom, Term]), + mnesia_update(NameAsAtom, Term), + event_notify(parameter_set, none, global, [{name, NameAsAtom}, + {value, Term}, + {user_who_performed_action, ActingUser}]), + ok. + +format_error(L) -> + {error_string, rabbit_misc:format_many([{"Validation failed~n", []} | L])}. + +set_any(VHost, Component, Name, Term, User) -> + case set_any0(VHost, Component, Name, Term, User) of + ok -> ok; + {errors, L} -> format_error(L) + end. + +set_any0(VHost, Component, Name, Term, User) -> + rabbit_log:debug("Asked to set or update runtime parameter '~s' in vhost '~s' " + "for component '~s', value: ~p", + [Name, VHost, Component, Term]), + case lookup_component(Component) of + {ok, Mod} -> + case flatten_errors( + Mod:validate(VHost, Component, Name, Term, get_user(User))) of + ok -> + case mnesia_update(VHost, Component, Name, Term) of + {old, Term} -> + ok; + _ -> + ActingUser = get_username(User), + event_notify( + parameter_set, VHost, Component, + [{name, Name}, + {value, Term}, + {user_who_performed_action, ActingUser}]), + Mod:notify(VHost, Component, Name, Term, ActingUser) + end, + ok; + E -> + E + end; + E -> + E + end. + +%% Validate only an user record as expected by the API before #rabbitmq-event-exchange-10 +get_user(#user{} = User) -> + User; +get_user(_) -> + none. + +get_username(#user{username = Username}) -> + Username; +get_username(none) -> + ?INTERNAL_USER; +get_username(Any) -> + Any. + +mnesia_update(Key, Term) -> + rabbit_misc:execute_mnesia_transaction(mnesia_update_fun(Key, Term)). + +mnesia_update(VHost, Comp, Name, Term) -> + rabbit_misc:execute_mnesia_transaction( + rabbit_vhost:with(VHost, mnesia_update_fun({VHost, Comp, Name}, Term))). + +mnesia_update_fun(Key, Term) -> + fun () -> + Res = case mnesia:read(?TABLE, Key, read) of + [] -> new; + [Params] -> {old, Params#runtime_parameters.value} + end, + ok = mnesia:write(?TABLE, c(Key, Term), write), + Res + end. + +clear(_, <<"policy">> , _, _) -> + {error_string, "policies may not be cleared using this method"}; +clear(VHost, Component, Name, ActingUser) -> + clear_any(VHost, Component, Name, ActingUser). + +clear_global(Key, ActingUser) -> + KeyAsAtom = rabbit_data_coercion:to_atom(Key), + Notify = fun() -> + event_notify(parameter_set, none, global, + [{name, KeyAsAtom}, + {user_who_performed_action, ActingUser}]), + ok + end, + case value_global(KeyAsAtom) of + not_found -> + {error_string, "Parameter does not exist"}; + _ -> + F = fun () -> + ok = mnesia:delete(?TABLE, KeyAsAtom, write) + end, + ok = rabbit_misc:execute_mnesia_transaction(F), + case mnesia:is_transaction() of + true -> Notify; + false -> Notify() + end + end. + +clear_component(Component, ActingUser) -> + case list_component(Component) of + [] -> + ok; + Xs -> + [clear(pget(vhost, X), + pget(component, X), + pget(name, X), + ActingUser) || X <- Xs], + ok + end. + +clear_any(VHost, Component, Name, ActingUser) -> + Notify = fun () -> + case lookup_component(Component) of + {ok, Mod} -> event_notify( + parameter_cleared, VHost, Component, + [{name, Name}, + {user_who_performed_action, ActingUser}]), + Mod:notify_clear(VHost, Component, Name, ActingUser); + _ -> ok + end + end, + case lookup(VHost, Component, Name) of + not_found -> {error_string, "Parameter does not exist"}; + _ -> mnesia_clear(VHost, Component, Name), + case mnesia:is_transaction() of + true -> Notify; + false -> Notify() + end + end. + +mnesia_clear(VHost, Component, Name) -> + F = fun () -> + ok = mnesia:delete(?TABLE, {VHost, Component, Name}, write) + end, + ok = rabbit_misc:execute_mnesia_transaction(rabbit_vhost:with(VHost, F)). + +event_notify(_Event, _VHost, <<"policy">>, _Props) -> + ok; +event_notify(Event, none, Component, Props) -> + rabbit_event:notify(Event, [{component, Component} | Props]); +event_notify(Event, VHost, Component, Props) -> + rabbit_event:notify(Event, [{vhost, VHost}, + {component, Component} | Props]). + +list() -> + [p(P) || #runtime_parameters{ key = {_VHost, Comp, _Name}} = P <- + rabbit_misc:dirty_read_all(?TABLE), Comp /= <<"policy">>]. + +list(VHost) -> list(VHost, '_'). +list_component(Component) -> list('_', Component). + +%% Not dirty_match_object since that would not be transactional when used in a +%% tx context +list(VHost, Component) -> + mnesia:async_dirty( + fun () -> + case VHost of + '_' -> ok; + _ -> rabbit_vhost:assert(VHost) + end, + Match = #runtime_parameters{key = {VHost, Component, '_'}, + _ = '_'}, + [p(P) || #runtime_parameters{key = {_VHost, Comp, _Name}} = P <- + mnesia:match_object(?TABLE, Match, read), + Comp =/= <<"policy">> orelse Component =:= <<"policy">>] + end). + +list_global() -> + %% list only atom keys + mnesia:async_dirty( + fun () -> + Match = #runtime_parameters{key = '_', _ = '_'}, + [p(P) || P <- mnesia:match_object(?TABLE, Match, read), + is_atom(P#runtime_parameters.key)] + end). + +list_formatted(VHost) -> + [ format_parameter(info_keys(), P) || P <- list(VHost) ]. + +format_parameter(InfoKeys, P) -> + lists:foldr(fun + (value, Acc) -> + [{value, rabbit_json:encode(pget(value, P))} | Acc]; + (Key, Acc) -> + case lists:keyfind(Key, 1, P) of + false -> Acc; + {Key, Val} -> [{Key, Val} | Acc] + end + end, + [], InfoKeys). + +list_formatted(VHost, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, + fun(P) -> format_parameter(info_keys(), P) end, list(VHost)). + +list_global_formatted() -> + [ format_parameter(global_info_keys(), P) || P <- list_global() ]. + +list_global_formatted(Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, + fun(P) -> format_parameter(global_info_keys(), P) end, list_global()). + +lookup(VHost, Component, Name) -> + case lookup0({VHost, Component, Name}, rabbit_misc:const(not_found)) of + not_found -> not_found; + Params -> p(Params) + end. + +lookup_global(Name) -> + case lookup0(Name, rabbit_misc:const(not_found)) of + not_found -> not_found; + Params -> p(Params) + end. + +value(VHost, Comp, Name) -> value0({VHost, Comp, Name}). +value(VHost, Comp, Name, Def) -> value0({VHost, Comp, Name}, Def). + +value_global(Key) -> + value0(Key). + +value_global(Key, Default) -> + value0(Key, Default). + +value0(Key) -> + case lookup0(Key, rabbit_misc:const(not_found)) of + not_found -> not_found; + Params -> Params#runtime_parameters.value + end. + +value0(Key, Default) -> + Params = lookup0(Key, fun () -> lookup_missing(Key, Default) end), + Params#runtime_parameters.value. + +lookup0(Key, DefaultFun) -> + case mnesia:dirty_read(?TABLE, Key) of + [] -> DefaultFun(); + [R] -> R + end. + +lookup_missing(Key, Default) -> + rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:read(?TABLE, Key, read) of + [] -> Record = c(Key, Default), + mnesia:write(?TABLE, Record, write), + Record; + [R] -> R + end + end). + +c(Key, Default) -> + #runtime_parameters{key = Key, + value = Default}. + +p(#runtime_parameters{key = {VHost, Component, Name}, value = Value}) -> + [{vhost, VHost}, + {component, Component}, + {name, Name}, + {value, Value}]; + +p(#runtime_parameters{key = Key, value = Value}) when is_atom(Key) -> + [{name, Key}, + {value, Value}]. + +info_keys() -> [component, name, value]. + +global_info_keys() -> [name, value]. + +%%--------------------------------------------------------------------------- + +lookup_component(Component) -> + case rabbit_registry:lookup_module( + runtime_parameter, list_to_atom(binary_to_list(Component))) of + {error, not_found} -> {errors, + [{"component ~s not found", [Component]}]}; + {ok, Module} -> {ok, Module} + end. + +flatten_errors(L) -> + case [{F, A} || I <- lists:flatten([L]), {error, F, A} <- [I]] of + [] -> ok; + E -> {errors, E} + end. diff --git a/deps/rabbit/src/rabbit_ssl.erl b/deps/rabbit/src/rabbit_ssl.erl new file mode 100644 index 0000000000..84670b0a19 --- /dev/null +++ b/deps/rabbit/src/rabbit_ssl.erl @@ -0,0 +1,195 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_ssl). + +-include_lib("public_key/include/public_key.hrl"). + +-export([peer_cert_issuer/1, peer_cert_subject/1, peer_cert_validity/1]). +-export([peer_cert_subject_items/2, peer_cert_auth_name/1]). +-export([cipher_suites_erlang/2, cipher_suites_erlang/1, + cipher_suites_openssl/2, cipher_suites_openssl/1, + cipher_suites/1]). + +%%-------------------------------------------------------------------------- + +-export_type([certificate/0]). + +% Due to API differences between OTP releases. +-dialyzer(no_missing_calls). +-ignore_xref([{ssl_cipher_format, suite_legacy, 1}, + {ssl_cipher_format, suite, 1}, + {ssl_cipher_format, suite_to_str, 1}, + {ssl_cipher_format, erl_suite_definition, 1}, + {ssl_cipher_format, suite_map_to_openssl_str, 1}, + {ssl_cipher_format, suite_map_to_bin, 1}]). + +-type certificate() :: rabbit_cert_info:certificate(). + +-type cipher_suites_mode() :: default | all | anonymous. + +-spec cipher_suites(cipher_suites_mode()) -> ssl:ciphers(). +cipher_suites(Mode) -> + Version = get_highest_protocol_version(), + ssl:cipher_suites(Mode, Version). + +-spec cipher_suites_erlang(cipher_suites_mode()) -> + [ssl:old_cipher_suite()]. +cipher_suites_erlang(Mode) -> + Version = get_highest_protocol_version(), + cipher_suites_erlang(Mode, Version). + +-spec cipher_suites_erlang(cipher_suites_mode(), + ssl:protocol_version() | tls_record:tls_version()) -> + [ssl:old_cipher_suite()]. +cipher_suites_erlang(Mode, Version) -> + [ format_cipher_erlang(C) + || C <- ssl:cipher_suites(Mode, Version) ]. + +-spec cipher_suites_openssl(cipher_suites_mode()) -> + [ssl:old_cipher_suite()]. +cipher_suites_openssl(Mode) -> + Version = get_highest_protocol_version(), + cipher_suites_openssl(Mode, Version). + +-spec cipher_suites_openssl(cipher_suites_mode(), + ssl:protocol_version() | tls_record:tls_version()) -> + [ssl:old_cipher_suite()]. +cipher_suites_openssl(Mode, Version) -> + lists:filtermap(fun(C) -> + OpenSSL = format_cipher_openssl(C), + case is_list(OpenSSL) of + true -> {true, OpenSSL}; + false -> false + end + end, + ssl:cipher_suites(Mode, Version)). + + +format_cipher_erlang(Cipher) -> + case erlang:function_exported(ssl_cipher_format, suite_map_to_bin, 1) of + true -> + format_cipher_erlang22(Cipher); + false -> + format_cipher_erlang21(Cipher) + end. + +format_cipher_erlang22(Cipher) -> + ssl_cipher_format:suite_legacy(ssl_cipher_format:suite_map_to_bin(Cipher)). + +format_cipher_erlang21(Cipher) -> + ssl_cipher_format:erl_suite_definition(ssl_cipher_format:suite(Cipher)). + + +format_cipher_openssl(Cipher) -> + case erlang:function_exported(ssl_cipher_format, suite_map_to_bin, 1) of + true -> + format_cipher_openssl22(Cipher); + false -> + format_cipher_openssl21(Cipher) + end. + +format_cipher_openssl22(Cipher) -> + ssl_cipher_format:suite_map_to_openssl_str(Cipher). + +format_cipher_openssl21(Cipher) -> + ssl_cipher_format:suite_to_str(Cipher). + +-spec get_highest_protocol_version() -> tls_record:tls_atom_version(). +get_highest_protocol_version() -> + tls_record:protocol_version( + tls_record:highest_protocol_version([])). + +%%-------------------------------------------------------------------------- +%% High-level functions used by reader +%%-------------------------------------------------------------------------- + +%% Return a string describing the certificate's issuer. +peer_cert_issuer(Cert) -> + rabbit_cert_info:issuer(Cert). + +%% Return a string describing the certificate's subject, as per RFC4514. +peer_cert_subject(Cert) -> + rabbit_cert_info:subject(Cert). + +%% Return the parts of the certificate's subject. +peer_cert_subject_items(Cert, Type) -> + rabbit_cert_info:subject_items(Cert, Type). + +%% Filters certificate SAN extensions by (OTP) SAN type name. +peer_cert_subject_alternative_names(Cert, Type) -> + SANs = rabbit_cert_info:subject_alternative_names(Cert), + lists:filter(fun({Key, _}) -> Key =:= Type end, SANs). + +%% Return a string describing the certificate's validity. +peer_cert_validity(Cert) -> + rabbit_cert_info:validity(Cert). + +%% Extract a username from the certificate +-spec peer_cert_auth_name + (certificate()) -> binary() | 'not_found' | 'unsafe'. + +peer_cert_auth_name(Cert) -> + {ok, Mode} = application:get_env(rabbit, ssl_cert_login_from), + peer_cert_auth_name(Mode, Cert). + +peer_cert_auth_name(distinguished_name, Cert) -> + case auth_config_sane() of + true -> iolist_to_binary(peer_cert_subject(Cert)); + false -> unsafe + end; + +peer_cert_auth_name(subject_alt_name, Cert) -> + peer_cert_auth_name(subject_alternative_name, Cert); + +peer_cert_auth_name(subject_alternative_name, Cert) -> + case auth_config_sane() of + true -> + Type = application:get_env(rabbit, ssl_cert_login_san_type, dns), + %% lists:nth/2 is 1-based + Index = application:get_env(rabbit, ssl_cert_login_san_index, 0) + 1, + OfType = peer_cert_subject_alternative_names(Cert, otp_san_type(Type)), + rabbit_log:debug("Peer certificate SANs of type ~s: ~p, index to use with lists:nth/2: ~b", [Type, OfType, Index]), + case length(OfType) of + 0 -> not_found; + N when N < Index -> not_found; + N when N >= Index -> + {_, Value} = lists:nth(Index, OfType), + rabbit_data_coercion:to_binary(Value) + end; + false -> unsafe + end; + +peer_cert_auth_name(common_name, Cert) -> + %% If there is more than one CN then we join them with "," in a + %% vaguely DN-like way. But this is more just so we do something + %% more intelligent than crashing, if you actually want to escape + %% things properly etc, use DN mode. + case auth_config_sane() of + true -> case peer_cert_subject_items(Cert, ?'id-at-commonName') of + not_found -> not_found; + CNs -> list_to_binary(string:join(CNs, ",")) + end; + false -> unsafe + end. + +auth_config_sane() -> + {ok, Opts} = application:get_env(rabbit, ssl_options), + case proplists:get_value(verify, Opts) of + verify_peer -> true; + V -> rabbit_log:warning("TLS peer verification (authentication) is " + "disabled, ssl_options.verify value used: ~p. " + "See https://www.rabbitmq.com/ssl.html#peer-verification to learn more.", [V]), + false + end. + +otp_san_type(dns) -> dNSName; +otp_san_type(ip) -> iPAddress; +otp_san_type(email) -> rfc822Name; +otp_san_type(uri) -> uniformResourceIdentifier; +otp_san_type(other_name) -> otherName; +otp_san_type(Other) -> Other. diff --git a/deps/rabbit/src/rabbit_stream_coordinator.erl b/deps/rabbit/src/rabbit_stream_coordinator.erl new file mode 100644 index 0000000000..9e4890c894 --- /dev/null +++ b/deps/rabbit/src/rabbit_stream_coordinator.erl @@ -0,0 +1,949 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at https://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% Copyright (c) 2012-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +-module(rabbit_stream_coordinator). + +-behaviour(ra_machine). + +-export([start/0]). +-export([format_ra_event/2]). + +-export([init/1, + apply/3, + state_enter/2, + init_aux/1, + handle_aux/6, + tick/2]). + +-export([recover/0, + start_cluster/1, + delete_cluster/2, + add_replica/2, + delete_replica/2]). + +-export([policy_changed/1]). + +-export([phase_repair_mnesia/2, + phase_start_cluster/1, + phase_delete_cluster/2, + phase_check_quorum/1, + phase_start_new_leader/1, + phase_stop_replicas/1, + phase_start_replica/3, + phase_delete_replica/2]). + +-export([log_overview/1]). + +-define(STREAM_COORDINATOR_STARTUP, {stream_coordinator_startup, self()}). +-define(TICK_TIMEOUT, 60000). +-define(RESTART_TIMEOUT, 1000). +-define(PHASE_RETRY_TIMEOUT, 10000). +-define(CMD_TIMEOUT, 30000). + +-record(?MODULE, {streams, monitors}). + +start() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + ServerId = {?MODULE, node()}, + case ra:restart_server(ServerId) of + {error, Reason} when Reason == not_started orelse + Reason == name_not_registered -> + case ra:start_server(make_ra_conf(node(), Nodes)) of + ok -> + global:set_lock(?STREAM_COORDINATOR_STARTUP), + case find_members(Nodes) of + [] -> + %% We're the first (and maybe only) one + ra:trigger_election(ServerId); + Members -> + %% What to do if we get a timeout? + {ok, _, _} = ra:add_member(Members, ServerId, 30000) + end, + global:del_lock(?STREAM_COORDINATOR_STARTUP), + _ = ra:members(ServerId), + ok; + Error -> + exit(Error) + end; + ok -> + ok; + Error -> + exit(Error) + end. + +find_members([]) -> + []; +find_members([Node | Nodes]) -> + case ra:members({?MODULE, Node}) of + {_, Members, _} -> + Members; + {error, noproc} -> + find_members(Nodes); + {timeout, _} -> + %% not sure what to do here + find_members(Nodes) + end. + +recover() -> + ra:restart_server({?MODULE, node()}). + +start_cluster(Q) -> + process_command({start_cluster, #{queue => Q}}). + +delete_cluster(StreamId, ActingUser) -> + process_command({delete_cluster, #{stream_id => StreamId, acting_user => ActingUser}}). + +add_replica(StreamId, Node) -> + process_command({start_replica, #{stream_id => StreamId, node => Node, + retries => 1}}). + +policy_changed(StreamId) -> + process_command({policy_changed, #{stream_id => StreamId}}). + +delete_replica(StreamId, Node) -> + process_command({delete_replica, #{stream_id => StreamId, node => Node}}). + +process_command(Cmd) -> + global:set_lock(?STREAM_COORDINATOR_STARTUP), + Servers = ensure_coordinator_started(), + global:del_lock(?STREAM_COORDINATOR_STARTUP), + process_command(Servers, Cmd). + +process_command([], _Cmd) -> + {error, coordinator_unavailable}; +process_command([Server | Servers], {CmdName, _} = Cmd) -> + case ra:process_command(Server, Cmd, ?CMD_TIMEOUT) of + {timeout, _} -> + rabbit_log:warning("Coordinator timeout on server ~p when processing command ~p", + [Server, CmdName]), + process_command(Servers, Cmd); + {error, noproc} -> + process_command(Servers, Cmd); + Reply -> + Reply + end. + +ensure_coordinator_started() -> + Local = {?MODULE, node()}, + AllNodes = all_nodes(), + case ra:restart_server(Local) of + {error, Reason} when Reason == not_started orelse + Reason == name_not_registered -> + OtherNodes = all_nodes() -- [Local], + %% We can't use find_members/0 here as a process that timeouts means the cluster is up + case lists:filter(fun(N) -> global:whereis_name(N) =/= undefined end, OtherNodes) of + [] -> + start_coordinator_cluster(); + _ -> + OtherNodes + end; + ok -> + AllNodes; + {error, {already_started, _}} -> + AllNodes; + _ -> + AllNodes + end. + +start_coordinator_cluster() -> + Nodes = rabbit_mnesia:cluster_nodes(running), + case ra:start_cluster([make_ra_conf(Node, Nodes) || Node <- Nodes]) of + {ok, Started, _} -> + Started; + {error, cluster_not_formed} -> + rabbit_log:warning("Stream coordinator cluster not formed", []), + [] + end. + +all_nodes() -> + Nodes = rabbit_mnesia:cluster_nodes(running) -- [node()], + [{?MODULE, Node} || Node <- [node() | Nodes]]. + +init(_Conf) -> + #?MODULE{streams = #{}, + monitors = #{}}. + +apply(#{from := From}, {policy_changed, #{stream_id := StreamId}} = Cmd, + #?MODULE{streams = Streams0} = State) -> + case maps:get(StreamId, Streams0, undefined) of + undefined -> + {State, ok, []}; + #{conf := Conf, + state := running} -> + case rabbit_stream_queue:update_stream_conf(Conf) of + Conf -> + %% No changes, ensure we only trigger an election if it's a must + {State, ok, []}; + _ -> + {State, ok, [{mod_call, osiris_writer, stop, [Conf]}]} + end; + SState0 -> + Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0), + {State#?MODULE{streams = Streams}, '$ra_no_reply', []} + + end; +apply(#{from := From}, {start_cluster, #{queue := Q}}, #?MODULE{streams = Streams} = State) -> + #{name := StreamId} = Conf0 = amqqueue:get_type_state(Q), + Conf = apply_leader_locator_strategy(Conf0, Streams), + case maps:is_key(StreamId, Streams) of + true -> + {State, '$ra_no_reply', wrap_reply(From, {error, already_started})}; + false -> + Phase = phase_start_cluster, + PhaseArgs = [amqqueue:set_type_state(Q, Conf)], + SState = #{state => start_cluster, + phase => Phase, + phase_args => PhaseArgs, + conf => Conf, + reply_to => From, + pending_cmds => [], + pending_replicas => []}, + rabbit_log:debug("rabbit_stream_coordinator: ~p entering phase_start_cluster", [StreamId]), + {State#?MODULE{streams = maps:put(StreamId, SState, Streams)}, '$ra_no_reply', + [{aux, {phase, StreamId, Phase, PhaseArgs}}]} + end; +apply(_Meta, {start_cluster_reply, Q}, #?MODULE{streams = Streams, + monitors = Monitors0} = State) -> + #{name := StreamId, + leader_pid := LeaderPid, + replica_pids := ReplicaPids} = Conf = amqqueue:get_type_state(Q), + SState0 = maps:get(StreamId, Streams), + Phase = phase_repair_mnesia, + PhaseArgs = [new, Q], + SState = SState0#{conf => Conf, + phase => Phase, + phase_args => PhaseArgs}, + Monitors = lists:foldl(fun(Pid, M) -> + maps:put(Pid, {StreamId, follower}, M) + end, maps:put(LeaderPid, {StreamId, leader}, Monitors0), ReplicaPids), + MonitorActions = [{monitor, process, Pid} || Pid <- ReplicaPids ++ [LeaderPid]], + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p " + "after start_cluster_reply", [StreamId, Phase]), + {State#?MODULE{streams = maps:put(StreamId, SState, Streams), + monitors = Monitors}, ok, + MonitorActions ++ [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; +apply(_Meta, {start_replica_failed, StreamId, Node, Retries, Reply}, + #?MODULE{streams = Streams0} = State) -> + rabbit_log:debug("rabbit_stream_coordinator: ~p start replica failed", [StreamId]), + case maps:get(StreamId, Streams0, undefined) of + undefined -> + {State, {error, not_found}, []}; + #{pending_replicas := Pending, + reply_to := From} = SState -> + Streams = Streams0#{StreamId => clear_stream_state(SState#{pending_replicas => + add_unique(Node, Pending)})}, + reply_and_run_pending( + From, StreamId, ok, Reply, + [{timer, {pipeline, + [{start_replica, #{stream_id => StreamId, + node => Node, + from => undefined, + retries => Retries + 1}}]}, + ?RESTART_TIMEOUT * Retries}], + State#?MODULE{streams = Streams}) + end; +apply(_Meta, {phase_finished, StreamId, Reply}, #?MODULE{streams = Streams0} = State) -> + rabbit_log:debug("rabbit_stream_coordinator: ~p phase finished", [StreamId]), + case maps:get(StreamId, Streams0, undefined) of + undefined -> + {State, {error, not_found}, []}; + #{reply_to := From} = SState -> + Streams = Streams0#{StreamId => clear_stream_state(SState)}, + reply_and_run_pending(From, StreamId, ok, Reply, [], State#?MODULE{streams = Streams}) + end; +apply(#{from := From}, {start_replica, #{stream_id := StreamId, node := Node, + retries := Retries}} = Cmd, + #?MODULE{streams = Streams0} = State) -> + case maps:get(StreamId, Streams0, undefined) of + undefined -> + case From of + undefined -> + {State, ok, []}; + _ -> + {State, '$ra_no_reply', wrap_reply(From, {error, not_found})} + end; + #{conf := Conf, + state := running} = SState0 -> + Phase = phase_start_replica, + PhaseArgs = [Node, Conf, Retries], + SState = update_stream_state(From, start_replica, Phase, PhaseArgs, SState0), + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p on node ~p", + [StreamId, Phase, Node]), + {State#?MODULE{streams = Streams0#{StreamId => SState}}, '$ra_no_reply', + [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; + SState0 -> + Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0), + {State#?MODULE{streams = Streams}, '$ra_no_reply', []} + end; +apply(_Meta, {start_replica_reply, StreamId, Pid}, + #?MODULE{streams = Streams, monitors = Monitors0} = State) -> + case maps:get(StreamId, Streams, undefined) of + undefined -> + {State, {error, not_found}, []}; + #{conf := Conf0} = SState0 -> + #{replica_nodes := Replicas0, + replica_pids := ReplicaPids0} = Conf0, + {ReplicaPids, MaybePid} = delete_replica_pid(node(Pid), ReplicaPids0), + Conf = Conf0#{replica_pids => [Pid | ReplicaPids], + replica_nodes => add_unique(node(Pid), Replicas0)}, + Phase = phase_repair_mnesia, + PhaseArgs = [update, Conf], + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p after start replica", [StreamId, Phase]), + #{pending_replicas := Pending} = SState0 = maps:get(StreamId, Streams), + SState = SState0#{conf => Conf, + phase => Phase, + phase_args => PhaseArgs, + pending_replicas => lists:delete(node(Pid), Pending)}, + Monitors1 = Monitors0#{Pid => {StreamId, follower}}, + Monitors = case MaybePid of + [P] -> maps:remove(P, Monitors1); + _ -> Monitors1 + end, + {State#?MODULE{streams = Streams#{StreamId => SState}, + monitors = Monitors}, ok, + [{monitor, process, Pid}, {aux, {phase, StreamId, Phase, PhaseArgs}}]} + end; +apply(#{from := From}, {delete_replica, #{stream_id := StreamId, node := Node}} = Cmd, + #?MODULE{streams = Streams0, + monitors = Monitors0} = State) -> + case maps:get(StreamId, Streams0, undefined) of + undefined -> + {State, '$ra_no_reply', wrap_reply(From, {error, not_found})}; + #{conf := Conf0, + state := running, + pending_replicas := Pending0} = SState0 -> + Replicas0 = maps:get(replica_nodes, Conf0), + ReplicaPids0 = maps:get(replica_pids, Conf0), + case lists:member(Node, Replicas0) of + false -> + reply_and_run_pending(From, StreamId, '$ra_no_reply', ok, [], State); + true -> + [Pid] = lists:filter(fun(P) -> node(P) == Node end, ReplicaPids0), + ReplicaPids = lists:delete(Pid, ReplicaPids0), + Replicas = lists:delete(Node, Replicas0), + Pending = lists:delete(Node, Pending0), + Conf = Conf0#{replica_pids => ReplicaPids, + replica_nodes => Replicas}, + Phase = phase_delete_replica, + PhaseArgs = [Node, Conf], + SState = update_stream_state(From, delete_replica, + Phase, PhaseArgs, + SState0#{conf => Conf0, + pending_replicas => Pending}), + Monitors = maps:remove(Pid, Monitors0), + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p on node ~p", [StreamId, Phase, Node]), + {State#?MODULE{monitors = Monitors, + streams = Streams0#{StreamId => SState}}, + '$ra_no_reply', + [{demonitor, process, Pid}, + {aux, {phase, StreamId, Phase, PhaseArgs}}]} + end; + SState0 -> + Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0), + {State#?MODULE{streams = Streams}, '$ra_no_reply', []} + end; +apply(#{from := From}, {delete_cluster, #{stream_id := StreamId, + acting_user := ActingUser}} = Cmd, + #?MODULE{streams = Streams0, monitors = Monitors0} = State) -> + case maps:get(StreamId, Streams0, undefined) of + undefined -> + {State, '$ra_no_reply', wrap_reply(From, {ok, 0})}; + #{conf := Conf, + state := running} = SState0 -> + ReplicaPids = maps:get(replica_pids, Conf), + LeaderPid = maps:get(leader_pid, Conf), + Monitors = lists:foldl(fun(Pid, M) -> + maps:remove(Pid, M) + end, Monitors0, ReplicaPids ++ [LeaderPid]), + Phase = phase_delete_cluster, + PhaseArgs = [Conf, ActingUser], + SState = update_stream_state(From, delete_cluster, Phase, PhaseArgs, SState0), + Demonitors = [{demonitor, process, Pid} || Pid <- [LeaderPid | ReplicaPids]], + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p", + [StreamId, Phase]), + {State#?MODULE{monitors = Monitors, + streams = Streams0#{StreamId => SState}}, '$ra_no_reply', + Demonitors ++ [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; + SState0 -> + Streams = maps:put(StreamId, add_pending_cmd(From, Cmd, SState0), Streams0), + {State#?MODULE{streams = Streams}, '$ra_no_reply', []} + end; +apply(_Meta, {delete_cluster_reply, StreamId}, #?MODULE{streams = Streams} = State0) -> + #{reply_to := From, + pending_cmds := Pending} = maps:get(StreamId, Streams), + State = State0#?MODULE{streams = maps:remove(StreamId, Streams)}, + rabbit_log:debug("rabbit_stream_coordinator: ~p finished delete_cluster_reply", + [StreamId]), + Actions = [{ra, pipeline_command, [{?MODULE, node()}, Cmd]} || Cmd <- Pending], + {State, ok, Actions ++ wrap_reply(From, {ok, 0})}; +apply(_Meta, {down, Pid, _Reason} = Cmd, #?MODULE{streams = Streams, + monitors = Monitors0} = State) -> + case maps:get(Pid, Monitors0, undefined) of + {StreamId, Role} -> + Monitors = maps:remove(Pid, Monitors0), + case maps:get(StreamId, Streams, undefined) of + #{state := delete_cluster} -> + {State#?MODULE{monitors = Monitors}, ok, []}; + undefined -> + {State#?MODULE{monitors = Monitors}, ok, []}; + #{state := running, + conf := #{replica_pids := Pids} = Conf0, + pending_cmds := Pending0} = SState0 -> + case Role of + leader -> + rabbit_log:info("rabbit_stream_coordinator: ~p leader is down, starting election", [StreamId]), + Phase = phase_stop_replicas, + PhaseArgs = [Conf0], + SState = update_stream_state(undefined, leader_election, Phase, PhaseArgs, SState0), + Events = [{demonitor, process, P} || P <- Pids], + Monitors1 = lists:foldl(fun(P, M) -> + maps:remove(P, M) + end, Monitors, Pids), + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p", [StreamId, Phase]), + {State#?MODULE{monitors = Monitors1, + streams = Streams#{StreamId => SState}}, + ok, Events ++ [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; + follower -> + case rabbit_misc:is_process_alive(maps:get(leader_pid, Conf0)) of + true -> + Phase = phase_start_replica, + PhaseArgs = [node(Pid), Conf0, 1], + SState = update_stream_state(undefined, + replica_restart, + Phase, PhaseArgs, + SState0), + rabbit_log:debug("rabbit_stream_coordinator: ~p replica on node ~p is down, entering ~p", [StreamId, node(Pid), Phase]), + {State#?MODULE{monitors = Monitors, + streams = Streams#{StreamId => SState}}, + ok, [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; + false -> + SState = SState0#{pending_cmds => Pending0 ++ [Cmd]}, + reply_and_run_pending(undefined, StreamId, ok, ok, [], State#?MODULE{streams = Streams#{StreamId => SState}}) + end + end; + #{pending_cmds := Pending0} = SState0 -> + SState = SState0#{pending_cmds => Pending0 ++ [Cmd]}, + {State#?MODULE{streams = Streams#{StreamId => SState}}, ok, []} + end; + undefined -> + {State, ok, []} + end; +apply(_Meta, {start_leader_election, StreamId, NewEpoch, Offsets}, + #?MODULE{streams = Streams} = State) -> + #{conf := Conf0} = SState0 = maps:get(StreamId, Streams), + #{leader_node := Leader, + replica_nodes := Replicas, + replica_pids := ReplicaPids0} = Conf0, + NewLeader = find_max_offset(Offsets), + rabbit_log:info("rabbit_stream_coordinator: ~p starting new leader on node ~p", + [StreamId, NewLeader]), + {ReplicaPids, _} = delete_replica_pid(NewLeader, ReplicaPids0), + Conf = rabbit_stream_queue:update_stream_conf( + Conf0#{epoch => NewEpoch, + leader_node => NewLeader, + replica_nodes => lists:delete(NewLeader, Replicas ++ [Leader]), + replica_pids => ReplicaPids}), + Phase = phase_start_new_leader, + PhaseArgs = [Conf], + SState = SState0#{conf => Conf, + phase => Phase, + phase_args => PhaseArgs}, + rabbit_log:debug("rabbit_stream_coordinator: ~p entering phase_start_new_leader", + [StreamId]), + {State#?MODULE{streams = Streams#{StreamId => SState}}, ok, + [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; +apply(_Meta, {leader_elected, StreamId, NewLeaderPid}, + #?MODULE{streams = Streams, monitors = Monitors0} = State) -> + rabbit_log:info("rabbit_stream_coordinator: ~p leader elected", [StreamId]), + #{conf := Conf0, + pending_cmds := Pending0} = SState0 = maps:get(StreamId, Streams), + #{leader_pid := LeaderPid, + replica_nodes := Replicas} = Conf0, + Conf = Conf0#{leader_pid => NewLeaderPid}, + Phase = phase_repair_mnesia, + PhaseArgs = [update, Conf], + Pending = Pending0 ++ [{start_replica, #{stream_id => StreamId, node => R, + retries => 1, from => undefined}} + || R <- Replicas], + SState = SState0#{conf => Conf, + phase => Phase, + phase_args => PhaseArgs, + pending_replicas => Replicas, + pending_cmds => Pending}, + Monitors = maps:put(NewLeaderPid, {StreamId, leader}, maps:remove(LeaderPid, Monitors0)), + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p after " + "leader election", [StreamId, Phase]), + {State#?MODULE{streams = Streams#{StreamId => SState}, + monitors = Monitors}, ok, + [{monitor, process, NewLeaderPid}, + {aux, {phase, StreamId, Phase, PhaseArgs}}]}; +apply(_Meta, {replicas_stopped, StreamId}, #?MODULE{streams = Streams} = State) -> + case maps:get(StreamId, Streams, undefined) of + undefined -> + {State, {error, not_found}, []}; + #{conf := Conf0} = SState0 -> + Phase = phase_check_quorum, + Conf = Conf0#{replica_pids => []}, + PhaseArgs = [Conf], + SState = SState0#{conf => Conf, + phase => Phase, + phase_args => PhaseArgs}, + rabbit_log:info("rabbit_stream_coordinator: ~p all replicas have been stopped, " + "checking quorum available", [StreamId]), + {State#?MODULE{streams = Streams#{StreamId => SState}}, ok, + [{aux, {phase, StreamId, Phase, PhaseArgs}}]} + end; +apply(_Meta, {stream_updated, #{name := StreamId} = Conf}, #?MODULE{streams = Streams} = State) -> + SState0 = maps:get(StreamId, Streams), + Phase = phase_repair_mnesia, + PhaseArgs = [update, Conf], + SState = SState0#{conf => Conf, + phase => Phase, + phase_args => PhaseArgs}, + rabbit_log:debug("rabbit_stream_coordinator: ~p entering ~p after" + " stream_updated", [StreamId, Phase]), + {State#?MODULE{streams = Streams#{StreamId => SState}}, ok, + [{aux, {phase, StreamId, Phase, PhaseArgs}}]}; +apply(_, {timeout, {pipeline, Cmds}}, State) -> + Actions = [{mod_call, ra, pipeline_command, [{?MODULE, node()}, Cmd]} || Cmd <- Cmds], + {State, ok, Actions}; +apply(_, {timeout, {aux, Cmd}}, State) -> + {State, ok, [{aux, Cmd}]}; +apply(Meta, {_, #{from := From}} = Cmd, State) -> + ?MODULE:apply(Meta#{from => From}, Cmd, State). + +state_enter(leader, #?MODULE{streams = Streams, monitors = Monitors}) -> + maps:fold(fun(_, #{conf := #{name := StreamId}, + pending_replicas := Pending, + state := State, + phase := Phase, + phase_args := PhaseArgs}, Acc) -> + restart_aux_phase(State, Phase, PhaseArgs, StreamId) ++ + pipeline_restart_replica_cmds(StreamId, Pending) ++ + Acc + end, [{monitor, process, P} || P <- maps:keys(Monitors)], Streams); +state_enter(follower, #?MODULE{monitors = Monitors}) -> + [{monitor, process, P} || P <- maps:keys(Monitors)]; +state_enter(recover, _) -> + put('$rabbit_vm_category', ?MODULE), + []; +state_enter(_, _) -> + []. + +restart_aux_phase(running, _, _, _) -> + []; +restart_aux_phase(_State, Phase, PhaseArgs, StreamId) -> + [{aux, {phase, StreamId, Phase, PhaseArgs}}]. + +pipeline_restart_replica_cmds(StreamId, Pending) -> + [{timer, {pipeline, [{start_replica, #{stream_id => StreamId, + node => Node, + from => undefined, + retries => 1}} + || Node <- Pending]}, ?RESTART_TIMEOUT}]. + +tick(_Ts, _State) -> + [{aux, maybe_resize_coordinator_cluster}]. + +maybe_resize_coordinator_cluster() -> + spawn(fun() -> + case ra:members({?MODULE, node()}) of + {_, Members, _} -> + MemberNodes = [Node || {_, Node} <- Members], + Running = rabbit_mnesia:cluster_nodes(running), + All = rabbit_mnesia:cluster_nodes(all), + case Running -- MemberNodes of + [] -> + ok; + New -> + rabbit_log:warning("New rabbit node(s) detected, " + "adding stream coordinator in: ~p", [New]), + add_members(Members, New) + end, + case MemberNodes -- All of + [] -> + ok; + Old -> + rabbit_log:warning("Rabbit node(s) removed from the cluster, " + "deleting stream coordinator in: ~p", [Old]), + remove_members(Members, Old) + end; + _ -> + ok + end + end). + +add_members(_, []) -> + ok; +add_members(Members, [Node | Nodes]) -> + Conf = make_ra_conf(Node, [N || {_, N} <- Members]), + case ra:start_server(Conf) of + ok -> + case ra:add_member(Members, {?MODULE, Node}) of + {ok, NewMembers, _} -> + add_members(NewMembers, Nodes); + _ -> + add_members(Members, Nodes) + end; + Error -> + rabbit_log:warning("Stream coordinator failed to start on node ~p : ~p", + [Node, Error]), + add_members(Members, Nodes) + end. + +remove_members(_, []) -> + ok; +remove_members(Members, [Node | Nodes]) -> + case ra:remove_member(Members, {?MODULE, Node}) of + {ok, NewMembers, _} -> + remove_members(NewMembers, Nodes); + _ -> + remove_members(Members, Nodes) + end. + +init_aux(_Name) -> + {#{}, undefined}. + +%% TODO ensure the dead writer is restarted as a replica at some point in time, increasing timeout? +handle_aux(leader, _, maybe_resize_coordinator_cluster, {Monitors, undefined}, LogState, _) -> + Pid = maybe_resize_coordinator_cluster(), + {no_reply, {Monitors, Pid}, LogState, [{monitor, process, aux, Pid}]}; +handle_aux(leader, _, maybe_resize_coordinator_cluster, AuxState, LogState, _) -> + %% Coordinator resizing is still happening, let's ignore this tick event + {no_reply, AuxState, LogState}; +handle_aux(leader, _, {down, Pid, _}, {Monitors, Pid}, LogState, _) -> + %% Coordinator resizing has finished + {no_reply, {Monitors, undefined}, LogState}; +handle_aux(leader, _, {phase, _, Fun, Args} = Cmd, {Monitors, Coordinator}, LogState, _) -> + Pid = erlang:apply(?MODULE, Fun, Args), + Actions = [{monitor, process, aux, Pid}], + {no_reply, {maps:put(Pid, Cmd, Monitors), Coordinator}, LogState, Actions}; +handle_aux(leader, _, {down, Pid, normal}, {Monitors, Coordinator}, LogState, _) -> + {no_reply, {maps:remove(Pid, Monitors), Coordinator}, LogState}; +handle_aux(leader, _, {down, Pid, Reason}, {Monitors0, Coordinator}, LogState, _) -> + %% The phase has failed, let's retry it + case maps:get(Pid, Monitors0) of + {phase, StreamId, phase_start_new_leader, Args} -> + rabbit_log:warning("Error while starting new leader for stream queue ~p, " + "restarting election: ~p", [StreamId, Reason]), + Monitors = maps:remove(Pid, Monitors0), + Cmd = {phase, StreamId, phase_check_quorum, Args}, + {no_reply, {Monitors, Coordinator}, LogState, [{timer, {aux, Cmd}, ?PHASE_RETRY_TIMEOUT}]}; + {phase, StreamId, Fun, _} = Cmd -> + rabbit_log:warning("Error while executing coordinator phase ~p for stream queue ~p ~p", + [Fun, StreamId, Reason]), + Monitors = maps:remove(Pid, Monitors0), + {no_reply, {Monitors, Coordinator}, LogState, [{timer, {aux, Cmd}, ?PHASE_RETRY_TIMEOUT}]} + end; +handle_aux(_, _, _, AuxState, LogState, _) -> + {no_reply, AuxState, LogState}. + +reply_and_run_pending(From, StreamId, Reply, WrapReply, Actions0, #?MODULE{streams = Streams} = State) -> + #{pending_cmds := Pending} = SState0 = maps:get(StreamId, Streams), + AuxActions = [{mod_call, ra, pipeline_command, [{?MODULE, node()}, Cmd]} + || Cmd <- Pending], + SState = maps:put(pending_cmds, [], SState0), + Actions = case From of + undefined -> + AuxActions ++ Actions0; + _ -> + wrap_reply(From, WrapReply) ++ AuxActions ++ Actions0 + end, + {State#?MODULE{streams = Streams#{StreamId => SState}}, Reply, Actions}. + +wrap_reply(From, Reply) -> + [{reply, From, {wrap_reply, Reply}}]. + +add_pending_cmd(From, {CmdName, CmdMap}, #{pending_cmds := Pending0} = StreamState) -> + %% Remove from pending the leader election and automatic replica restart when + %% the command is delete_cluster + Pending = case CmdName of + delete_cluster -> + lists:filter(fun({down, _, _}) -> + false; + (_) -> + true + end, Pending0); + _ -> + Pending0 + end, + maps:put(pending_cmds, Pending ++ [{CmdName, maps:put(from, From, CmdMap)}], + StreamState). + +clear_stream_state(StreamState) -> + StreamState#{reply_to => undefined, + state => running, + phase => undefined, + phase_args => undefined}. + +update_stream_state(From, State, Phase, PhaseArgs, StreamState) -> + StreamState#{reply_to => From, + state => State, + phase => Phase, + phase_args => PhaseArgs}. + +phase_start_replica(Node, #{name := StreamId} = Conf0, + Retries) -> + spawn( + fun() -> + %% If a new leader hasn't yet been elected, this will fail with a badmatch + %% as get_reader_context returns a no proc. An unhandled failure will + %% crash this monitored process and restart it later. + %% TODO However, do we want that crash in the log? We might need to try/catch + %% to provide a log message instead as it's 'expected'. We could try to + %% verify first that the leader is alive, but there would still be potential + %% for a race condition in here. + try + case osiris_replica:start(Node, Conf0) of + {ok, Pid} -> + ra:pipeline_command({?MODULE, node()}, + {start_replica_reply, StreamId, Pid}); + {error, already_present} -> + ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, ok}); + {error, {already_started, _}} -> + ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, ok}); + {error, Reason} = Error -> + rabbit_log:warning("Error while starting replica for ~p : ~p", + [maps:get(name, Conf0), Reason]), + ra:pipeline_command({?MODULE, node()}, + {start_replica_failed, StreamId, Node, Retries, Error}) + end + catch _:E-> + rabbit_log:warning("Error while starting replica for ~p : ~p", + [maps:get(name, Conf0), E]), + ra:pipeline_command({?MODULE, node()}, + {start_replica_failed, StreamId, Node, Retries, {error, E}}) + end + end). + +phase_delete_replica(Node, Conf) -> + spawn( + fun() -> + ok = osiris_replica:delete(Node, Conf), + ra:pipeline_command({?MODULE, node()}, {stream_updated, Conf}) + end). + +phase_stop_replicas(#{replica_nodes := Replicas, + name := StreamId} = Conf) -> + spawn( + fun() -> + [try + osiris_replica:stop(Node, Conf) + catch _:{{nodedown, _}, _} -> + %% It could be the old leader that is still down, it's normal. + ok + end || Node <- Replicas], + ra:pipeline_command({?MODULE, node()}, {replicas_stopped, StreamId}) + end). + +phase_start_new_leader(#{name := StreamId, leader_node := Node, leader_pid := LPid} = Conf) -> + spawn(fun() -> + osiris_replica:stop(Node, Conf), + %% If the start fails, the monitor will capture the crash and restart it + case osiris_writer:start(Conf) of + {ok, Pid} -> + ra:pipeline_command({?MODULE, node()}, + {leader_elected, StreamId, Pid}); + {error, already_present} -> + ra:pipeline_command({?MODULE, node()}, + {leader_elected, StreamId, LPid}); + {error, {already_started, Pid}} -> + ra:pipeline_command({?MODULE, node()}, + {leader_elected, StreamId, Pid}) + end + end). + +phase_check_quorum(#{name := StreamId, + epoch := Epoch, + replica_nodes := Nodes} = Conf) -> + spawn(fun() -> + Offsets = find_replica_offsets(Conf), + case is_quorum(length(Nodes) + 1, length(Offsets)) of + true -> + ra:pipeline_command({?MODULE, node()}, + {start_leader_election, StreamId, Epoch + 1, Offsets}); + false -> + %% Let's crash this process so the monitor will restart it + exit({not_enough_quorum, StreamId}) + end + end). + +find_replica_offsets(#{replica_nodes := Nodes, + leader_node := Leader} = Conf) -> + lists:foldl( + fun(Node, Acc) -> + try + %% osiris_log:overview/1 needs the directory - last item of the list + case rpc:call(Node, rabbit, is_running, []) of + false -> + Acc; + true -> + case rpc:call(Node, ?MODULE, log_overview, [Conf]) of + {badrpc, nodedown} -> + Acc; + {_Range, Offsets} -> + [{Node, select_highest_offset(Offsets)} | Acc] + end + end + catch + _:_ -> + Acc + end + end, [], Nodes ++ [Leader]). + +select_highest_offset([]) -> + empty; +select_highest_offset(Offsets) -> + lists:last(Offsets). + +log_overview(Config) -> + Dir = osiris_log:directory(Config), + osiris_log:overview(Dir). + +find_max_offset(Offsets) -> + [{Node, _} | _] = lists:sort(fun({_, {Ao, E}}, {_, {Bo, E}}) -> + Ao >= Bo; + ({_, {_, Ae}}, {_, {_, Be}}) -> + Ae >= Be; + ({_, empty}, _) -> + false; + (_, {_, empty}) -> + true + end, Offsets), + Node. + +is_quorum(1, 1) -> + true; +is_quorum(NumReplicas, NumAlive) -> + NumAlive >= ((NumReplicas div 2) + 1). + +phase_repair_mnesia(new, Q) -> + spawn(fun() -> + Reply = rabbit_amqqueue:internal_declare(Q, false), + #{name := StreamId} = amqqueue:get_type_state(Q), + ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, Reply}) + end); + +phase_repair_mnesia(update, #{reference := QName, + leader_pid := LeaderPid, + name := StreamId} = Conf) -> + Fun = fun (Q) -> + amqqueue:set_type_state(amqqueue:set_pid(Q, LeaderPid), Conf) + end, + spawn(fun() -> + case rabbit_misc:execute_mnesia_transaction( + fun() -> + rabbit_amqqueue:update(QName, Fun) + end) of + not_found -> + %% This can happen during recovery + [Q] = mnesia:dirty_read(rabbit_durable_queue, QName), + rabbit_amqqueue:ensure_rabbit_queue_record_is_initialized(Fun(Q)); + _ -> + ok + end, + ra:pipeline_command({?MODULE, node()}, {phase_finished, StreamId, ok}) + end). + +phase_start_cluster(Q0) -> + spawn( + fun() -> + case osiris:start_cluster(amqqueue:get_type_state(Q0)) of + {ok, #{leader_pid := Pid} = Conf} -> + Q = amqqueue:set_type_state(amqqueue:set_pid(Q0, Pid), Conf), + ra:pipeline_command({?MODULE, node()}, {start_cluster_reply, Q}); + {error, {already_started, _}} -> + ra:pipeline_command({?MODULE, node()}, {start_cluster_finished, {error, already_started}}) + end + end). + +phase_delete_cluster(#{name := StreamId, + reference := QName} = Conf, ActingUser) -> + spawn( + fun() -> + ok = osiris:delete_cluster(Conf), + _ = rabbit_amqqueue:internal_delete(QName, ActingUser), + ra:pipeline_command({?MODULE, node()}, {delete_cluster_reply, StreamId}) + end). + +format_ra_event(ServerId, Evt) -> + {stream_coordinator_event, ServerId, Evt}. + +make_ra_conf(Node, Nodes) -> + UId = ra:new_uid(ra_lib:to_binary(?MODULE)), + Formatter = {?MODULE, format_ra_event, []}, + Members = [{?MODULE, N} || N <- Nodes], + TickTimeout = application:get_env(rabbit, stream_tick_interval, + ?TICK_TIMEOUT), + #{cluster_name => ?MODULE, + id => {?MODULE, Node}, + uid => UId, + friendly_name => atom_to_list(?MODULE), + metrics_key => ?MODULE, + initial_members => Members, + log_init_args => #{uid => UId}, + tick_timeout => TickTimeout, + machine => {module, ?MODULE, #{}}, + ra_event_formatter => Formatter}. + +add_unique(Node, Nodes) -> + case lists:member(Node, Nodes) of + true -> + Nodes; + _ -> + [Node | Nodes] + end. + +delete_replica_pid(Node, ReplicaPids) -> + lists:partition(fun(P) -> node(P) =/= Node end, ReplicaPids). + +apply_leader_locator_strategy(#{leader_locator_strategy := <<"client-local">>} = Conf, _) -> + Conf; +apply_leader_locator_strategy(#{leader_node := Leader, + replica_nodes := Replicas0, + leader_locator_strategy := <<"random">>, + name := StreamId} = Conf, _) -> + Replicas = [Leader | Replicas0], + ClusterSize = length(Replicas), + Hash = erlang:phash2(StreamId), + Pos = (Hash rem ClusterSize) + 1, + NewLeader = lists:nth(Pos, Replicas), + NewReplicas = lists:delete(NewLeader, Replicas), + Conf#{leader_node => NewLeader, + replica_nodes => NewReplicas}; +apply_leader_locator_strategy(#{leader_node := Leader, + replica_nodes := Replicas0, + leader_locator_strategy := <<"least-leaders">>} = Conf, + Streams) -> + Replicas = [Leader | Replicas0], + Counters0 = maps:from_list([{R, 0} || R <- Replicas]), + Counters = maps:to_list(maps:fold(fun(_Key, #{conf := #{leader_node := L}}, Acc) -> + maps:update_with(L, fun(V) -> V + 1 end, 0, Acc) + end, Counters0, Streams)), + Ordered = lists:sort(fun({_, V1}, {_, V2}) -> + V1 =< V2 + end, Counters), + %% We could have potentially introduced nodes that are not in the list of replicas if + %% initial cluster size is smaller than the cluster size. Let's select the first one + %% that is on the list of replicas + NewLeader = select_first_matching_node(Ordered, Replicas), + NewReplicas = lists:delete(NewLeader, Replicas), + Conf#{leader_node => NewLeader, + replica_nodes => NewReplicas}. + +select_first_matching_node([{N, _} | Rest], Replicas) -> + case lists:member(N, Replicas) of + true -> N; + false -> select_first_matching_node(Rest, Replicas) + end. diff --git a/deps/rabbit/src/rabbit_stream_queue.erl b/deps/rabbit/src/rabbit_stream_queue.erl new file mode 100644 index 0000000000..4e428495b0 --- /dev/null +++ b/deps/rabbit/src/rabbit_stream_queue.erl @@ -0,0 +1,734 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at https://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% Copyright (c) 2012-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_stream_queue). + +-behaviour(rabbit_queue_type). + +-export([is_enabled/0, + declare/2, + delete/4, + purge/1, + policy_changed/1, + recover/2, + is_recoverable/1, + consume/3, + cancel/5, + handle_event/2, + deliver/2, + settle/4, + credit/4, + dequeue/4, + info/2, + init/1, + close/1, + update/2, + state_info/1, + stat/1, + capabilities/0]). + +-export([set_retention_policy/3]). +-export([add_replica/3, + delete_replica/3]). +-export([format_osiris_event/2]). +-export([update_stream_conf/1]). + +-include("rabbit.hrl"). +-include("amqqueue.hrl"). + +-define(INFO_KEYS, [name, durable, auto_delete, arguments, leader, members, online, state, + messages, messages_ready, messages_unacknowledged, committed_offset, + policy, operator_policy, effective_policy_definition, type]). + +-type appender_seq() :: non_neg_integer(). + +-record(stream, {name :: rabbit_types:r('queue'), + credit :: integer(), + max :: non_neg_integer(), + start_offset = 0 :: non_neg_integer(), + listening_offset = 0 :: non_neg_integer(), + log :: undefined | osiris_log:state()}). + +-record(stream_client, {name :: term(), + leader :: pid(), + next_seq = 1 :: non_neg_integer(), + correlation = #{} :: #{appender_seq() => term()}, + soft_limit :: non_neg_integer(), + slow = false :: boolean(), + readers = #{} :: #{term() => #stream{}} + }). + +-import(rabbit_queue_type_util, [args_policy_lookup/3]). + +-type client() :: #stream_client{}. + +-spec is_enabled() -> boolean(). +is_enabled() -> + rabbit_feature_flags:is_enabled(stream_queue). + +-spec declare(amqqueue:amqqueue(), node()) -> + {'new' | 'existing', amqqueue:amqqueue()} | + {protocol_error, Type :: atom(), Reason :: string(), Args :: term()}. +declare(Q0, Node) when ?amqqueue_is_stream(Q0) -> + case rabbit_queue_type_util:run_checks( + [fun rabbit_queue_type_util:check_auto_delete/1, + fun rabbit_queue_type_util:check_exclusive/1, + fun rabbit_queue_type_util:check_non_durable/1], + Q0) of + ok -> + start_cluster(Q0, Node); + Err -> + Err + end. + +start_cluster(Q0, Node) -> + Arguments = amqqueue:get_arguments(Q0), + QName = amqqueue:get_name(Q0), + Opts = amqqueue:get_options(Q0), + ActingUser = maps:get(user, Opts, ?UNKNOWN_USER), + Conf0 = make_stream_conf(Node, Q0), + case rabbit_stream_coordinator:start_cluster( + amqqueue:set_type_state(Q0, Conf0)) of + {ok, {error, already_started}, _} -> + {protocol_error, precondition_failed, "safe queue name already in use '~s'", + [Node]}; + {ok, {created, Q}, _} -> + rabbit_event:notify(queue_created, + [{name, QName}, + {durable, true}, + {auto_delete, false}, + {arguments, Arguments}, + {user_who_performed_action, + ActingUser}]), + {new, Q}; + {ok, {error, Error}, _} -> + _ = rabbit_amqqueue:internal_delete(QName, ActingUser), + {protocol_error, internal_error, "Cannot declare a queue '~s' on node '~s': ~255p", + [rabbit_misc:rs(QName), node(), Error]}; + {ok, {existing, Q}, _} -> + {existing, Q}; + {error, coordinator_unavailable} -> + _ = rabbit_amqqueue:internal_delete(QName, ActingUser), + {protocol_error, internal_error, + "Cannot declare a queue '~s' on node '~s': coordinator unavailable", + [rabbit_misc:rs(QName), node()]} + end. + +-spec delete(amqqueue:amqqueue(), boolean(), + boolean(), rabbit_types:username()) -> + rabbit_types:ok(non_neg_integer()) | + rabbit_types:error(in_use | not_empty). +delete(Q, _IfUnused, _IfEmpty, ActingUser) -> + Name = maps:get(name, amqqueue:get_type_state(Q)), + {ok, Reply, _} = rabbit_stream_coordinator:delete_cluster(Name, ActingUser), + Reply. + +-spec purge(amqqueue:amqqueue()) -> + {ok, non_neg_integer()} | {error, term()}. +purge(_) -> + {error, not_supported}. + +-spec policy_changed(amqqueue:amqqueue()) -> 'ok'. +policy_changed(Q) -> + Name = maps:get(name, amqqueue:get_type_state(Q)), + _ = rabbit_stream_coordinator:policy_changed(Name), + ok. + +stat(_) -> + {ok, 0, 0}. + +consume(Q, #{prefetch_count := 0}, _) + when ?amqqueue_is_stream(Q) -> + {protocol_error, precondition_failed, "consumer prefetch count is not set for '~s'", + [rabbit_misc:rs(amqqueue:get_name(Q))]}; +consume(Q, #{no_ack := true}, _) + when ?amqqueue_is_stream(Q) -> + {protocol_error, not_implemented, + "automatic acknowledgement not supported by stream queues ~s", + [rabbit_misc:rs(amqqueue:get_name(Q))]}; +consume(Q, #{limiter_active := true}, _State) + when ?amqqueue_is_stream(Q) -> + {error, global_qos_not_supported_for_queue_type}; +consume(Q, Spec, QState0) when ?amqqueue_is_stream(Q) -> + %% Messages should include the offset as a custom header. + case check_queue_exists_in_local_node(Q) of + ok -> + #{no_ack := NoAck, + channel_pid := ChPid, + prefetch_count := ConsumerPrefetchCount, + consumer_tag := ConsumerTag, + exclusive_consume := ExclusiveConsume, + args := Args, + ok_msg := OkMsg} = Spec, + QName = amqqueue:get_name(Q), + Offset = case rabbit_misc:table_lookup(Args, <<"x-stream-offset">>) of + undefined -> + next; + {_, <<"first">>} -> + first; + {_, <<"last">>} -> + last; + {_, <<"next">>} -> + next; + {timestamp, V} -> + {timestamp, V}; + {_, V} -> + V + end, + rabbit_core_metrics:consumer_created(ChPid, ConsumerTag, ExclusiveConsume, + not NoAck, QName, + ConsumerPrefetchCount, false, + up, Args), + %% FIXME: reply needs to be sent before the stream begins sending + %% really it should be sent by the stream queue process like classic queues + %% do + maybe_send_reply(ChPid, OkMsg), + QState = begin_stream(QState0, Q, ConsumerTag, Offset, + ConsumerPrefetchCount), + {ok, QState, []}; + Err -> + Err + end. + +get_local_pid(#{leader_pid := Pid}) when node(Pid) == node() -> + Pid; +get_local_pid(#{replica_pids := ReplicaPids}) -> + [Local | _] = lists:filter(fun(Pid) -> + node(Pid) == node() + end, ReplicaPids), + Local. + +begin_stream(#stream_client{readers = Readers0} = State, + Q, Tag, Offset, Max) -> + LocalPid = get_local_pid(amqqueue:get_type_state(Q)), + {ok, Seg0} = osiris:init_reader(LocalPid, Offset), + NextOffset = osiris_log:next_offset(Seg0) - 1, + osiris:register_offset_listener(LocalPid, NextOffset), + %% TODO: avoid double calls to the same process + StartOffset = case Offset of + first -> NextOffset; + last -> NextOffset; + next -> NextOffset; + {timestamp, _} -> NextOffset; + _ -> Offset + end, + Str0 = #stream{name = amqqueue:get_name(Q), + credit = Max, + start_offset = StartOffset, + listening_offset = NextOffset, + log = Seg0, + max = Max}, + State#stream_client{readers = Readers0#{Tag => Str0}}. + +cancel(_Q, ConsumerTag, OkMsg, ActingUser, #stream_client{readers = Readers0, + name = QName} = State) -> + Readers = maps:remove(ConsumerTag, Readers0), + rabbit_core_metrics:consumer_deleted(self(), ConsumerTag, QName), + rabbit_event:notify(consumer_deleted, [{consumer_tag, ConsumerTag}, + {channel, self()}, + {queue, QName}, + {user_who_performed_action, ActingUser}]), + maybe_send_reply(self(), OkMsg), + {ok, State#stream_client{readers = Readers}}. + +credit(CTag, Credit, Drain, #stream_client{readers = Readers0, + name = Name, + leader = Leader} = State) -> + {Readers1, Msgs} = case Readers0 of + #{CTag := #stream{credit = Credit0} = Str0} -> + Str1 = Str0#stream{credit = Credit0 + Credit}, + {Str, Msgs0} = stream_entries(Name, Leader, Str1), + {Readers0#{CTag => Str}, Msgs0}; + _ -> + {Readers0, []} + end, + {Readers, Actions} = + case Drain of + true -> + case Readers1 of + #{CTag := #stream{credit = Credit1} = Str2} -> + {Readers0#{CTag => Str2#stream{credit = 0}}, [{send_drained, {CTag, Credit1}}]}; + _ -> + {Readers1, []} + end; + false -> + {Readers1, []} + end, + {State#stream_client{readers = Readers}, [{send_credit_reply, length(Msgs)}, + {deliver, CTag, true, Msgs}] ++ Actions}. + +deliver(QSs, #delivery{confirm = Confirm} = Delivery) -> + lists:foldl( + fun({_Q, stateless}, {Qs, Actions}) -> + %% TODO what do we do with stateless? + %% QRef = amqqueue:get_pid(Q), + %% ok = rabbit_fifo_client:untracked_enqueue( + %% [QRef], Delivery#delivery.message), + {Qs, Actions}; + ({Q, S0}, {Qs, Actions}) -> + S = deliver(Confirm, Delivery, S0), + {[{Q, S} | Qs], Actions} + end, {[], []}, QSs). + +deliver(_Confirm, #delivery{message = Msg, msg_seq_no = MsgId}, + #stream_client{name = Name, + leader = LeaderPid, + next_seq = Seq, + correlation = Correlation0, + soft_limit = SftLmt, + slow = Slow0} = State) -> + ok = osiris:write(LeaderPid, Seq, msg_to_iodata(Msg)), + Correlation = case MsgId of + undefined -> + Correlation0; + _ when is_number(MsgId) -> + Correlation0#{Seq => MsgId} + end, + Slow = case maps:size(Correlation) >= SftLmt of + true when not Slow0 -> + credit_flow:block(Name), + true; + Bool -> + Bool + end, + State#stream_client{next_seq = Seq + 1, + correlation = Correlation, + slow = Slow}. +-spec dequeue(_, _, _, client()) -> no_return(). +dequeue(_, _, _, #stream_client{name = Name}) -> + {protocol_error, not_implemented, "basic.get not supported by stream queues ~s", + [rabbit_misc:rs(Name)]}. + +handle_event({osiris_written, From, Corrs}, State = #stream_client{correlation = Correlation0, + soft_limit = SftLmt, + slow = Slow0, + name = Name}) -> + MsgIds = maps:values(maps:with(Corrs, Correlation0)), + Correlation = maps:without(Corrs, Correlation0), + Slow = case maps:size(Correlation) < SftLmt of + true when Slow0 -> + credit_flow:unblock(Name), + false; + _ -> + Slow0 + end, + {ok, State#stream_client{correlation = Correlation, + slow = Slow}, [{settled, From, MsgIds}]}; +handle_event({osiris_offset, _From, _Offs}, State = #stream_client{leader = Leader, + readers = Readers0, + name = Name}) -> + %% offset isn't actually needed as we use the atomic to read the + %% current committed + {Readers, TagMsgs} = maps:fold( + fun (Tag, Str0, {Acc, TM}) -> + {Str, Msgs} = stream_entries(Name, Leader, Str0), + %% HACK for now, better to just return but + %% tricky with acks credits + %% that also evaluate the stream + % gen_server:cast(self(), {stream_delivery, Tag, Msgs}), + {Acc#{Tag => Str}, [{Tag, Leader, Msgs} | TM]} + end, {#{}, []}, Readers0), + Ack = true, + Deliveries = [{deliver, Tag, Ack, OffsetMsg} + || {Tag, _LeaderPid, OffsetMsg} <- TagMsgs], + {ok, State#stream_client{readers = Readers}, Deliveries}. + +is_recoverable(Q) -> + Node = node(), + #{replica_nodes := Nodes, + leader_node := Leader} = amqqueue:get_type_state(Q), + lists:member(Node, Nodes ++ [Leader]). + +recover(_VHost, Queues) -> + lists:foldl( + fun (Q0, {R0, F0}) -> + {ok, Q} = recover(Q0), + {[Q | R0], F0} + end, {[], []}, Queues). + +settle(complete, CTag, MsgIds, #stream_client{readers = Readers0, + name = Name, + leader = Leader} = State) -> + Credit = length(MsgIds), + {Readers, Msgs} = case Readers0 of + #{CTag := #stream{credit = Credit0} = Str0} -> + Str1 = Str0#stream{credit = Credit0 + Credit}, + {Str, Msgs0} = stream_entries(Name, Leader, Str1), + {Readers0#{CTag => Str}, Msgs0}; + _ -> + {Readers0, []} + end, + {State#stream_client{readers = Readers}, [{deliver, CTag, true, Msgs}]}; +settle(_, _, _, #stream_client{name = Name}) -> + {protocol_error, not_implemented, + "basic.nack and basic.reject not supported by stream queues ~s", + [rabbit_misc:rs(Name)]}. + +info(Q, all_items) -> + info(Q, ?INFO_KEYS); +info(Q, Items) -> + lists:foldr(fun(Item, Acc) -> + [{Item, i(Item, Q)} | Acc] + end, [], Items). + +i(name, Q) when ?is_amqqueue(Q) -> amqqueue:get_name(Q); +i(durable, Q) when ?is_amqqueue(Q) -> amqqueue:is_durable(Q); +i(auto_delete, Q) when ?is_amqqueue(Q) -> amqqueue:is_auto_delete(Q); +i(arguments, Q) when ?is_amqqueue(Q) -> amqqueue:get_arguments(Q); +i(leader, Q) when ?is_amqqueue(Q) -> + #{leader_node := Leader} = amqqueue:get_type_state(Q), + Leader; +i(members, Q) when ?is_amqqueue(Q) -> + #{replica_nodes := Nodes} = amqqueue:get_type_state(Q), + Nodes; +i(online, Q) -> + #{replica_pids := ReplicaPids, + leader_pid := LeaderPid} = amqqueue:get_type_state(Q), + [node(P) || P <- ReplicaPids ++ [LeaderPid], rabbit_misc:is_process_alive(P)]; +i(state, Q) when ?is_amqqueue(Q) -> + %% TODO the coordinator should answer this, I guess?? + running; +i(messages, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_coarse_metrics, QName) of + [{_, _, _, M, _}] -> + M; + [] -> + 0 + end; +i(messages_ready, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_coarse_metrics, QName) of + [{_, MR, _, _, _}] -> + MR; + [] -> + 0 + end; +i(messages_unacknowledged, Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + case ets:lookup(queue_coarse_metrics, QName) of + [{_, _, MU, _, _}] -> + MU; + [] -> + 0 + end; +i(committed_offset, Q) -> + %% TODO should it be on a metrics table? + Data = osiris_counters:overview(), + maps:get(committed_offset, + maps:get({osiris_writer, amqqueue:get_name(Q)}, Data)); +i(policy, Q) -> + case rabbit_policy:name(Q) of + none -> ''; + Policy -> Policy + end; +i(operator_policy, Q) -> + case rabbit_policy:name_op(Q) of + none -> ''; + Policy -> Policy + end; +i(effective_policy_definition, Q) -> + case rabbit_policy:effective_definition(Q) of + undefined -> []; + Def -> Def + end; +i(type, _) -> + stream; +i(_, _) -> + ''. + +init(Q) when ?is_amqqueue(Q) -> + Leader = amqqueue:get_pid(Q), + {ok, SoftLimit} = application:get_env(rabbit, stream_messages_soft_limit), + #stream_client{name = amqqueue:get_name(Q), + leader = Leader, + soft_limit = SoftLimit}. + +close(#stream_client{readers = Readers}) -> + _ = maps:map(fun (_, #stream{log = Log}) -> + osiris_log:close(Log) + end, Readers), + ok. + +update(_, State) -> + State. + +state_info(_) -> + #{}. + +set_retention_policy(Name, VHost, Policy) -> + case rabbit_amqqueue:check_max_age(Policy) of + {error, _} = E -> + E; + MaxAge -> + QName = rabbit_misc:r(VHost, queue, Name), + Fun = fun(Q) -> + Conf = amqqueue:get_type_state(Q), + amqqueue:set_type_state(Q, Conf#{max_age => MaxAge}) + end, + case rabbit_misc:execute_mnesia_transaction( + fun() -> rabbit_amqqueue:update(QName, Fun) end) of + not_found -> + {error, not_found}; + _ -> + ok + end + end. + +add_replica(VHost, Name, Node) -> + QName = rabbit_misc:r(VHost, queue, Name), + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}; + {ok, Q} when ?amqqueue_is_quorum(Q) -> + {error, quorum_queue_not_supported}; + {ok, Q} when ?amqqueue_is_stream(Q) -> + case lists:member(Node, rabbit_mnesia:cluster_nodes(running)) of + false -> + {error, node_not_running}; + true -> + #{name := StreamId} = amqqueue:get_type_state(Q), + {ok, Reply, _} = rabbit_stream_coordinator:add_replica(StreamId, Node), + Reply + end; + E -> + E + end. + +delete_replica(VHost, Name, Node) -> + QName = rabbit_misc:r(VHost, queue, Name), + case rabbit_amqqueue:lookup(QName) of + {ok, Q} when ?amqqueue_is_classic(Q) -> + {error, classic_queue_not_supported}; + {ok, Q} when ?amqqueue_is_quorum(Q) -> + {error, quorum_queue_not_supported}; + {ok, Q} when ?amqqueue_is_stream(Q) -> + case lists:member(Node, rabbit_mnesia:cluster_nodes(running)) of + false -> + {error, node_not_running}; + true -> + #{name := StreamId} = amqqueue:get_type_state(Q), + {ok, Reply, _} = rabbit_stream_coordinator:delete_replica(StreamId, Node), + Reply + end; + E -> + E + end. + +make_stream_conf(Node, Q) -> + QName = amqqueue:get_name(Q), + Name = queue_name(QName), + %% MaxLength = args_policy_lookup(<<"max-length">>, fun min/2, Q), + MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun min/2, Q), + MaxAge = max_age(args_policy_lookup(<<"max-age">>, fun max_age/2, Q)), + MaxSegmentSize = args_policy_lookup(<<"max-segment-size">>, fun min/2, Q), + LeaderLocator = queue_leader_locator(args_policy_lookup(<<"queue-leader-locator">>, + fun res_arg/2, Q)), + InitialClusterSize = initial_cluster_size(args_policy_lookup(<<"initial-cluster-size">>, + fun res_arg/2, Q)), + Replicas0 = rabbit_mnesia:cluster_nodes(all) -- [Node], + Replicas = select_stream_nodes(InitialClusterSize - 1, Replicas0), + Formatter = {?MODULE, format_osiris_event, [QName]}, + Retention = lists:filter(fun({_, R}) -> + R =/= undefined + end, [{max_bytes, MaxBytes}, + {max_age, MaxAge}]), + add_if_defined(max_segment_size, MaxSegmentSize, #{reference => QName, + name => Name, + retention => Retention, + leader_locator_strategy => LeaderLocator, + leader_node => Node, + replica_nodes => Replicas, + event_formatter => Formatter, + epoch => 1}). + +select_stream_nodes(Size, All) when length(All) =< Size -> + All; +select_stream_nodes(Size, All) -> + Node = node(), + case lists:member(Node, All) of + true -> + select_stream_nodes(Size - 1, lists:delete(Node, All), [Node]); + false -> + select_stream_nodes(Size, All, []) + end. + +select_stream_nodes(0, _, Selected) -> + Selected; +select_stream_nodes(Size, Rest, Selected) -> + S = lists:nth(rand:uniform(length(Rest)), Rest), + select_stream_nodes(Size - 1, lists:delete(S, Rest), [S | Selected]). + +update_stream_conf(#{reference := QName} = Conf) -> + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + MaxBytes = args_policy_lookup(<<"max-length-bytes">>, fun min/2, Q), + MaxAge = max_age(args_policy_lookup(<<"max-age">>, fun max_age/2, Q)), + MaxSegmentSize = args_policy_lookup(<<"max-segment-size">>, fun min/2, Q), + Retention = lists:filter(fun({_, R}) -> + R =/= undefined + end, [{max_bytes, MaxBytes}, + {max_age, MaxAge}]), + add_if_defined(max_segment_size, MaxSegmentSize, Conf#{retention => Retention}); + _ -> + Conf + end. + +add_if_defined(_, undefined, Map) -> + Map; +add_if_defined(Key, Value, Map) -> + maps:put(Key, Value, Map). + +format_osiris_event(Evt, QRef) -> + {'$gen_cast', {queue_event, QRef, Evt}}. + +max_age(undefined) -> + undefined; +max_age(Bin) when is_binary(Bin) -> + rabbit_amqqueue:check_max_age(Bin); +max_age(Age) -> + Age. + +max_age(Age1, Age2) -> + min(rabbit_amqqueue:check_max_age(Age1), rabbit_amqqueue:check_max_age(Age2)). + +queue_leader_locator(undefined) -> <<"client-local">>; +queue_leader_locator(Val) -> Val. + +initial_cluster_size(undefined) -> + length(rabbit_mnesia:cluster_nodes(running)); +initial_cluster_size(Val) -> + Val. + +res_arg(PolVal, undefined) -> PolVal; +res_arg(_, ArgVal) -> ArgVal. + +queue_name(#resource{virtual_host = VHost, name = Name}) -> + Timestamp = erlang:integer_to_binary(erlang:system_time()), + osiris_util:to_base64uri(erlang:binary_to_list(<<VHost/binary, "_", Name/binary, "_", + Timestamp/binary>>)). + +recover(Q) -> + rabbit_stream_coordinator:recover(), + {ok, Q}. + +check_queue_exists_in_local_node(Q) -> + Conf = amqqueue:get_type_state(Q), + AllNodes = [maps:get(leader_node, Conf) | maps:get(replica_nodes, Conf)], + case lists:member(node(), AllNodes) of + true -> + ok; + false -> + {protocol_error, precondition_failed, + "queue '~s' does not a have a replica on the local node", + [rabbit_misc:rs(amqqueue:get_name(Q))]} + end. + +maybe_send_reply(_ChPid, undefined) -> ok; +maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). + +stream_entries(Name, Id, Str) -> + stream_entries(Name, Id, Str, []). + +stream_entries(Name, LeaderPid, + #stream{name = QName, + credit = Credit, + start_offset = StartOffs, + listening_offset = LOffs, + log = Seg0} = Str0, MsgIn) + when Credit > 0 -> + case osiris_log:read_chunk_parsed(Seg0) of + {end_of_stream, Seg} -> + NextOffset = osiris_log:next_offset(Seg), + case NextOffset > LOffs of + true -> + osiris:register_offset_listener(LeaderPid, NextOffset), + {Str0#stream{log = Seg, + listening_offset = NextOffset}, MsgIn}; + false -> + {Str0#stream{log = Seg}, MsgIn} + end; + {Records, Seg} -> + Msgs = [begin + Msg0 = binary_to_msg(QName, B), + Msg = rabbit_basic:add_header(<<"x-stream-offset">>, + long, O, Msg0), + {Name, LeaderPid, O, false, Msg} + end || {O, B} <- Records, + O >= StartOffs], + + NumMsgs = length(Msgs), + + Str = Str0#stream{credit = Credit - NumMsgs, + log = Seg}, + case Str#stream.credit < 1 of + true -> + %% we are done here + {Str, MsgIn ++ Msgs}; + false -> + %% if there are fewer Msgs than Entries0 it means there were non-events + %% in the log and we should recurse and try again + stream_entries(Name, LeaderPid, Str, MsgIn ++ Msgs) + end + end; +stream_entries(_Name, _Id, Str, Msgs) -> + {Str, Msgs}. + +binary_to_msg(#resource{virtual_host = VHost, + kind = queue, + name = QName}, Data) -> + R0 = rabbit_msg_record:init(Data), + %% if the message annotation isn't present the data most likely came from + %% the rabbitmq-stream plugin so we'll choose defaults that simulate use + %% of the direct exchange + {utf8, Exchange} = rabbit_msg_record:message_annotation(<<"x-exchange">>, + R0, {utf8, <<>>}), + {utf8, RoutingKey} = rabbit_msg_record:message_annotation(<<"x-routing-key">>, + R0, {utf8, QName}), + {Props, Payload} = rabbit_msg_record:to_amqp091(R0), + XName = #resource{kind = exchange, + virtual_host = VHost, + name = Exchange}, + Content = #content{class_id = 60, + properties = Props, + properties_bin = none, + payload_fragments_rev = [Payload]}, + {ok, Msg} = rabbit_basic:message(XName, RoutingKey, Content), + Msg. + + +msg_to_iodata(#basic_message{exchange_name = #resource{name = Exchange}, + routing_keys = [RKey | _], + content = Content}) -> + #content{properties = Props, + payload_fragments_rev = Payload} = + rabbit_binary_parser:ensure_content_decoded(Content), + R0 = rabbit_msg_record:from_amqp091(Props, lists:reverse(Payload)), + %% TODO durable? + R = rabbit_msg_record:add_message_annotations( + #{<<"x-exchange">> => {utf8, Exchange}, + <<"x-routing-key">> => {utf8, RKey}}, R0), + rabbit_msg_record:to_iodata(R). + +capabilities() -> + #{policies => [<<"max-length-bytes">>, <<"max-age">>, <<"max-segment-size">>, + <<"queue-leader-locator">>, <<"initial-cluster-size">>], + queue_arguments => [<<"x-dead-letter-exchange">>, <<"x-dead-letter-routing-key">>, + <<"x-max-length">>, <<"x-max-length-bytes">>, + <<"x-single-active-consumer">>, <<"x-queue-type">>, + <<"x-max-age">>, <<"x-max-segment-size">>, + <<"x-initial-cluster-size">>, <<"x-queue-leader-locator">>], + consumer_arguments => [<<"x-stream-offset">>], + server_named => false}. diff --git a/deps/rabbit/src/rabbit_sup.erl b/deps/rabbit/src/rabbit_sup.erl new file mode 100644 index 0000000000..06643b155d --- /dev/null +++ b/deps/rabbit/src/rabbit_sup.erl @@ -0,0 +1,109 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_sup). + +-behaviour(supervisor). + +-export([start_link/0, start_child/1, start_child/2, start_child/3, start_child/4, + start_supervisor_child/1, start_supervisor_child/2, + start_supervisor_child/3, + start_restartable_child/1, start_restartable_child/2, + start_delayed_restartable_child/1, start_delayed_restartable_child/2, + stop_child/1]). + +-export([init/1]). + +-include("rabbit.hrl"). + +-define(SERVER, ?MODULE). + +%%---------------------------------------------------------------------------- + +-spec start_link() -> rabbit_types:ok_pid_or_error(). + +start_link() -> supervisor:start_link({local, ?SERVER}, ?MODULE, []). + +-spec start_child(atom()) -> 'ok'. + +start_child(Mod) -> start_child(Mod, []). + +-spec start_child(atom(), [any()]) -> 'ok'. + +start_child(Mod, Args) -> start_child(Mod, Mod, Args). + +-spec start_child(atom(), atom(), [any()]) -> 'ok'. + +start_child(ChildId, Mod, Args) -> + child_reply(supervisor:start_child( + ?SERVER, + {ChildId, {Mod, start_link, Args}, + transient, ?WORKER_WAIT, worker, [Mod]})). + +-spec start_child(atom(), atom(), atom(), [any()]) -> 'ok'. + +start_child(ChildId, Mod, Fun, Args) -> + child_reply(supervisor:start_child( + ?SERVER, + {ChildId, {Mod, Fun, Args}, + transient, ?WORKER_WAIT, worker, [Mod]})). + +-spec start_supervisor_child(atom()) -> 'ok'. + +start_supervisor_child(Mod) -> start_supervisor_child(Mod, []). + +-spec start_supervisor_child(atom(), [any()]) -> 'ok'. + +start_supervisor_child(Mod, Args) -> start_supervisor_child(Mod, Mod, Args). + +-spec start_supervisor_child(atom(), atom(), [any()]) -> 'ok'. + +start_supervisor_child(ChildId, Mod, Args) -> + child_reply(supervisor:start_child( + ?SERVER, + {ChildId, {Mod, start_link, Args}, + transient, infinity, supervisor, [Mod]})). + +-spec start_restartable_child(atom()) -> 'ok'. + +start_restartable_child(M) -> start_restartable_child(M, [], false). + +-spec start_restartable_child(atom(), [any()]) -> 'ok'. + +start_restartable_child(M, A) -> start_restartable_child(M, A, false). + +-spec start_delayed_restartable_child(atom()) -> 'ok'. + +start_delayed_restartable_child(M) -> start_restartable_child(M, [], true). + +-spec start_delayed_restartable_child(atom(), [any()]) -> 'ok'. + +start_delayed_restartable_child(M, A) -> start_restartable_child(M, A, true). + +start_restartable_child(Mod, Args, Delay) -> + Name = list_to_atom(atom_to_list(Mod) ++ "_sup"), + child_reply(supervisor:start_child( + ?SERVER, + {Name, {rabbit_restartable_sup, start_link, + [Name, {Mod, start_link, Args}, Delay]}, + transient, infinity, supervisor, [rabbit_restartable_sup]})). + +-spec stop_child(atom()) -> rabbit_types:ok_or_error(any()). + +stop_child(ChildId) -> + case supervisor:terminate_child(?SERVER, ChildId) of + ok -> supervisor:delete_child(?SERVER, ChildId); + E -> E + end. + +init([]) -> {ok, {{one_for_all, 0, 1}, []}}. + + +%%---------------------------------------------------------------------------- + +child_reply({ok, _}) -> ok; +child_reply(X) -> X. diff --git a/deps/rabbit/src/rabbit_sysmon_handler.erl b/deps/rabbit/src/rabbit_sysmon_handler.erl new file mode 100644 index 0000000000..8f7298ed6e --- /dev/null +++ b/deps/rabbit/src/rabbit_sysmon_handler.erl @@ -0,0 +1,235 @@ +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% https://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +%% @doc A custom event handler to the `sysmon_handler' application's +%% `system_monitor' event manager. +%% +%% This module attempts to discover more information about a process +%% that generates a system_monitor event. + +-module(rabbit_sysmon_handler). + +-behaviour(gen_event). + +%% API +-export([add_handler/0]). + +%% gen_event callbacks +-export([init/1, handle_event/2, handle_call/2, + handle_info/2, terminate/2, code_change/3]). + +-record(state, {timer_ref :: reference() | undefined}). + +-define(INACTIVITY_TIMEOUT, 5000). + +%%%=================================================================== +%%% gen_event callbacks +%%%=================================================================== + +add_handler() -> + %% Vulnerable to race conditions (installing handler multiple + %% times), but risk is zero in the common OTP app startup case. + case lists:member(?MODULE, gen_event:which_handlers(sysmon_handler)) of + true -> + ok; + false -> + sysmon_handler_filter:add_custom_handler(?MODULE, []) + end. + +%%%=================================================================== +%%% gen_event callbacks +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever a new event handler is added to an event manager, +%% this function is called to initialize the event handler. +%% +%% @spec init(Args) -> {ok, State} +%% @end +%%-------------------------------------------------------------------- +init([]) -> + {ok, #state{}, hibernate}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event manager receives an event sent using +%% gen_event:notify/2 or gen_event:sync_notify/2, this function is +%% called for each installed event handler to handle the event. +%% +%% @spec handle_event(Event, State) -> +%% {ok, State} | +%% {swap_handler, Args1, State1, Mod2, Args2} | +%% remove_handler +%% @end +%%-------------------------------------------------------------------- +handle_event({monitor, Pid, Type, _Info}, + State=#state{timer_ref=TimerRef}) when Pid == self() -> + %% Reset the inactivity timeout + NewTimerRef = reset_timer(TimerRef), + maybe_collect_garbage(Type), + {ok, State#state{timer_ref=NewTimerRef}}; +handle_event({monitor, PidOrPort, Type, Info}, State=#state{timer_ref=TimerRef}) -> + %% Reset the inactivity timeout + NewTimerRef = reset_timer(TimerRef), + {Fmt, Args} = format_pretty_proc_or_port_info(PidOrPort), + rabbit_log:warning("~p ~w ~w " ++ Fmt ++ " ~w", [?MODULE, Type, PidOrPort] ++ Args ++ [Info]), + {ok, State#state{timer_ref=NewTimerRef}}; +handle_event({suppressed, Type, Info}, State=#state{timer_ref=TimerRef}) -> + %% Reset the inactivity timeout + NewTimerRef = reset_timer(TimerRef), + rabbit_log:debug("~p encountered a suppressed event of type ~w: ~w", [?MODULE, Type, Info]), + {ok, State#state{timer_ref=NewTimerRef}}; +handle_event(Event, State=#state{timer_ref=TimerRef}) -> + NewTimerRef = reset_timer(TimerRef), + rabbit_log:warning("~p unhandled event: ~p", [?MODULE, Event]), + {ok, State#state{timer_ref=NewTimerRef}}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event manager receives a request sent using +%% gen_event:call/3,4, this function is called for the specified +%% event handler to handle the request. +%% +%% @spec handle_call(Request, State) -> +%% {ok, Reply, State} | +%% {swap_handler, Reply, Args1, State1, Mod2, Args2} | +%% {remove_handler, Reply} +%% @end +%%-------------------------------------------------------------------- +handle_call(_Call, State) -> + Reply = not_supported, + {ok, Reply, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% This function is called for each installed event handler when +%% an event manager receives any other message than an event or a +%% synchronous request (or a system message). +%% +%% @spec handle_info(Info, State) -> +%% {ok, State} | +%% {swap_handler, Args1, State1, Mod2, Args2} | +%% remove_handler +%% @end +%%-------------------------------------------------------------------- +handle_info(inactivity_timeout, State) -> + %% No events have arrived for the timeout period + %% so hibernate to free up resources. + {ok, State, hibernate}; +handle_info(Info, State) -> + rabbit_log:info("handle_info got ~p", [Info]), + {ok, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Whenever an event handler is deleted from an event manager, this +%% function is called. It should be the opposite of Module:init/1 and +%% do any necessary cleaning up. +%% +%% @spec terminate(Reason, State) -> void() +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Convert process state when code is changed +%% +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +format_pretty_proc_or_port_info(PidOrPort) -> + try + case get_pretty_proc_or_port_info(PidOrPort) of + undefined -> + {"", []}; + Res -> + Res + end + catch C:E:S -> + {"Pid ~w, ~W ~W at ~w\n", + [PidOrPort, C, 20, E, 20, S]} + end. + +get_pretty_proc_or_port_info(Pid) when is_pid(Pid) -> + Infos = [registered_name, initial_call, current_function, message_queue_len], + case process_info(Pid, Infos) of + undefined -> + undefined; + [] -> + undefined; + [{registered_name, RN0}, ICT1, {_, CF}, {_, MQL}] -> + ICT = case proc_lib:translate_initial_call(Pid) of + {proc_lib, init_p, 5} -> % not by proc_lib, see docs + ICT1; + ICT2 -> + {initial_call, ICT2} + end, + RNL = if RN0 == [] -> []; + true -> [{name, RN0}] + end, + {"~w", [RNL ++ [ICT, CF, {message_queue_len, MQL}]]} + end; +get_pretty_proc_or_port_info(Port) when is_port(Port) -> + PortInfo = erlang:port_info(Port), + {value, {name, Name}, PortInfo2} = lists:keytake(name, 1, PortInfo), + QueueSize = [erlang:port_info(Port, queue_size)], + Connected = case proplists:get_value(connected, PortInfo2) of + undefined -> + []; + ConnectedPid -> + case proc_lib:translate_initial_call(ConnectedPid) of + {proc_lib, init_p, 5} -> % not by proc_lib, see docs + []; + ICT -> + [{initial_call, ICT}] + end + end, + {"name ~s ~w", [Name, lists:append([PortInfo2, QueueSize, Connected])]}. + + +%% @doc If the message type is due to a large heap warning +%% and the source is ourself, go ahead and collect garbage +%% to avoid the death spiral. +-spec maybe_collect_garbage(atom()) -> ok. +maybe_collect_garbage(large_heap) -> + erlang:garbage_collect(), + ok; +maybe_collect_garbage(_) -> + ok. + +-spec reset_timer(undefined | reference()) -> reference(). +reset_timer(undefined) -> + erlang:send_after(?INACTIVITY_TIMEOUT, self(), inactivity_timeout); +reset_timer(TimerRef) -> + _ = erlang:cancel_timer(TimerRef), + reset_timer(undefined). diff --git a/deps/rabbit/src/rabbit_sysmon_minder.erl b/deps/rabbit/src/rabbit_sysmon_minder.erl new file mode 100644 index 0000000000..a0402e5ebe --- /dev/null +++ b/deps/rabbit/src/rabbit_sysmon_minder.erl @@ -0,0 +1,156 @@ +%% ------------------------------------------------------------------- +%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% https://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(rabbit_sysmon_minder). + +-behaviour(gen_server). + +%% API +-export([start_link/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @doc +%% Starts the server +%% +%% @spec start_link() -> {ok, Pid} | ignore | {error, Error} +%% @end +%%-------------------------------------------------------------------- +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Initializes the server +%% +%% @spec init(Args) -> {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%% @end +%%-------------------------------------------------------------------- +init([]) -> + %% Add our system_monitor event handler. We do that here because + %% we have a process at our disposal (i.e. ourself) to receive the + %% notification in the very unlikely event that the + %% sysmon_handler has crashed and been removed from the + %% sysmon_handler gen_event server. (If we had a supervisor + %% or app-starting process add the handler, then if the handler + %% crashes, nobody will act on the crash notification.) + rabbit_sysmon_handler:add_handler(), + {ok, #state{}}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Handling call messages +%% +%% @spec handle_call(Request, From, State) -> +%% {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | +%% {stop, Reason, State} +%% @end +%%-------------------------------------------------------------------- +handle_call(_Request, _From, State) -> + Reply = ok, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Handling cast messages +%% +%% @spec handle_cast(Msg, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @end +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Handling all non call/cast messages +%% +%% @spec handle_info(Info, State) -> {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} +%% @end +%%-------------------------------------------------------------------- +handle_info({gen_event_EXIT, rabbit_sysmon_handler, _}, State) -> + %% SASL will create an error message, no need for us to duplicate it. + %% + %% Our handler should never crash, but it did indeed crash. If + %% there's a pathological condition somewhere that's generating + %% lots of unforseen things that crash core's custom handler, we + %% could make things worse by jumping back into the exploding + %% volcano. Wait a little bit before jumping back. Besides, the + %% system_monitor data is nice but is not critical: there is no + %% need to make things worse if things are indeed bad, and if we + %% miss a few seconds of system_monitor events, the world will not + %% end. + timer:sleep(2*1000), + rabbit_sysmon_handler:add_handler(), + {noreply, State}; +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any +%% necessary cleaning up. When it returns, the gen_server terminates +%% with Reason. The return value is ignored. +%% +%% @spec terminate(Reason, State) -> void() +%% @end +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% @private +%% @doc +%% Convert process state when code is changed +%% +%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} +%% @end +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/deps/rabbit/src/rabbit_table.erl b/deps/rabbit/src/rabbit_table.erl new file mode 100644 index 0000000000..77534763d0 --- /dev/null +++ b/deps/rabbit/src/rabbit_table.erl @@ -0,0 +1,416 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_table). + +-export([ + create/0, create/2, ensure_local_copies/1, ensure_table_copy/2, + wait_for_replicated/1, wait/1, wait/2, + force_load/0, is_present/0, is_empty/0, needs_default_data/0, + check_schema_integrity/1, clear_ram_only_tables/0, retry_timeout/0, + wait_for_replicated/0, exists/1]). + +%% for testing purposes +-export([definitions/0]). + +-include_lib("rabbit_common/include/rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-type retry() :: boolean(). +-type mnesia_table() :: atom(). + +%%---------------------------------------------------------------------------- +%% Main interface +%%---------------------------------------------------------------------------- + +-spec create() -> 'ok'. + +create() -> + lists:foreach( + fun ({Table, Def}) -> create(Table, Def) end, + definitions()), + ensure_secondary_indexes(), + ok. + +-spec create(mnesia_table(), list()) -> rabbit_types:ok_or_error(any()). + +create(TableName, TableDefinition) -> + TableDefinition1 = proplists:delete(match, TableDefinition), + rabbit_log:debug("Will create a schema database table '~s'", [TableName]), + case mnesia:create_table(TableName, TableDefinition1) of + {atomic, ok} -> ok; + {aborted,{already_exists, TableName}} -> ok; + {aborted, {already_exists, TableName, _}} -> ok; + {aborted, Reason} -> + throw({error, {table_creation_failed, TableName, TableDefinition1, Reason}}) + end. + +-spec exists(mnesia_table()) -> boolean(). +exists(Table) -> + lists:member(Table, mnesia:system_info(tables)). + +%% Sets up secondary indexes in a blank node database. +ensure_secondary_indexes() -> + ensure_secondary_index(rabbit_queue, vhost), + ok. + +ensure_secondary_index(Table, Field) -> + case mnesia:add_table_index(Table, Field) of + {atomic, ok} -> ok; + {aborted, {already_exists, Table, _}} -> ok + end. + +-spec ensure_table_copy(mnesia_table(), node()) -> ok | {error, any()}. +ensure_table_copy(TableName, Node) -> + rabbit_log:debug("Will add a local schema database copy for table '~s'", [TableName]), + case mnesia:add_table_copy(TableName, Node, disc_copies) of + {atomic, ok} -> ok; + {aborted,{already_exists, TableName}} -> ok; + {aborted, {already_exists, TableName, _}} -> ok; + {aborted, Reason} -> {error, Reason} + end. + +%% This arity only exists for backwards compatibility with certain +%% plugins. See https://github.com/rabbitmq/rabbitmq-clusterer/issues/19. + +-spec wait_for_replicated() -> 'ok'. + +wait_for_replicated() -> + wait_for_replicated(false). + +-spec wait_for_replicated(retry()) -> 'ok'. + +wait_for_replicated(Retry) -> + wait([Tab || {Tab, TabDef} <- definitions(), + not lists:member({local_content, true}, TabDef)], Retry). + +-spec wait([atom()]) -> 'ok'. + +wait(TableNames) -> + wait(TableNames, _Retry = false). + +wait(TableNames, Retry) -> + {Timeout, Retries} = retry_timeout(Retry), + wait(TableNames, Timeout, Retries). + +wait(TableNames, Timeout, Retries) -> + %% We might be in ctl here for offline ops, in which case we can't + %% get_env() for the rabbit app. + rabbit_log:info("Waiting for Mnesia tables for ~p ms, ~p retries left~n", + [Timeout, Retries - 1]), + Result = case mnesia:wait_for_tables(TableNames, Timeout) of + ok -> + ok; + {timeout, BadTabs} -> + AllNodes = rabbit_mnesia:cluster_nodes(all), + {error, {timeout_waiting_for_tables, AllNodes, BadTabs}}; + {error, Reason} -> + AllNodes = rabbit_mnesia:cluster_nodes(all), + {error, {failed_waiting_for_tables, AllNodes, Reason}} + end, + case {Retries, Result} of + {_, ok} -> + rabbit_log:info("Successfully synced tables from a peer"), + ok; + {1, {error, _} = Error} -> + throw(Error); + {_, {error, Error}} -> + rabbit_log:warning("Error while waiting for Mnesia tables: ~p~n", [Error]), + wait(TableNames, Timeout, Retries - 1) + end. + +retry_timeout(_Retry = false) -> + {retry_timeout(), 1}; +retry_timeout(_Retry = true) -> + Retries = case application:get_env(rabbit, mnesia_table_loading_retry_limit) of + {ok, T} -> T; + undefined -> 10 + end, + {retry_timeout(), Retries}. + +-spec retry_timeout() -> non_neg_integer() | infinity. + +retry_timeout() -> + case application:get_env(rabbit, mnesia_table_loading_retry_timeout) of + {ok, T} -> T; + undefined -> 30000 + end. + +-spec force_load() -> 'ok'. + +force_load() -> [mnesia:force_load_table(T) || T <- names()], ok. + +-spec is_present() -> boolean(). + +is_present() -> names() -- mnesia:system_info(tables) =:= []. + +-spec is_empty() -> boolean(). + +is_empty() -> is_empty(names()). + +-spec needs_default_data() -> boolean(). + +needs_default_data() -> is_empty([rabbit_user, rabbit_user_permission, + rabbit_vhost]). + +is_empty(Names) -> + lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, + Names). + +-spec check_schema_integrity(retry()) -> rabbit_types:ok_or_error(any()). + +check_schema_integrity(Retry) -> + Tables = mnesia:system_info(tables), + case check(fun (Tab, TabDef) -> + case lists:member(Tab, Tables) of + false -> {error, {table_missing, Tab}}; + true -> check_attributes(Tab, TabDef) + end + end) of + ok -> wait(names(), Retry), + check(fun check_content/2); + Other -> Other + end. + +-spec clear_ram_only_tables() -> 'ok'. + +clear_ram_only_tables() -> + Node = node(), + lists:foreach( + fun (TabName) -> + case lists:member(Node, mnesia:table_info(TabName, ram_copies)) of + true -> {atomic, ok} = mnesia:clear_table(TabName); + false -> ok + end + end, names()), + ok. + +%% The sequence in which we delete the schema and then the other +%% tables is important: if we delete the schema first when moving to +%% RAM mnesia will loudly complain since it doesn't make much sense to +%% do that. But when moving to disc, we need to move the schema first. + +-spec ensure_local_copies('disc' | 'ram') -> 'ok'. + +ensure_local_copies(disc) -> + create_local_copy(schema, disc_copies), + create_local_copies(disc); +ensure_local_copies(ram) -> + create_local_copies(ram), + create_local_copy(schema, ram_copies). + +%%-------------------------------------------------------------------- +%% Internal helpers +%%-------------------------------------------------------------------- + +create_local_copies(Type) -> + lists:foreach( + fun ({Tab, TabDef}) -> + HasDiscCopies = has_copy_type(TabDef, disc_copies), + HasDiscOnlyCopies = has_copy_type(TabDef, disc_only_copies), + LocalTab = proplists:get_bool(local_content, TabDef), + StorageType = + if + Type =:= disc orelse LocalTab -> + if + HasDiscCopies -> disc_copies; + HasDiscOnlyCopies -> disc_only_copies; + true -> ram_copies + end; + Type =:= ram -> + ram_copies + end, + ok = create_local_copy(Tab, StorageType) + end, definitions(Type)), + ok. + +create_local_copy(Tab, Type) -> + StorageType = mnesia:table_info(Tab, storage_type), + {atomic, ok} = + if + StorageType == unknown -> + mnesia:add_table_copy(Tab, node(), Type); + StorageType /= Type -> + mnesia:change_table_copy_type(Tab, node(), Type); + true -> {atomic, ok} + end, + ok. + +has_copy_type(TabDef, DiscType) -> + lists:member(node(), proplists:get_value(DiscType, TabDef, [])). + +check_attributes(Tab, TabDef) -> + {_, ExpAttrs} = proplists:lookup(attributes, TabDef), + case mnesia:table_info(Tab, attributes) of + ExpAttrs -> ok; + Attrs -> {error, {table_attributes_mismatch, Tab, ExpAttrs, Attrs}} + end. + +check_content(Tab, TabDef) -> + {_, Match} = proplists:lookup(match, TabDef), + case mnesia:dirty_first(Tab) of + '$end_of_table' -> + ok; + Key -> + ObjList = mnesia:dirty_read(Tab, Key), + MatchComp = ets:match_spec_compile([{Match, [], ['$_']}]), + case ets:match_spec_run(ObjList, MatchComp) of + ObjList -> ok; + _ -> {error, {table_content_invalid, Tab, Match, ObjList}} + end + end. + +check(Fun) -> + case [Error || {Tab, TabDef} <- definitions(), + begin + {Ret, Error} = case Fun(Tab, TabDef) of + ok -> {false, none}; + {error, E} -> {true, E} + end, + Ret + end] of + [] -> ok; + Errors -> {error, Errors} + end. + +%%-------------------------------------------------------------------- +%% Table definitions +%%-------------------------------------------------------------------- + +names() -> [Tab || {Tab, _} <- definitions()]. + +%% The tables aren't supposed to be on disk on a ram node +definitions(disc) -> + definitions(); +definitions(ram) -> + [{Tab, [{disc_copies, []}, {ram_copies, [node()]} | + proplists:delete( + ram_copies, proplists:delete(disc_copies, TabDef))]} || + {Tab, TabDef} <- definitions()]. + +definitions() -> + [{rabbit_user, + [{record_name, internal_user}, + {attributes, internal_user:fields()}, + {disc_copies, [node()]}, + {match, internal_user:pattern_match_all()}]}, + {rabbit_user_permission, + [{record_name, user_permission}, + {attributes, record_info(fields, user_permission)}, + {disc_copies, [node()]}, + {match, #user_permission{user_vhost = #user_vhost{_='_'}, + permission = #permission{_='_'}, + _='_'}}]}, + {rabbit_topic_permission, + [{record_name, topic_permission}, + {attributes, record_info(fields, topic_permission)}, + {disc_copies, [node()]}, + {match, #topic_permission{topic_permission_key = #topic_permission_key{_='_'}, + permission = #permission{_='_'}, + _='_'}}]}, + {rabbit_vhost, + [ + {record_name, vhost}, + {attributes, vhost:fields()}, + {disc_copies, [node()]}, + {match, vhost:pattern_match_all()}]}, + {rabbit_listener, + [{record_name, listener}, + {attributes, record_info(fields, listener)}, + {type, bag}, + {match, #listener{_='_'}}]}, + {rabbit_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {disc_copies, [node()]}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_semi_durable_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_route, + [{record_name, route}, + {attributes, record_info(fields, route)}, + {type, ordered_set}, + {match, #route{binding = binding_match(), _='_'}}]}, + {rabbit_reverse_route, + [{record_name, reverse_route}, + {attributes, record_info(fields, reverse_route)}, + {type, ordered_set}, + {match, #reverse_route{reverse_binding = reverse_binding_match(), + _='_'}}]}, + {rabbit_topic_trie_node, + [{record_name, topic_trie_node}, + {attributes, record_info(fields, topic_trie_node)}, + {type, ordered_set}, + {match, #topic_trie_node{trie_node = trie_node_match(), _='_'}}]}, + {rabbit_topic_trie_edge, + [{record_name, topic_trie_edge}, + {attributes, record_info(fields, topic_trie_edge)}, + {type, ordered_set}, + {match, #topic_trie_edge{trie_edge = trie_edge_match(), _='_'}}]}, + {rabbit_topic_trie_binding, + [{record_name, topic_trie_binding}, + {attributes, record_info(fields, topic_trie_binding)}, + {type, ordered_set}, + {match, #topic_trie_binding{trie_binding = trie_binding_match(), + _='_'}}]}, + {rabbit_durable_exchange, + [{record_name, exchange}, + {attributes, record_info(fields, exchange)}, + {disc_copies, [node()]}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange, + [{record_name, exchange}, + {attributes, record_info(fields, exchange)}, + {match, #exchange{name = exchange_name_match(), _='_'}}]}, + {rabbit_exchange_serial, + [{record_name, exchange_serial}, + {attributes, record_info(fields, exchange_serial)}, + {match, #exchange_serial{name = exchange_name_match(), _='_'}}]}, + {rabbit_runtime_parameters, + [{record_name, runtime_parameters}, + {attributes, record_info(fields, runtime_parameters)}, + {disc_copies, [node()]}, + {match, #runtime_parameters{_='_'}}]}, + {rabbit_durable_queue, + [{record_name, amqqueue}, + {attributes, amqqueue:fields()}, + {disc_copies, [node()]}, + {match, amqqueue:pattern_match_on_name(queue_name_match())}]}, + {rabbit_queue, + [{record_name, amqqueue}, + {attributes, amqqueue:fields()}, + {match, amqqueue:pattern_match_on_name(queue_name_match())}]} + ] + ++ gm:table_definitions() + ++ mirrored_supervisor:table_definitions(). + +binding_match() -> + #binding{source = exchange_name_match(), + destination = binding_destination_match(), + _='_'}. +reverse_binding_match() -> + #reverse_binding{destination = binding_destination_match(), + source = exchange_name_match(), + _='_'}. +binding_destination_match() -> + resource_match('_'). +trie_node_match() -> + #trie_node{exchange_name = exchange_name_match(), _='_'}. +trie_edge_match() -> + #trie_edge{exchange_name = exchange_name_match(), _='_'}. +trie_binding_match() -> + #trie_binding{exchange_name = exchange_name_match(), _='_'}. +exchange_name_match() -> + resource_match(exchange). +queue_name_match() -> + resource_match(queue). +resource_match(Kind) -> + #resource{kind = Kind, _='_'}. diff --git a/deps/rabbit/src/rabbit_trace.erl b/deps/rabbit/src/rabbit_trace.erl new file mode 100644 index 0000000000..74b892330e --- /dev/null +++ b/deps/rabbit/src/rabbit_trace.erl @@ -0,0 +1,128 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_trace). + +-export([init/1, enabled/1, tap_in/6, tap_out/5, start/1, stop/1]). + +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). + +-define(TRACE_VHOSTS, trace_vhosts). +-define(XNAME, <<"amq.rabbitmq.trace">>). + +%%---------------------------------------------------------------------------- + +-type state() :: rabbit_types:exchange() | 'none'. + +%%---------------------------------------------------------------------------- + +-spec init(rabbit_types:vhost()) -> state(). + +init(VHost) -> + case enabled(VHost) of + false -> none; + true -> {ok, X} = rabbit_exchange:lookup( + rabbit_misc:r(VHost, exchange, ?XNAME)), + X + end. + +-spec enabled(rabbit_types:vhost()) -> boolean(). + +enabled(VHost) -> + {ok, VHosts} = application:get_env(rabbit, ?TRACE_VHOSTS), + lists:member(VHost, VHosts). + +-spec tap_in(rabbit_types:basic_message(), [rabbit_amqqueue:name()], + binary(), rabbit_channel:channel_number(), + rabbit_types:username(), state()) -> 'ok'. + +tap_in(_Msg, _QNames, _ConnName, _ChannelNum, _Username, none) -> ok; +tap_in(Msg = #basic_message{exchange_name = #resource{name = XName, + virtual_host = VHost}}, + QNames, ConnName, ChannelNum, Username, TraceX) -> + trace(TraceX, Msg, <<"publish">>, XName, + [{<<"vhost">>, longstr, VHost}, + {<<"connection">>, longstr, ConnName}, + {<<"channel">>, signedint, ChannelNum}, + {<<"user">>, longstr, Username}, + {<<"routed_queues">>, array, + [{longstr, QName#resource.name} || QName <- QNames]}]). + +-spec tap_out(rabbit_amqqueue:qmsg(), binary(), + rabbit_channel:channel_number(), + rabbit_types:username(), state()) -> 'ok'. + +tap_out(_Msg, _ConnName, _ChannelNum, _Username, none) -> ok; +tap_out({#resource{name = QName, virtual_host = VHost}, + _QPid, _QMsgId, Redelivered, Msg}, + ConnName, ChannelNum, Username, TraceX) -> + RedeliveredNum = case Redelivered of true -> 1; false -> 0 end, + trace(TraceX, Msg, <<"deliver">>, QName, + [{<<"redelivered">>, signedint, RedeliveredNum}, + {<<"vhost">>, longstr, VHost}, + {<<"connection">>, longstr, ConnName}, + {<<"channel">>, signedint, ChannelNum}, + {<<"user">>, longstr, Username}]). + +%%---------------------------------------------------------------------------- + +-spec start(rabbit_types:vhost()) -> 'ok'. + +start(VHost) -> + rabbit_log:info("Enabling tracing for vhost '~s'~n", [VHost]), + update_config(fun (VHosts) -> [VHost | VHosts -- [VHost]] end). + +-spec stop(rabbit_types:vhost()) -> 'ok'. + +stop(VHost) -> + rabbit_log:info("Disabling tracing for vhost '~s'~n", [VHost]), + update_config(fun (VHosts) -> VHosts -- [VHost] end). + +update_config(Fun) -> + {ok, VHosts0} = application:get_env(rabbit, ?TRACE_VHOSTS), + VHosts = Fun(VHosts0), + application:set_env(rabbit, ?TRACE_VHOSTS, VHosts), + rabbit_channel:refresh_config_local(), + ok. + +%%---------------------------------------------------------------------------- + +trace(#exchange{name = Name}, #basic_message{exchange_name = Name}, + _RKPrefix, _RKSuffix, _Extra) -> + ok; +trace(X, Msg = #basic_message{content = #content{payload_fragments_rev = PFR}}, + RKPrefix, RKSuffix, Extra) -> + ok = rabbit_basic:publish( + X, <<RKPrefix/binary, ".", RKSuffix/binary>>, + #'P_basic'{headers = msg_to_table(Msg) ++ Extra}, PFR), + ok. + +msg_to_table(#basic_message{exchange_name = #resource{name = XName}, + routing_keys = RoutingKeys, + content = Content}) -> + #content{properties = Props} = + rabbit_binary_parser:ensure_content_decoded(Content), + {PropsTable, _Ix} = + lists:foldl(fun (K, {L, Ix}) -> + V = element(Ix, Props), + NewL = case V of + undefined -> L; + _ -> [{a2b(K), type(V), V} | L] + end, + {NewL, Ix + 1} + end, {[], 2}, record_info(fields, 'P_basic')), + [{<<"exchange_name">>, longstr, XName}, + {<<"routing_keys">>, array, [{longstr, K} || K <- RoutingKeys]}, + {<<"properties">>, table, PropsTable}, + {<<"node">>, longstr, a2b(node())}]. + +a2b(A) -> list_to_binary(atom_to_list(A)). + +type(V) when is_list(V) -> table; +type(V) when is_integer(V) -> signedint; +type(_V) -> longstr. diff --git a/deps/rabbit/src/rabbit_tracking.erl b/deps/rabbit/src/rabbit_tracking.erl new file mode 100644 index 0000000000..a124d20226 --- /dev/null +++ b/deps/rabbit/src/rabbit_tracking.erl @@ -0,0 +1,103 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_tracking). + +%% Common behaviour and processing functions for tracking components +%% +%% See in use: +%% * rabbit_connection_tracking +%% * rabbit_channel_tracking + +-callback boot() -> ok. +-callback update_tracked(term()) -> ok. +-callback handle_cast(term()) -> ok. +-callback register_tracked( + rabbit_types:tracked_connection() | + rabbit_types:tracked_channel()) -> 'ok'. +-callback unregister_tracked( + rabbit_types:tracked_connection_id() | + rabbit_types:tracked_channel_id()) -> 'ok'. +-callback count_tracked_items_in(term()) -> non_neg_integer(). +-callback clear_tracking_tables() -> 'ok'. +-callback shutdown_tracked_items(list(), term()) -> ok. + +-export([id/2, count_tracked_items/4, match_tracked_items/2, + clear_tracking_table/1, delete_tracking_table/3, + delete_tracked_entry/3]). + +%%---------------------------------------------------------------------------- + +-spec id(atom(), term()) -> + rabbit_types:tracked_connection_id() | rabbit_types:tracked_channel_id(). + +id(Node, Name) -> {Node, Name}. + +-spec count_tracked_items(function(), integer(), term(), string()) -> + non_neg_integer(). + +count_tracked_items(TableNameFun, CountRecPosition, Key, ContextMsg) -> + lists:foldl(fun (Node, Acc) -> + Tab = TableNameFun(Node), + try + N = case mnesia:dirty_read(Tab, Key) of + [] -> 0; + [Val] -> + element(CountRecPosition, Val) + end, + Acc + N + catch _:Err -> + rabbit_log:error( + "Failed to fetch number of ~p ~p on node ~p:~n~p~n", + [ContextMsg, Key, Node, Err]), + Acc + end + end, 0, rabbit_nodes:all_running()). + +-spec match_tracked_items(function(), tuple()) -> term(). + +match_tracked_items(TableNameFun, MatchSpec) -> + lists:foldl( + fun (Node, Acc) -> + Tab = TableNameFun(Node), + Acc ++ mnesia:dirty_match_object( + Tab, + MatchSpec) + end, [], rabbit_nodes:all_running()). + +-spec clear_tracking_table(atom()) -> ok. + +clear_tracking_table(TableName) -> + case mnesia:clear_table(TableName) of + {atomic, ok} -> ok; + {aborted, _} -> ok + end. + +-spec delete_tracking_table(atom(), node(), string()) -> ok. + +delete_tracking_table(TableName, Node, ContextMsg) -> + case mnesia:delete_table(TableName) of + {atomic, ok} -> ok; + {aborted, {no_exists, _}} -> ok; + {aborted, Error} -> + rabbit_log:error("Failed to delete a ~p table for node ~p: ~p", + [ContextMsg, Node, Error]), + ok + end. + +-spec delete_tracked_entry({atom(), atom(), list()}, function(), term()) -> ok. + +delete_tracked_entry(_ExistsCheckSpec = {M, F, A}, TableNameFun, Key) -> + ClusterNodes = rabbit_nodes:all_running(), + ExistsInCluster = + lists:any(fun(Node) -> rpc:call(Node, M, F, A) end, ClusterNodes), + case ExistsInCluster of + false -> + [mnesia:dirty_delete(TableNameFun(Node), Key) || Node <- ClusterNodes]; + true -> + ok + end. diff --git a/deps/rabbit/src/rabbit_upgrade.erl b/deps/rabbit/src/rabbit_upgrade.erl new file mode 100644 index 0000000000..b1b128fecc --- /dev/null +++ b/deps/rabbit/src/rabbit_upgrade.erl @@ -0,0 +1,314 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_upgrade). + +-export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0, + maybe_migrate_queues_to_per_vhost_storage/0, + nodes_running/1, secondary_upgrade/1]). + +-include("rabbit.hrl"). + +-define(VERSION_FILENAME, "schema_version"). +-define(LOCK_FILENAME, "schema_upgrade_lock"). + +%% ------------------------------------------------------------------- + +%% The upgrade logic is quite involved, due to the existence of +%% clusters. +%% +%% Firstly, we have two different types of upgrades to do: Mnesia and +%% everything else. Mnesia upgrades must only be done by one node in +%% the cluster (we treat a non-clustered node as a single-node +%% cluster). This is the primary upgrader. The other upgrades need to +%% be done by all nodes. +%% +%% The primary upgrader has to start first (and do its Mnesia +%% upgrades). Secondary upgraders need to reset their Mnesia database +%% and then rejoin the cluster. They can't do the Mnesia upgrades as +%% well and then merge databases since the cookie for each table will +%% end up different and the merge will fail. +%% +%% This in turn means that we need to determine whether we are the +%% primary or secondary upgrader *before* Mnesia comes up. If we +%% didn't then the secondary upgrader would try to start Mnesia, and +%% either hang waiting for a node which is not yet up, or fail since +%% its schema differs from the other nodes in the cluster. +%% +%% Also, the primary upgrader needs to start Mnesia to do its +%% upgrades, but needs to forcibly load tables rather than wait for +%% them (in case it was not the last node to shut down, in which case +%% it would wait forever). +%% +%% This in turn means that maybe_upgrade_mnesia/0 has to be patched +%% into the boot process by prelaunch before the mnesia application is +%% started. By the time Mnesia is started the upgrades have happened +%% (on the primary), or Mnesia has been reset (on the secondary) and +%% rabbit_mnesia:init_db_unchecked/2 can then make the node rejoin the cluster +%% in the normal way. +%% +%% The non-mnesia upgrades are then triggered by +%% rabbit_mnesia:init_db_unchecked/2. Of course, it's possible for a given +%% upgrade process to only require Mnesia upgrades, or only require +%% non-Mnesia upgrades. In the latter case no Mnesia resets and +%% reclusterings occur. +%% +%% The primary upgrader needs to be a disc node. Ideally we would like +%% it to be the last disc node to shut down (since otherwise there's a +%% risk of data loss). On each node we therefore record the disc nodes +%% that were still running when we shut down. A disc node that knows +%% other nodes were up when it shut down, or a ram node, will refuse +%% to be the primary upgrader, and will thus not start when upgrades +%% are needed. +%% +%% However, this is racy if several nodes are shut down at once. Since +%% rabbit records the running nodes, and shuts down before mnesia, the +%% race manifests as all disc nodes thinking they are not the primary +%% upgrader. Therefore the user can remove the record of the last disc +%% node to shut down to get things going again. This may lose any +%% mnesia changes that happened after the node chosen as the primary +%% upgrader was shut down. + +%% ------------------------------------------------------------------- + +ensure_backup_taken() -> + case filelib:is_file(lock_filename()) of + false -> case filelib:is_dir(backup_dir()) of + false -> ok = take_backup(); + _ -> ok + end; + true -> + rabbit_log:error("Found lock file at ~s. + Either previous upgrade is in progress or has failed. + Database backup path: ~s", + [lock_filename(), backup_dir()]), + throw({error, previous_upgrade_failed}) + end. + +take_backup() -> + BackupDir = backup_dir(), + info("upgrades: Backing up mnesia dir to ~p~n", [BackupDir]), + case rabbit_mnesia:copy_db(BackupDir) of + ok -> info("upgrades: Mnesia dir backed up to ~p~n", + [BackupDir]); + {error, E} -> throw({could_not_back_up_mnesia_dir, E, BackupDir}) + end. + +ensure_backup_removed() -> + case filelib:is_dir(backup_dir()) of + true -> ok = remove_backup(); + _ -> ok + end. + +remove_backup() -> + ok = rabbit_file:recursive_delete([backup_dir()]), + info("upgrades: Mnesia backup removed~n", []). + +-spec maybe_upgrade_mnesia() -> 'ok'. + +maybe_upgrade_mnesia() -> + AllNodes = rabbit_mnesia:cluster_nodes(all), + ok = rabbit_mnesia_rename:maybe_finish(AllNodes), + %% Mnesia upgrade is the first upgrade scope, + %% so we should create a backup here if there are any upgrades + case rabbit_version:all_upgrades_required([mnesia, local, message_store]) of + {error, starting_from_scratch} -> + ok; + {error, version_not_available} -> + case AllNodes of + [] -> die("Cluster upgrade needed but upgrading from " + "< 2.1.1.~nUnfortunately you will need to " + "rebuild the cluster.", []); + _ -> ok + end; + {error, _} = Err -> + throw(Err); + {ok, []} -> + ok; + {ok, Upgrades} -> + ensure_backup_taken(), + run_mnesia_upgrades(proplists:get_value(mnesia, Upgrades, []), + AllNodes) + end. + +run_mnesia_upgrades([], _) -> ok; +run_mnesia_upgrades(Upgrades, AllNodes) -> + case upgrade_mode(AllNodes) of + primary -> primary_upgrade(Upgrades, AllNodes); + secondary -> secondary_upgrade(AllNodes) + end. + +upgrade_mode(AllNodes) -> + case nodes_running(AllNodes) of + [] -> + AfterUs = rabbit_nodes:all_running() -- [node()], + case {node_type_legacy(), AfterUs} of + {disc, []} -> + primary; + {disc, _} -> + Filename = rabbit_node_monitor:running_nodes_filename(), + die("Cluster upgrade needed but other disc nodes shut " + "down after this one.~nPlease first start the last " + "disc node to shut down.~n~nNote: if several disc " + "nodes were shut down simultaneously they may " + "all~nshow this message. In which case, remove " + "the lock file on one of them and~nstart that node. " + "The lock file on this node is:~n~n ~s ", [Filename]); + {ram, _} -> + die("Cluster upgrade needed but this is a ram node.~n" + "Please first start the last disc node to shut down.", + []) + end; + [Another|_] -> + MyVersion = rabbit_version:desired_for_scope(mnesia), + case rpc:call(Another, rabbit_version, desired_for_scope, + [mnesia]) of + {badrpc, {'EXIT', {undef, _}}} -> + die_because_cluster_upgrade_needed(unknown_old_version, + MyVersion); + {badrpc, Reason} -> + die_because_cluster_upgrade_needed({unknown, Reason}, + MyVersion); + CV -> case rabbit_version:matches( + MyVersion, CV) of + true -> secondary; + false -> die_because_cluster_upgrade_needed( + CV, MyVersion) + end + end + end. + +-spec die_because_cluster_upgrade_needed(any(), any()) -> no_return(). + +die_because_cluster_upgrade_needed(ClusterVersion, MyVersion) -> + %% The other node(s) are running an + %% unexpected version. + die("Cluster upgrade needed but other nodes are " + "running ~p~nand I want ~p", + [ClusterVersion, MyVersion]). + +-spec die(string(), list()) -> no_return(). + +die(Msg, Args) -> + %% We don't throw or exit here since that gets thrown + %% straight out into do_boot, generating an erl_crash.dump + %% and displaying any error message in a confusing way. + rabbit_log:error(Msg, Args), + Str = rabbit_misc:format( + "~n~n****~n~n" ++ Msg ++ "~n~n****~n~n~n", Args), + io:format(Str), + error_logger:logfile(close), + case application:get_env(rabbit, halt_on_upgrade_failure) of + {ok, false} -> throw({upgrade_error, Str}); + _ -> halt(1) %% i.e. true or undefined + end. + +primary_upgrade(Upgrades, Nodes) -> + Others = Nodes -- [node()], + ok = apply_upgrades( + mnesia, + Upgrades, + fun () -> + rabbit_table:force_load(), + case Others of + [] -> ok; + _ -> info("mnesia upgrades: Breaking cluster~n", []), + [{atomic, ok} = mnesia:del_table_copy(schema, Node) + || Node <- Others] + end + end), + ok. + +secondary_upgrade(AllNodes) -> + %% must do this before we wipe out schema + NodeType = node_type_legacy(), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema), + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ok = rabbit_mnesia:init_db_unchecked(AllNodes, NodeType), + ok = rabbit_version:record_desired_for_scope(mnesia), + ok. + +nodes_running(Nodes) -> + [N || N <- Nodes, rabbit:is_running(N)]. + +%% ------------------------------------------------------------------- + +-spec maybe_upgrade_local() -> + 'ok' | + 'version_not_available' | + 'starting_from_scratch'. + +maybe_upgrade_local() -> + case rabbit_version:upgrades_required(local) of + {error, version_not_available} -> version_not_available; + {error, starting_from_scratch} -> starting_from_scratch; + {error, _} = Err -> throw(Err); + {ok, []} -> ensure_backup_removed(), + ok; + {ok, Upgrades} -> mnesia:stop(), + ok = apply_upgrades(local, Upgrades, + fun () -> ok end), + ok + end. + +%% ------------------------------------------------------------------- + +maybe_migrate_queues_to_per_vhost_storage() -> + Result = case rabbit_version:upgrades_required(message_store) of + {error, version_not_available} -> version_not_available; + {error, starting_from_scratch} -> + starting_from_scratch; + {error, _} = Err -> throw(Err); + {ok, []} -> ok; + {ok, Upgrades} -> apply_upgrades(message_store, + Upgrades, + fun() -> ok end), + ok + end, + %% Message store upgrades should be + %% the last group. + %% Backup can be deleted here. + ensure_backup_removed(), + Result. + +%% ------------------------------------------------------------------- + +apply_upgrades(Scope, Upgrades, Fun) -> + ok = rabbit_file:lock_file(lock_filename()), + info("~s upgrades: ~w to apply~n", [Scope, length(Upgrades)]), + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + Fun(), + [apply_upgrade(Scope, Upgrade) || Upgrade <- Upgrades], + info("~s upgrades: All upgrades applied successfully~n", [Scope]), + ok = rabbit_version:record_desired_for_scope(Scope), + ok = file:delete(lock_filename()). + +apply_upgrade(Scope, {M, F}) -> + info("~s upgrades: Applying ~w:~w~n", [Scope, M, F]), + ok = apply(M, F, []). + +%% ------------------------------------------------------------------- + +dir() -> rabbit_mnesia:dir(). + +lock_filename() -> lock_filename(dir()). +lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME). +backup_dir() -> dir() ++ "-upgrade-backup". + +node_type_legacy() -> + %% This is pretty ugly but we can't start Mnesia and ask it (will + %% hang), we can't look at the config file (may not include us + %% even if we're a disc node). We also can't use + %% rabbit_mnesia:node_type/0 because that will give false + %% positives on Rabbit up to 2.5.1. + case filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")) of + true -> disc; + false -> ram + end. + +info(Msg, Args) -> rabbit_log:info(Msg, Args). diff --git a/deps/rabbit/src/rabbit_upgrade_functions.erl b/deps/rabbit/src/rabbit_upgrade_functions.erl new file mode 100644 index 0000000000..59417c72bb --- /dev/null +++ b/deps/rabbit/src/rabbit_upgrade_functions.erl @@ -0,0 +1,662 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_upgrade_functions). + +%% If you are tempted to add include("rabbit.hrl"). here, don't. Using record +%% defs here leads to pain later. + +-compile([nowarn_export_all, export_all]). + +-rabbit_upgrade({remove_user_scope, mnesia, []}). +-rabbit_upgrade({hash_passwords, mnesia, []}). +-rabbit_upgrade({add_ip_to_listener, mnesia, []}). +-rabbit_upgrade({add_opts_to_listener, mnesia, [add_ip_to_listener]}). +-rabbit_upgrade({internal_exchanges, mnesia, []}). +-rabbit_upgrade({user_to_internal_user, mnesia, [hash_passwords]}). +-rabbit_upgrade({topic_trie, mnesia, []}). +-rabbit_upgrade({semi_durable_route, mnesia, []}). +-rabbit_upgrade({exchange_event_serial, mnesia, []}). +-rabbit_upgrade({trace_exchanges, mnesia, [internal_exchanges]}). +-rabbit_upgrade({user_admin_to_tags, mnesia, [user_to_internal_user]}). +-rabbit_upgrade({ha_mirrors, mnesia, []}). +-rabbit_upgrade({gm, mnesia, []}). +-rabbit_upgrade({exchange_scratch, mnesia, [trace_exchanges]}). +-rabbit_upgrade({mirrored_supervisor, mnesia, []}). +-rabbit_upgrade({topic_trie_node, mnesia, []}). +-rabbit_upgrade({runtime_parameters, mnesia, []}). +-rabbit_upgrade({exchange_scratches, mnesia, [exchange_scratch]}). +-rabbit_upgrade({policy, mnesia, + [exchange_scratches, ha_mirrors]}). +-rabbit_upgrade({sync_slave_pids, mnesia, [policy]}). +-rabbit_upgrade({no_mirror_nodes, mnesia, [sync_slave_pids]}). +-rabbit_upgrade({gm_pids, mnesia, [no_mirror_nodes]}). +-rabbit_upgrade({exchange_decorators, mnesia, [policy]}). +-rabbit_upgrade({policy_apply_to, mnesia, [runtime_parameters]}). +-rabbit_upgrade({queue_decorators, mnesia, [gm_pids]}). +-rabbit_upgrade({internal_system_x, mnesia, [exchange_decorators]}). +-rabbit_upgrade({cluster_name, mnesia, [runtime_parameters]}). +-rabbit_upgrade({down_slave_nodes, mnesia, [queue_decorators]}). +-rabbit_upgrade({queue_state, mnesia, [down_slave_nodes]}). +-rabbit_upgrade({recoverable_slaves, mnesia, [queue_state]}). +-rabbit_upgrade({policy_version, mnesia, [recoverable_slaves]}). +-rabbit_upgrade({slave_pids_pending_shutdown, mnesia, [policy_version]}). +-rabbit_upgrade({user_password_hashing, mnesia, [hash_passwords]}). +-rabbit_upgrade({operator_policies, mnesia, [slave_pids_pending_shutdown, internal_system_x]}). +-rabbit_upgrade({vhost_limits, mnesia, []}). +-rabbit_upgrade({queue_vhost_field, mnesia, [operator_policies]}). +-rabbit_upgrade({topic_permission, mnesia, []}). +-rabbit_upgrade({queue_options, mnesia, [queue_vhost_field]}). +-rabbit_upgrade({exchange_options, mnesia, [operator_policies]}). + +%% ------------------------------------------------------------------- + +%% replaces vhost.dummy (used to avoid having a single-field record +%% which Mnesia doesn't like) with vhost.limits (which is actually +%% used) + +-spec vhost_limits() -> 'ok'. + +vhost_limits() -> + transform( + rabbit_vhost, + fun ({vhost, VHost, _Dummy}) -> + {vhost, VHost, undefined} + end, + [virtual_host, limits]). + +%% It's a bad idea to use records or record_info here, even for the +%% destination form. Because in the future, the destination form of +%% your current transform may not match the record any more, and it +%% would be messy to have to go back and fix old transforms at that +%% point. + +-spec remove_user_scope() -> 'ok'. + +remove_user_scope() -> + transform( + rabbit_user_permission, + fun ({user_permission, UV, {permission, _Scope, Conf, Write, Read}}) -> + {user_permission, UV, {permission, Conf, Write, Read}} + end, + [user_vhost, permission]). + +%% this is an early migration that hashes passwords using MD5, +%% only relevant to those migrating from 2.1.1. +%% all users created after in 3.6.0 or later will use SHA-256 (unless configured +%% otherwise) + +-spec hash_passwords() -> 'ok'. + +hash_passwords() -> + transform( + rabbit_user, + fun ({user, Username, Password, IsAdmin}) -> + Hash = rabbit_auth_backend_internal:hash_password(rabbit_password_hashing_md5, Password), + {user, Username, Hash, IsAdmin} + end, + [username, password_hash, is_admin]). + +-spec add_ip_to_listener() -> 'ok'. + +add_ip_to_listener() -> + transform( + rabbit_listener, + fun ({listener, Node, Protocol, Host, Port}) -> + {listener, Node, Protocol, Host, {0,0,0,0}, Port} + end, + [node, protocol, host, ip_address, port]). + +-spec add_opts_to_listener() -> 'ok'. + +add_opts_to_listener() -> + transform( + rabbit_listener, + fun ({listener, Node, Protocol, Host, IP, Port}) -> + {listener, Node, Protocol, Host, IP, Port, []} + end, + [node, protocol, host, ip_address, port, opts]). + +-spec internal_exchanges() -> 'ok'. + +internal_exchanges() -> + Tables = [rabbit_exchange, rabbit_durable_exchange], + AddInternalFun = + fun ({exchange, Name, Type, Durable, AutoDelete, Args}) -> + {exchange, Name, Type, Durable, AutoDelete, false, Args} + end, + [ ok = transform(T, + AddInternalFun, + [name, type, durable, auto_delete, internal, arguments]) + || T <- Tables ], + ok. + +-spec user_to_internal_user() -> 'ok'. + +user_to_internal_user() -> + transform( + rabbit_user, + fun({user, Username, PasswordHash, IsAdmin}) -> + {internal_user, Username, PasswordHash, IsAdmin} + end, + [username, password_hash, is_admin], internal_user). + +-spec topic_trie() -> 'ok'. + +topic_trie() -> + create(rabbit_topic_trie_edge, [{record_name, topic_trie_edge}, + {attributes, [trie_edge, node_id]}, + {type, ordered_set}]), + create(rabbit_topic_trie_binding, [{record_name, topic_trie_binding}, + {attributes, [trie_binding, value]}, + {type, ordered_set}]). + +-spec semi_durable_route() -> 'ok'. + +semi_durable_route() -> + create(rabbit_semi_durable_route, [{record_name, route}, + {attributes, [binding, value]}]). + +-spec exchange_event_serial() -> 'ok'. + +exchange_event_serial() -> + create(rabbit_exchange_serial, [{record_name, exchange_serial}, + {attributes, [name, next]}]). + +-spec trace_exchanges() -> 'ok'. + +trace_exchanges() -> + [declare_exchange( + rabbit_misc:r(VHost, exchange, <<"amq.rabbitmq.trace">>), topic) || + VHost <- rabbit_vhost:list_names()], + ok. + +-spec user_admin_to_tags() -> 'ok'. + +user_admin_to_tags() -> + transform( + rabbit_user, + fun({internal_user, Username, PasswordHash, true}) -> + {internal_user, Username, PasswordHash, [administrator]}; + ({internal_user, Username, PasswordHash, false}) -> + {internal_user, Username, PasswordHash, [management]} + end, + [username, password_hash, tags], internal_user). + +-spec ha_mirrors() -> 'ok'. + +ha_mirrors() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddMirrorPidsFun = + fun ({amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid}) -> + {amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid, + [], undefined} + end, + [ ok = transform(T, + AddMirrorPidsFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, mirror_nodes]) + || T <- Tables ], + ok. + +-spec gm() -> 'ok'. + +gm() -> + create(gm_group, [{record_name, gm_group}, + {attributes, [name, version, members]}]). + +-spec exchange_scratch() -> 'ok'. + +exchange_scratch() -> + ok = exchange_scratch(rabbit_exchange), + ok = exchange_scratch(rabbit_durable_exchange). + +exchange_scratch(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Int, Args}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined} + end, + [name, type, durable, auto_delete, internal, arguments, scratch]). + +-spec mirrored_supervisor() -> 'ok'. + +mirrored_supervisor() -> + create(mirrored_sup_childspec, + [{record_name, mirrored_sup_childspec}, + {attributes, [key, mirroring_pid, childspec]}]). + +-spec topic_trie_node() -> 'ok'. + +topic_trie_node() -> + create(rabbit_topic_trie_node, + [{record_name, topic_trie_node}, + {attributes, [trie_node, edge_count, binding_count]}, + {type, ordered_set}]). + +-spec runtime_parameters() -> 'ok'. + +runtime_parameters() -> + create(rabbit_runtime_parameters, + [{record_name, runtime_parameters}, + {attributes, [key, value]}, + {disc_copies, [node()]}]). + +exchange_scratches() -> + ok = exchange_scratches(rabbit_exchange), + ok = exchange_scratches(rabbit_durable_exchange). + +exchange_scratches(Table) -> + transform( + Table, + fun ({exchange, Name, Type = <<"x-federation">>, Dur, AutoDel, Int, Args, + Scratch}) -> + Scratches = orddict:store(federation, Scratch, orddict:new()), + {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches}; + %% We assert here that nothing else uses the scratch mechanism ATM + ({exchange, Name, Type, Dur, AutoDel, Int, Args, undefined}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, undefined} + end, + [name, type, durable, auto_delete, internal, arguments, scratches]). + +-spec policy() -> 'ok'. + +policy() -> + ok = exchange_policy(rabbit_exchange), + ok = exchange_policy(rabbit_durable_exchange), + ok = queue_policy(rabbit_queue), + ok = queue_policy(rabbit_durable_queue). + +exchange_policy(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, + undefined} + end, + [name, type, durable, auto_delete, internal, arguments, scratches, + policy]). + +queue_policy(Table) -> + transform( + Table, + fun ({amqqueue, Name, Dur, AutoDel, Excl, Args, Pid, SPids, MNodes}) -> + {amqqueue, Name, Dur, AutoDel, Excl, Args, Pid, SPids, MNodes, + undefined} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, + slave_pids, mirror_nodes, policy]). + +-spec sync_slave_pids() -> 'ok'. + +sync_slave_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddSyncSlavesFun = + fun ({amqqueue, N, D, AD, Excl, Args, Pid, SPids, MNodes, Pol}) -> + {amqqueue, N, D, AD, Excl, Args, Pid, SPids, [], MNodes, Pol} + end, + [ok = transform(T, AddSyncSlavesFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, mirror_nodes, policy]) + || T <- Tables], + ok. + +-spec no_mirror_nodes() -> 'ok'. + +no_mirror_nodes() -> + Tables = [rabbit_queue, rabbit_durable_queue], + RemoveMirrorNodesFun = + fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, _MNodes, Pol}) -> + {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol} + end, + [ok = transform(T, RemoveMirrorNodesFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, policy]) + || T <- Tables], + ok. + +-spec gm_pids() -> 'ok'. + +gm_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddGMPidsFun = + fun ({amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol}) -> + {amqqueue, N, D, AD, O, A, Pid, SPids, SSPids, Pol, []} + end, + [ok = transform(T, AddGMPidsFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, slave_pids, sync_slave_pids, policy, gm_pids]) + || T <- Tables], + ok. + +-spec exchange_decorators() -> 'ok'. + +exchange_decorators() -> + ok = exchange_decorators(rabbit_exchange), + ok = exchange_decorators(rabbit_durable_exchange). + +exchange_decorators(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, + Policy}) -> + {exchange, Name, Type, Dur, AutoDel, Int, Args, Scratches, Policy, + {[], []}} + end, + [name, type, durable, auto_delete, internal, arguments, scratches, policy, + decorators]). + +-spec policy_apply_to() -> 'ok'. + +policy_apply_to() -> + transform( + rabbit_runtime_parameters, + fun ({runtime_parameters, Key = {_VHost, <<"policy">>, _Name}, Value}) -> + ApplyTo = apply_to(proplists:get_value(<<"definition">>, Value)), + {runtime_parameters, Key, [{<<"apply-to">>, ApplyTo} | Value]}; + ({runtime_parameters, Key, Value}) -> + {runtime_parameters, Key, Value} + end, + [key, value]), + rabbit_policy:invalidate(), + ok. + +apply_to(Def) -> + case [proplists:get_value(K, Def) || + K <- [<<"federation-upstream-set">>, <<"ha-mode">>]] of + [undefined, undefined] -> <<"all">>; + [_, undefined] -> <<"exchanges">>; + [undefined, _] -> <<"queues">>; + [_, _] -> <<"all">> + end. + +-spec queue_decorators() -> 'ok'. + +queue_decorators() -> + ok = queue_decorators(rabbit_queue), + ok = queue_decorators(rabbit_durable_queue). + +queue_decorators(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, Policy, GmPids}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, Policy, GmPids, []} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, policy, gm_pids, decorators]). + +-spec internal_system_x() -> 'ok'. + +internal_system_x() -> + transform( + rabbit_durable_exchange, + fun ({exchange, Name = {resource, _, _, <<"amq.rabbitmq.", _/binary>>}, + Type, Dur, AutoDel, _Int, Args, Scratches, Policy, Decorators}) -> + {exchange, Name, Type, Dur, AutoDel, true, Args, Scratches, + Policy, Decorators}; + (X) -> + X + end, + [name, type, durable, auto_delete, internal, arguments, scratches, policy, + decorators]). + +-spec cluster_name() -> 'ok'. + +cluster_name() -> + {atomic, ok} = mnesia:transaction(fun cluster_name_tx/0), + ok. + +cluster_name_tx() -> + %% mnesia:transform_table/4 does not let us delete records + T = rabbit_runtime_parameters, + mnesia:write_lock_table(T), + Ks = [K || {_VHost, <<"federation">>, <<"local-nodename">>} = K + <- mnesia:all_keys(T)], + case Ks of + [] -> ok; + [K|Tl] -> [{runtime_parameters, _K, Name}] = mnesia:read(T, K, write), + R = {runtime_parameters, cluster_name, Name}, + mnesia:write(T, R, write), + case Tl of + [] -> ok; + _ -> {VHost, _, _} = K, + error_logger:warning_msg( + "Multiple local-nodenames found, picking '~s' " + "from '~s' for cluster name~n", [Name, VHost]) + end + end, + [mnesia:delete(T, K, write) || K <- Ks], + ok. + +-spec down_slave_nodes() -> 'ok'. + +down_slave_nodes() -> + ok = down_slave_nodes(rabbit_queue), + ok = down_slave_nodes(rabbit_durable_queue). + +down_slave_nodes(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, Policy, GmPids, Decorators}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, [], Policy, GmPids, Decorators} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, down_slave_nodes, policy, gm_pids, decorators]). + +-spec queue_state() -> 'ok'. + +queue_state() -> + ok = queue_state(rabbit_queue), + ok = queue_state(rabbit_durable_queue). + +queue_state(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators, + live} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, down_slave_nodes, policy, gm_pids, decorators, state]). + +-spec recoverable_slaves() -> 'ok'. + +recoverable_slaves() -> + ok = recoverable_slaves(rabbit_queue), + ok = recoverable_slaves(rabbit_durable_queue). + +recoverable_slaves(Table) -> + transform( + Table, fun (Q) -> Q end, %% Don't change shape of record + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, recoverable_slaves, policy, gm_pids, decorators, + state]). + +policy_version() -> + ok = policy_version(rabbit_queue), + ok = policy_version(rabbit_durable_queue). + +policy_version(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators, + State}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators, + State, 0} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, recoverable_slaves, policy, gm_pids, decorators, state, + policy_version]). + +slave_pids_pending_shutdown() -> + ok = slave_pids_pending_shutdown(rabbit_queue), + ok = slave_pids_pending_shutdown(rabbit_durable_queue). + +slave_pids_pending_shutdown(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators, + State, PolicyVersion}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators, + State, PolicyVersion, []} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, recoverable_slaves, policy, gm_pids, decorators, state, + policy_version, slave_pids_pending_shutdown]). + +-spec operator_policies() -> 'ok'. + +operator_policies() -> + ok = exchange_operator_policies(rabbit_exchange), + ok = exchange_operator_policies(rabbit_durable_exchange), + ok = queue_operator_policies(rabbit_queue), + ok = queue_operator_policies(rabbit_durable_queue). + +exchange_operator_policies(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Internal, + Args, Scratches, Policy, Decorators}) -> + {exchange, Name, Type, Dur, AutoDel, Internal, + Args, Scratches, Policy, undefined, Decorators} + end, + [name, type, durable, auto_delete, internal, arguments, scratches, policy, + operator_policy, decorators]). + +queue_operator_policies(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, GmPids, Decorators, + State, PolicyVersion, SlavePidsPendingShutdown}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, undefined, GmPids, + Decorators, State, PolicyVersion, SlavePidsPendingShutdown} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, recoverable_slaves, policy, operator_policy, + gm_pids, decorators, state, policy_version, slave_pids_pending_shutdown]). + +-spec queue_vhost_field() -> 'ok'. + +queue_vhost_field() -> + ok = queue_vhost_field(rabbit_queue), + ok = queue_vhost_field(rabbit_durable_queue), + {atomic, ok} = mnesia:add_table_index(rabbit_queue, vhost), + {atomic, ok} = mnesia:add_table_index(rabbit_durable_queue, vhost), + ok. + +queue_vhost_field(Table) -> + transform( + Table, + fun ({amqqueue, Name = {resource, VHost, queue, _QName}, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators, + State, PolicyVersion, SlavePidsPendingShutdown}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators, + State, PolicyVersion, SlavePidsPendingShutdown, VHost} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, recoverable_slaves, policy, operator_policy, + gm_pids, decorators, state, policy_version, slave_pids_pending_shutdown, vhost]). + +-spec queue_options() -> 'ok'. + +queue_options() -> + ok = queue_options(rabbit_queue), + ok = queue_options(rabbit_durable_queue), + ok. + +queue_options(Table) -> + transform( + Table, + fun ({amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators, + State, PolicyVersion, SlavePidsPendingShutdown, VHost}) -> + {amqqueue, Name, Durable, AutoDelete, ExclusiveOwner, Arguments, + Pid, SlavePids, SyncSlavePids, DSN, Policy, OperatorPolicy, GmPids, Decorators, + State, PolicyVersion, SlavePidsPendingShutdown, VHost, #{}} + end, + [name, durable, auto_delete, exclusive_owner, arguments, pid, slave_pids, + sync_slave_pids, recoverable_slaves, policy, operator_policy, + gm_pids, decorators, state, policy_version, slave_pids_pending_shutdown, vhost, options]). + +%% Prior to 3.6.0, passwords were hashed using MD5, this populates +%% existing records with said default. Users created with 3.6.0+ will +%% have internal_user.hashing_algorithm populated by the internal +%% authn backend. + +-spec user_password_hashing() -> 'ok'. + +user_password_hashing() -> + transform( + rabbit_user, + fun ({internal_user, Username, Hash, Tags}) -> + {internal_user, Username, Hash, Tags, rabbit_password_hashing_md5} + end, + [username, password_hash, tags, hashing_algorithm]). + +-spec topic_permission() -> 'ok'. +topic_permission() -> + create(rabbit_topic_permission, + [{record_name, topic_permission}, + {attributes, [topic_permission_key, permission]}, + {disc_copies, [node()]}]). + +-spec exchange_options() -> 'ok'. + +exchange_options() -> + ok = exchange_options(rabbit_exchange), + ok = exchange_options(rabbit_durable_exchange). + +exchange_options(Table) -> + transform( + Table, + fun ({exchange, Name, Type, Dur, AutoDel, Internal, + Args, Scratches, Policy, OperatorPolicy, Decorators}) -> + {exchange, Name, Type, Dur, AutoDel, Internal, + Args, Scratches, Policy, OperatorPolicy, Decorators, #{}} + end, + [name, type, durable, auto_delete, internal, arguments, scratches, policy, + operator_policy, decorators, options]). + +%%-------------------------------------------------------------------- + +transform(TableName, Fun, FieldList) -> + rabbit_table:wait([TableName]), + {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList), + ok. + +transform(TableName, Fun, FieldList, NewRecordName) -> + rabbit_table:wait([TableName]), + {atomic, ok} = mnesia:transform_table(TableName, Fun, FieldList, + NewRecordName), + ok. + +create(Tab, TabDef) -> + rabbit_log:debug("Will create a schema table named '~s'", [Tab]), + {atomic, ok} = mnesia:create_table(Tab, TabDef), + ok. + +%% Dumb replacement for rabbit_exchange:declare that does not require +%% the exchange type registry or worker pool to be running by dint of +%% not validating anything and assuming the exchange type does not +%% require serialisation. NB: this assumes the +%% pre-exchange-scratch-space format +declare_exchange(XName, Type) -> + X = {exchange, XName, Type, true, false, false, []}, + ok = mnesia:dirty_write(rabbit_durable_exchange, X). diff --git a/deps/rabbit/src/rabbit_upgrade_preparation.erl b/deps/rabbit/src/rabbit_upgrade_preparation.erl new file mode 100644 index 0000000000..fc1de24610 --- /dev/null +++ b/deps/rabbit/src/rabbit_upgrade_preparation.erl @@ -0,0 +1,51 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_upgrade_preparation). + +-export([await_online_quorum_plus_one/1, await_online_synchronised_mirrors/1]). + +%% +%% API +%% + +-define(SAMPLING_INTERVAL, 200). + +await_online_quorum_plus_one(Timeout) -> + Iterations = ceil(Timeout / ?SAMPLING_INTERVAL), + do_await_safe_online_quorum(Iterations). + + +await_online_synchronised_mirrors(Timeout) -> + Iterations = ceil(Timeout / ?SAMPLING_INTERVAL), + do_await_online_synchronised_mirrors(Iterations). + + +%% +%% Implementation +%% + +do_await_safe_online_quorum(0) -> + false; +do_await_safe_online_quorum(IterationsLeft) -> + case rabbit_quorum_queue:list_with_minimum_quorum() of + [] -> true; + List when is_list(List) -> + timer:sleep(?SAMPLING_INTERVAL), + do_await_safe_online_quorum(IterationsLeft - 1) + end. + + +do_await_online_synchronised_mirrors(0) -> + false; +do_await_online_synchronised_mirrors(IterationsLeft) -> + case rabbit_amqqueue:list_local_mirrored_classic_without_synchronised_mirrors() of + [] -> true; + List when is_list(List) -> + timer:sleep(?SAMPLING_INTERVAL), + do_await_online_synchronised_mirrors(IterationsLeft - 1) + end. diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl new file mode 100644 index 0000000000..cf6fa4a189 --- /dev/null +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -0,0 +1,3015 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_variable_queue). + +-export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1, + purge/1, purge_acks/1, + publish/6, publish_delivered/5, + batch_publish/4, batch_publish_delivered/4, + discard/4, drain_confirmed/1, + dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, + ackfold/4, fold/3, len/1, is_empty/1, depth/1, + set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, + handle_pre_hibernate/1, resume/1, msg_rates/1, + info/2, invoke/3, is_duplicate/2, set_queue_mode/2, + zip_msgs_and_acks/4, multiple_routing_keys/0, handle_info/2]). + +-export([start/2, stop/1]). + +%% exported for testing only +-export([start_msg_store/3, stop_msg_store/1, init/6]). + +-export([move_messages_to_vhost_store/0]). + +-export([migrate_queue/3, migrate_message/3, get_per_vhost_store_client/2, + get_global_store_client/1, log_upgrade_verbose/1, + log_upgrade_verbose/2]). + +-include_lib("stdlib/include/qlc.hrl"). + +-define(QUEUE_MIGRATION_BATCH_SIZE, 100). +-define(EMPTY_START_FUN_STATE, {fun (ok) -> finished end, ok}). + +%%---------------------------------------------------------------------------- +%% Messages, and their position in the queue, can be in memory or on +%% disk, or both. Persistent messages will have both message and +%% position pushed to disk as soon as they arrive; transient messages +%% can be written to disk (and thus both types can be evicted from +%% memory) under memory pressure. The question of whether a message is +%% in RAM and whether it is persistent are orthogonal. +%% +%% Messages are persisted using the queue index and the message +%% store. Normally the queue index holds the position of the message +%% *within this queue* along with a couple of small bits of metadata, +%% while the message store holds the message itself (including headers +%% and other properties). +%% +%% However, as an optimisation, small messages can be embedded +%% directly in the queue index and bypass the message store +%% altogether. +%% +%% Definitions: +%% +%% alpha: this is a message where both the message itself, and its +%% position within the queue are held in RAM +%% +%% beta: this is a message where the message itself is only held on +%% disk (if persisted to the message store) but its position +%% within the queue is held in RAM. +%% +%% gamma: this is a message where the message itself is only held on +%% disk, but its position is both in RAM and on disk. +%% +%% delta: this is a collection of messages, represented by a single +%% term, where the messages and their position are only held on +%% disk. +%% +%% Note that for persistent messages, the message and its position +%% within the queue are always held on disk, *in addition* to being in +%% one of the above classifications. +%% +%% Also note that within this code, the term gamma seldom +%% appears. It's frequently the case that gammas are defined by betas +%% who have had their queue position recorded on disk. +%% +%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though +%% many of these steps are frequently skipped. q1 and q4 only hold +%% alphas, q2 and q3 hold both betas and gammas. When a message +%% arrives, its classification is determined. It is then added to the +%% rightmost appropriate queue. +%% +%% If a new message is determined to be a beta or gamma, q1 is +%% empty. If a new message is determined to be a delta, q1 and q2 are +%% empty (and actually q4 too). +%% +%% When removing messages from a queue, if q4 is empty then q3 is read +%% directly. If q3 becomes empty then the next segment's worth of +%% messages from delta are read into q3, reducing the size of +%% delta. If the queue is non empty, either q4 or q3 contain +%% entries. It is never permitted for delta to hold all the messages +%% in the queue. +%% +%% The duration indicated to us by the memory_monitor is used to +%% calculate, given our current ingress and egress rates, how many +%% messages we should hold in RAM (i.e. as alphas). We track the +%% ingress and egress rates for both messages and pending acks and +%% rates for both are considered when calculating the number of +%% messages to hold in RAM. When we need to push alphas to betas or +%% betas to gammas, we favour writing out messages that are further +%% from the head of the queue. This minimises writes to disk, as the +%% messages closer to the tail of the queue stay in the queue for +%% longer, thus do not need to be replaced as quickly by sending other +%% messages to disk. +%% +%% Whilst messages are pushed to disk and forgotten from RAM as soon +%% as requested by a new setting of the queue RAM duration, the +%% inverse is not true: we only load messages back into RAM as +%% demanded as the queue is read from. Thus only publishes to the +%% queue will take up available spare capacity. +%% +%% When we report our duration to the memory monitor, we calculate +%% average ingress and egress rates over the last two samples, and +%% then calculate our duration based on the sum of the ingress and +%% egress rates. More than two samples could be used, but it's a +%% balance between responding quickly enough to changes in +%% producers/consumers versus ignoring temporary blips. The problem +%% with temporary blips is that with just a few queues, they can have +%% substantial impact on the calculation of the average duration and +%% hence cause unnecessary I/O. Another alternative is to increase the +%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5 +%% seconds. However, that then runs the risk of being too slow to +%% inform the memory monitor of changes. Thus a 5 second interval, +%% plus a rolling average over the last two samples seems to work +%% well in practice. +%% +%% The sum of the ingress and egress rates is used because the egress +%% rate alone is not sufficient. Adding in the ingress rate means that +%% queues which are being flooded by messages are given more memory, +%% resulting in them being able to process the messages faster (by +%% doing less I/O, or at least deferring it) and thus helping keep +%% their mailboxes empty and thus the queue as a whole is more +%% responsive. If such a queue also has fast but previously idle +%% consumers, the consumer can then start to be driven as fast as it +%% can go, whereas if only egress rate was being used, the incoming +%% messages may have to be written to disk and then read back in, +%% resulting in the hard disk being a bottleneck in driving the +%% consumers. Generally, we want to give Rabbit every chance of +%% getting rid of messages as fast as possible and remaining +%% responsive, and using only the egress rate impacts that goal. +%% +%% Once the queue has more alphas than the target_ram_count, the +%% surplus must be converted to betas, if not gammas, if not rolled +%% into delta. The conditions under which these transitions occur +%% reflect the conflicting goals of minimising RAM cost per msg, and +%% minimising CPU cost per msg. Once the msg has become a beta, its +%% payload is no longer in RAM, thus a read from the msg_store must +%% occur before the msg can be delivered, but the RAM cost of a beta +%% is the same as a gamma, so converting a beta to gamma will not free +%% up any further RAM. To reduce the RAM cost further, the gamma must +%% be rolled into delta. Whilst recovering a beta or a gamma to an +%% alpha requires only one disk read (from the msg_store), recovering +%% a msg from within delta will require two reads (queue_index and +%% then msg_store). But delta has a near-0 per-msg RAM cost. So the +%% conflict is between using delta more, which will free up more +%% memory, but require additional CPU and disk ops, versus using delta +%% less and gammas and betas more, which will cost more memory, but +%% require fewer disk ops and less CPU overhead. +%% +%% In the case of a persistent msg published to a durable queue, the +%% msg is immediately written to the msg_store and queue_index. If +%% then additionally converted from an alpha, it'll immediately go to +%% a gamma (as it's already in queue_index), and cannot exist as a +%% beta. Thus a durable queue with a mixture of persistent and +%% transient msgs in it which has more messages than permitted by the +%% target_ram_count may contain an interspersed mixture of betas and +%% gammas in q2 and q3. +%% +%% There is then a ratio that controls how many betas and gammas there +%% can be. This is based on the target_ram_count and thus expresses +%% the fact that as the number of permitted alphas in the queue falls, +%% so should the number of betas and gammas fall (i.e. delta +%% grows). If q2 and q3 contain more than the permitted number of +%% betas and gammas, then the surplus are forcibly converted to gammas +%% (as necessary) and then rolled into delta. The ratio is that +%% delta/(betas+gammas+delta) equals +%% (betas+gammas+delta)/(target_ram_count+betas+gammas+delta). I.e. as +%% the target_ram_count shrinks to 0, so must betas and gammas. +%% +%% The conversion of betas to deltas is done if there are at least +%% ?IO_BATCH_SIZE betas in q2 & q3. This value should not be too small, +%% otherwise the frequent operations on the queues of q2 and q3 will not be +%% effectively amortised (switching the direction of queue access defeats +%% amortisation). Note that there is a natural upper bound due to credit_flow +%% limits on the alpha to beta conversion. +%% +%% The conversion from alphas to betas is chunked due to the +%% credit_flow limits of the msg_store. This further smooths the +%% effects of changes to the target_ram_count and ensures the queue +%% remains responsive even when there is a large amount of IO work to +%% do. The 'resume' callback is utilised to ensure that conversions +%% are done as promptly as possible whilst ensuring the queue remains +%% responsive. +%% +%% In the queue we keep track of both messages that are pending +%% delivery and messages that are pending acks. In the event of a +%% queue purge, we only need to load qi segments if the queue has +%% elements in deltas (i.e. it came under significant memory +%% pressure). In the event of a queue deletion, in addition to the +%% preceding, by keeping track of pending acks in RAM, we do not need +%% to search through qi segments looking for messages that are yet to +%% be acknowledged. +%% +%% Pending acks are recorded in memory by storing the message itself. +%% If the message has been sent to disk, we do not store the message +%% content. During memory reduction, pending acks containing message +%% content have that content removed and the corresponding messages +%% are pushed out to disk. +%% +%% Messages from pending acks are returned to q4, q3 and delta during +%% requeue, based on the limits of seq_id contained in each. Requeued +%% messages retain their original seq_id, maintaining order +%% when requeued. +%% +%% The order in which alphas are pushed to betas and pending acks +%% are pushed to disk is determined dynamically. We always prefer to +%% push messages for the source (alphas or acks) that is growing the +%% fastest (with growth measured as avg. ingress - avg. egress). +%% +%% Notes on Clean Shutdown +%% (This documents behaviour in variable_queue, queue_index and +%% msg_store.) +%% +%% In order to try to achieve as fast a start-up as possible, if a +%% clean shutdown occurs, we try to save out state to disk to reduce +%% work on startup. In the msg_store this takes the form of the +%% index_module's state, plus the file_summary ets table, and client +%% refs. In the VQ, this takes the form of the count of persistent +%% messages in the queue and references into the msg_stores. The +%% queue_index adds to these terms the details of its segments and +%% stores the terms in the queue directory. +%% +%% Two message stores are used. One is created for persistent messages +%% to durable queues that must survive restarts, and the other is used +%% for all other messages that just happen to need to be written to +%% disk. On start up we can therefore nuke the transient message +%% store, and be sure that the messages in the persistent store are +%% all that we need. +%% +%% The references to the msg_stores are there so that the msg_store +%% knows to only trust its saved state if all of the queues it was +%% previously talking to come up cleanly. Likewise, the queues +%% themselves (esp queue_index) skips work in init if all the queues +%% and msg_store were shutdown cleanly. This gives both good speed +%% improvements and also robustness so that if anything possibly went +%% wrong in shutdown (or there was subsequent manual tampering), all +%% messages and queues that can be recovered are recovered, safely. +%% +%% To delete transient messages lazily, the variable_queue, on +%% startup, stores the next_seq_id reported by the queue_index as the +%% transient_threshold. From that point on, whenever it's reading a +%% message off disk via the queue_index, if the seq_id is below this +%% threshold and the message is transient then it drops the message +%% (the message itself won't exist on disk because it would have been +%% stored in the transient msg_store which would have had its saved +%% state nuked on startup). This avoids the expensive operation of +%% scanning the entire queue on startup in order to delete transient +%% messages that were only pushed to disk to save memory. +%% +%%---------------------------------------------------------------------------- + +-behaviour(rabbit_backing_queue). + +-record(vqstate, + { q1, + q2, + delta, + q3, + q4, + next_seq_id, + ram_pending_ack, %% msgs using store, still in RAM + disk_pending_ack, %% msgs in store, paged out + qi_pending_ack, %% msgs using qi, *can't* be paged out + index_state, + msg_store_clients, + durable, + transient_threshold, + qi_embed_msgs_below, + + len, %% w/o unacked + bytes, %% w/o unacked + unacked_bytes, + persistent_count, %% w unacked + persistent_bytes, %% w unacked + delta_transient_bytes, %% + + target_ram_count, + ram_msg_count, %% w/o unacked + ram_msg_count_prev, + ram_ack_count_prev, + ram_bytes, %% w unacked + out_counter, + in_counter, + rates, + msgs_on_disk, + msg_indices_on_disk, + unconfirmed, + confirmed, + ack_out_counter, + ack_in_counter, + %% Unlike the other counters these two do not feed into + %% #rates{} and get reset + disk_read_count, + disk_write_count, + + io_batch_size, + + %% default queue or lazy queue + mode, + %% number of reduce_memory_usage executions, once it + %% reaches a threshold the queue will manually trigger a runtime GC + %% see: maybe_execute_gc/1 + memory_reduction_run_count, + %% Queue data is grouped by VHost. We need to store it + %% to work with queue index. + virtual_host, + waiting_bump = false + }). + +-record(rates, { in, out, ack_in, ack_out, timestamp }). + +-record(msg_status, + { seq_id, + msg_id, + msg, + is_persistent, + is_delivered, + msg_in_store, + index_on_disk, + persist_to, + msg_props + }). + +-record(delta, + { start_seq_id, %% start_seq_id is inclusive + count, + transient, + end_seq_id %% end_seq_id is exclusive + }). + +-define(HEADER_GUESS_SIZE, 100). %% see determine_persist_to/2 +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + +-define(QUEUE, lqueue). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include_lib("rabbit_common/include/rabbit_framing.hrl"). +-include("amqqueue.hrl"). + +%%---------------------------------------------------------------------------- + +-rabbit_upgrade({multiple_routing_keys, local, []}). +-rabbit_upgrade({move_messages_to_vhost_store, message_store, []}). + +-type seq_id() :: non_neg_integer(). + +-type rates() :: #rates { in :: float(), + out :: float(), + ack_in :: float(), + ack_out :: float(), + timestamp :: rabbit_types:timestamp()}. + +-type delta() :: #delta { start_seq_id :: non_neg_integer(), + count :: non_neg_integer(), + end_seq_id :: non_neg_integer() }. + +%% The compiler (rightfully) complains that ack() and state() are +%% unused. For this reason we duplicate a -spec from +%% rabbit_backing_queue with the only intent being to remove +%% warnings. The problem here is that we can't parameterise the BQ +%% behaviour by these two types as we would like to. We still leave +%% these here for documentation purposes. +-type ack() :: seq_id(). +-type state() :: #vqstate { + q1 :: ?QUEUE:?QUEUE(), + q2 :: ?QUEUE:?QUEUE(), + delta :: delta(), + q3 :: ?QUEUE:?QUEUE(), + q4 :: ?QUEUE:?QUEUE(), + next_seq_id :: seq_id(), + ram_pending_ack :: gb_trees:tree(), + disk_pending_ack :: gb_trees:tree(), + qi_pending_ack :: gb_trees:tree(), + index_state :: any(), + msg_store_clients :: 'undefined' | {{any(), binary()}, + {any(), binary()}}, + durable :: boolean(), + transient_threshold :: non_neg_integer(), + qi_embed_msgs_below :: non_neg_integer(), + + len :: non_neg_integer(), + bytes :: non_neg_integer(), + unacked_bytes :: non_neg_integer(), + + persistent_count :: non_neg_integer(), + persistent_bytes :: non_neg_integer(), + + target_ram_count :: non_neg_integer() | 'infinity', + ram_msg_count :: non_neg_integer(), + ram_msg_count_prev :: non_neg_integer(), + ram_ack_count_prev :: non_neg_integer(), + ram_bytes :: non_neg_integer(), + out_counter :: non_neg_integer(), + in_counter :: non_neg_integer(), + rates :: rates(), + msgs_on_disk :: gb_sets:set(), + msg_indices_on_disk :: gb_sets:set(), + unconfirmed :: gb_sets:set(), + confirmed :: gb_sets:set(), + ack_out_counter :: non_neg_integer(), + ack_in_counter :: non_neg_integer(), + disk_read_count :: non_neg_integer(), + disk_write_count :: non_neg_integer(), + + io_batch_size :: pos_integer(), + mode :: 'default' | 'lazy', + memory_reduction_run_count :: non_neg_integer()}. + +-define(BLANK_DELTA, #delta { start_seq_id = undefined, + count = 0, + transient = 0, + end_seq_id = undefined }). +-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z, + count = 0, + transient = 0, + end_seq_id = Z }). + +-define(MICROS_PER_SECOND, 1000000.0). + +%% We're sampling every 5s for RAM duration; a half life that is of +%% the same order of magnitude is probably about right. +-define(RATE_AVG_HALF_LIFE, 5.0). + +%% We will recalculate the #rates{} every time we get asked for our +%% RAM duration, or every N messages published, whichever is +%% sooner. We do this since the priority calculations in +%% rabbit_amqqueue_process need fairly fresh rates. +-define(MSGS_PER_RATE_CALC, 100). + +%% we define the garbage collector threshold +%% it needs to tune the `reduce_memory_use` calls. Thus, the garbage collection. +%% see: rabbitmq-server-973 and rabbitmq-server-964 +-define(DEFAULT_EXPLICIT_GC_RUN_OP_THRESHOLD, 1000). +-define(EXPLICIT_GC_RUN_OP_THRESHOLD(Mode), + case get(explicit_gc_run_operation_threshold) of + undefined -> + Val = explicit_gc_run_operation_threshold_for_mode(Mode), + put(explicit_gc_run_operation_threshold, Val), + Val; + Val -> Val + end). + +explicit_gc_run_operation_threshold_for_mode(Mode) -> + {Key, Fallback} = case Mode of + lazy -> {lazy_queue_explicit_gc_run_operation_threshold, + ?DEFAULT_EXPLICIT_GC_RUN_OP_THRESHOLD}; + _ -> {queue_explicit_gc_run_operation_threshold, + ?DEFAULT_EXPLICIT_GC_RUN_OP_THRESHOLD} + end, + rabbit_misc:get_env(rabbit, Key, Fallback). + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +start(VHost, DurableQueues) -> + {AllTerms, StartFunState} = rabbit_queue_index:start(VHost, DurableQueues), + %% Group recovery terms by vhost. + ClientRefs = [Ref || Terms <- AllTerms, + Terms /= non_clean_shutdown, + begin + Ref = proplists:get_value(persistent_ref, Terms), + Ref =/= undefined + end], + start_msg_store(VHost, ClientRefs, StartFunState), + {ok, AllTerms}. + +stop(VHost) -> + ok = stop_msg_store(VHost), + ok = rabbit_queue_index:stop(VHost). + +start_msg_store(VHost, Refs, StartFunState) when is_list(Refs); Refs == undefined -> + rabbit_log:info("Starting message stores for vhost '~s'~n", [VHost]), + do_start_msg_store(VHost, ?TRANSIENT_MSG_STORE, undefined, ?EMPTY_START_FUN_STATE), + do_start_msg_store(VHost, ?PERSISTENT_MSG_STORE, Refs, StartFunState), + ok. + +do_start_msg_store(VHost, Type, Refs, StartFunState) -> + case rabbit_vhost_msg_store:start(VHost, Type, Refs, StartFunState) of + {ok, _} -> + rabbit_log:info("Started message store of type ~s for vhost '~s'~n", [abbreviated_type(Type), VHost]); + {error, {no_such_vhost, VHost}} = Err -> + rabbit_log:error("Failed to start message store of type ~s for vhost '~s': the vhost no longer exists!~n", + [Type, VHost]), + exit(Err); + {error, Error} -> + rabbit_log:error("Failed to start message store of type ~s for vhost '~s': ~p~n", + [Type, VHost, Error]), + exit({error, Error}) + end. + +abbreviated_type(?TRANSIENT_MSG_STORE) -> transient; +abbreviated_type(?PERSISTENT_MSG_STORE) -> persistent. + +stop_msg_store(VHost) -> + rabbit_vhost_msg_store:stop(VHost, ?TRANSIENT_MSG_STORE), + rabbit_vhost_msg_store:stop(VHost, ?PERSISTENT_MSG_STORE), + ok. + +init(Queue, Recover, Callback) -> + init( + Queue, Recover, Callback, + fun (MsgIds, ActionTaken) -> + msgs_written_to_disk(Callback, MsgIds, ActionTaken) + end, + fun (MsgIds) -> msg_indices_written_to_disk(Callback, MsgIds) end, + fun (MsgIds) -> msgs_and_indices_written_to_disk(Callback, MsgIds) end). + +init(Q, new, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) -> + QueueName = amqqueue:get_name(Q), + IsDurable = amqqueue:is_durable(Q), + IndexState = rabbit_queue_index:init(QueueName, + MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), + VHost = QueueName#resource.virtual_host, + init(IsDurable, IndexState, 0, 0, [], + case IsDurable of + true -> msg_store_client_init(?PERSISTENT_MSG_STORE, + MsgOnDiskFun, AsyncCallback, VHost); + false -> undefined + end, + msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, + AsyncCallback, VHost), VHost); + +%% We can be recovering a transient queue if it crashed +init(Q, Terms, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) -> + QueueName = amqqueue:get_name(Q), + IsDurable = amqqueue:is_durable(Q), + {PRef, RecoveryTerms} = process_recovery_terms(Terms), + VHost = QueueName#resource.virtual_host, + {PersistentClient, ContainsCheckFun} = + case IsDurable of + true -> C = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef, + MsgOnDiskFun, AsyncCallback, + VHost), + {C, fun (MsgId) when is_binary(MsgId) -> + rabbit_msg_store:contains(MsgId, C); + (#basic_message{is_persistent = Persistent}) -> + Persistent + end}; + false -> {undefined, fun(_MsgId) -> false end} + end, + TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, + undefined, AsyncCallback, + VHost), + {DeltaCount, DeltaBytes, IndexState} = + rabbit_queue_index:recover( + QueueName, RecoveryTerms, + rabbit_vhost_msg_store:successfully_recovered_state( + VHost, + ?PERSISTENT_MSG_STORE), + ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), + init(IsDurable, IndexState, DeltaCount, DeltaBytes, RecoveryTerms, + PersistentClient, TransientClient, VHost). + +process_recovery_terms(Terms=non_clean_shutdown) -> + {rabbit_guid:gen(), Terms}; +process_recovery_terms(Terms) -> + case proplists:get_value(persistent_ref, Terms) of + undefined -> {rabbit_guid:gen(), []}; + PRef -> {PRef, Terms} + end. + +terminate(_Reason, State) -> + State1 = #vqstate { virtual_host = VHost, + persistent_count = PCount, + persistent_bytes = PBytes, + index_state = IndexState, + msg_store_clients = {MSCStateP, MSCStateT} } = + purge_pending_ack(true, State), + PRef = case MSCStateP of + undefined -> undefined; + _ -> ok = maybe_client_terminate(MSCStateP), + rabbit_msg_store:client_ref(MSCStateP) + end, + ok = rabbit_msg_store:client_delete_and_terminate(MSCStateT), + Terms = [{persistent_ref, PRef}, + {persistent_count, PCount}, + {persistent_bytes, PBytes}], + a(State1#vqstate { + index_state = rabbit_queue_index:terminate(VHost, Terms, IndexState), + msg_store_clients = undefined }). + +%% the only difference between purge and delete is that delete also +%% needs to delete everything that's been delivered and not ack'd. +delete_and_terminate(_Reason, State) -> + %% Normally when we purge messages we interact with the qi by + %% issues delivers and acks for every purged message. In this case + %% we don't need to do that, so we just delete the qi. + State1 = purge_and_index_reset(State), + State2 = #vqstate { msg_store_clients = {MSCStateP, MSCStateT} } = + purge_pending_ack_delete_and_terminate(State1), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_delete_and_terminate(MSCStateP) + end, + rabbit_msg_store:client_delete_and_terminate(MSCStateT), + a(State2 #vqstate { msg_store_clients = undefined }). + +delete_crashed(Q) when ?is_amqqueue(Q) -> + QName = amqqueue:get_name(Q), + ok = rabbit_queue_index:erase(QName). + +purge(State = #vqstate { len = Len }) -> + case is_pending_ack_empty(State) and is_unconfirmed_empty(State) of + true -> + {Len, purge_and_index_reset(State)}; + false -> + {Len, purge_when_pending_acks(State)} + end. + +purge_acks(State) -> a(purge_pending_ack(false, State)). + +publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State) -> + State1 = + publish1(Msg, MsgProps, IsDelivered, ChPid, Flow, + fun maybe_write_to_disk/4, + State), + a(maybe_reduce_memory_use(maybe_update_rates(State1))). + +batch_publish(Publishes, ChPid, Flow, State) -> + {ChPid, Flow, State1} = + lists:foldl(fun batch_publish1/2, {ChPid, Flow, State}, Publishes), + State2 = ui(State1), + a(maybe_reduce_memory_use(maybe_update_rates(State2))). + +publish_delivered(Msg, MsgProps, ChPid, Flow, State) -> + {SeqId, State1} = + publish_delivered1(Msg, MsgProps, ChPid, Flow, + fun maybe_write_to_disk/4, + State), + {SeqId, a(maybe_reduce_memory_use(maybe_update_rates(State1)))}. + +batch_publish_delivered(Publishes, ChPid, Flow, State) -> + {ChPid, Flow, SeqIds, State1} = + lists:foldl(fun batch_publish_delivered1/2, + {ChPid, Flow, [], State}, Publishes), + State2 = ui(State1), + {lists:reverse(SeqIds), a(maybe_reduce_memory_use(maybe_update_rates(State2)))}. + +discard(_MsgId, _ChPid, _Flow, State) -> State. + +drain_confirmed(State = #vqstate { confirmed = C }) -> + case gb_sets:is_empty(C) of + true -> {[], State}; %% common case + false -> {gb_sets:to_list(C), State #vqstate { + confirmed = gb_sets:new() }} + end. + +dropwhile(Pred, State) -> + {MsgProps, State1} = + remove_by_predicate(Pred, State), + {MsgProps, a(State1)}. + +fetchwhile(Pred, Fun, Acc, State) -> + {MsgProps, Acc1, State1} = + fetch_by_predicate(Pred, Fun, Acc, State), + {MsgProps, Acc1, a(State1)}. + +fetch(AckRequired, State) -> + case queue_out(State) of + {empty, State1} -> + {empty, a(State1)}; + {{value, MsgStatus}, State1} -> + %% it is possible that the message wasn't read from disk + %% at this point, so read it in. + {Msg, State2} = read_msg(MsgStatus, State1), + {AckTag, State3} = remove(AckRequired, MsgStatus, State2), + {{Msg, MsgStatus#msg_status.is_delivered, AckTag}, a(State3)} + end. + +drop(AckRequired, State) -> + case queue_out(State) of + {empty, State1} -> + {empty, a(State1)}; + {{value, MsgStatus}, State1} -> + {AckTag, State2} = remove(AckRequired, MsgStatus, State1), + {{MsgStatus#msg_status.msg_id, AckTag}, a(State2)} + end. + +%% Duplicated from rabbit_backing_queue +-spec ack([ack()], state()) -> {[rabbit_guid:guid()], state()}. + +ack([], State) -> + {[], State}; +%% optimisation: this head is essentially a partial evaluation of the +%% general case below, for the single-ack case. +ack([SeqId], State) -> + case remove_pending_ack(true, SeqId, State) of + {none, _} -> + {[], State}; + {#msg_status { msg_id = MsgId, + is_persistent = IsPersistent, + msg_in_store = MsgInStore, + index_on_disk = IndexOnDisk }, + State1 = #vqstate { index_state = IndexState, + msg_store_clients = MSCState, + ack_out_counter = AckOutCount }} -> + IndexState1 = case IndexOnDisk of + true -> rabbit_queue_index:ack([SeqId], IndexState); + false -> IndexState + end, + case MsgInStore of + true -> ok = msg_store_remove(MSCState, IsPersistent, [MsgId]); + false -> ok + end, + {[MsgId], + a(State1 #vqstate { index_state = IndexState1, + ack_out_counter = AckOutCount + 1 })} + end; +ack(AckTags, State) -> + {{IndexOnDiskSeqIds, MsgIdsByStore, AllMsgIds}, + State1 = #vqstate { index_state = IndexState, + msg_store_clients = MSCState, + ack_out_counter = AckOutCount }} = + lists:foldl( + fun (SeqId, {Acc, State2}) -> + case remove_pending_ack(true, SeqId, State2) of + {none, _} -> + {Acc, State2}; + {MsgStatus, State3} -> + {accumulate_ack(MsgStatus, Acc), State3} + end + end, {accumulate_ack_init(), State}, AckTags), + IndexState1 = rabbit_queue_index:ack(IndexOnDiskSeqIds, IndexState), + remove_msgs_by_id(MsgIdsByStore, MSCState), + {lists:reverse(AllMsgIds), + a(State1 #vqstate { index_state = IndexState1, + ack_out_counter = AckOutCount + length(AckTags) })}. + +requeue(AckTags, #vqstate { mode = default, + delta = Delta, + q3 = Q3, + q4 = Q4, + in_counter = InCounter, + len = Len } = State) -> + {SeqIds, Q4a, MsgIds, State1} = queue_merge(lists:sort(AckTags), Q4, [], + beta_limit(Q3), + fun publish_alpha/2, State), + {SeqIds1, Q3a, MsgIds1, State2} = queue_merge(SeqIds, Q3, MsgIds, + delta_limit(Delta), + fun publish_beta/2, State1), + {Delta1, MsgIds2, State3} = delta_merge(SeqIds1, Delta, MsgIds1, + State2), + MsgCount = length(MsgIds2), + {MsgIds2, a(maybe_reduce_memory_use( + maybe_update_rates(ui( + State3 #vqstate { delta = Delta1, + q3 = Q3a, + q4 = Q4a, + in_counter = InCounter + MsgCount, + len = Len + MsgCount }))))}; +requeue(AckTags, #vqstate { mode = lazy, + delta = Delta, + q3 = Q3, + in_counter = InCounter, + len = Len } = State) -> + {SeqIds, Q3a, MsgIds, State1} = queue_merge(lists:sort(AckTags), Q3, [], + delta_limit(Delta), + fun publish_beta/2, State), + {Delta1, MsgIds1, State2} = delta_merge(SeqIds, Delta, MsgIds, + State1), + MsgCount = length(MsgIds1), + {MsgIds1, a(maybe_reduce_memory_use( + maybe_update_rates(ui( + State2 #vqstate { delta = Delta1, + q3 = Q3a, + in_counter = InCounter + MsgCount, + len = Len + MsgCount }))))}. + +ackfold(MsgFun, Acc, State, AckTags) -> + {AccN, StateN} = + lists:foldl(fun(SeqId, {Acc0, State0}) -> + MsgStatus = lookup_pending_ack(SeqId, State0), + {Msg, State1} = read_msg(MsgStatus, State0), + {MsgFun(Msg, SeqId, Acc0), State1} + end, {Acc, State}, AckTags), + {AccN, a(StateN)}. + +fold(Fun, Acc, State = #vqstate{index_state = IndexState}) -> + {Its, IndexState1} = lists:foldl(fun inext/2, {[], IndexState}, + [msg_iterator(State), + disk_ack_iterator(State), + ram_ack_iterator(State), + qi_ack_iterator(State)]), + ifold(Fun, Acc, Its, State#vqstate{index_state = IndexState1}). + +len(#vqstate { len = Len }) -> Len. + +is_empty(State) -> 0 == len(State). + +depth(State) -> + len(State) + count_pending_acks(State). + +set_ram_duration_target( + DurationTarget, State = #vqstate { + rates = #rates { in = AvgIngressRate, + out = AvgEgressRate, + ack_in = AvgAckIngressRate, + ack_out = AvgAckEgressRate }, + target_ram_count = TargetRamCount }) -> + Rate = + AvgEgressRate + AvgIngressRate + AvgAckEgressRate + AvgAckIngressRate, + TargetRamCount1 = + case DurationTarget of + infinity -> infinity; + _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec + end, + State1 = State #vqstate { target_ram_count = TargetRamCount1 }, + a(case TargetRamCount1 == infinity orelse + (TargetRamCount =/= infinity andalso + TargetRamCount1 >= TargetRamCount) of + true -> State1; + false -> reduce_memory_use(State1) + end). + +maybe_update_rates(State = #vqstate{ in_counter = InCount, + out_counter = OutCount }) + when InCount + OutCount > ?MSGS_PER_RATE_CALC -> + update_rates(State); +maybe_update_rates(State) -> + State. + +update_rates(State = #vqstate{ in_counter = InCount, + out_counter = OutCount, + ack_in_counter = AckInCount, + ack_out_counter = AckOutCount, + rates = #rates{ in = InRate, + out = OutRate, + ack_in = AckInRate, + ack_out = AckOutRate, + timestamp = TS }}) -> + Now = erlang:monotonic_time(), + + Rates = #rates { in = update_rate(Now, TS, InCount, InRate), + out = update_rate(Now, TS, OutCount, OutRate), + ack_in = update_rate(Now, TS, AckInCount, AckInRate), + ack_out = update_rate(Now, TS, AckOutCount, AckOutRate), + timestamp = Now }, + + State#vqstate{ in_counter = 0, + out_counter = 0, + ack_in_counter = 0, + ack_out_counter = 0, + rates = Rates }. + +update_rate(Now, TS, Count, Rate) -> + Time = erlang:convert_time_unit(Now - TS, native, micro_seconds) / + ?MICROS_PER_SECOND, + if + Time == 0 -> Rate; + true -> rabbit_misc:moving_average(Time, ?RATE_AVG_HALF_LIFE, + Count / Time, Rate) + end. + +ram_duration(State) -> + State1 = #vqstate { rates = #rates { in = AvgIngressRate, + out = AvgEgressRate, + ack_in = AvgAckIngressRate, + ack_out = AvgAckEgressRate }, + ram_msg_count = RamMsgCount, + ram_msg_count_prev = RamMsgCountPrev, + ram_pending_ack = RPA, + qi_pending_ack = QPA, + ram_ack_count_prev = RamAckCountPrev } = + update_rates(State), + + RamAckCount = gb_trees:size(RPA) + gb_trees:size(QPA), + + Duration = %% msgs+acks / (msgs+acks/sec) == sec + case lists:all(fun (X) -> X < 0.01 end, + [AvgEgressRate, AvgIngressRate, + AvgAckEgressRate, AvgAckIngressRate]) of + true -> infinity; + false -> (RamMsgCountPrev + RamMsgCount + + RamAckCount + RamAckCountPrev) / + (4 * (AvgEgressRate + AvgIngressRate + + AvgAckEgressRate + AvgAckIngressRate)) + end, + + {Duration, State1}. + +needs_timeout(#vqstate { index_state = IndexState }) -> + case rabbit_queue_index:needs_sync(IndexState) of + confirms -> timed; + other -> idle; + false -> false + end. + +timeout(State = #vqstate { index_state = IndexState }) -> + State #vqstate { index_state = rabbit_queue_index:sync(IndexState) }. + +handle_pre_hibernate(State = #vqstate { index_state = IndexState }) -> + State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }. + +handle_info(bump_reduce_memory_use, State = #vqstate{ waiting_bump = true }) -> + State#vqstate{ waiting_bump = false }; +handle_info(bump_reduce_memory_use, State) -> + State. + +resume(State) -> a(reduce_memory_use(State)). + +msg_rates(#vqstate { rates = #rates { in = AvgIngressRate, + out = AvgEgressRate } }) -> + {AvgIngressRate, AvgEgressRate}. + +info(messages_ready_ram, #vqstate{ram_msg_count = RamMsgCount}) -> + RamMsgCount; +info(messages_unacknowledged_ram, #vqstate{ram_pending_ack = RPA, + qi_pending_ack = QPA}) -> + gb_trees:size(RPA) + gb_trees:size(QPA); +info(messages_ram, State) -> + info(messages_ready_ram, State) + info(messages_unacknowledged_ram, State); +info(messages_persistent, #vqstate{persistent_count = PersistentCount}) -> + PersistentCount; +info(messages_paged_out, #vqstate{delta = #delta{transient = Count}}) -> + Count; +info(message_bytes, #vqstate{bytes = Bytes, + unacked_bytes = UBytes}) -> + Bytes + UBytes; +info(message_bytes_ready, #vqstate{bytes = Bytes}) -> + Bytes; +info(message_bytes_unacknowledged, #vqstate{unacked_bytes = UBytes}) -> + UBytes; +info(message_bytes_ram, #vqstate{ram_bytes = RamBytes}) -> + RamBytes; +info(message_bytes_persistent, #vqstate{persistent_bytes = PersistentBytes}) -> + PersistentBytes; +info(message_bytes_paged_out, #vqstate{delta_transient_bytes = PagedOutBytes}) -> + PagedOutBytes; +info(head_message_timestamp, #vqstate{ + q3 = Q3, + q4 = Q4, + ram_pending_ack = RPA, + qi_pending_ack = QPA}) -> + head_message_timestamp(Q3, Q4, RPA, QPA); +info(disk_reads, #vqstate{disk_read_count = Count}) -> + Count; +info(disk_writes, #vqstate{disk_write_count = Count}) -> + Count; +info(backing_queue_status, #vqstate { + q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + mode = Mode, + len = Len, + target_ram_count = TargetRamCount, + next_seq_id = NextSeqId, + rates = #rates { in = AvgIngressRate, + out = AvgEgressRate, + ack_in = AvgAckIngressRate, + ack_out = AvgAckEgressRate }}) -> + + [ {mode , Mode}, + {q1 , ?QUEUE:len(Q1)}, + {q2 , ?QUEUE:len(Q2)}, + {delta , Delta}, + {q3 , ?QUEUE:len(Q3)}, + {q4 , ?QUEUE:len(Q4)}, + {len , Len}, + {target_ram_count , TargetRamCount}, + {next_seq_id , NextSeqId}, + {avg_ingress_rate , AvgIngressRate}, + {avg_egress_rate , AvgEgressRate}, + {avg_ack_ingress_rate, AvgAckIngressRate}, + {avg_ack_egress_rate , AvgAckEgressRate} ]; +info(_, _) -> + ''. + +invoke(?MODULE, Fun, State) -> Fun(?MODULE, State); +invoke( _, _, State) -> State. + +is_duplicate(_Msg, State) -> {false, State}. + +set_queue_mode(Mode, State = #vqstate { mode = Mode }) -> + State; +set_queue_mode(lazy, State = #vqstate { + target_ram_count = TargetRamCount }) -> + %% To become a lazy queue we need to page everything to disk first. + State1 = convert_to_lazy(State), + %% restore the original target_ram_count + a(State1 #vqstate { mode = lazy, target_ram_count = TargetRamCount }); +set_queue_mode(default, State) -> + %% becoming a default queue means loading messages from disk like + %% when a queue is recovered. + a(maybe_deltas_to_betas(State #vqstate { mode = default })); +set_queue_mode(_, State) -> + State. + +zip_msgs_and_acks(Msgs, AckTags, Accumulator, _State) -> + lists:foldl(fun ({{#basic_message{ id = Id }, _Props}, AckTag}, Acc) -> + [{Id, AckTag} | Acc] + end, Accumulator, lists:zip(Msgs, AckTags)). + +convert_to_lazy(State) -> + State1 = #vqstate { delta = Delta, q3 = Q3, len = Len } = + set_ram_duration_target(0, State), + case Delta#delta.count + ?QUEUE:len(Q3) == Len of + true -> + State1; + false -> + %% When pushing messages to disk, we might have been + %% blocked by the msg_store, so we need to see if we have + %% to wait for more credit, and then keep paging messages. + %% + %% The amqqueue_process could have taken care of this, but + %% between the time it receives the bump_credit msg and + %% calls BQ:resume to keep paging messages to disk, some + %% other request may arrive to the BQ which at this moment + %% is not in a proper state for a lazy BQ (unless all + %% messages have been paged to disk already). + wait_for_msg_store_credit(), + convert_to_lazy(resume(State1)) + end. + +wait_for_msg_store_credit() -> + case credit_flow:blocked() of + true -> receive + {bump_credit, Msg} -> + credit_flow:handle_bump_msg(Msg) + end; + false -> ok + end. + +%% Get the Timestamp property of the first msg, if present. This is +%% the one with the oldest timestamp among the heads of the pending +%% acks and unread queues. We can't check disk_pending_acks as these +%% are paged out - we assume some will soon be paged in rather than +%% forcing it to happen. Pending ack msgs are included as they are +%% regarded as unprocessed until acked, this also prevents the result +%% apparently oscillating during repeated rejects. Q3 is only checked +%% when Q4 is empty as any Q4 msg will be earlier. +head_message_timestamp(Q3, Q4, RPA, QPA) -> + HeadMsgs = [ HeadMsgStatus#msg_status.msg || + HeadMsgStatus <- + [ get_qs_head([Q4, Q3]), + get_pa_head(RPA), + get_pa_head(QPA) ], + HeadMsgStatus /= undefined, + HeadMsgStatus#msg_status.msg /= undefined ], + + Timestamps = + [Timestamp || HeadMsg <- HeadMsgs, + Timestamp <- [rabbit_basic:extract_timestamp( + HeadMsg#basic_message.content)], + Timestamp /= undefined + ], + + case Timestamps == [] of + true -> ''; + false -> lists:min(Timestamps) + end. + +get_qs_head(Qs) -> + catch lists:foldl( + fun (Q, Acc) -> + case get_q_head(Q) of + undefined -> Acc; + Val -> throw(Val) + end + end, undefined, Qs). + +get_q_head(Q) -> + get_collection_head(Q, fun ?QUEUE:is_empty/1, fun ?QUEUE:peek/1). + +get_pa_head(PA) -> + get_collection_head(PA, fun gb_trees:is_empty/1, fun gb_trees:smallest/1). + +get_collection_head(Col, IsEmpty, GetVal) -> + case IsEmpty(Col) of + false -> + {_, MsgStatus} = GetVal(Col), + MsgStatus; + true -> undefined + end. + +%%---------------------------------------------------------------------------- +%% Minor helpers +%%---------------------------------------------------------------------------- +a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + mode = default, + len = Len, + bytes = Bytes, + unacked_bytes = UnackedBytes, + persistent_count = PersistentCount, + persistent_bytes = PersistentBytes, + ram_msg_count = RamMsgCount, + ram_bytes = RamBytes}) -> + E1 = ?QUEUE:is_empty(Q1), + E2 = ?QUEUE:is_empty(Q2), + ED = Delta#delta.count == 0, + E3 = ?QUEUE:is_empty(Q3), + E4 = ?QUEUE:is_empty(Q4), + LZ = Len == 0, + + %% if q1 has messages then q3 cannot be empty. See publish/6. + true = E1 or not E3, + %% if q2 has messages then we have messages in delta (paged to + %% disk). See push_alphas_to_betas/2. + true = E2 or not ED, + %% if delta has messages then q3 cannot be empty. This is enforced + %% by paging, where min([?SEGMENT_ENTRY_COUNT, len(q3)]) messages + %% are always kept on RAM. + true = ED or not E3, + %% if the queue length is 0, then q3 and q4 must be empty. + true = LZ == (E3 and E4), + + true = Len >= 0, + true = Bytes >= 0, + true = UnackedBytes >= 0, + true = PersistentCount >= 0, + true = PersistentBytes >= 0, + true = RamMsgCount >= 0, + true = RamMsgCount =< Len, + true = RamBytes >= 0, + true = RamBytes =< Bytes + UnackedBytes, + + State; +a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + mode = lazy, + len = Len, + bytes = Bytes, + unacked_bytes = UnackedBytes, + persistent_count = PersistentCount, + persistent_bytes = PersistentBytes, + ram_msg_count = RamMsgCount, + ram_bytes = RamBytes}) -> + E1 = ?QUEUE:is_empty(Q1), + E2 = ?QUEUE:is_empty(Q2), + ED = Delta#delta.count == 0, + E3 = ?QUEUE:is_empty(Q3), + E4 = ?QUEUE:is_empty(Q4), + LZ = Len == 0, + L3 = ?QUEUE:len(Q3), + + %% q1 must always be empty, since q1 only gets messages during + %% publish, but for lazy queues messages go straight to delta. + true = E1, + + %% q2 only gets messages from q1 when push_alphas_to_betas is + %% called for a non empty delta, which won't be the case for a + %% lazy queue. This means q2 must always be empty. + true = E2, + + %% q4 must always be empty, since q1 only gets messages during + %% publish, but for lazy queues messages go straight to delta. + true = E4, + + %% if the queue is empty, then delta is empty and q3 is empty. + true = LZ == (ED and E3), + + %% There should be no messages in q1, q2, and q4 + true = Delta#delta.count + L3 == Len, + + true = Len >= 0, + true = Bytes >= 0, + true = UnackedBytes >= 0, + true = PersistentCount >= 0, + true = PersistentBytes >= 0, + true = RamMsgCount >= 0, + true = RamMsgCount =< Len, + true = RamBytes >= 0, + true = RamBytes =< Bytes + UnackedBytes, + + State. + +d(Delta = #delta { start_seq_id = Start, count = Count, end_seq_id = End }) + when Start + Count =< End -> + Delta. + +m(MsgStatus = #msg_status { is_persistent = IsPersistent, + msg_in_store = MsgInStore, + index_on_disk = IndexOnDisk }) -> + true = (not IsPersistent) or IndexOnDisk, + true = msg_in_ram(MsgStatus) or MsgInStore, + MsgStatus. + +one_if(true ) -> 1; +one_if(false) -> 0. + +cons_if(true, E, L) -> [E | L]; +cons_if(false, _E, L) -> L. + +gb_sets_maybe_insert(false, _Val, Set) -> Set; +gb_sets_maybe_insert(true, Val, Set) -> gb_sets:add(Val, Set). + +msg_status(IsPersistent, IsDelivered, SeqId, + Msg = #basic_message {id = MsgId}, MsgProps, IndexMaxSize) -> + #msg_status{seq_id = SeqId, + msg_id = MsgId, + msg = Msg, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_in_store = false, + index_on_disk = false, + persist_to = determine_persist_to(Msg, MsgProps, IndexMaxSize), + msg_props = MsgProps}. + +beta_msg_status({Msg = #basic_message{id = MsgId}, + SeqId, MsgProps, IsPersistent, IsDelivered}) -> + MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent, IsDelivered), + MS0#msg_status{msg_id = MsgId, + msg = Msg, + persist_to = queue_index, + msg_in_store = false}; + +beta_msg_status({MsgId, SeqId, MsgProps, IsPersistent, IsDelivered}) -> + MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent, IsDelivered), + MS0#msg_status{msg_id = MsgId, + msg = undefined, + persist_to = msg_store, + msg_in_store = true}. + +beta_msg_status0(SeqId, MsgProps, IsPersistent, IsDelivered) -> + #msg_status{seq_id = SeqId, + msg = undefined, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + index_on_disk = true, + msg_props = MsgProps}. + +trim_msg_status(MsgStatus) -> + case persist_to(MsgStatus) of + msg_store -> MsgStatus#msg_status{msg = undefined}; + queue_index -> MsgStatus + end. + +with_msg_store_state({MSCStateP, MSCStateT}, true, Fun) -> + {Result, MSCStateP1} = Fun(MSCStateP), + {Result, {MSCStateP1, MSCStateT}}; +with_msg_store_state({MSCStateP, MSCStateT}, false, Fun) -> + {Result, MSCStateT1} = Fun(MSCStateT), + {Result, {MSCStateP, MSCStateT1}}. + +with_immutable_msg_store_state(MSCState, IsPersistent, Fun) -> + {Res, MSCState} = with_msg_store_state(MSCState, IsPersistent, + fun (MSCState1) -> + {Fun(MSCState1), MSCState1} + end), + Res. + +msg_store_client_init(MsgStore, MsgOnDiskFun, Callback, VHost) -> + msg_store_client_init(MsgStore, rabbit_guid:gen(), MsgOnDiskFun, + Callback, VHost). + +msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback, VHost) -> + CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE), + rabbit_vhost_msg_store:client_init(VHost, MsgStore, + Ref, MsgOnDiskFun, + fun () -> + Callback(?MODULE, CloseFDsFun) + end). + +msg_store_write(MSCState, IsPersistent, MsgId, Msg) -> + with_immutable_msg_store_state( + MSCState, IsPersistent, + fun (MSCState1) -> + rabbit_msg_store:write_flow(MsgId, Msg, MSCState1) + end). + +msg_store_read(MSCState, IsPersistent, MsgId) -> + with_msg_store_state( + MSCState, IsPersistent, + fun (MSCState1) -> + rabbit_msg_store:read(MsgId, MSCState1) + end). + +msg_store_remove(MSCState, IsPersistent, MsgIds) -> + with_immutable_msg_store_state( + MSCState, IsPersistent, + fun (MCSState1) -> + rabbit_msg_store:remove(MsgIds, MCSState1) + end). + +msg_store_close_fds(MSCState, IsPersistent) -> + with_msg_store_state( + MSCState, IsPersistent, + fun (MSCState1) -> rabbit_msg_store:close_all_indicated(MSCState1) end). + +msg_store_close_fds_fun(IsPersistent) -> + fun (?MODULE, State = #vqstate { msg_store_clients = MSCState }) -> + {ok, MSCState1} = msg_store_close_fds(MSCState, IsPersistent), + State #vqstate { msg_store_clients = MSCState1 } + end. + +maybe_write_delivered(false, _SeqId, IndexState) -> + IndexState; +maybe_write_delivered(true, SeqId, IndexState) -> + rabbit_queue_index:deliver([SeqId], IndexState). + +betas_from_index_entries(List, TransientThreshold, DelsAndAcksFun, State) -> + {Filtered, Delivers, Acks, RamReadyCount, RamBytes, TransientCount, TransientBytes} = + lists:foldr( + fun ({_MsgOrId, SeqId, _MsgProps, IsPersistent, IsDelivered} = M, + {Filtered1, Delivers1, Acks1, RRC, RB, TC, TB} = Acc) -> + case SeqId < TransientThreshold andalso not IsPersistent of + true -> {Filtered1, + cons_if(not IsDelivered, SeqId, Delivers1), + [SeqId | Acks1], RRC, RB, TC, TB}; + false -> MsgStatus = m(beta_msg_status(M)), + HaveMsg = msg_in_ram(MsgStatus), + Size = msg_size(MsgStatus), + case is_msg_in_pending_acks(SeqId, State) of + false -> {?QUEUE:in_r(MsgStatus, Filtered1), + Delivers1, Acks1, + RRC + one_if(HaveMsg), + RB + one_if(HaveMsg) * Size, + TC + one_if(not IsPersistent), + TB + one_if(not IsPersistent) * Size}; + true -> Acc %% [0] + end + end + end, {?QUEUE:new(), [], [], 0, 0, 0, 0}, List), + {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(Delivers, Acks, State), + TransientCount, TransientBytes}. +%% [0] We don't increase RamBytes here, even though it pertains to +%% unacked messages too, since if HaveMsg then the message must have +%% been stored in the QI, thus the message must have been in +%% qi_pending_ack, thus it must already have been in RAM. + +is_msg_in_pending_acks(SeqId, #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, + qi_pending_ack = QPA }) -> + (gb_trees:is_defined(SeqId, RPA) orelse + gb_trees:is_defined(SeqId, DPA) orelse + gb_trees:is_defined(SeqId, QPA)). + +expand_delta(SeqId, ?BLANK_DELTA_PATTERN(X), IsPersistent) -> + d(#delta { start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1, + transient = one_if(not IsPersistent)}); +expand_delta(SeqId, #delta { start_seq_id = StartSeqId, + count = Count, + transient = Transient } = Delta, + IsPersistent ) + when SeqId < StartSeqId -> + d(Delta #delta { start_seq_id = SeqId, count = Count + 1, + transient = Transient + one_if(not IsPersistent)}); +expand_delta(SeqId, #delta { count = Count, + end_seq_id = EndSeqId, + transient = Transient } = Delta, + IsPersistent) + when SeqId >= EndSeqId -> + d(Delta #delta { count = Count + 1, end_seq_id = SeqId + 1, + transient = Transient + one_if(not IsPersistent)}); +expand_delta(_SeqId, #delta { count = Count, + transient = Transient } = Delta, + IsPersistent ) -> + d(Delta #delta { count = Count + 1, + transient = Transient + one_if(not IsPersistent) }). + +%%---------------------------------------------------------------------------- +%% Internal major helpers for Public API +%%---------------------------------------------------------------------------- + +init(IsDurable, IndexState, DeltaCount, DeltaBytes, Terms, + PersistentClient, TransientClient, VHost) -> + {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState), + + {DeltaCount1, DeltaBytes1} = + case Terms of + non_clean_shutdown -> {DeltaCount, DeltaBytes}; + _ -> {proplists:get_value(persistent_count, + Terms, DeltaCount), + proplists:get_value(persistent_bytes, + Terms, DeltaBytes)} + end, + Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of + true -> ?BLANK_DELTA; + false -> d(#delta { start_seq_id = LowSeqId, + count = DeltaCount1, + transient = 0, + end_seq_id = NextSeqId }) + end, + Now = erlang:monotonic_time(), + IoBatchSize = rabbit_misc:get_env(rabbit, msg_store_io_batch_size, + ?IO_BATCH_SIZE), + + {ok, IndexMaxSize} = application:get_env( + rabbit, queue_index_embed_msgs_below), + State = #vqstate { + q1 = ?QUEUE:new(), + q2 = ?QUEUE:new(), + delta = Delta, + q3 = ?QUEUE:new(), + q4 = ?QUEUE:new(), + next_seq_id = NextSeqId, + ram_pending_ack = gb_trees:empty(), + disk_pending_ack = gb_trees:empty(), + qi_pending_ack = gb_trees:empty(), + index_state = IndexState1, + msg_store_clients = {PersistentClient, TransientClient}, + durable = IsDurable, + transient_threshold = NextSeqId, + qi_embed_msgs_below = IndexMaxSize, + + len = DeltaCount1, + persistent_count = DeltaCount1, + bytes = DeltaBytes1, + persistent_bytes = DeltaBytes1, + delta_transient_bytes = 0, + + target_ram_count = infinity, + ram_msg_count = 0, + ram_msg_count_prev = 0, + ram_ack_count_prev = 0, + ram_bytes = 0, + unacked_bytes = 0, + out_counter = 0, + in_counter = 0, + rates = blank_rates(Now), + msgs_on_disk = gb_sets:new(), + msg_indices_on_disk = gb_sets:new(), + unconfirmed = gb_sets:new(), + confirmed = gb_sets:new(), + ack_out_counter = 0, + ack_in_counter = 0, + disk_read_count = 0, + disk_write_count = 0, + + io_batch_size = IoBatchSize, + + mode = default, + memory_reduction_run_count = 0, + virtual_host = VHost}, + a(maybe_deltas_to_betas(State)). + +blank_rates(Now) -> + #rates { in = 0.0, + out = 0.0, + ack_in = 0.0, + ack_out = 0.0, + timestamp = Now}. + +in_r(MsgStatus = #msg_status { msg = undefined }, + State = #vqstate { mode = default, q3 = Q3, q4 = Q4 }) -> + case ?QUEUE:is_empty(Q4) of + true -> State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) }; + false -> {Msg, State1 = #vqstate { q4 = Q4a }} = + read_msg(MsgStatus, State), + MsgStatus1 = MsgStatus#msg_status{msg = Msg}, + stats(ready0, {MsgStatus, MsgStatus1}, 0, + State1 #vqstate { q4 = ?QUEUE:in_r(MsgStatus1, Q4a) }) + end; +in_r(MsgStatus, + State = #vqstate { mode = default, q4 = Q4 }) -> + State #vqstate { q4 = ?QUEUE:in_r(MsgStatus, Q4) }; +%% lazy queues +in_r(MsgStatus = #msg_status { seq_id = SeqId, is_persistent = IsPersistent }, + State = #vqstate { mode = lazy, q3 = Q3, delta = Delta}) -> + case ?QUEUE:is_empty(Q3) of + true -> + {_MsgStatus1, State1} = + maybe_write_to_disk(true, true, MsgStatus, State), + State2 = stats(ready0, {MsgStatus, none}, 1, State1), + Delta1 = expand_delta(SeqId, Delta, IsPersistent), + State2 #vqstate{ delta = Delta1}; + false -> + State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) } + end. + +queue_out(State = #vqstate { mode = default, q4 = Q4 }) -> + case ?QUEUE:out(Q4) of + {empty, _Q4} -> + case fetch_from_q3(State) of + {empty, _State1} = Result -> Result; + {loaded, {MsgStatus, State1}} -> {{value, MsgStatus}, State1} + end; + {{value, MsgStatus}, Q4a} -> + {{value, MsgStatus}, State #vqstate { q4 = Q4a }} + end; +%% lazy queues +queue_out(State = #vqstate { mode = lazy }) -> + case fetch_from_q3(State) of + {empty, _State1} = Result -> Result; + {loaded, {MsgStatus, State1}} -> {{value, MsgStatus}, State1} + end. + +read_msg(#msg_status{msg = undefined, + msg_id = MsgId, + is_persistent = IsPersistent}, State) -> + read_msg(MsgId, IsPersistent, State); +read_msg(#msg_status{msg = Msg}, State) -> + {Msg, State}. + +read_msg(MsgId, IsPersistent, State = #vqstate{msg_store_clients = MSCState, + disk_read_count = Count}) -> + {{ok, Msg = #basic_message {}}, MSCState1} = + msg_store_read(MSCState, IsPersistent, MsgId), + {Msg, State #vqstate {msg_store_clients = MSCState1, + disk_read_count = Count + 1}}. + +stats(Signs, Statuses, DeltaPaged, State) -> + stats0(expand_signs(Signs), expand_statuses(Statuses), DeltaPaged, State). + +expand_signs(ready0) -> {0, 0, true}; +expand_signs(lazy_pub) -> {1, 0, true}; +expand_signs({A, B}) -> {A, B, false}. + +expand_statuses({none, A}) -> {false, msg_in_ram(A), A}; +expand_statuses({B, none}) -> {msg_in_ram(B), false, B}; +expand_statuses({lazy, A}) -> {false , false, A}; +expand_statuses({B, A}) -> {msg_in_ram(B), msg_in_ram(A), B}. + +%% In this function at least, we are religious: the variable name +%% contains "Ready" or "Unacked" iff that is what it counts. If +%% neither is present it counts both. +stats0({DeltaReady, DeltaUnacked, ReadyMsgPaged}, + {InRamBefore, InRamAfter, MsgStatus}, DeltaPaged, + State = #vqstate{len = ReadyCount, + bytes = ReadyBytes, + ram_msg_count = RamReadyCount, + persistent_count = PersistentCount, + unacked_bytes = UnackedBytes, + ram_bytes = RamBytes, + delta_transient_bytes = DeltaBytes, + persistent_bytes = PersistentBytes}) -> + S = msg_size(MsgStatus), + DeltaTotal = DeltaReady + DeltaUnacked, + DeltaRam = case {InRamBefore, InRamAfter} of + {false, false} -> 0; + {false, true} -> 1; + {true, false} -> -1; + {true, true} -> 0 + end, + DeltaRamReady = case DeltaReady of + 1 -> one_if(InRamAfter); + -1 -> -one_if(InRamBefore); + 0 when ReadyMsgPaged -> DeltaRam; + 0 -> 0 + end, + DeltaPersistent = DeltaTotal * one_if(MsgStatus#msg_status.is_persistent), + State#vqstate{len = ReadyCount + DeltaReady, + ram_msg_count = RamReadyCount + DeltaRamReady, + persistent_count = PersistentCount + DeltaPersistent, + bytes = ReadyBytes + DeltaReady * S, + unacked_bytes = UnackedBytes + DeltaUnacked * S, + ram_bytes = RamBytes + DeltaRam * S, + persistent_bytes = PersistentBytes + DeltaPersistent * S, + delta_transient_bytes = DeltaBytes + DeltaPaged * one_if(not MsgStatus#msg_status.is_persistent) * S}. + +msg_size(#msg_status{msg_props = #message_properties{size = Size}}) -> Size. + +msg_in_ram(#msg_status{msg = Msg}) -> Msg =/= undefined. + +%% first param: AckRequired +remove(true, MsgStatus = #msg_status { + seq_id = SeqId, + is_delivered = IsDelivered, + index_on_disk = IndexOnDisk }, + State = #vqstate {out_counter = OutCount, + index_state = IndexState}) -> + %% Mark it delivered if necessary + IndexState1 = maybe_write_delivered( + IndexOnDisk andalso not IsDelivered, + SeqId, IndexState), + + State1 = record_pending_ack( + MsgStatus #msg_status { + is_delivered = true }, State), + + State2 = stats({-1, 1}, {MsgStatus, MsgStatus}, 0, State1), + + {SeqId, maybe_update_rates( + State2 #vqstate {out_counter = OutCount + 1, + index_state = IndexState1})}; + +%% This function body has the same behaviour as remove_queue_entries/3 +%% but instead of removing messages based on a ?QUEUE, this removes +%% just one message, the one referenced by the MsgStatus provided. +remove(false, MsgStatus = #msg_status { + seq_id = SeqId, + msg_id = MsgId, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_in_store = MsgInStore, + index_on_disk = IndexOnDisk }, + State = #vqstate {out_counter = OutCount, + index_state = IndexState, + msg_store_clients = MSCState}) -> + %% Mark it delivered if necessary + IndexState1 = maybe_write_delivered( + IndexOnDisk andalso not IsDelivered, + SeqId, IndexState), + + %% Remove from msg_store and queue index, if necessary + case MsgInStore of + true -> ok = msg_store_remove(MSCState, IsPersistent, [MsgId]); + false -> ok + end, + + IndexState2 = + case IndexOnDisk of + true -> rabbit_queue_index:ack([SeqId], IndexState1); + false -> IndexState1 + end, + + State1 = stats({-1, 0}, {MsgStatus, none}, 0, State), + + {undefined, maybe_update_rates( + State1 #vqstate {out_counter = OutCount + 1, + index_state = IndexState2})}. + +%% This function exists as a way to improve dropwhile/2 +%% performance. The idea of having this function is to optimise calls +%% to rabbit_queue_index by batching delivers and acks, instead of +%% sending them one by one. +%% +%% Instead of removing every message as their are popped from the +%% queue, it first accumulates them and then removes them by calling +%% remove_queue_entries/3, since the behaviour of +%% remove_queue_entries/3 when used with +%% process_delivers_and_acks_fun(deliver_and_ack) is the same as +%% calling remove(false, MsgStatus, State). +%% +%% remove/3 also updates the out_counter in every call, but here we do +%% it just once at the end. +remove_by_predicate(Pred, State = #vqstate {out_counter = OutCount}) -> + {MsgProps, QAcc, State1} = + collect_by_predicate(Pred, ?QUEUE:new(), State), + State2 = + remove_queue_entries( + QAcc, process_delivers_and_acks_fun(deliver_and_ack), State1), + %% maybe_update_rates/1 is called in remove/2 for every + %% message. Since we update out_counter only once, we call it just + %% there. + {MsgProps, maybe_update_rates( + State2 #vqstate { + out_counter = OutCount + ?QUEUE:len(QAcc)})}. + +%% This function exists as a way to improve fetchwhile/4 +%% performance. The idea of having this function is to optimise calls +%% to rabbit_queue_index by batching delivers, instead of sending them +%% one by one. +%% +%% Fun is the function passed to fetchwhile/4 that's +%% applied to every fetched message and used to build the fetchwhile/4 +%% result accumulator FetchAcc. +fetch_by_predicate(Pred, Fun, FetchAcc, + State = #vqstate { + index_state = IndexState, + out_counter = OutCount}) -> + {MsgProps, QAcc, State1} = + collect_by_predicate(Pred, ?QUEUE:new(), State), + + {Delivers, FetchAcc1, State2} = + process_queue_entries(QAcc, Fun, FetchAcc, State1), + + IndexState1 = rabbit_queue_index:deliver(Delivers, IndexState), + + {MsgProps, FetchAcc1, maybe_update_rates( + State2 #vqstate { + index_state = IndexState1, + out_counter = OutCount + ?QUEUE:len(QAcc)})}. + +%% We try to do here the same as what remove(true, State) does but +%% processing several messages at the same time. The idea is to +%% optimize rabbit_queue_index:deliver/2 calls by sending a list of +%% SeqIds instead of one by one, thus process_queue_entries1 will +%% accumulate the required deliveries, will record_pending_ack for +%% each message, and will update stats, like remove/2 does. +%% +%% For the meaning of Fun and FetchAcc arguments see +%% fetch_by_predicate/4 above. +process_queue_entries(Q, Fun, FetchAcc, State) -> + ?QUEUE:foldl(fun (MsgStatus, Acc) -> + process_queue_entries1(MsgStatus, Fun, Acc) + end, + {[], FetchAcc, State}, Q). + +process_queue_entries1( + #msg_status { seq_id = SeqId, is_delivered = IsDelivered, + index_on_disk = IndexOnDisk} = MsgStatus, + Fun, + {Delivers, FetchAcc, State}) -> + {Msg, State1} = read_msg(MsgStatus, State), + State2 = record_pending_ack( + MsgStatus #msg_status { + is_delivered = true }, State1), + {cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers), + Fun(Msg, SeqId, FetchAcc), + stats({-1, 1}, {MsgStatus, MsgStatus}, 0, State2)}. + +collect_by_predicate(Pred, QAcc, State) -> + case queue_out(State) of + {empty, State1} -> + {undefined, QAcc, State1}; + {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} -> + case Pred(MsgProps) of + true -> collect_by_predicate(Pred, ?QUEUE:in(MsgStatus, QAcc), + State1); + false -> {MsgProps, QAcc, in_r(MsgStatus, State1)} + end + end. + +%%---------------------------------------------------------------------------- +%% Helpers for Public API purge/1 function +%%---------------------------------------------------------------------------- + +%% The difference between purge_when_pending_acks/1 +%% vs. purge_and_index_reset/1 is that the first one issues a deliver +%% and an ack to the queue index for every message that's being +%% removed, while the later just resets the queue index state. +purge_when_pending_acks(State) -> + State1 = purge1(process_delivers_and_acks_fun(deliver_and_ack), State), + a(State1). + +purge_and_index_reset(State) -> + State1 = purge1(process_delivers_and_acks_fun(none), State), + a(reset_qi_state(State1)). + +%% This function removes messages from each of {q1, q2, q3, q4}. +%% +%% With remove_queue_entries/3 q1 and q4 are emptied, while q2 and q3 +%% are specially handled by purge_betas_and_deltas/2. +%% +%% purge_betas_and_deltas/2 loads messages from the queue index, +%% filling up q3 and in some cases moving messages form q2 to q3 while +%% resetting q2 to an empty queue (see maybe_deltas_to_betas/2). The +%% messages loaded into q3 are removed by calling +%% remove_queue_entries/3 until there are no more messages to be read +%% from the queue index. Messages are read in batches from the queue +%% index. +purge1(AfterFun, State = #vqstate { q4 = Q4}) -> + State1 = remove_queue_entries(Q4, AfterFun, State), + + State2 = #vqstate {q1 = Q1} = + purge_betas_and_deltas(AfterFun, State1#vqstate{q4 = ?QUEUE:new()}), + + State3 = remove_queue_entries(Q1, AfterFun, State2), + + a(State3#vqstate{q1 = ?QUEUE:new()}). + +reset_qi_state(State = #vqstate{index_state = IndexState}) -> + State#vqstate{index_state = + rabbit_queue_index:reset_state(IndexState)}. + +is_pending_ack_empty(State) -> + count_pending_acks(State) =:= 0. + +is_unconfirmed_empty(#vqstate { unconfirmed = UC }) -> + gb_sets:is_empty(UC). + +count_pending_acks(#vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, + qi_pending_ack = QPA }) -> + gb_trees:size(RPA) + gb_trees:size(DPA) + gb_trees:size(QPA). + +purge_betas_and_deltas(DelsAndAcksFun, State = #vqstate { mode = Mode }) -> + State0 = #vqstate { q3 = Q3 } = + case Mode of + lazy -> maybe_deltas_to_betas(DelsAndAcksFun, State); + _ -> State + end, + + case ?QUEUE:is_empty(Q3) of + true -> State0; + false -> State1 = remove_queue_entries(Q3, DelsAndAcksFun, State0), + purge_betas_and_deltas(DelsAndAcksFun, + maybe_deltas_to_betas( + DelsAndAcksFun, + State1#vqstate{q3 = ?QUEUE:new()})) + end. + +remove_queue_entries(Q, DelsAndAcksFun, + State = #vqstate{msg_store_clients = MSCState}) -> + {MsgIdsByStore, Delivers, Acks, State1} = + ?QUEUE:foldl(fun remove_queue_entries1/2, + {maps:new(), [], [], State}, Q), + remove_msgs_by_id(MsgIdsByStore, MSCState), + DelsAndAcksFun(Delivers, Acks, State1). + +remove_queue_entries1( + #msg_status { msg_id = MsgId, seq_id = SeqId, is_delivered = IsDelivered, + msg_in_store = MsgInStore, index_on_disk = IndexOnDisk, + is_persistent = IsPersistent} = MsgStatus, + {MsgIdsByStore, Delivers, Acks, State}) -> + {case MsgInStore of + true -> rabbit_misc:maps_cons(IsPersistent, MsgId, MsgIdsByStore); + false -> MsgIdsByStore + end, + cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers), + cons_if(IndexOnDisk, SeqId, Acks), + stats({-1, 0}, {MsgStatus, none}, 0, State)}. + +process_delivers_and_acks_fun(deliver_and_ack) -> + fun (Delivers, Acks, State = #vqstate { index_state = IndexState }) -> + IndexState1 = + rabbit_queue_index:ack( + Acks, rabbit_queue_index:deliver(Delivers, IndexState)), + State #vqstate { index_state = IndexState1 } + end; +process_delivers_and_acks_fun(_) -> + fun (_, _, State) -> + State + end. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for publishing +%%---------------------------------------------------------------------------- + +publish1(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId }, + MsgProps = #message_properties { needs_confirming = NeedsConfirming }, + IsDelivered, _ChPid, _Flow, PersistFun, + State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, + mode = default, + qi_embed_msgs_below = IndexMaxSize, + next_seq_id = SeqId, + in_counter = InCount, + durable = IsDurable, + unconfirmed = UC }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps, IndexMaxSize), + {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State), + State2 = case ?QUEUE:is_empty(Q3) of + false -> State1 #vqstate { q1 = ?QUEUE:in(m(MsgStatus1), Q1) }; + true -> State1 #vqstate { q4 = ?QUEUE:in(m(MsgStatus1), Q4) } + end, + InCount1 = InCount + 1, + UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), + stats({1, 0}, {none, MsgStatus1}, 0, + State2#vqstate{ next_seq_id = SeqId + 1, + in_counter = InCount1, + unconfirmed = UC1 }); +publish1(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId }, + MsgProps = #message_properties { needs_confirming = NeedsConfirming }, + IsDelivered, _ChPid, _Flow, PersistFun, + State = #vqstate { mode = lazy, + qi_embed_msgs_below = IndexMaxSize, + next_seq_id = SeqId, + in_counter = InCount, + durable = IsDurable, + unconfirmed = UC, + delta = Delta}) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps, IndexMaxSize), + {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State), + Delta1 = expand_delta(SeqId, Delta, IsPersistent), + UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), + stats(lazy_pub, {lazy, m(MsgStatus1)}, 1, + State1#vqstate{ delta = Delta1, + next_seq_id = SeqId + 1, + in_counter = InCount + 1, + unconfirmed = UC1}). + +batch_publish1({Msg, MsgProps, IsDelivered}, {ChPid, Flow, State}) -> + {ChPid, Flow, publish1(Msg, MsgProps, IsDelivered, ChPid, Flow, + fun maybe_prepare_write_to_disk/4, State)}. + +publish_delivered1(Msg = #basic_message { is_persistent = IsPersistent, + id = MsgId }, + MsgProps = #message_properties { + needs_confirming = NeedsConfirming }, + _ChPid, _Flow, PersistFun, + State = #vqstate { mode = default, + qi_embed_msgs_below = IndexMaxSize, + next_seq_id = SeqId, + out_counter = OutCount, + in_counter = InCount, + durable = IsDurable, + unconfirmed = UC }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = msg_status(IsPersistent1, true, SeqId, Msg, MsgProps, IndexMaxSize), + {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State), + State2 = record_pending_ack(m(MsgStatus1), State1), + UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), + State3 = stats({0, 1}, {none, MsgStatus1}, 0, + State2 #vqstate { next_seq_id = SeqId + 1, + out_counter = OutCount + 1, + in_counter = InCount + 1, + unconfirmed = UC1 }), + {SeqId, State3}; +publish_delivered1(Msg = #basic_message { is_persistent = IsPersistent, + id = MsgId }, + MsgProps = #message_properties { + needs_confirming = NeedsConfirming }, + _ChPid, _Flow, PersistFun, + State = #vqstate { mode = lazy, + qi_embed_msgs_below = IndexMaxSize, + next_seq_id = SeqId, + out_counter = OutCount, + in_counter = InCount, + durable = IsDurable, + unconfirmed = UC }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = msg_status(IsPersistent1, true, SeqId, Msg, MsgProps, IndexMaxSize), + {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State), + State2 = record_pending_ack(m(MsgStatus1), State1), + UC1 = gb_sets_maybe_insert(NeedsConfirming, MsgId, UC), + State3 = stats({0, 1}, {none, MsgStatus1}, 0, + State2 #vqstate { next_seq_id = SeqId + 1, + out_counter = OutCount + 1, + in_counter = InCount + 1, + unconfirmed = UC1 }), + {SeqId, State3}. + +batch_publish_delivered1({Msg, MsgProps}, {ChPid, Flow, SeqIds, State}) -> + {SeqId, State1} = + publish_delivered1(Msg, MsgProps, ChPid, Flow, + fun maybe_prepare_write_to_disk/4, + State), + {ChPid, Flow, [SeqId | SeqIds], State1}. + +maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status { + msg_in_store = true }, State) -> + {MsgStatus, State}; +maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { + msg = Msg, msg_id = MsgId, + is_persistent = IsPersistent }, + State = #vqstate{ msg_store_clients = MSCState, + disk_write_count = Count}) + when Force orelse IsPersistent -> + case persist_to(MsgStatus) of + msg_store -> ok = msg_store_write(MSCState, IsPersistent, MsgId, + prepare_to_store(Msg)), + {MsgStatus#msg_status{msg_in_store = true}, + State#vqstate{disk_write_count = Count + 1}}; + queue_index -> {MsgStatus, State} + end; +maybe_write_msg_to_disk(_Force, MsgStatus, State) -> + {MsgStatus, State}. + +%% Due to certain optimisations made inside +%% rabbit_queue_index:pre_publish/7 we need to have two separate +%% functions for index persistence. This one is only used when paging +%% during memory pressure. We didn't want to modify +%% maybe_write_index_to_disk/3 because that function is used in other +%% places. +maybe_batch_write_index_to_disk(_Force, + MsgStatus = #msg_status { + index_on_disk = true }, State) -> + {MsgStatus, State}; +maybe_batch_write_index_to_disk(Force, + MsgStatus = #msg_status { + msg = Msg, + msg_id = MsgId, + seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_props = MsgProps}, + State = #vqstate { + target_ram_count = TargetRamCount, + disk_write_count = DiskWriteCount, + index_state = IndexState}) + when Force orelse IsPersistent -> + {MsgOrId, DiskWriteCount1} = + case persist_to(MsgStatus) of + msg_store -> {MsgId, DiskWriteCount}; + queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1} + end, + IndexState1 = rabbit_queue_index:pre_publish( + MsgOrId, SeqId, MsgProps, IsPersistent, IsDelivered, + TargetRamCount, IndexState), + {MsgStatus#msg_status{index_on_disk = true}, + State#vqstate{index_state = IndexState1, + disk_write_count = DiskWriteCount1}}; +maybe_batch_write_index_to_disk(_Force, MsgStatus, State) -> + {MsgStatus, State}. + +maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { + index_on_disk = true }, State) -> + {MsgStatus, State}; +maybe_write_index_to_disk(Force, MsgStatus = #msg_status { + msg = Msg, + msg_id = MsgId, + seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_props = MsgProps}, + State = #vqstate{target_ram_count = TargetRamCount, + disk_write_count = DiskWriteCount, + index_state = IndexState}) + when Force orelse IsPersistent -> + {MsgOrId, DiskWriteCount1} = + case persist_to(MsgStatus) of + msg_store -> {MsgId, DiskWriteCount}; + queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1} + end, + IndexState1 = rabbit_queue_index:publish( + MsgOrId, SeqId, MsgProps, IsPersistent, TargetRamCount, + IndexState), + IndexState2 = maybe_write_delivered(IsDelivered, SeqId, IndexState1), + {MsgStatus#msg_status{index_on_disk = true}, + State#vqstate{index_state = IndexState2, + disk_write_count = DiskWriteCount1}}; + +maybe_write_index_to_disk(_Force, MsgStatus, State) -> + {MsgStatus, State}. + +maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) -> + {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), + maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1). + +maybe_prepare_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) -> + {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), + maybe_batch_write_index_to_disk(ForceIndex, MsgStatus1, State1). + +determine_persist_to(#basic_message{ + content = #content{properties = Props, + properties_bin = PropsBin}}, + #message_properties{size = BodySize}, + IndexMaxSize) -> + %% The >= is so that you can set the env to 0 and never persist + %% to the index. + %% + %% We want this to be fast, so we avoid size(term_to_binary()) + %% here, or using the term size estimation from truncate.erl, both + %% of which are too slow. So instead, if the message body size + %% goes over the limit then we avoid any other checks. + %% + %% If it doesn't we need to decide if the properties will push + %% it past the limit. If we have the encoded properties (usual + %% case) we can just check their size. If we don't (message came + %% via the direct client), we make a guess based on the number of + %% headers. + case BodySize >= IndexMaxSize of + true -> msg_store; + false -> Est = case is_binary(PropsBin) of + true -> BodySize + size(PropsBin); + false -> #'P_basic'{headers = Hs} = Props, + case Hs of + undefined -> 0; + _ -> length(Hs) + end * ?HEADER_GUESS_SIZE + BodySize + end, + case Est >= IndexMaxSize of + true -> msg_store; + false -> queue_index + end + end. + +persist_to(#msg_status{persist_to = To}) -> To. + +prepare_to_store(Msg) -> + Msg#basic_message{ + %% don't persist any recoverable decoded properties + content = rabbit_binary_parser:clear_decoded_content( + Msg #basic_message.content)}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for acks +%%---------------------------------------------------------------------------- + +record_pending_ack(#msg_status { seq_id = SeqId } = MsgStatus, + State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, + qi_pending_ack = QPA, + ack_in_counter = AckInCount}) -> + Insert = fun (Tree) -> gb_trees:insert(SeqId, MsgStatus, Tree) end, + {RPA1, DPA1, QPA1} = + case {msg_in_ram(MsgStatus), persist_to(MsgStatus)} of + {false, _} -> {RPA, Insert(DPA), QPA}; + {_, queue_index} -> {RPA, DPA, Insert(QPA)}; + {_, msg_store} -> {Insert(RPA), DPA, QPA} + end, + State #vqstate { ram_pending_ack = RPA1, + disk_pending_ack = DPA1, + qi_pending_ack = QPA1, + ack_in_counter = AckInCount + 1}. + +lookup_pending_ack(SeqId, #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, + qi_pending_ack = QPA}) -> + case gb_trees:lookup(SeqId, RPA) of + {value, V} -> V; + none -> case gb_trees:lookup(SeqId, DPA) of + {value, V} -> V; + none -> gb_trees:get(SeqId, QPA) + end + end. + +%% First parameter = UpdateStats +remove_pending_ack(true, SeqId, State) -> + case remove_pending_ack(false, SeqId, State) of + {none, _} -> + {none, State}; + {MsgStatus, State1} -> + {MsgStatus, stats({0, -1}, {MsgStatus, none}, 0, State1)} + end; +remove_pending_ack(false, SeqId, State = #vqstate{ram_pending_ack = RPA, + disk_pending_ack = DPA, + qi_pending_ack = QPA}) -> + case gb_trees:lookup(SeqId, RPA) of + {value, V} -> RPA1 = gb_trees:delete(SeqId, RPA), + {V, State #vqstate { ram_pending_ack = RPA1 }}; + none -> case gb_trees:lookup(SeqId, DPA) of + {value, V} -> + DPA1 = gb_trees:delete(SeqId, DPA), + {V, State#vqstate{disk_pending_ack = DPA1}}; + none -> + case gb_trees:lookup(SeqId, QPA) of + {value, V} -> + QPA1 = gb_trees:delete(SeqId, QPA), + {V, State#vqstate{qi_pending_ack = QPA1}}; + none -> + {none, State} + end + end + end. + +purge_pending_ack(KeepPersistent, + State = #vqstate { index_state = IndexState, + msg_store_clients = MSCState }) -> + {IndexOnDiskSeqIds, MsgIdsByStore, State1} = purge_pending_ack1(State), + case KeepPersistent of + true -> remove_transient_msgs_by_id(MsgIdsByStore, MSCState), + State1; + false -> IndexState1 = + rabbit_queue_index:ack(IndexOnDiskSeqIds, IndexState), + remove_msgs_by_id(MsgIdsByStore, MSCState), + State1 #vqstate { index_state = IndexState1 } + end. + +purge_pending_ack_delete_and_terminate( + State = #vqstate { index_state = IndexState, + msg_store_clients = MSCState }) -> + {_, MsgIdsByStore, State1} = purge_pending_ack1(State), + IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState), + remove_msgs_by_id(MsgIdsByStore, MSCState), + State1 #vqstate { index_state = IndexState1 }. + +purge_pending_ack1(State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA, + qi_pending_ack = QPA }) -> + F = fun (_SeqId, MsgStatus, Acc) -> accumulate_ack(MsgStatus, Acc) end, + {IndexOnDiskSeqIds, MsgIdsByStore, _AllMsgIds} = + rabbit_misc:gb_trees_fold( + F, rabbit_misc:gb_trees_fold( + F, rabbit_misc:gb_trees_fold( + F, accumulate_ack_init(), RPA), DPA), QPA), + State1 = State #vqstate { ram_pending_ack = gb_trees:empty(), + disk_pending_ack = gb_trees:empty(), + qi_pending_ack = gb_trees:empty()}, + {IndexOnDiskSeqIds, MsgIdsByStore, State1}. + +%% MsgIdsByStore is an map with two keys: +%% +%% true: holds a list of Persistent Message Ids. +%% false: holds a list of Transient Message Ids. +%% +%% When we call maps:to_list/1 we get two sets of msg ids, where +%% IsPersistent is either true for persistent messages or false for +%% transient ones. The msg_store_remove/3 function takes this boolean +%% flag to determine from which store the messages should be removed +%% from. +remove_msgs_by_id(MsgIdsByStore, MSCState) -> + [ok = msg_store_remove(MSCState, IsPersistent, MsgIds) + || {IsPersistent, MsgIds} <- maps:to_list(MsgIdsByStore)]. + +remove_transient_msgs_by_id(MsgIdsByStore, MSCState) -> + case maps:find(false, MsgIdsByStore) of + error -> ok; + {ok, MsgIds} -> ok = msg_store_remove(MSCState, false, MsgIds) + end. + +accumulate_ack_init() -> {[], maps:new(), []}. + +accumulate_ack(#msg_status { seq_id = SeqId, + msg_id = MsgId, + is_persistent = IsPersistent, + msg_in_store = MsgInStore, + index_on_disk = IndexOnDisk }, + {IndexOnDiskSeqIdsAcc, MsgIdsByStore, AllMsgIds}) -> + {cons_if(IndexOnDisk, SeqId, IndexOnDiskSeqIdsAcc), + case MsgInStore of + true -> rabbit_misc:maps_cons(IsPersistent, MsgId, MsgIdsByStore); + false -> MsgIdsByStore + end, + [MsgId | AllMsgIds]}. + +%%---------------------------------------------------------------------------- +%% Internal plumbing for confirms (aka publisher acks) +%%---------------------------------------------------------------------------- + +record_confirms(MsgIdSet, State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC, + confirmed = C }) -> + State #vqstate { + msgs_on_disk = rabbit_misc:gb_sets_difference(MOD, MsgIdSet), + msg_indices_on_disk = rabbit_misc:gb_sets_difference(MIOD, MsgIdSet), + unconfirmed = rabbit_misc:gb_sets_difference(UC, MsgIdSet), + confirmed = gb_sets:union(C, MsgIdSet) }. + +msgs_written_to_disk(Callback, MsgIdSet, ignored) -> + Callback(?MODULE, + fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end); +msgs_written_to_disk(Callback, MsgIdSet, written) -> + Callback(?MODULE, + fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + Confirmed = gb_sets:intersection(UC, MsgIdSet), + record_confirms(gb_sets:intersection(MsgIdSet, MIOD), + State #vqstate { + msgs_on_disk = + gb_sets:union(MOD, Confirmed) }) + end). + +msg_indices_written_to_disk(Callback, MsgIdSet) -> + Callback(?MODULE, + fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + Confirmed = gb_sets:intersection(UC, MsgIdSet), + record_confirms(gb_sets:intersection(MsgIdSet, MOD), + State #vqstate { + msg_indices_on_disk = + gb_sets:union(MIOD, Confirmed) }) + end). + +msgs_and_indices_written_to_disk(Callback, MsgIdSet) -> + Callback(?MODULE, + fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end). + +%%---------------------------------------------------------------------------- +%% Internal plumbing for requeue +%%---------------------------------------------------------------------------- + +publish_alpha(#msg_status { msg = undefined } = MsgStatus, State) -> + {Msg, State1} = read_msg(MsgStatus, State), + MsgStatus1 = MsgStatus#msg_status { msg = Msg }, + {MsgStatus1, stats({1, -1}, {MsgStatus, MsgStatus1}, 0, State1)}; +publish_alpha(MsgStatus, State) -> + {MsgStatus, stats({1, -1}, {MsgStatus, MsgStatus}, 0, State)}. + +publish_beta(MsgStatus, State) -> + {MsgStatus1, State1} = maybe_prepare_write_to_disk(true, false, MsgStatus, State), + MsgStatus2 = m(trim_msg_status(MsgStatus1)), + {MsgStatus2, stats({1, -1}, {MsgStatus, MsgStatus2}, 0, State1)}. + +%% Rebuild queue, inserting sequence ids to maintain ordering +queue_merge(SeqIds, Q, MsgIds, Limit, PubFun, State) -> + queue_merge(SeqIds, Q, ?QUEUE:new(), MsgIds, + Limit, PubFun, State). + +queue_merge([SeqId | Rest] = SeqIds, Q, Front, MsgIds, + Limit, PubFun, State) + when Limit == undefined orelse SeqId < Limit -> + case ?QUEUE:out(Q) of + {{value, #msg_status { seq_id = SeqIdQ } = MsgStatus}, Q1} + when SeqIdQ < SeqId -> + %% enqueue from the remaining queue + queue_merge(SeqIds, Q1, ?QUEUE:in(MsgStatus, Front), MsgIds, + Limit, PubFun, State); + {_, _Q1} -> + %% enqueue from the remaining list of sequence ids + case msg_from_pending_ack(SeqId, State) of + {none, _} -> + queue_merge(Rest, Q, Front, MsgIds, Limit, PubFun, State); + {MsgStatus, State1} -> + {#msg_status { msg_id = MsgId } = MsgStatus1, State2} = + PubFun(MsgStatus, State1), + queue_merge(Rest, Q, ?QUEUE:in(MsgStatus1, Front), [MsgId | MsgIds], + Limit, PubFun, State2) + end + end; +queue_merge(SeqIds, Q, Front, MsgIds, + _Limit, _PubFun, State) -> + {SeqIds, ?QUEUE:join(Front, Q), MsgIds, State}. + +delta_merge([], Delta, MsgIds, State) -> + {Delta, MsgIds, State}; +delta_merge(SeqIds, Delta, MsgIds, State) -> + lists:foldl(fun (SeqId, {Delta0, MsgIds0, State0} = Acc) -> + case msg_from_pending_ack(SeqId, State0) of + {none, _} -> + Acc; + {#msg_status { msg_id = MsgId, + is_persistent = IsPersistent } = MsgStatus, State1} -> + {_MsgStatus, State2} = + maybe_prepare_write_to_disk(true, true, MsgStatus, State1), + {expand_delta(SeqId, Delta0, IsPersistent), [MsgId | MsgIds0], + stats({1, -1}, {MsgStatus, none}, 1, State2)} + end + end, {Delta, MsgIds, State}, SeqIds). + +%% Mostly opposite of record_pending_ack/2 +msg_from_pending_ack(SeqId, State) -> + case remove_pending_ack(false, SeqId, State) of + {none, _} -> + {none, State}; + {#msg_status { msg_props = MsgProps } = MsgStatus, State1} -> + {MsgStatus #msg_status { + msg_props = MsgProps #message_properties { needs_confirming = false } }, + State1} + end. + +beta_limit(Q) -> + case ?QUEUE:peek(Q) of + {value, #msg_status { seq_id = SeqId }} -> SeqId; + empty -> undefined + end. + +delta_limit(?BLANK_DELTA_PATTERN(_X)) -> undefined; +delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId. + +%%---------------------------------------------------------------------------- +%% Iterator +%%---------------------------------------------------------------------------- + +ram_ack_iterator(State) -> + {ack, gb_trees:iterator(State#vqstate.ram_pending_ack)}. + +disk_ack_iterator(State) -> + {ack, gb_trees:iterator(State#vqstate.disk_pending_ack)}. + +qi_ack_iterator(State) -> + {ack, gb_trees:iterator(State#vqstate.qi_pending_ack)}. + +msg_iterator(State) -> istate(start, State). + +istate(start, State) -> {q4, State#vqstate.q4, State}; +istate(q4, State) -> {q3, State#vqstate.q3, State}; +istate(q3, State) -> {delta, State#vqstate.delta, State}; +istate(delta, State) -> {q2, State#vqstate.q2, State}; +istate(q2, State) -> {q1, State#vqstate.q1, State}; +istate(q1, _State) -> done. + +next({ack, It}, IndexState) -> + case gb_trees:next(It) of + none -> {empty, IndexState}; + {_SeqId, MsgStatus, It1} -> Next = {ack, It1}, + {value, MsgStatus, true, Next, IndexState} + end; +next(done, IndexState) -> {empty, IndexState}; +next({delta, #delta{start_seq_id = SeqId, + end_seq_id = SeqId}, State}, IndexState) -> + next(istate(delta, State), IndexState); +next({delta, #delta{start_seq_id = SeqId, + end_seq_id = SeqIdEnd} = Delta, State}, IndexState) -> + SeqIdB = rabbit_queue_index:next_segment_boundary(SeqId), + SeqId1 = lists:min([SeqIdB, SeqIdEnd]), + {List, IndexState1} = rabbit_queue_index:read(SeqId, SeqId1, IndexState), + next({delta, Delta#delta{start_seq_id = SeqId1}, List, State}, IndexState1); +next({delta, Delta, [], State}, IndexState) -> + next({delta, Delta, State}, IndexState); +next({delta, Delta, [{_, SeqId, _, _, _} = M | Rest], State}, IndexState) -> + case is_msg_in_pending_acks(SeqId, State) of + false -> Next = {delta, Delta, Rest, State}, + {value, beta_msg_status(M), false, Next, IndexState}; + true -> next({delta, Delta, Rest, State}, IndexState) + end; +next({Key, Q, State}, IndexState) -> + case ?QUEUE:out(Q) of + {empty, _Q} -> next(istate(Key, State), IndexState); + {{value, MsgStatus}, QN} -> Next = {Key, QN, State}, + {value, MsgStatus, false, Next, IndexState} + end. + +inext(It, {Its, IndexState}) -> + case next(It, IndexState) of + {empty, IndexState1} -> + {Its, IndexState1}; + {value, MsgStatus1, Unacked, It1, IndexState1} -> + {[{MsgStatus1, Unacked, It1} | Its], IndexState1} + end. + +ifold(_Fun, Acc, [], State0) -> + {Acc, State0}; +ifold(Fun, Acc, Its0, State0) -> + [{MsgStatus, Unacked, It} | Rest] = + lists:sort(fun ({#msg_status{seq_id = SeqId1}, _, _}, + {#msg_status{seq_id = SeqId2}, _, _}) -> + SeqId1 =< SeqId2 + end, Its0), + {Msg, State1} = read_msg(MsgStatus, State0), + case Fun(Msg, MsgStatus#msg_status.msg_props, Unacked, Acc) of + {stop, Acc1} -> + {Acc1, State1}; + {cont, Acc1} -> + IndexState0 = State1#vqstate.index_state, + {Its1, IndexState1} = inext(It, {Rest, IndexState0}), + State2 = State1#vqstate{index_state = IndexState1}, + ifold(Fun, Acc1, Its1, State2) + end. + +%%---------------------------------------------------------------------------- +%% Phase changes +%%---------------------------------------------------------------------------- + +maybe_reduce_memory_use(State = #vqstate {memory_reduction_run_count = MRedRunCount, + mode = Mode}) -> + case MRedRunCount >= ?EXPLICIT_GC_RUN_OP_THRESHOLD(Mode) of + true -> State1 = reduce_memory_use(State), + State1#vqstate{memory_reduction_run_count = 0}; + false -> State#vqstate{memory_reduction_run_count = MRedRunCount + 1} + end. + +reduce_memory_use(State = #vqstate { target_ram_count = infinity }) -> + State; +reduce_memory_use(State = #vqstate { + mode = default, + ram_pending_ack = RPA, + ram_msg_count = RamMsgCount, + target_ram_count = TargetRamCount, + io_batch_size = IoBatchSize, + rates = #rates { in = AvgIngress, + out = AvgEgress, + ack_in = AvgAckIngress, + ack_out = AvgAckEgress } }) -> + {CreditDiscBound, _} =rabbit_misc:get_env(rabbit, + msg_store_credit_disc_bound, + ?CREDIT_DISC_BOUND), + {NeedResumeA2B, State1} = {_, #vqstate { q2 = Q2, q3 = Q3 }} = + case chunk_size(RamMsgCount + gb_trees:size(RPA), TargetRamCount) of + 0 -> {false, State}; + %% Reduce memory of pending acks and alphas. The order is + %% determined based on which is growing faster. Whichever + %% comes second may very well get a quota of 0 if the + %% first manages to push out the max number of messages. + A2BChunk -> + %% In case there are few messages to be sent to a message store + %% and many messages to be embedded to the queue index, + %% we should limit the number of messages to be flushed + %% to avoid blocking the process. + A2BChunkActual = case A2BChunk > CreditDiscBound * 2 of + true -> CreditDiscBound * 2; + false -> A2BChunk + end, + Funs = case ((AvgAckIngress - AvgAckEgress) > + (AvgIngress - AvgEgress)) of + true -> [fun limit_ram_acks/2, + fun push_alphas_to_betas/2]; + false -> [fun push_alphas_to_betas/2, + fun limit_ram_acks/2] + end, + {Quota, State2} = lists:foldl(fun (ReduceFun, {QuotaN, StateN}) -> + ReduceFun(QuotaN, StateN) + end, {A2BChunkActual, State}, Funs), + {(Quota == 0) andalso (A2BChunk > A2BChunkActual), State2} + end, + Permitted = permitted_beta_count(State1), + {NeedResumeB2D, State3} = + %% If there are more messages with their queue position held in RAM, + %% a.k.a. betas, in Q2 & Q3 than IoBatchSize, + %% write their queue position to disk, a.k.a. push_betas_to_deltas + case chunk_size(?QUEUE:len(Q2) + ?QUEUE:len(Q3), + Permitted) of + B2DChunk when B2DChunk >= IoBatchSize -> + %% Same as for alphas to betas. Limit a number of messages + %% to be flushed to disk at once to avoid blocking the process. + B2DChunkActual = case B2DChunk > CreditDiscBound * 2 of + true -> CreditDiscBound * 2; + false -> B2DChunk + end, + StateBD = push_betas_to_deltas(B2DChunkActual, State1), + {B2DChunk > B2DChunkActual, StateBD}; + _ -> + {false, State1} + end, + %% We can be blocked by the credit flow, or limited by a batch size, + %% or finished with flushing. + %% If blocked by the credit flow - the credit grant will resume processing, + %% if limited by a batch - the batch continuation message should be sent. + %% The continuation message will be prioritised over publishes, + %% but not consumptions, so the queue can make progess. + Blocked = credit_flow:blocked(), + case {Blocked, NeedResumeA2B orelse NeedResumeB2D} of + %% Credit bump will continue paging + {true, _} -> State3; + %% Finished with paging + {false, false} -> State3; + %% Planning next batch + {false, true} -> + %% We don't want to use self-credit-flow, because it's harder to + %% reason about. So the process sends a (prioritised) message to + %% itself and sets a waiting_bump value to keep the message box clean + maybe_bump_reduce_memory_use(State3) + end; +%% When using lazy queues, there are no alphas, so we don't need to +%% call push_alphas_to_betas/2. +reduce_memory_use(State = #vqstate { + mode = lazy, + ram_pending_ack = RPA, + ram_msg_count = RamMsgCount, + target_ram_count = TargetRamCount }) -> + State1 = #vqstate { q3 = Q3 } = + case chunk_size(RamMsgCount + gb_trees:size(RPA), TargetRamCount) of + 0 -> State; + S1 -> {_, State2} = limit_ram_acks(S1, State), + State2 + end, + + State3 = + case chunk_size(?QUEUE:len(Q3), + permitted_beta_count(State1)) of + 0 -> + State1; + S2 -> + push_betas_to_deltas(S2, State1) + end, + garbage_collect(), + State3. + +maybe_bump_reduce_memory_use(State = #vqstate{ waiting_bump = true }) -> + State; +maybe_bump_reduce_memory_use(State) -> + self() ! bump_reduce_memory_use, + State#vqstate{ waiting_bump = true }. + +limit_ram_acks(0, State) -> + {0, ui(State)}; +limit_ram_acks(Quota, State = #vqstate { ram_pending_ack = RPA, + disk_pending_ack = DPA }) -> + case gb_trees:is_empty(RPA) of + true -> + {Quota, ui(State)}; + false -> + {SeqId, MsgStatus, RPA1} = gb_trees:take_largest(RPA), + {MsgStatus1, State1} = + maybe_prepare_write_to_disk(true, false, MsgStatus, State), + MsgStatus2 = m(trim_msg_status(MsgStatus1)), + DPA1 = gb_trees:insert(SeqId, MsgStatus2, DPA), + limit_ram_acks(Quota - 1, + stats({0, 0}, {MsgStatus, MsgStatus2}, 0, + State1 #vqstate { ram_pending_ack = RPA1, + disk_pending_ack = DPA1 })) + end. + +permitted_beta_count(#vqstate { len = 0 }) -> + infinity; +permitted_beta_count(#vqstate { mode = lazy, + target_ram_count = TargetRamCount}) -> + TargetRamCount; +permitted_beta_count(#vqstate { target_ram_count = 0, q3 = Q3 }) -> + lists:min([?QUEUE:len(Q3), rabbit_queue_index:next_segment_boundary(0)]); +permitted_beta_count(#vqstate { q1 = Q1, + q4 = Q4, + target_ram_count = TargetRamCount, + len = Len }) -> + BetaDelta = Len - ?QUEUE:len(Q1) - ?QUEUE:len(Q4), + lists:max([rabbit_queue_index:next_segment_boundary(0), + BetaDelta - ((BetaDelta * BetaDelta) div + (BetaDelta + TargetRamCount))]). + +chunk_size(Current, Permitted) + when Permitted =:= infinity orelse Permitted >= Current -> + 0; +chunk_size(Current, Permitted) -> + Current - Permitted. + +fetch_from_q3(State = #vqstate { mode = default, + q1 = Q1, + q2 = Q2, + delta = #delta { count = DeltaCount }, + q3 = Q3, + q4 = Q4 }) -> + case ?QUEUE:out(Q3) of + {empty, _Q3} -> + {empty, State}; + {{value, MsgStatus}, Q3a} -> + State1 = State #vqstate { q3 = Q3a }, + State2 = case {?QUEUE:is_empty(Q3a), 0 == DeltaCount} of + {true, true} -> + %% q3 is now empty, it wasn't before; + %% delta is still empty. So q2 must be + %% empty, and we know q4 is empty + %% otherwise we wouldn't be loading from + %% q3. As such, we can just set q4 to Q1. + true = ?QUEUE:is_empty(Q2), %% ASSERTION + true = ?QUEUE:is_empty(Q4), %% ASSERTION + State1 #vqstate { q1 = ?QUEUE:new(), q4 = Q1 }; + {true, false} -> + maybe_deltas_to_betas(State1); + {false, _} -> + %% q3 still isn't empty, we've not + %% touched delta, so the invariants + %% between q1, q2, delta and q3 are + %% maintained + State1 + end, + {loaded, {MsgStatus, State2}} + end; +%% lazy queues +fetch_from_q3(State = #vqstate { mode = lazy, + delta = #delta { count = DeltaCount }, + q3 = Q3 }) -> + case ?QUEUE:out(Q3) of + {empty, _Q3} when DeltaCount =:= 0 -> + {empty, State}; + {empty, _Q3} -> + fetch_from_q3(maybe_deltas_to_betas(State)); + {{value, MsgStatus}, Q3a} -> + State1 = State #vqstate { q3 = Q3a }, + {loaded, {MsgStatus, State1}} + end. + +maybe_deltas_to_betas(State) -> + AfterFun = process_delivers_and_acks_fun(deliver_and_ack), + maybe_deltas_to_betas(AfterFun, State). + +maybe_deltas_to_betas(_DelsAndAcksFun, + State = #vqstate {delta = ?BLANK_DELTA_PATTERN(X) }) -> + State; +maybe_deltas_to_betas(DelsAndAcksFun, + State = #vqstate { + q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + ram_msg_count = RamMsgCount, + ram_bytes = RamBytes, + disk_read_count = DiskReadCount, + delta_transient_bytes = DeltaTransientBytes, + transient_threshold = TransientThreshold }) -> + #delta { start_seq_id = DeltaSeqId, + count = DeltaCount, + transient = Transient, + end_seq_id = DeltaSeqIdEnd } = Delta, + DeltaSeqId1 = + lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId), + DeltaSeqIdEnd]), + {List, IndexState1} = rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, + IndexState), + {Q3a, RamCountsInc, RamBytesInc, State1, TransientCount, TransientBytes} = + betas_from_index_entries(List, TransientThreshold, + DelsAndAcksFun, + State #vqstate { index_state = IndexState1 }), + State2 = State1 #vqstate { ram_msg_count = RamMsgCount + RamCountsInc, + ram_bytes = RamBytes + RamBytesInc, + disk_read_count = DiskReadCount + RamCountsInc }, + case ?QUEUE:len(Q3a) of + 0 -> + %% we ignored every message in the segment due to it being + %% transient and below the threshold + maybe_deltas_to_betas( + DelsAndAcksFun, + State2 #vqstate { + delta = d(Delta #delta { start_seq_id = DeltaSeqId1 })}); + Q3aLen -> + Q3b = ?QUEUE:join(Q3, Q3a), + case DeltaCount - Q3aLen of + 0 -> + %% delta is now empty, but it wasn't before, so + %% can now join q2 onto q3 + State2 #vqstate { q2 = ?QUEUE:new(), + delta = ?BLANK_DELTA, + q3 = ?QUEUE:join(Q3b, Q2), + delta_transient_bytes = 0}; + N when N > 0 -> + Delta1 = d(#delta { start_seq_id = DeltaSeqId1, + count = N, + transient = Transient - TransientCount, + end_seq_id = DeltaSeqIdEnd }), + State2 #vqstate { delta = Delta1, + q3 = Q3b, + delta_transient_bytes = DeltaTransientBytes - TransientBytes } + end + end. + +push_alphas_to_betas(Quota, State) -> + {Quota1, State1} = + push_alphas_to_betas( + fun ?QUEUE:out/1, + fun (MsgStatus, Q1a, + State0 = #vqstate { q3 = Q3, delta = #delta { count = 0, + transient = 0 } }) -> + State0 #vqstate { q1 = Q1a, q3 = ?QUEUE:in(MsgStatus, Q3) }; + (MsgStatus, Q1a, State0 = #vqstate { q2 = Q2 }) -> + State0 #vqstate { q1 = Q1a, q2 = ?QUEUE:in(MsgStatus, Q2) } + end, Quota, State #vqstate.q1, State), + {Quota2, State2} = + push_alphas_to_betas( + fun ?QUEUE:out_r/1, + fun (MsgStatus, Q4a, State0 = #vqstate { q3 = Q3 }) -> + State0 #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3), q4 = Q4a } + end, Quota1, State1 #vqstate.q4, State1), + {Quota2, State2}. + +push_alphas_to_betas(_Generator, _Consumer, Quota, _Q, + State = #vqstate { ram_msg_count = RamMsgCount, + target_ram_count = TargetRamCount }) + when Quota =:= 0 orelse + TargetRamCount =:= infinity orelse + TargetRamCount >= RamMsgCount -> + {Quota, ui(State)}; +push_alphas_to_betas(Generator, Consumer, Quota, Q, State) -> + %% We consume credits from the message_store whenever we need to + %% persist a message to disk. See: + %% rabbit_variable_queue:msg_store_write/4. So perhaps the + %% msg_store is trying to throttle down our queue. + case credit_flow:blocked() of + true -> {Quota, ui(State)}; + false -> case Generator(Q) of + {empty, _Q} -> + {Quota, ui(State)}; + {{value, MsgStatus}, Qa} -> + {MsgStatus1, State1} = + maybe_prepare_write_to_disk(true, false, MsgStatus, + State), + MsgStatus2 = m(trim_msg_status(MsgStatus1)), + State2 = stats( + ready0, {MsgStatus, MsgStatus2}, 0, State1), + State3 = Consumer(MsgStatus2, Qa, State2), + push_alphas_to_betas(Generator, Consumer, Quota - 1, + Qa, State3) + end + end. + +push_betas_to_deltas(Quota, State = #vqstate { mode = default, + q2 = Q2, + delta = Delta, + q3 = Q3}) -> + PushState = {Quota, Delta, State}, + {Q3a, PushState1} = push_betas_to_deltas( + fun ?QUEUE:out_r/1, + fun rabbit_queue_index:next_segment_boundary/1, + Q3, PushState), + {Q2a, PushState2} = push_betas_to_deltas( + fun ?QUEUE:out/1, + fun (Q2MinSeqId) -> Q2MinSeqId end, + Q2, PushState1), + {_, Delta1, State1} = PushState2, + State1 #vqstate { q2 = Q2a, + delta = Delta1, + q3 = Q3a }; +%% In the case of lazy queues we want to page as many messages as +%% possible from q3. +push_betas_to_deltas(Quota, State = #vqstate { mode = lazy, + delta = Delta, + q3 = Q3}) -> + PushState = {Quota, Delta, State}, + {Q3a, PushState1} = push_betas_to_deltas( + fun ?QUEUE:out_r/1, + fun (Q2MinSeqId) -> Q2MinSeqId end, + Q3, PushState), + {_, Delta1, State1} = PushState1, + State1 #vqstate { delta = Delta1, + q3 = Q3a }. + + +push_betas_to_deltas(Generator, LimitFun, Q, PushState) -> + case ?QUEUE:is_empty(Q) of + true -> + {Q, PushState}; + false -> + {value, #msg_status { seq_id = MinSeqId }} = ?QUEUE:peek(Q), + {value, #msg_status { seq_id = MaxSeqId }} = ?QUEUE:peek_r(Q), + Limit = LimitFun(MinSeqId), + case MaxSeqId < Limit of + true -> {Q, PushState}; + false -> push_betas_to_deltas1(Generator, Limit, Q, PushState) + end + end. + +push_betas_to_deltas1(_Generator, _Limit, Q, {0, Delta, State}) -> + {Q, {0, Delta, ui(State)}}; +push_betas_to_deltas1(Generator, Limit, Q, {Quota, Delta, State}) -> + case Generator(Q) of + {empty, _Q} -> + {Q, {Quota, Delta, ui(State)}}; + {{value, #msg_status { seq_id = SeqId }}, _Qa} + when SeqId < Limit -> + {Q, {Quota, Delta, ui(State)}}; + {{value, MsgStatus = #msg_status { seq_id = SeqId }}, Qa} -> + {#msg_status { index_on_disk = true, + is_persistent = IsPersistent }, State1} = + maybe_batch_write_index_to_disk(true, MsgStatus, State), + State2 = stats(ready0, {MsgStatus, none}, 1, State1), + Delta1 = expand_delta(SeqId, Delta, IsPersistent), + push_betas_to_deltas1(Generator, Limit, Qa, + {Quota - 1, Delta1, State2}) + end. + +%% Flushes queue index batch caches and updates queue index state. +ui(#vqstate{index_state = IndexState, + target_ram_count = TargetRamCount} = State) -> + IndexState1 = rabbit_queue_index:flush_pre_publish_cache( + TargetRamCount, IndexState), + State#vqstate{index_state = IndexState1}. + +%%---------------------------------------------------------------------------- +%% Upgrading +%%---------------------------------------------------------------------------- + +-spec multiple_routing_keys() -> 'ok'. + +multiple_routing_keys() -> + transform_storage( + fun ({basic_message, ExchangeName, Routing_Key, Content, + MsgId, Persistent}) -> + {ok, {basic_message, ExchangeName, [Routing_Key], Content, + MsgId, Persistent}}; + (_) -> {error, corrupt_message} + end), + ok. + + +%% Assumes message store is not running +transform_storage(TransformFun) -> + transform_store(?PERSISTENT_MSG_STORE, TransformFun), + transform_store(?TRANSIENT_MSG_STORE, TransformFun). + +transform_store(Store, TransformFun) -> + rabbit_msg_store:force_recovery(rabbit_mnesia:dir(), Store), + rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). + +move_messages_to_vhost_store() -> + case list_persistent_queues() of + [] -> + log_upgrade("No durable queues found." + " Skipping message store migration"), + ok; + Queues -> + move_messages_to_vhost_store(Queues) + end, + ok = delete_old_store(), + ok = rabbit_queue_index:cleanup_global_recovery_terms(). + +move_messages_to_vhost_store(Queues) -> + log_upgrade("Moving messages to per-vhost message store"), + %% Move the queue index for each persistent queue to the new store + lists:foreach( + fun(Queue) -> + QueueName = amqqueue:get_name(Queue), + rabbit_queue_index:move_to_per_vhost_stores(QueueName) + end, + Queues), + %% Legacy (global) msg_store may require recovery. + %% This upgrade step should only be started + %% if we are upgrading from a pre-3.7.0 version. + {QueuesWithTerms, RecoveryRefs, StartFunState} = read_old_recovery_terms(Queues), + + OldStore = run_old_persistent_store(RecoveryRefs, StartFunState), + + VHosts = rabbit_vhost:list_names(), + + %% New store should not be recovered. + NewMsgStore = start_new_store(VHosts), + %% Recovery terms should be started for all vhosts for new store. + [ok = rabbit_recovery_terms:open_table(VHost) || VHost <- VHosts], + + MigrationBatchSize = application:get_env(rabbit, queue_migration_batch_size, + ?QUEUE_MIGRATION_BATCH_SIZE), + in_batches(MigrationBatchSize, + {rabbit_variable_queue, migrate_queue, [OldStore, NewMsgStore]}, + QueuesWithTerms, + "message_store upgrades: Migrating batch ~p of ~p queues. Out of total ~p ~n", + "message_store upgrades: Batch ~p of ~p queues migrated ~n. ~p total left"), + + log_upgrade("Message store migration finished"), + ok = rabbit_sup:stop_child(OldStore), + [ok= rabbit_recovery_terms:close_table(VHost) || VHost <- VHosts], + ok = stop_new_store(NewMsgStore). + +in_batches(Size, MFA, List, MessageStart, MessageEnd) -> + in_batches(Size, 1, MFA, List, MessageStart, MessageEnd). + +in_batches(_, _, _, [], _, _) -> ok; +in_batches(Size, BatchNum, MFA, List, MessageStart, MessageEnd) -> + Length = length(List), + {Batch, Tail} = case Size > Length of + true -> {List, []}; + false -> lists:split(Size, List) + end, + ProcessedLength = (BatchNum - 1) * Size, + rabbit_log:info(MessageStart, [BatchNum, Size, ProcessedLength + Length]), + {M, F, A} = MFA, + Keys = [ rpc:async_call(node(), M, F, [El | A]) || El <- Batch ], + lists:foreach(fun(Key) -> + case rpc:yield(Key) of + {badrpc, Err} -> throw(Err); + _ -> ok + end + end, + Keys), + rabbit_log:info(MessageEnd, [BatchNum, Size, length(Tail)]), + in_batches(Size, BatchNum + 1, MFA, Tail, MessageStart, MessageEnd). + +migrate_queue({QueueName = #resource{virtual_host = VHost, name = Name}, + RecoveryTerm}, + OldStore, NewStore) -> + log_upgrade_verbose( + "Migrating messages in queue ~s in vhost ~s to per-vhost message store~n", + [Name, VHost]), + OldStoreClient = get_global_store_client(OldStore), + NewStoreClient = get_per_vhost_store_client(QueueName, NewStore), + %% WARNING: During scan_queue_segments queue index state is being recovered + %% and terminated. This can cause side effects! + rabbit_queue_index:scan_queue_segments( + %% We migrate only persistent messages which are found in message store + %% and are not acked yet + fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, OldC) + when is_binary(MsgId) -> + migrate_message(MsgId, OldC, NewStoreClient); + (_SeqId, _MsgId, _MsgProps, + _IsPersistent, _IsDelivered, _IsAcked, OldC) -> + OldC + end, + OldStoreClient, + QueueName), + rabbit_msg_store:client_terminate(OldStoreClient), + rabbit_msg_store:client_terminate(NewStoreClient), + NewClientRef = rabbit_msg_store:client_ref(NewStoreClient), + case RecoveryTerm of + non_clean_shutdown -> ok; + Term when is_list(Term) -> + NewRecoveryTerm = lists:keyreplace(persistent_ref, 1, RecoveryTerm, + {persistent_ref, NewClientRef}), + rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm) + end, + log_upgrade_verbose("Finished migrating queue ~s in vhost ~s", [Name, VHost]), + {QueueName, NewClientRef}. + +migrate_message(MsgId, OldC, NewC) -> + case rabbit_msg_store:read(MsgId, OldC) of + {{ok, Msg}, OldC1} -> + ok = rabbit_msg_store:write(MsgId, Msg, NewC), + OldC1; + _ -> OldC + end. + +get_per_vhost_store_client(#resource{virtual_host = VHost}, NewStore) -> + {VHost, StorePid} = lists:keyfind(VHost, 1, NewStore), + rabbit_msg_store:client_init(StorePid, rabbit_guid:gen(), + fun(_,_) -> ok end, fun() -> ok end). + +get_global_store_client(OldStore) -> + rabbit_msg_store:client_init(OldStore, + rabbit_guid:gen(), + fun(_,_) -> ok end, + fun() -> ok end). + +list_persistent_queues() -> + Node = node(), + mnesia:async_dirty( + fun () -> + qlc:e(qlc:q([Q || Q <- mnesia:table(rabbit_durable_queue), + ?amqqueue_is_classic(Q), + amqqueue:qnode(Q) == Node, + mnesia:read(rabbit_queue, amqqueue:get_name(Q), read) =:= []])) + end). + +read_old_recovery_terms([]) -> + {[], [], ?EMPTY_START_FUN_STATE}; +read_old_recovery_terms(Queues) -> + QueueNames = [amqqueue:get_name(Q) || Q <- Queues], + {AllTerms, StartFunState} = rabbit_queue_index:read_global_recovery_terms(QueueNames), + Refs = [Ref || Terms <- AllTerms, + Terms /= non_clean_shutdown, + begin + Ref = proplists:get_value(persistent_ref, Terms), + Ref =/= undefined + end], + {lists:zip(QueueNames, AllTerms), Refs, StartFunState}. + +run_old_persistent_store(Refs, StartFunState) -> + OldStoreName = ?PERSISTENT_MSG_STORE, + ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, start_global_store_link, + [OldStoreName, rabbit_mnesia:dir(), + Refs, StartFunState]), + OldStoreName. + +start_new_store(VHosts) -> + %% Ensure vhost supervisor is started, so we can add vhosts to it. + lists:map(fun(VHost) -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + {ok, Pid} = rabbit_msg_store:start_link(?PERSISTENT_MSG_STORE, + VHostDir, + undefined, + ?EMPTY_START_FUN_STATE), + {VHost, Pid} + end, + VHosts). + +stop_new_store(NewStore) -> + lists:foreach(fun({_VHost, StorePid}) -> + unlink(StorePid), + exit(StorePid, shutdown) + end, + NewStore), + ok. + +delete_old_store() -> + log_upgrade("Removing the old message store data"), + rabbit_file:recursive_delete( + [filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])]), + %% Delete old transient store as well + rabbit_file:recursive_delete( + [filename:join([rabbit_mnesia:dir(), ?TRANSIENT_MSG_STORE])]), + ok. + +log_upgrade(Msg) -> + log_upgrade(Msg, []). + +log_upgrade(Msg, Args) -> + rabbit_log:info("message_store upgrades: " ++ Msg, Args). + +log_upgrade_verbose(Msg) -> + log_upgrade_verbose(Msg, []). + +log_upgrade_verbose(Msg, Args) -> + rabbit_log_upgrade:info(Msg, Args). + +maybe_client_terminate(MSCStateP) -> + %% Queue might have been asked to stop by the supervisor, it needs a clean + %% shutdown in order for the supervising strategy to work - if it reaches max + %% restarts might bring the vhost down. + try + rabbit_msg_store:client_terminate(MSCStateP) + catch + _:_ -> + ok + end. diff --git a/deps/rabbit/src/rabbit_version.erl b/deps/rabbit/src/rabbit_version.erl new file mode 100644 index 0000000000..3f5462c7b4 --- /dev/null +++ b/deps/rabbit/src/rabbit_version.erl @@ -0,0 +1,227 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_version). + +-export([recorded/0, matches/2, desired/0, desired_for_scope/1, + record_desired/0, record_desired_for_scope/1, + upgrades_required/1, all_upgrades_required/1, + check_version_consistency/3, + check_version_consistency/4, check_otp_consistency/1, + version_error/3]). + +%% ------------------------------------------------------------------- + +-export_type([scope/0, step/0]). + +-type scope() :: atom(). +-type scope_version() :: [atom()]. +-type step() :: {atom(), atom()}. + +-type version() :: [atom()]. + +%% ------------------------------------------------------------------- + +-define(VERSION_FILENAME, "schema_version"). +-define(SCOPES, [mnesia, local]). + +%% ------------------------------------------------------------------- + +-spec recorded() -> rabbit_types:ok_or_error2(version(), any()). + +recorded() -> case rabbit_file:read_term_file(schema_filename()) of + {ok, [V]} -> {ok, V}; + {error, _} = Err -> Err + end. + +record(V) -> ok = rabbit_file:write_term_file(schema_filename(), [V]). + +recorded_for_scope(Scope) -> + case recorded() of + {error, _} = Err -> + Err; + {ok, Version} -> + {ok, case lists:keysearch(Scope, 1, categorise_by_scope(Version)) of + false -> []; + {value, {Scope, SV1}} -> SV1 + end} + end. + +record_for_scope(Scope, ScopeVersion) -> + case recorded() of + {error, _} = Err -> + Err; + {ok, Version} -> + Version1 = lists:keystore(Scope, 1, categorise_by_scope(Version), + {Scope, ScopeVersion}), + ok = record([Name || {_Scope, Names} <- Version1, Name <- Names]) + end. + +%% ------------------------------------------------------------------- + +-spec matches([A], [A]) -> boolean(). + +matches(VerA, VerB) -> + lists:usort(VerA) =:= lists:usort(VerB). + +%% ------------------------------------------------------------------- + +-spec desired() -> version(). + +desired() -> [Name || Scope <- ?SCOPES, Name <- desired_for_scope(Scope)]. + +-spec desired_for_scope(scope()) -> scope_version(). + +desired_for_scope(Scope) -> with_upgrade_graph(fun heads/1, Scope). + +-spec record_desired() -> 'ok'. + +record_desired() -> record(desired()). + +-spec record_desired_for_scope + (scope()) -> rabbit_types:ok_or_error(any()). + +record_desired_for_scope(Scope) -> + record_for_scope(Scope, desired_for_scope(Scope)). + +-spec upgrades_required + (scope()) -> rabbit_types:ok_or_error2([step()], any()). + +upgrades_required(Scope) -> + case recorded_for_scope(Scope) of + {error, enoent} -> + case filelib:is_file(rabbit_guid:filename()) of + false -> {error, starting_from_scratch}; + true -> {error, version_not_available} + end; + {ok, CurrentHeads} -> + with_upgrade_graph( + fun (G) -> + case unknown_heads(CurrentHeads, G) of + [] -> {ok, upgrades_to_apply(CurrentHeads, G)}; + Unknown -> {error, {future_upgrades_found, Unknown}} + end + end, Scope) + end. + +all_upgrades_required(Scopes) -> + case recorded() of + {error, enoent} -> + case filelib:is_file(rabbit_guid:filename()) of + false -> {error, starting_from_scratch}; + true -> {error, version_not_available} + end; + {ok, _} -> + lists:foldl( + fun + (_, {error, Err}) -> {error, Err}; + (Scope, {ok, Acc}) -> + case upgrades_required(Scope) of + %% Lift errors from any scope. + {error, Err} -> {error, Err}; + %% Filter non-upgradable scopes + {ok, []} -> {ok, Acc}; + {ok, Upgrades} -> {ok, [{Scope, Upgrades} | Acc]} + end + end, + {ok, []}, + Scopes) + end. + +%% ------------------------------------------------------------------- + +with_upgrade_graph(Fun, Scope) -> + case rabbit_misc:build_acyclic_graph( + fun ({_App, Module, Steps}) -> vertices(Module, Steps, Scope) end, + fun ({_App, Module, Steps}) -> edges(Module, Steps, Scope) end, + rabbit_misc:all_module_attributes(rabbit_upgrade)) of + {ok, G} -> try + Fun(G) + after + true = digraph:delete(G) + end; + {error, {vertex, duplicate, StepName}} -> + throw({error, {duplicate_upgrade_step, StepName}}); + {error, {edge, {bad_vertex, StepName}, _From, _To}} -> + throw({error, {dependency_on_unknown_upgrade_step, StepName}}); + {error, {edge, {bad_edge, StepNames}, _From, _To}} -> + throw({error, {cycle_in_upgrade_steps, StepNames}}) + end. + +vertices(Module, Steps, Scope0) -> + [{StepName, {Module, StepName}} || {StepName, Scope1, _Reqs} <- Steps, + Scope0 == Scope1]. + +edges(_Module, Steps, Scope0) -> + [{Require, StepName} || {StepName, Scope1, Requires} <- Steps, + Require <- Requires, + Scope0 == Scope1]. +unknown_heads(Heads, G) -> + [H || H <- Heads, digraph:vertex(G, H) =:= false]. + +upgrades_to_apply(Heads, G) -> + %% Take all the vertices which can reach the known heads. That's + %% everything we've already applied. Subtract that from all + %% vertices: that's what we have to apply. + Unsorted = sets:to_list( + sets:subtract( + sets:from_list(digraph:vertices(G)), + sets:from_list(digraph_utils:reaching(Heads, G)))), + %% Form a subgraph from that list and find a topological ordering + %% so we can invoke them in order. + [element(2, digraph:vertex(G, StepName)) || + StepName <- digraph_utils:topsort(digraph_utils:subgraph(G, Unsorted))]. + +heads(G) -> + lists:sort([V || V <- digraph:vertices(G), digraph:out_degree(G, V) =:= 0]). + +%% ------------------------------------------------------------------- + +categorise_by_scope(Version) when is_list(Version) -> + Categorised = + [{Scope, Name} || {_App, _Module, Attributes} <- + rabbit_misc:all_module_attributes(rabbit_upgrade), + {Name, Scope, _Requires} <- Attributes, + lists:member(Name, Version)], + maps:to_list( + lists:foldl(fun ({Scope, Name}, CatVersion) -> + rabbit_misc:maps_cons(Scope, Name, CatVersion) + end, maps:new(), Categorised)). + +dir() -> rabbit_mnesia:dir(). + +schema_filename() -> filename:join(dir(), ?VERSION_FILENAME). + +%% -------------------------------------------------------------------- + +-spec check_version_consistency + (string(), string(), string()) -> rabbit_types:ok_or_error(any()). + +check_version_consistency(This, Remote, Name) -> + check_version_consistency(This, Remote, Name, fun (A, B) -> A =:= B end). + +-spec check_version_consistency + (string(), string(), string(), + fun((string(), string()) -> boolean())) -> + rabbit_types:ok_or_error(any()). + +check_version_consistency(This, Remote, Name, Comp) -> + case Comp(This, Remote) of + true -> ok; + false -> version_error(Name, This, Remote) + end. + +version_error(Name, This, Remote) -> + {error, {inconsistent_cluster, + rabbit_misc:format("~s version mismatch: local node is ~s, " + "remote node ~s", [Name, This, Remote])}}. + +-spec check_otp_consistency + (string()) -> rabbit_types:ok_or_error(any()). + +check_otp_consistency(Remote) -> + check_version_consistency(rabbit_misc:otp_release(), Remote, "OTP"). diff --git a/deps/rabbit/src/rabbit_vhost.erl b/deps/rabbit/src/rabbit_vhost.erl new file mode 100644 index 0000000000..c8c5fc961a --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost.erl @@ -0,0 +1,422 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_vhost). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("vhost.hrl"). + +-export([recover/0, recover/1]). +-export([add/2, add/4, delete/2, exists/1, with/2, with_user_and_vhost/3, assert/1, update/2, + set_limits/2, vhost_cluster_state/1, is_running_on_all_nodes/1, await_running_on_all_nodes/2, + list/0, count/0, list_names/0, all/0, parse_tags/1]). +-export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). +-export([dir/1, msg_store_dir_path/1, msg_store_dir_wildcard/0]). +-export([delete_storage/1]). +-export([vhost_down/1]). +-export([put_vhost/5]). + +%% +%% API +%% + +recover() -> + %% Clear out remnants of old incarnation, in case we restarted + %% faster than other nodes handled DOWN messages from us. + rabbit_amqqueue:on_node_down(node()), + + rabbit_amqqueue:warn_file_limit(), + + %% Prepare rabbit_semi_durable_route table + rabbit_binding:recover(), + + %% rabbit_vhost_sup_sup will start the actual recovery. + %% So recovery will be run every time a vhost supervisor is restarted. + ok = rabbit_vhost_sup_sup:start(), + + [ok = rabbit_vhost_sup_sup:init_vhost(VHost) || VHost <- list_names()], + ok. + +recover(VHost) -> + VHostDir = msg_store_dir_path(VHost), + rabbit_log:info("Making sure data directory '~ts' for vhost '~s' exists~n", + [VHostDir, VHost]), + VHostStubFile = filename:join(VHostDir, ".vhost"), + ok = rabbit_file:ensure_dir(VHostStubFile), + ok = file:write_file(VHostStubFile, VHost), + {Recovered, Failed} = rabbit_amqqueue:recover(VHost), + AllQs = Recovered ++ Failed, + QNames = [amqqueue:get_name(Q) || Q <- AllQs], + ok = rabbit_binding:recover(rabbit_exchange:recover(VHost), QNames), + ok = rabbit_amqqueue:start(Recovered), + %% Start queue mirrors. + ok = rabbit_mirror_queue_misc:on_vhost_up(VHost), + ok. + +-define(INFO_KEYS, vhost:info_keys()). + +-spec parse_tags(binary() | string() | atom()) -> [atom()]. +parse_tags(undefined) -> + []; +parse_tags("") -> + []; +parse_tags(<<"">>) -> + []; +parse_tags(Val) when is_binary(Val) -> + parse_tags(rabbit_data_coercion:to_list(Val)); +parse_tags(Val) when is_list(Val) -> + [trim_tag(Tag) || Tag <- re:split(Val, ",", [{return, list}])]. + +-spec add(vhost:name(), rabbit_types:username()) -> rabbit_types:ok_or_error(any()). + +add(VHost, ActingUser) -> + case exists(VHost) of + true -> ok; + false -> do_add(VHost, <<"">>, [], ActingUser) + end. + +-spec add(vhost:name(), binary(), [atom()], rabbit_types:username()) -> rabbit_types:ok_or_error(any()). + +add(Name, Description, Tags, ActingUser) -> + case exists(Name) of + true -> ok; + false -> do_add(Name, Description, Tags, ActingUser) + end. + +do_add(Name, Description, Tags, ActingUser) -> + case Description of + undefined -> + rabbit_log:info("Adding vhost '~s' without a description", [Name]); + Value -> + rabbit_log:info("Adding vhost '~s' (description: '~s')", [Name, Value]) + end, + VHost = rabbit_misc:execute_mnesia_transaction( + fun () -> + case mnesia:wread({rabbit_vhost, Name}) of + [] -> + Row = vhost:new(Name, [], #{description => Description, tags => Tags}), + rabbit_log:debug("Inserting a virtual host record ~p", [Row]), + ok = mnesia:write(rabbit_vhost, Row, write), + Row; + %% the vhost already exists + [Row] -> + Row + end + end, + fun (VHost1, true) -> + VHost1; + (VHost1, false) -> + [begin + Resource = rabbit_misc:r(Name, exchange, ExchangeName), + rabbit_log:debug("Will declare an exchange ~p", [Resource]), + _ = rabbit_exchange:declare(Resource, Type, true, false, Internal, [], ActingUser) + end || {ExchangeName, Type, Internal} <- + [{<<"">>, direct, false}, + {<<"amq.direct">>, direct, false}, + {<<"amq.topic">>, topic, false}, + %% per 0-9-1 pdf + {<<"amq.match">>, headers, false}, + %% per 0-9-1 xml + {<<"amq.headers">>, headers, false}, + {<<"amq.fanout">>, fanout, false}, + {<<"amq.rabbitmq.trace">>, topic, true}]], + VHost1 + end), + case rabbit_vhost_sup_sup:start_on_all_nodes(Name) of + ok -> + rabbit_event:notify(vhost_created, info(VHost) + ++ [{user_who_performed_action, ActingUser}, + {description, Description}, + {tags, Tags}]), + ok; + {error, Reason} -> + Msg = rabbit_misc:format("failed to set up vhost '~s': ~p", + [Name, Reason]), + {error, Msg} + end. + +-spec delete(vhost:name(), rabbit_types:username()) -> rabbit_types:ok_or_error(any()). + +delete(VHost, ActingUser) -> + %% FIXME: We are forced to delete the queues and exchanges outside + %% the TX below. Queue deletion involves sending messages to the queue + %% process, which in turn results in further mnesia actions and + %% eventually the termination of that process. Exchange deletion causes + %% notifications which must be sent outside the TX + rabbit_log:info("Deleting vhost '~s'~n", [VHost]), + QDelFun = fun (Q) -> rabbit_amqqueue:delete(Q, false, false, ActingUser) end, + [begin + Name = amqqueue:get_name(Q), + assert_benign(rabbit_amqqueue:with(Name, QDelFun), ActingUser) + end || Q <- rabbit_amqqueue:list(VHost)], + [assert_benign(rabbit_exchange:delete(Name, false, ActingUser), ActingUser) || + #exchange{name = Name} <- rabbit_exchange:list(VHost)], + Funs = rabbit_misc:execute_mnesia_transaction( + with(VHost, fun () -> internal_delete(VHost, ActingUser) end)), + ok = rabbit_event:notify(vhost_deleted, [{name, VHost}, + {user_who_performed_action, ActingUser}]), + [case Fun() of + ok -> ok; + {error, {no_such_vhost, VHost}} -> ok + end || Fun <- Funs], + %% After vhost was deleted from mnesia DB, we try to stop vhost supervisors + %% on all the nodes. + rabbit_vhost_sup_sup:delete_on_all_nodes(VHost), + ok. + +put_vhost(Name, Description, Tags0, Trace, Username) -> + Tags = case Tags0 of + undefined -> <<"">>; + null -> <<"">>; + "undefined" -> <<"">>; + "null" -> <<"">>; + Other -> Other + end, + Result = case exists(Name) of + true -> ok; + false -> add(Name, Description, parse_tags(Tags), Username), + %% wait for up to 45 seconds for the vhost to initialise + %% on all nodes + case await_running_on_all_nodes(Name, 45000) of + ok -> + maybe_grant_full_permissions(Name, Username); + {error, timeout} -> + {error, timeout} + end + end, + case Trace of + true -> rabbit_trace:start(Name); + false -> rabbit_trace:stop(Name); + undefined -> ok + end, + Result. + +%% when definitions are loaded on boot, Username here will be ?INTERNAL_USER, +%% which does not actually exist +maybe_grant_full_permissions(_Name, ?INTERNAL_USER) -> + ok; +maybe_grant_full_permissions(Name, Username) -> + U = rabbit_auth_backend_internal:lookup_user(Username), + maybe_grant_full_permissions(U, Name, Username). + +maybe_grant_full_permissions({ok, _}, Name, Username) -> + rabbit_auth_backend_internal:set_permissions( + Username, Name, <<".*">>, <<".*">>, <<".*">>, Username); +maybe_grant_full_permissions(_, _Name, _Username) -> + ok. + + +%% 50 ms +-define(AWAIT_SAMPLE_INTERVAL, 50). + +-spec await_running_on_all_nodes(vhost:name(), integer()) -> ok | {error, timeout}. +await_running_on_all_nodes(VHost, Timeout) -> + Attempts = round(Timeout / ?AWAIT_SAMPLE_INTERVAL), + await_running_on_all_nodes0(VHost, Attempts). + +await_running_on_all_nodes0(_VHost, 0) -> + {error, timeout}; +await_running_on_all_nodes0(VHost, Attempts) -> + case is_running_on_all_nodes(VHost) of + true -> ok; + _ -> + timer:sleep(?AWAIT_SAMPLE_INTERVAL), + await_running_on_all_nodes0(VHost, Attempts - 1) + end. + +-spec is_running_on_all_nodes(vhost:name()) -> boolean(). +is_running_on_all_nodes(VHost) -> + States = vhost_cluster_state(VHost), + lists:all(fun ({_Node, State}) -> State =:= running end, + States). + +-spec vhost_cluster_state(vhost:name()) -> [{atom(), atom()}]. +vhost_cluster_state(VHost) -> + Nodes = rabbit_nodes:all_running(), + lists:map(fun(Node) -> + State = case rabbit_misc:rpc_call(Node, + rabbit_vhost_sup_sup, is_vhost_alive, + [VHost]) of + {badrpc, nodedown} -> nodedown; + true -> running; + false -> stopped + end, + {Node, State} + end, + Nodes). + +vhost_down(VHost) -> + ok = rabbit_event:notify(vhost_down, + [{name, VHost}, + {node, node()}, + {user_who_performed_action, ?INTERNAL_USER}]). + +delete_storage(VHost) -> + VhostDir = msg_store_dir_path(VHost), + rabbit_log:info("Deleting message store directory for vhost '~s' at '~s'~n", [VHost, VhostDir]), + %% Message store should be closed when vhost supervisor is closed. + case rabbit_file:recursive_delete([VhostDir]) of + ok -> ok; + {error, {_, enoent}} -> + %% a concurrent delete did the job for us + rabbit_log:warning("Tried to delete storage directories for vhost '~s', it failed with an ENOENT", [VHost]), + ok; + Other -> + rabbit_log:warning("Tried to delete storage directories for vhost '~s': ~p", [VHost, Other]), + Other + end. + +assert_benign(ok, _) -> ok; +assert_benign({ok, _}, _) -> ok; +assert_benign({ok, _, _}, _) -> ok; +assert_benign({error, not_found}, _) -> ok; +assert_benign({error, {absent, Q, _}}, ActingUser) -> + %% Removing the mnesia entries here is safe. If/when the down node + %% restarts, it will clear out the on-disk storage of the queue. + QName = amqqueue:get_name(Q), + rabbit_amqqueue:internal_delete(QName, ActingUser). + +internal_delete(VHost, ActingUser) -> + [ok = rabbit_auth_backend_internal:clear_permissions( + proplists:get_value(user, Info), VHost, ActingUser) + || Info <- rabbit_auth_backend_internal:list_vhost_permissions(VHost)], + TopicPermissions = rabbit_auth_backend_internal:list_vhost_topic_permissions(VHost), + [ok = rabbit_auth_backend_internal:clear_topic_permissions( + proplists:get_value(user, TopicPermission), VHost, ActingUser) + || TopicPermission <- TopicPermissions], + Fs1 = [rabbit_runtime_parameters:clear(VHost, + proplists:get_value(component, Info), + proplists:get_value(name, Info), + ActingUser) + || Info <- rabbit_runtime_parameters:list(VHost)], + Fs2 = [rabbit_policy:delete(VHost, proplists:get_value(name, Info), ActingUser) + || Info <- rabbit_policy:list(VHost)], + ok = mnesia:delete({rabbit_vhost, VHost}), + Fs1 ++ Fs2. + +-spec exists(vhost:name()) -> boolean(). + +exists(VHost) -> + mnesia:dirty_read({rabbit_vhost, VHost}) /= []. + +-spec list_names() -> [vhost:name()]. +list_names() -> mnesia:dirty_all_keys(rabbit_vhost). + +%% Exists for backwards compatibility, prefer list_names/0. +-spec list() -> [vhost:name()]. +list() -> list_names(). + +-spec all() -> [vhost:vhost()]. +all() -> mnesia:dirty_match_object(rabbit_vhost, vhost:pattern_match_all()). + +-spec count() -> non_neg_integer(). +count() -> + length(list()). + +-spec with(vhost:name(), rabbit_misc:thunk(A)) -> A. + +with(VHost, Thunk) -> + fun () -> + case mnesia:read({rabbit_vhost, VHost}) of + [] -> + mnesia:abort({no_such_vhost, VHost}); + [_V] -> + Thunk() + end + end. + +-spec with_user_and_vhost + (rabbit_types:username(), vhost:name(), rabbit_misc:thunk(A)) -> A. + +with_user_and_vhost(Username, VHost, Thunk) -> + rabbit_misc:with_user(Username, with(VHost, Thunk)). + +%% Like with/2 but outside an Mnesia tx + +-spec assert(vhost:name()) -> 'ok'. + +assert(VHost) -> case exists(VHost) of + true -> ok; + false -> throw({error, {no_such_vhost, VHost}}) + end. + +-spec update(vhost:name(), fun((vhost:vhost()) -> vhost:vhost())) -> vhost:vhost(). + +update(VHost, Fun) -> + case mnesia:read({rabbit_vhost, VHost}) of + [] -> + mnesia:abort({no_such_vhost, VHost}); + [V] -> + V1 = Fun(V), + ok = mnesia:write(rabbit_vhost, V1, write), + V1 + end. + +set_limits(VHost, undefined) -> + vhost:set_limits(VHost, []); +set_limits(VHost, Limits) -> + vhost:set_limits(VHost, Limits). + + +dir(Vhost) -> + <<Num:128>> = erlang:md5(Vhost), + rabbit_misc:format("~.36B", [Num]). + +msg_store_dir_path(VHost) -> + EncodedName = dir(VHost), + rabbit_data_coercion:to_list(filename:join([msg_store_dir_base(), EncodedName])). + +msg_store_dir_wildcard() -> + rabbit_data_coercion:to_list(filename:join([msg_store_dir_base(), "*"])). + +msg_store_dir_base() -> + Dir = rabbit_mnesia:dir(), + filename:join([Dir, "msg_stores", "vhosts"]). + +-spec trim_tag(list() | binary() | atom()) -> atom(). +trim_tag(Val) -> + rabbit_data_coercion:to_atom(string:trim(rabbit_data_coercion:to_list(Val))). + +%%---------------------------------------------------------------------------- + +infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items]. + +i(name, VHost) -> vhost:get_name(VHost); +i(tracing, VHost) -> rabbit_trace:enabled(vhost:get_name(VHost)); +i(cluster_state, VHost) -> vhost_cluster_state(vhost:get_name(VHost)); +i(description, VHost) -> vhost:get_description(VHost); +i(tags, VHost) -> vhost:get_tags(VHost); +i(metadata, VHost) -> vhost:get_metadata(VHost); +i(Item, VHost) -> + rabbit_log:error("Don't know how to compute a virtual host info item '~s' for virtual host '~p'", [Item, VHost]), + throw({bad_argument, Item}). + +-spec info(vhost:vhost() | vhost:name()) -> rabbit_types:infos(). + +info(VHost) when ?is_vhost(VHost) -> + infos(?INFO_KEYS, VHost); +info(Key) -> + case mnesia:dirty_read({rabbit_vhost, Key}) of + [] -> []; + [VHost] -> infos(?INFO_KEYS, VHost) + end. + +-spec info(vhost:vhost(), rabbit_types:info_keys()) -> rabbit_types:infos(). +info(VHost, Items) -> infos(Items, VHost). + +-spec info_all() -> [rabbit_types:infos()]. +info_all() -> info_all(?INFO_KEYS). + +-spec info_all(rabbit_types:info_keys()) -> [rabbit_types:infos()]. +info_all(Items) -> [info(VHost, Items) || VHost <- all()]. + +info_all(Ref, AggregatorPid) -> info_all(?INFO_KEYS, Ref, AggregatorPid). + +-spec info_all(rabbit_types:info_keys(), reference(), pid()) -> 'ok'. +info_all(Items, Ref, AggregatorPid) -> + rabbit_control_misc:emitting_map( + AggregatorPid, Ref, fun(VHost) -> info(VHost, Items) end, all()). diff --git a/deps/rabbit/src/rabbit_vhost_limit.erl b/deps/rabbit/src/rabbit_vhost_limit.erl new file mode 100644 index 0000000000..bee01f3054 --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost_limit.erl @@ -0,0 +1,205 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_vhost_limit). + +-behaviour(rabbit_runtime_parameter). + +-include("rabbit.hrl"). + +-export([register/0]). +-export([parse_set/3, set/3, clear/2]). +-export([list/0, list/1]). +-export([update_limit/4, clear_limit/3, get_limit/2]). +-export([validate/5, notify/5, notify_clear/4]). +-export([connection_limit/1, queue_limit/1, + is_over_queue_limit/1, would_exceed_queue_limit/2, + is_over_connection_limit/1]). + +-import(rabbit_misc, [pget/2, pget/3]). + +-rabbit_boot_step({?MODULE, + [{description, "vhost limit parameters"}, + {mfa, {rabbit_vhost_limit, register, []}}, + {requires, rabbit_registry}, + {enables, recovery}]}). + +%%---------------------------------------------------------------------------- + +register() -> + rabbit_registry:register(runtime_parameter, <<"vhost-limits">>, ?MODULE). + +validate(_VHost, <<"vhost-limits">>, Name, Term, _User) -> + rabbit_parameter_validation:proplist( + Name, vhost_limit_validation(), Term). + +notify(VHost, <<"vhost-limits">>, <<"limits">>, Limits, ActingUser) -> + rabbit_event:notify(vhost_limits_set, [{name, <<"limits">>}, + {user_who_performed_action, ActingUser} + | Limits]), + update_vhost(VHost, Limits). + +notify_clear(VHost, <<"vhost-limits">>, <<"limits">>, ActingUser) -> + rabbit_event:notify(vhost_limits_cleared, [{name, <<"limits">>}, + {user_who_performed_action, ActingUser}]), + %% If the function is called as a part of vhost deletion, the vhost can + %% be already deleted. + case rabbit_vhost:exists(VHost) of + true -> update_vhost(VHost, undefined); + false -> ok + end. + +connection_limit(VirtualHost) -> + get_limit(VirtualHost, <<"max-connections">>). + +queue_limit(VirtualHost) -> + get_limit(VirtualHost, <<"max-queues">>). + + +query_limits(VHost) -> + case rabbit_runtime_parameters:list(VHost, <<"vhost-limits">>) of + [] -> []; + Params -> [ {pget(vhost, Param), pget(value, Param)} + || Param <- Params, + pget(value, Param) =/= undefined, + pget(name, Param) == <<"limits">> ] + end. + + +-spec list() -> [{vhost:name(), rabbit_types:infos()}]. +list() -> + query_limits('_'). + +-spec list(vhost:name()) -> rabbit_types:infos(). +list(VHost) -> + case query_limits(VHost) of + [] -> []; + [{VHost, Value}] -> Value + end. + +-spec is_over_connection_limit(vhost:name()) -> {true, non_neg_integer()} | false. + +is_over_connection_limit(VirtualHost) -> + case rabbit_vhost_limit:connection_limit(VirtualHost) of + %% no limit configured + undefined -> false; + %% with limit = 0, no connections are allowed + {ok, 0} -> {true, 0}; + {ok, Limit} when is_integer(Limit) andalso Limit > 0 -> + ConnectionCount = + rabbit_connection_tracking:count_tracked_items_in({vhost, VirtualHost}), + case ConnectionCount >= Limit of + false -> false; + true -> {true, Limit} + end; + %% any negative value means "no limit". Note that parameter validation + %% will replace negative integers with 'undefined', so this is to be + %% explicit and extra defensive + {ok, Limit} when is_integer(Limit) andalso Limit < 0 -> false; + %% ignore non-integer limits + {ok, _Limit} -> false + end. + +-spec would_exceed_queue_limit(non_neg_integer(), vhost:name()) -> + {true, non_neg_integer(), non_neg_integer()} | false. + +would_exceed_queue_limit(AdditionalCount, VirtualHost) -> + case queue_limit(VirtualHost) of + undefined -> + %% no limit configured + false; + {ok, 0} -> + %% with limit = 0, no queues can be declared (perhaps not very + %% useful but consistent with the connection limit) + {true, 0, 0}; + {ok, Limit} when is_integer(Limit) andalso Limit > 0 -> + QueueCount = rabbit_amqqueue:count(VirtualHost), + case (AdditionalCount + QueueCount) > Limit of + false -> false; + true -> {true, Limit, QueueCount} + end; + {ok, Limit} when is_integer(Limit) andalso Limit < 0 -> + %% any negative value means "no limit". Note that parameter validation + %% will replace negative integers with 'undefined', so this is to be + %% explicit and extra defensive + false; + {ok, _Limit} -> + %% ignore non-integer limits + false + end. + +-spec is_over_queue_limit(vhost:name()) -> {true, non_neg_integer()} | false. + +is_over_queue_limit(VirtualHost) -> + case would_exceed_queue_limit(1, VirtualHost) of + {true, Limit, _QueueCount} -> {true, Limit}; + false -> false + end. + +%%---------------------------------------------------------------------------- + +parse_set(VHost, Defn, ActingUser) -> + Definition = rabbit_data_coercion:to_binary(Defn), + case rabbit_json:try_decode(Definition) of + {ok, Term} -> + set(VHost, maps:to_list(Term), ActingUser); + {error, Reason} -> + {error_string, + rabbit_misc:format("JSON decoding error. Reason: ~ts", [Reason])} + end. + +set(VHost, Defn, ActingUser) -> + rabbit_runtime_parameters:set_any(VHost, <<"vhost-limits">>, + <<"limits">>, Defn, ActingUser). + +clear(VHost, ActingUser) -> + rabbit_runtime_parameters:clear_any(VHost, <<"vhost-limits">>, + <<"limits">>, ActingUser). + +update_limit(VHost, Name, Value, ActingUser) -> + OldDef = case rabbit_runtime_parameters:list(VHost, <<"vhost-limits">>) of + [] -> []; + [Param] -> pget(value, Param, []) + end, + NewDef = [{Name, Value} | lists:keydelete(Name, 1, OldDef)], + set(VHost, NewDef, ActingUser). + +clear_limit(VHost, Name, ActingUser) -> + OldDef = case rabbit_runtime_parameters:list(VHost, <<"vhost-limits">>) of + [] -> []; + [Param] -> pget(value, Param, []) + end, + NewDef = lists:keydelete(Name, 1, OldDef), + set(VHost, NewDef, ActingUser). + +vhost_limit_validation() -> + [{<<"max-connections">>, fun rabbit_parameter_validation:integer/2, optional}, + {<<"max-queues">>, fun rabbit_parameter_validation:integer/2, optional}]. + +update_vhost(VHostName, Limits) -> + rabbit_misc:execute_mnesia_transaction( + fun() -> + rabbit_vhost:update(VHostName, + fun(VHost) -> + rabbit_vhost:set_limits(VHost, Limits) + end) + end), + ok. + +get_limit(VirtualHost, Limit) -> + case rabbit_runtime_parameters:list(VirtualHost, <<"vhost-limits">>) of + [] -> undefined; + [Param] -> case pget(value, Param) of + undefined -> undefined; + Val -> case pget(Limit, Val) of + undefined -> undefined; + %% no limit + N when N < 0 -> undefined; + N when N >= 0 -> {ok, N} + end + end + end. diff --git a/deps/rabbit/src/rabbit_vhost_msg_store.erl b/deps/rabbit/src/rabbit_vhost_msg_store.erl new file mode 100644 index 0000000000..8667b4d143 --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost_msg_store.erl @@ -0,0 +1,68 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_vhost_msg_store). + +-include("rabbit.hrl"). + +-export([start/4, stop/2, client_init/5, successfully_recovered_state/2]). +-export([vhost_store_pid/2]). + +start(VHost, Type, ClientRefs, StartupFunState) when is_list(ClientRefs); + ClientRefs == undefined -> + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, VHostSup} -> + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + supervisor2:start_child(VHostSup, + {Type, {rabbit_msg_store, start_link, + [Type, VHostDir, ClientRefs, StartupFunState]}, + transient, ?MSG_STORE_WORKER_WAIT, worker, [rabbit_msg_store]}); + %% we can get here if a vhost is added and removed concurrently + %% e.g. some integration tests do it + {error, {no_such_vhost, VHost}} = E -> + rabbit_log:error("Failed to start a message store for vhost ~s: vhost no longer exists!", + [VHost]), + E + end. + +stop(VHost, Type) -> + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, VHostSup} -> + ok = supervisor2:terminate_child(VHostSup, Type), + ok = supervisor2:delete_child(VHostSup, Type); + %% see start/4 + {error, {no_such_vhost, VHost}} -> + rabbit_log:error("Failed to stop a message store for vhost ~s: vhost no longer exists!", + [VHost]), + + ok + end. + +client_init(VHost, Type, Ref, MsgOnDiskFun, CloseFDsFun) -> + with_vhost_store(VHost, Type, fun(StorePid) -> + rabbit_msg_store:client_init(StorePid, Ref, MsgOnDiskFun, CloseFDsFun) + end). + +with_vhost_store(VHost, Type, Fun) -> + case vhost_store_pid(VHost, Type) of + no_pid -> + throw({message_store_not_started, Type, VHost}); + Pid when is_pid(Pid) -> + Fun(Pid) + end. + +vhost_store_pid(VHost, Type) -> + {ok, VHostSup} = rabbit_vhost_sup_sup:get_vhost_sup(VHost), + case supervisor2:find_child(VHostSup, Type) of + [Pid] -> Pid; + [] -> no_pid + end. + +successfully_recovered_state(VHost, Type) -> + with_vhost_store(VHost, Type, fun(StorePid) -> + rabbit_msg_store:successfully_recovered_state(StorePid) + end). diff --git a/deps/rabbit/src/rabbit_vhost_process.erl b/deps/rabbit/src/rabbit_vhost_process.erl new file mode 100644 index 0000000000..cf70d49010 --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost_process.erl @@ -0,0 +1,96 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% This module implements a vhost identity process. + +%% On start this process will try to recover the vhost data and +%% processes structure (queues and message stores). +%% If recovered successfully, the process will save it's PID +%% to vhost process registry. If vhost process PID is in the registry and the +%% process is alive - the vhost is considered running. + +%% On termination, the ptocess will notify of vhost going down. + +%% The process will also check periodically if the vhost still +%% present in mnesia DB and stop the vhost supervision tree when it +%% disappears. + +-module(rabbit_vhost_process). + +%% Transitional step until we can require Erlang/OTP 21 and +%% use the now recommended try/catch syntax for obtaining the stack trace. +-compile(nowarn_deprecated_function). + +-include("rabbit.hrl"). + +-define(TICKTIME_RATIO, 4). + +-behaviour(gen_server2). +-export([start_link/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +start_link(VHost) -> + gen_server2:start_link(?MODULE, [VHost], []). + + +init([VHost]) -> + process_flag(trap_exit, true), + rabbit_log:debug("Recovering data for VHost ~p~n", [VHost]), + try + %% Recover the vhost data and save it to vhost registry. + ok = rabbit_vhost:recover(VHost), + rabbit_vhost_sup_sup:save_vhost_process(VHost, self()), + Interval = interval(), + timer:send_interval(Interval, check_vhost), + true = erlang:garbage_collect(), + {ok, VHost} + catch _:Reason:Stacktrace -> + rabbit_amqqueue:mark_local_durable_queues_stopped(VHost), + rabbit_log:error("Unable to recover vhost ~p data. Reason ~p~n" + " Stacktrace ~p", + [VHost, Reason, Stacktrace]), + {stop, Reason} + end. + +handle_call(_,_,VHost) -> + {reply, ok, VHost}. + +handle_cast(_, VHost) -> + {noreply, VHost}. + +handle_info(check_vhost, VHost) -> + case rabbit_vhost:exists(VHost) of + true -> {noreply, VHost}; + false -> + rabbit_log:warning("Virtual host '~s' is gone. " + "Stopping its top level supervisor.", + [VHost]), + %% Stop vhost's top supervisor in a one-off process to avoid a deadlock: + %% us (a child process) waiting for supervisor shutdown and our supervisor(s) + %% waiting for us to shutdown. + spawn( + fun() -> + rabbit_vhost_sup_sup:stop_and_delete_vhost(VHost) + end), + {noreply, VHost} + end; +handle_info(_, VHost) -> + {noreply, VHost}. + +terminate(shutdown, VHost) -> + %% Notify that vhost is stopped. + rabbit_vhost:vhost_down(VHost); +terminate(_, _VHost) -> + ok. + +code_change(_OldVsn, VHost, _Extra) -> + {ok, VHost}. + +interval() -> + application:get_env(kernel, net_ticktime, 60000) * ?TICKTIME_RATIO. diff --git a/deps/rabbit/src/rabbit_vhost_sup.erl b/deps/rabbit/src/rabbit_vhost_sup.erl new file mode 100644 index 0000000000..d82d827ecf --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost_sup.erl @@ -0,0 +1,22 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_vhost_sup). + +-include("rabbit.hrl"). + +%% Each vhost gets an instance of this supervisor that supervises +%% message stores and queues (via rabbit_amqqueue_sup_sup). +-behaviour(supervisor2). +-export([init/1]). +-export([start_link/1]). + +start_link(VHost) -> + supervisor2:start_link(?MODULE, [VHost]). + +init([_VHost]) -> + {ok, {{one_for_all, 0, 1}, []}}. diff --git a/deps/rabbit/src/rabbit_vhost_sup_sup.erl b/deps/rabbit/src/rabbit_vhost_sup_sup.erl new file mode 100644 index 0000000000..c201237daa --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost_sup_sup.erl @@ -0,0 +1,271 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_vhost_sup_sup). + +-include("rabbit.hrl"). + +-behaviour(supervisor2). + +-export([init/1]). + +-export([start_link/0, start/0]). +-export([init_vhost/1, + start_vhost/1, start_vhost/2, + get_vhost_sup/1, get_vhost_sup/2, + save_vhost_sup/3, + save_vhost_process/2]). +-export([delete_on_all_nodes/1, start_on_all_nodes/1]). +-export([is_vhost_alive/1]). +-export([check/0]). + +%% Internal +-export([stop_and_delete_vhost/1]). + +-record(vhost_sup, {vhost, vhost_sup_pid, wrapper_pid, vhost_process_pid}). + +start() -> + case supervisor:start_child(rabbit_sup, {?MODULE, + {?MODULE, start_link, []}, + permanent, infinity, supervisor, + [?MODULE]}) of + {ok, _} -> ok; + {error, Err} -> {error, Err} + end. + +start_link() -> + supervisor2:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + %% This assumes that a single vhost termination should not shut down nodes + %% unless the operator opts in. + RestartStrategy = vhost_restart_strategy(), + ets:new(?MODULE, [named_table, public, {keypos, #vhost_sup.vhost}]), + {ok, {{simple_one_for_one, 0, 5}, + [{rabbit_vhost, {rabbit_vhost_sup_wrapper, start_link, []}, + RestartStrategy, ?SUPERVISOR_WAIT, supervisor, + [rabbit_vhost_sup_wrapper, rabbit_vhost_sup]}]}}. + +start_on_all_nodes(VHost) -> + %% Do not try to start a vhost on booting peer nodes + AllBooted = [Node || Node <- rabbit_nodes:all_running(), rabbit:is_booted(Node)], + Nodes = [node() | AllBooted], + Results = [{Node, start_vhost(VHost, Node)} || Node <- Nodes], + Failures = lists:filter(fun + ({_, {ok, _}}) -> false; + ({_, {error, {already_started, _}}}) -> false; + (_) -> true + end, + Results), + case Failures of + [] -> ok; + Errors -> {error, {failed_to_start_vhost_on_nodes, Errors}} + end. + +delete_on_all_nodes(VHost) -> + [ stop_and_delete_vhost(VHost, Node) || Node <- rabbit_nodes:all_running() ], + ok. + +stop_and_delete_vhost(VHost) -> + StopResult = case lookup_vhost_sup_record(VHost) of + not_found -> ok; + #vhost_sup{wrapper_pid = WrapperPid, + vhost_sup_pid = VHostSupPid} -> + case is_process_alive(WrapperPid) of + false -> ok; + true -> + rabbit_log:info("Stopping vhost supervisor ~p" + " for vhost '~s'~n", + [VHostSupPid, VHost]), + case supervisor2:terminate_child(?MODULE, WrapperPid) of + ok -> + true = ets:delete(?MODULE, VHost), + ok; + Other -> + Other + end + end + end, + ok = rabbit_vhost:delete_storage(VHost), + StopResult. + +%% We take an optimistic approach whan stopping a remote VHost supervisor. +stop_and_delete_vhost(VHost, Node) when Node == node(self()) -> + stop_and_delete_vhost(VHost); +stop_and_delete_vhost(VHost, Node) -> + case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, stop_and_delete_vhost, [VHost]) of + ok -> ok; + {badrpc, RpcErr} -> + rabbit_log:error("Failed to stop and delete a vhost ~p" + " on node ~p." + " Reason: ~p", + [VHost, Node, RpcErr]), + {error, RpcErr} + end. + +-spec init_vhost(rabbit_types:vhost()) -> ok | {error, {no_such_vhost, rabbit_types:vhost()}}. +init_vhost(VHost) -> + case start_vhost(VHost) of + {ok, _} -> ok; + {error, {already_started, _}} -> + rabbit_log:warning( + "Attempting to start an already started vhost '~s'.", + [VHost]), + ok; + {error, {no_such_vhost, VHost}} -> + {error, {no_such_vhost, VHost}}; + {error, Reason} -> + case vhost_restart_strategy() of + permanent -> + rabbit_log:error( + "Unable to initialize vhost data store for vhost '~s'." + " Reason: ~p", + [VHost, Reason]), + throw({error, Reason}); + transient -> + rabbit_log:warning( + "Unable to initialize vhost data store for vhost '~s'." + " The vhost will be stopped for this node. " + " Reason: ~p", + [VHost, Reason]), + ok + end + end. + +-type vhost_error() :: {no_such_vhost, rabbit_types:vhost()} | + {vhost_supervisor_not_running, rabbit_types:vhost()}. + +-spec get_vhost_sup(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, vhost_error() | term()}. +get_vhost_sup(VHost, Node) -> + case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, get_vhost_sup, [VHost]) of + {ok, Pid} when is_pid(Pid) -> + {ok, Pid}; + {error, Err} -> + {error, Err}; + {badrpc, RpcErr} -> + {error, RpcErr} + end. + +-spec get_vhost_sup(rabbit_types:vhost()) -> {ok, pid()} | {error, vhost_error()}. +get_vhost_sup(VHost) -> + case rabbit_vhost:exists(VHost) of + false -> + {error, {no_such_vhost, VHost}}; + true -> + case vhost_sup_pid(VHost) of + no_pid -> + {error, {vhost_supervisor_not_running, VHost}}; + {ok, Pid} when is_pid(Pid) -> + {ok, Pid} + end + end. + +-spec start_vhost(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, term()}. +start_vhost(VHost, Node) -> + case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, start_vhost, [VHost]) of + {ok, Pid} -> {ok, Pid}; + {error, Err} -> {error, Err}; + {badrpc, RpcErr} -> {error, RpcErr} + end. + +-spec start_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}. +start_vhost(VHost) -> + case rabbit_vhost:exists(VHost) of + false -> {error, {no_such_vhost, VHost}}; + true -> + case whereis(?MODULE) of + Pid when is_pid(Pid) -> + supervisor2:start_child(?MODULE, [VHost]); + undefined -> + {error, rabbit_vhost_sup_sup_not_running} + end + end. + +-spec is_vhost_alive(rabbit_types:vhost()) -> boolean(). +is_vhost_alive(VHost) -> +%% A vhost is considered alive if it's supervision tree is alive and +%% saved in the ETS table + case lookup_vhost_sup_record(VHost) of + #vhost_sup{wrapper_pid = WrapperPid, + vhost_sup_pid = VHostSupPid, + vhost_process_pid = VHostProcessPid} + when is_pid(WrapperPid), + is_pid(VHostSupPid), + is_pid(VHostProcessPid) -> + is_process_alive(WrapperPid) + andalso + is_process_alive(VHostSupPid) + andalso + is_process_alive(VHostProcessPid); + _ -> false + end. + + +-spec save_vhost_sup(rabbit_types:vhost(), pid(), pid()) -> ok. +save_vhost_sup(VHost, WrapperPid, VHostPid) -> + true = ets:insert(?MODULE, #vhost_sup{vhost = VHost, + vhost_sup_pid = VHostPid, + wrapper_pid = WrapperPid}), + ok. + +-spec save_vhost_process(rabbit_types:vhost(), pid()) -> ok. +save_vhost_process(VHost, VHostProcessPid) -> + true = ets:update_element(?MODULE, VHost, + {#vhost_sup.vhost_process_pid, VHostProcessPid}), + ok. + +-spec lookup_vhost_sup_record(rabbit_types:vhost()) -> #vhost_sup{} | not_found. +lookup_vhost_sup_record(VHost) -> + case ets:info(?MODULE, name) of + ?MODULE -> + case ets:lookup(?MODULE, VHost) of + [] -> not_found; + [#vhost_sup{} = VHostSup] -> VHostSup + end; + undefined -> not_found + end. + +-spec vhost_sup_pid(rabbit_types:vhost()) -> no_pid | {ok, pid()}. +vhost_sup_pid(VHost) -> + case lookup_vhost_sup_record(VHost) of + not_found -> + no_pid; + #vhost_sup{vhost_sup_pid = Pid} = VHostSup -> + case erlang:is_process_alive(Pid) of + true -> {ok, Pid}; + false -> + ets:delete_object(?MODULE, VHostSup), + no_pid + end + end. + +vhost_restart_strategy() -> + %% This assumes that a single vhost termination should not shut down nodes + %% unless the operator opts in. + case application:get_env(rabbit, vhost_restart_strategy, continue) of + continue -> transient; + stop_node -> permanent; + transient -> transient; + permanent -> permanent + end. + +check() -> + VHosts = rabbit_vhost:list_names(), + lists:filter( + fun(V) -> + case rabbit_vhost_sup_sup:get_vhost_sup(V) of + {ok, Sup} -> + MsgStores = [Pid || {Name, Pid, _, _} <- supervisor:which_children(Sup), + lists:member(Name, [msg_store_persistent, + msg_store_transient])], + not is_vhost_alive(V) orelse (not lists:all(fun(P) -> + erlang:is_process_alive(P) + end, MsgStores)); + {error, _} -> + true + end + end, VHosts). diff --git a/deps/rabbit/src/rabbit_vhost_sup_wrapper.erl b/deps/rabbit/src/rabbit_vhost_sup_wrapper.erl new file mode 100644 index 0000000000..ed239ade69 --- /dev/null +++ b/deps/rabbit/src/rabbit_vhost_sup_wrapper.erl @@ -0,0 +1,57 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% This module is a wrapper around vhost supervisor to +%% provide exactly once restart semantics. + +-module(rabbit_vhost_sup_wrapper). + +-include("rabbit.hrl"). + +-behaviour(supervisor2). +-export([init/1]). +-export([start_link/1]). +-export([start_vhost_sup/1]). + +start_link(VHost) -> + %% Using supervisor, because supervisor2 does not stop a started child when + %% another one fails to start. Bug? + case rabbit_vhost_sup_sup:get_vhost_sup(VHost) of + {ok, Pid} -> + {error, {already_started, Pid}}; + {error, _} -> + supervisor:start_link(?MODULE, [VHost]) + end. + +init([VHost]) -> + %% 2 restarts in 5 minutes. One per message store. + {ok, {{one_for_all, 2, 300}, + [ + %% rabbit_vhost_sup is an empty supervisor container for + %% all data processes. + {rabbit_vhost_sup, + {rabbit_vhost_sup_wrapper, start_vhost_sup, [VHost]}, + permanent, infinity, supervisor, + [rabbit_vhost_sup]}, + %% rabbit_vhost_process is a vhost identity process, which + %% is responsible for data recovery and vhost aliveness status. + %% See the module comments for more info. + {rabbit_vhost_process, + {rabbit_vhost_process, start_link, [VHost]}, + permanent, ?WORKER_WAIT, worker, + [rabbit_vhost_process]}]}}. + + +start_vhost_sup(VHost) -> + case rabbit_vhost_sup:start_link(VHost) of + {ok, Pid} -> + %% Save vhost sup record with wrapper pid and vhost sup pid. + ok = rabbit_vhost_sup_sup:save_vhost_sup(VHost, self(), Pid), + {ok, Pid}; + Other -> + Other + end. diff --git a/deps/rabbit/src/rabbit_vm.erl b/deps/rabbit/src/rabbit_vm.erl new file mode 100644 index 0000000000..b014e090c5 --- /dev/null +++ b/deps/rabbit/src/rabbit_vm.erl @@ -0,0 +1,427 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(rabbit_vm). + +-export([memory/0, binary/0, ets_tables_memory/1]). + +-define(MAGIC_PLUGINS, ["cowboy", "ranch", "sockjs"]). + +%%---------------------------------------------------------------------------- + +-spec memory() -> rabbit_types:infos(). + +memory() -> + All = interesting_sups(), + {Sums, _Other} = sum_processes( + lists:append(All), distinguishers(), [memory]), + + [Qs, QsSlave, Qqs, Ssqs, Srqs, SCoor, ConnsReader, ConnsWriter, ConnsChannel, + ConnsOther, MsgIndexProc, MgmtDbProc, Plugins] = + [aggregate(Names, Sums, memory, fun (X) -> X end) + || Names <- distinguished_interesting_sups()], + + MnesiaETS = mnesia_memory(), + MsgIndexETS = ets_memory(msg_stores()), + MetricsETS = ets_memory([rabbit_metrics]), + QuorumETS = ets_memory([ra_log_ets]), + MetricsProc = try + [{_, M}] = process_info(whereis(rabbit_metrics), [memory]), + M + catch + error:badarg -> + 0 + end, + MgmtDbETS = ets_memory([rabbit_mgmt_storage]), + [{total, ErlangTotal}, + {processes, Processes}, + {ets, ETS}, + {atom, Atom}, + {binary, Bin}, + {code, Code}, + {system, System}] = + erlang:memory([total, processes, ets, atom, binary, code, system]), + + Strategy = vm_memory_monitor:get_memory_calculation_strategy(), + Allocated = recon_alloc:memory(allocated), + Rss = vm_memory_monitor:get_rss_memory(), + + AllocatedUnused = max(Allocated - ErlangTotal, 0), + OSReserved = max(Rss - Allocated, 0), + + OtherProc = Processes + - ConnsReader - ConnsWriter - ConnsChannel - ConnsOther + - Qs - QsSlave - Qqs - Ssqs - Srqs - SCoor - MsgIndexProc - Plugins + - MgmtDbProc - MetricsProc, + + [ + %% Connections + {connection_readers, ConnsReader}, + {connection_writers, ConnsWriter}, + {connection_channels, ConnsChannel}, + {connection_other, ConnsOther}, + + %% Queues + {queue_procs, Qs}, + {queue_slave_procs, QsSlave}, + {quorum_queue_procs, Qqs}, + {stream_queue_procs, Ssqs}, + {stream_queue_replica_reader_procs, Srqs}, + {stream_queue_coordinator_procs, SCoor}, + + %% Processes + {plugins, Plugins}, + {other_proc, lists:max([0, OtherProc])}, %% [1] + + %% Metrics + {metrics, MetricsETS + MetricsProc}, + {mgmt_db, MgmtDbETS + MgmtDbProc}, + + %% ETS + {mnesia, MnesiaETS}, + {quorum_ets, QuorumETS}, + {other_ets, ETS - MnesiaETS - MetricsETS - MgmtDbETS - MsgIndexETS - QuorumETS}, + + %% Messages (mostly, some binaries are not messages) + {binary, Bin}, + {msg_index, MsgIndexETS + MsgIndexProc}, + + %% System + {code, Code}, + {atom, Atom}, + {other_system, System - ETS - Bin - Code - Atom}, + {allocated_unused, AllocatedUnused}, + {reserved_unallocated, OSReserved}, + {strategy, Strategy}, + {total, [{erlang, ErlangTotal}, + {rss, Rss}, + {allocated, Allocated}]} + ]. +%% [1] - erlang:memory(processes) can be less than the sum of its +%% parts. Rather than display something nonsensical, just silence any +%% claims about negative memory. See +%% http://erlang.org/pipermail/erlang-questions/2012-September/069320.html + +-spec binary() -> rabbit_types:infos(). + +binary() -> + All = interesting_sups(), + {Sums, Rest} = + sum_processes( + lists:append(All), + fun (binary, Info, Acc) -> + lists:foldl(fun ({Ptr, Sz, _RefCnt}, Acc0) -> + sets:add_element({Ptr, Sz}, Acc0) + end, Acc, Info) + end, distinguishers(), [{binary, sets:new()}]), + [Other, Qs, QsSlave, Qqs, Ssqs, Srqs, Scoor, ConnsReader, ConnsWriter, + ConnsChannel, ConnsOther, MsgIndexProc, MgmtDbProc, Plugins] = + [aggregate(Names, [{other, Rest} | Sums], binary, fun sum_binary/1) + || Names <- [[other] | distinguished_interesting_sups()]], + [{connection_readers, ConnsReader}, + {connection_writers, ConnsWriter}, + {connection_channels, ConnsChannel}, + {connection_other, ConnsOther}, + {queue_procs, Qs}, + {queue_slave_procs, QsSlave}, + {quorum_queue_procs, Qqs}, + {stream_queue_procs, Ssqs}, + {stream_queue_replica_reader_procs, Srqs}, + {stream_queue_coordinator_procs, Scoor}, + {plugins, Plugins}, + {mgmt_db, MgmtDbProc}, + {msg_index, MsgIndexProc}, + {other, Other}]. + +%%---------------------------------------------------------------------------- + +mnesia_memory() -> + case mnesia:system_info(is_running) of + yes -> lists:sum([bytes(mnesia:table_info(Tab, memory)) || + Tab <- mnesia:system_info(tables)]); + _ -> 0 + end. + +ets_memory(Owners) -> + lists:sum([V || {_K, V} <- ets_tables_memory(Owners)]). + +-spec ets_tables_memory(Owners) -> rabbit_types:infos() + when Owners :: all | OwnerProcessName | [OwnerProcessName], + OwnerProcessName :: atom(). + +ets_tables_memory(all) -> + [{ets:info(T, name), bytes(ets:info(T, memory))} + || T <- ets:all(), + is_atom(T)]; +ets_tables_memory(OwnerName) when is_atom(OwnerName) -> + ets_tables_memory([OwnerName]); +ets_tables_memory(Owners) when is_list(Owners) -> + OwnerPids = lists:map(fun(O) when is_pid(O) -> O; + (O) when is_atom(O) -> whereis(O) + end, + Owners), + [{ets:info(T, name), bytes(ets:info(T, memory))} + || T <- ets:all(), + lists:member(ets:info(T, owner), OwnerPids)]. + +bytes(Words) -> try + Words * erlang:system_info(wordsize) + catch + _:_ -> 0 + end. + +interesting_sups() -> + [queue_sups(), quorum_sups(), stream_server_sups(), stream_reader_sups(), + conn_sups() | interesting_sups0()]. + +queue_sups() -> + all_vhosts_children(rabbit_amqqueue_sup_sup). + +quorum_sups() -> + %% TODO: in the future not all ra servers may be queues and we needs + %% some way to filter this + case whereis(ra_server_sup_sup) of + undefined -> + []; + _ -> + [Pid || {_, Pid, _, _} <- + supervisor:which_children(ra_server_sup_sup)] + end. + +stream_server_sups() -> [osiris_server_sup]. +stream_reader_sups() -> [osiris_replica_reader_sup]. + +msg_stores() -> + all_vhosts_children(msg_store_transient) + ++ + all_vhosts_children(msg_store_persistent). + +all_vhosts_children(Name) -> + case whereis(rabbit_vhost_sup_sup) of + undefined -> []; + Pid when is_pid(Pid) -> + lists:filtermap( + fun({_, VHostSupWrapper, _, _}) -> + case supervisor2:find_child(VHostSupWrapper, + rabbit_vhost_sup) of + [] -> false; + [VHostSup] -> + case supervisor2:find_child(VHostSup, Name) of + [QSup] -> {true, QSup}; + [] -> false + end + end + end, + supervisor:which_children(rabbit_vhost_sup_sup)) + end. + +interesting_sups0() -> + MsgIndexProcs = msg_stores(), + MgmtDbProcs = [rabbit_mgmt_sup_sup], + PluginProcs = plugin_sups(), + [MsgIndexProcs, MgmtDbProcs, PluginProcs]. + +conn_sups() -> + Ranches = lists:flatten(ranch_server_sups()), + [amqp_sup|Ranches]. + +ranch_server_sups() -> + try + ets:match(ranch_server, {{conns_sup, '_'}, '$1'}) + catch + %% Ranch ETS table doesn't exist yet + error:badarg -> [] + end. + +with(Sups, With) -> [{Sup, With} || Sup <- Sups]. + +distinguishers() -> with(queue_sups(), fun queue_type/1) ++ + with(conn_sups(), fun conn_type/1) ++ + with(quorum_sups(), fun ra_type/1). + +distinguished_interesting_sups() -> + [ + with(queue_sups(), master), + with(queue_sups(), slave), + with(quorum_sups(), quorum), + stream_server_sups(), + stream_reader_sups(), + with(quorum_sups(), stream), + with(conn_sups(), reader), + with(conn_sups(), writer), + with(conn_sups(), channel), + with(conn_sups(), other)] + ++ interesting_sups0(). + +plugin_sups() -> + lists:append([plugin_sup(App) || + {App, _, _} <- rabbit_misc:which_applications(), + is_plugin(atom_to_list(App))]). + +plugin_sup(App) -> + case application_controller:get_master(App) of + undefined -> []; + Master -> case application_master:get_child(Master) of + {Pid, _} when is_pid(Pid) -> [process_name(Pid)]; + Pid when is_pid(Pid) -> [process_name(Pid)]; + _ -> [] + end + end. + +process_name(Pid) -> + case process_info(Pid, registered_name) of + {registered_name, Name} -> Name; + _ -> Pid + end. + +is_plugin("rabbitmq_" ++ _) -> true; +is_plugin(App) -> lists:member(App, ?MAGIC_PLUGINS). + +aggregate(Names, Sums, Key, Fun) -> + lists:sum([extract(Name, Sums, Key, Fun) || Name <- Names]). + +extract(Name, Sums, Key, Fun) -> + case keyfind(Name, Sums) of + {value, Accs} -> Fun(keyfetch(Key, Accs)); + false -> 0 + end. + +sum_binary(Set) -> + sets:fold(fun({_Pt, Sz}, Acc) -> Acc + Sz end, 0, Set). + +queue_type(PDict) -> + case keyfind(process_name, PDict) of + {value, {rabbit_mirror_queue_slave, _}} -> slave; + _ -> master + end. + +conn_type(PDict) -> + case keyfind(process_name, PDict) of + {value, {rabbit_reader, _}} -> reader; + {value, {rabbit_writer, _}} -> writer; + {value, {rabbit_channel, _}} -> channel; + _ -> other + end. + +ra_type(PDict) -> + case keyfind('$rabbit_vm_category', PDict) of + {value, rabbit_stream_coordinator} -> stream; + _ -> quorum + end. + +%%---------------------------------------------------------------------------- + +%% NB: this code is non-rabbit specific. + +-type process() :: pid() | atom(). +-type info_key() :: atom(). +-type info_value() :: any(). +-type info_item() :: {info_key(), info_value()}. +-type accumulate() :: fun ((info_key(), info_value(), info_value()) -> + info_value()). +-type distinguisher() :: fun (([{term(), term()}]) -> atom()). +-type distinguishers() :: [{info_key(), distinguisher()}]. +-spec sum_processes([process()], distinguishers(), [info_key()]) -> + {[{process(), [info_item()]}], [info_item()]}. +-spec sum_processes([process()], accumulate(), distinguishers(), + [info_item()]) -> + {[{process(), [info_item()]}], [info_item()]}. + +sum_processes(Names, Distinguishers, Items) -> + sum_processes(Names, fun (_, X, Y) -> X + Y end, Distinguishers, + [{Item, 0} || Item <- Items]). + +%% summarize the process_info of all processes based on their +%% '$ancestor' hierarchy, recorded in their process dictionary. +%% +%% The function takes +%% +%% 1) a list of names/pids of processes that are accumulation points +%% in the hierarchy. +%% +%% 2) a function that aggregates individual info items -taking the +%% info item key, value and accumulated value as the input and +%% producing a new accumulated value. +%% +%% 3) a list of info item key / initial accumulator value pairs. +%% +%% The process_info of a process is accumulated at the nearest of its +%% ancestors that is mentioned in the first argument, or, if no such +%% ancestor exists or the ancestor information is absent, in a special +%% 'other' bucket. +%% +%% The result is a pair consisting of +%% +%% 1) a k/v list, containing for each of the accumulation names/pids a +%% list of info items, containing the accumulated data, and +%% +%% 2) the 'other' bucket - a list of info items containing the +%% accumulated data of all processes with no matching ancestors +%% +%% Note that this function operates on names as well as pids, but +%% these must match whatever is contained in the '$ancestor' process +%% dictionary entry. Generally that means for all registered processes +%% the name should be used. +sum_processes(Names, Fun, Distinguishers, Acc0) -> + Items = [Item || {Item, _Blank0} <- Acc0], + {NameAccs, OtherAcc} = + lists:foldl( + fun (Pid, Acc) -> + InfoItems = [registered_name, dictionary | Items], + case process_info(Pid, InfoItems) of + undefined -> + Acc; + [{registered_name, RegName}, {dictionary, D} | Vals] -> + %% see docs for process_info/2 for the + %% special handling of 'registered_name' + %% info items + Extra = case RegName of + [] -> []; + N -> [N] + end, + Name0 = find_ancestor(Extra, D, Names), + Name = case keyfind(Name0, Distinguishers) of + {value, DistFun} -> {Name0, DistFun(D)}; + false -> Name0 + end, + accumulate( + Name, Fun, orddict:from_list(Vals), Acc, Acc0) + end + end, {orddict:new(), Acc0}, processes()), + %% these conversions aren't strictly necessary; we do them simply + %% for the sake of encapsulating the representation. + {[{Name, orddict:to_list(Accs)} || + {Name, Accs} <- orddict:to_list(NameAccs)], + orddict:to_list(OtherAcc)}. + +find_ancestor(Extra, D, Names) -> + Ancestors = case keyfind('$ancestors', D) of + {value, Ancs} -> Ancs; + false -> [] + end, + case lists:splitwith(fun (A) -> not lists:member(A, Names) end, + Extra ++ Ancestors) of + {_, []} -> undefined; + {_, [Name | _]} -> Name + end. + +accumulate(undefined, Fun, ValsDict, {NameAccs, OtherAcc}, _Acc0) -> + {NameAccs, orddict:merge(Fun, ValsDict, OtherAcc)}; +accumulate(Name, Fun, ValsDict, {NameAccs, OtherAcc}, Acc0) -> + F = fun (NameAcc) -> orddict:merge(Fun, ValsDict, NameAcc) end, + {case orddict:is_key(Name, NameAccs) of + true -> orddict:update(Name, F, NameAccs); + false -> orddict:store( Name, F(Acc0), NameAccs) + end, OtherAcc}. + +keyfetch(K, L) -> {value, {_, V}} = lists:keysearch(K, 1, L), + V. + +keyfind(K, L) -> case lists:keysearch(K, 1, L) of + {value, {_, V}} -> {value, V}; + false -> false + end. diff --git a/deps/rabbit/src/supervised_lifecycle.erl b/deps/rabbit/src/supervised_lifecycle.erl new file mode 100644 index 0000000000..0e1bb9b5c8 --- /dev/null +++ b/deps/rabbit/src/supervised_lifecycle.erl @@ -0,0 +1,53 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +%% Invoke callbacks on startup and termination. +%% +%% Simply hook this process into a supervision hierarchy, to have the +%% callbacks invoked at a precise point during the establishment and +%% teardown of that hierarchy, respectively. +%% +%% Or launch the process independently, and link to it, to have the +%% callbacks invoked on startup and when the linked process +%% terminates, respectively. + +-module(supervised_lifecycle). + +-behavior(gen_server). + +-export([start_link/3]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +%%---------------------------------------------------------------------------- + +-spec start_link(atom(), rabbit_types:mfargs(), rabbit_types:mfargs()) -> + rabbit_types:ok_pid_or_error(). + +start_link(Name, StartMFA, StopMFA) -> + gen_server:start_link({local, Name}, ?MODULE, [StartMFA, StopMFA], []). + +%%---------------------------------------------------------------------------- + +init([{M, F, A}, StopMFA]) -> + process_flag(trap_exit, true), + apply(M, F, A), + {ok, StopMFA}. + +handle_call(_Request, _From, State) -> {noreply, State}. + +handle_cast(_Msg, State) -> {noreply, State}. + +handle_info(_Info, State) -> {noreply, State}. + +terminate(_Reason, {M, F, A}) -> + apply(M, F, A), + ok. + +code_change(_OldVsn, State, _Extra) -> {ok, State}. diff --git a/deps/rabbit/src/tcp_listener.erl b/deps/rabbit/src/tcp_listener.erl new file mode 100644 index 0000000000..93c24ab397 --- /dev/null +++ b/deps/rabbit/src/tcp_listener.erl @@ -0,0 +1,90 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(tcp_listener). + +%% Represents a running TCP listener (a process that listens for inbound +%% TCP or TLS connections). Every protocol supported typically has one +%% or two listeners, plain TCP and (optionally) TLS, but there can +%% be more, e.g. when multiple network interfaces are involved. +%% +%% A listener has 6 properties (is a tuple of 6): +%% +%% * IP address +%% * Port +%% * Node +%% * Label (human-friendly name, e.g. AMQP 0-9-1) +%% * Startup callback +%% * Shutdown callback +%% +%% Listeners use Ranch in embedded mode to accept and "bridge" client +%% connections with protocol entry points such as rabbit_reader. +%% +%% Listeners are tracked in a Mnesia table so that they can be +%% +%% * Shut down +%% * Listed (e.g. in the management UI) +%% +%% Every tcp_listener process has callbacks that are executed on start +%% and termination. Those must take care of listener registration +%% among other things. +%% +%% Listeners are supervised by tcp_listener_sup (one supervisor per protocol). +%% +%% See also rabbit_networking and tcp_listener_sup. + +-behaviour(gen_server). + +-export([start_link/5]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {on_startup, on_shutdown, label, ip, port}). + +%%---------------------------------------------------------------------------- + +-type mfargs() :: {atom(), atom(), [any()]}. + +-spec start_link + (inet:ip_address(), inet:port_number(), + mfargs(), mfargs(), string()) -> + rabbit_types:ok_pid_or_error(). + +start_link(IPAddress, Port, + OnStartup, OnShutdown, Label) -> + gen_server:start_link( + ?MODULE, {IPAddress, Port, + OnStartup, OnShutdown, Label}, []). + +%%-------------------------------------------------------------------- + +init({IPAddress, Port, {M,F,A} = OnStartup, OnShutdown, Label}) -> + process_flag(trap_exit, true), + error_logger:info_msg( + "started ~s on ~s:~p~n", + [Label, rabbit_misc:ntoab(IPAddress), Port]), + apply(M, F, A ++ [IPAddress, Port]), + {ok, #state{on_startup = OnStartup, on_shutdown = OnShutdown, + label = Label, ip=IPAddress, port=Port}}. + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, #state{on_shutdown = {M,F,A}, label=Label, ip=IPAddress, port=Port}) -> + error_logger:info_msg("stopped ~s on ~s:~p~n", + [Label, rabbit_misc:ntoab(IPAddress), Port]), + apply(M, F, A ++ [IPAddress, Port]). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/deps/rabbit/src/tcp_listener_sup.erl b/deps/rabbit/src/tcp_listener_sup.erl new file mode 100644 index 0000000000..82128bb2af --- /dev/null +++ b/deps/rabbit/src/tcp_listener_sup.erl @@ -0,0 +1,54 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2007-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(tcp_listener_sup). + +%% Supervises TCP listeners. There is a separate supervisor for every +%% protocol. In case of AMQP 0-9-1, it resides under rabbit_sup. Plugins +%% that provide protocol support (e.g. STOMP) have an instance of this supervisor in their +%% app supervision tree. +%% +%% See also rabbit_networking and tcp_listener. + +-behaviour(supervisor). + +-export([start_link/10]). +-export([init/1]). + +-type mfargs() :: {atom(), atom(), [any()]}. + +-spec start_link + (inet:ip_address(), inet:port_number(), module(), [gen_tcp:listen_option()], + module(), any(), mfargs(), mfargs(), integer(), string()) -> + rabbit_types:ok_pid_or_error(). + +start_link(IPAddress, Port, Transport, SocketOpts, ProtoSup, ProtoOpts, OnStartup, OnShutdown, + ConcurrentAcceptorCount, Label) -> + supervisor:start_link( + ?MODULE, {IPAddress, Port, Transport, SocketOpts, ProtoSup, ProtoOpts, OnStartup, OnShutdown, + ConcurrentAcceptorCount, Label}). + +init({IPAddress, Port, Transport, SocketOpts, ProtoSup, ProtoOpts, OnStartup, OnShutdown, + ConcurrentAcceptorCount, Label}) -> + {ok, AckTimeout} = application:get_env(rabbit, ssl_handshake_timeout), + MaxConnections = rabbit_misc:get_env(rabbit, connection_max, infinity), + RanchListenerOpts = #{ + num_acceptors => ConcurrentAcceptorCount, + max_connections => MaxConnections, + handshake_timeout => AckTimeout, + connection_type => supervisor, + socket_opts => [{ip, IPAddress}, + {port, Port} | + SocketOpts] + }, + Flags = {one_for_all, 10, 10}, + OurChildSpecStart = {tcp_listener, start_link, [IPAddress, Port, OnStartup, OnShutdown, Label]}, + OurChildSpec = {tcp_listener, OurChildSpecStart, transient, 16#ffffffff, worker, [tcp_listener]}, + RanchChildSpec = ranch:child_spec(rabbit_networking:ranch_ref(IPAddress, Port), + Transport, RanchListenerOpts, + ProtoSup, ProtoOpts), + {ok, {Flags, [RanchChildSpec, OurChildSpec]}}. diff --git a/deps/rabbit/src/term_to_binary_compat.erl b/deps/rabbit/src/term_to_binary_compat.erl new file mode 100644 index 0000000000..327a846d1f --- /dev/null +++ b/deps/rabbit/src/term_to_binary_compat.erl @@ -0,0 +1,15 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2017-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(term_to_binary_compat). + +-include("rabbit.hrl"). + +-export([term_to_binary_1/1]). + +term_to_binary_1(Term) -> + term_to_binary(Term, [{minor_version, 1}]). diff --git a/deps/rabbit/src/vhost.erl b/deps/rabbit/src/vhost.erl new file mode 100644 index 0000000000..ca704183a0 --- /dev/null +++ b/deps/rabbit/src/vhost.erl @@ -0,0 +1,172 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(vhost). + +-include_lib("rabbit_common/include/rabbit.hrl"). +-include("vhost.hrl"). + +-export([ + new/2, + new/3, + fields/0, + fields/1, + info_keys/0, + record_version_to_use/0, + upgrade/1, + upgrade_to/2, + pattern_match_all/0, + get_name/1, + get_limits/1, + get_metadata/1, + get_description/1, + get_tags/1, + set_limits/2 +]). + +-define(record_version, vhost_v2). + +-type(name() :: binary()). + +-type(metadata_key() :: atom()). + +-type(metadata() :: #{description => binary(), + tags => [atom()], + metadata_key() => any()} | undefined). + +-type vhost() :: vhost_v1:vhost_v1() | vhost_v2(). + +-record(vhost, { + %% name as a binary + virtual_host :: name() | '_', + %% proplist of limits configured, if any + limits :: list() | '_', + metadata :: metadata() | '_' +}). + +-type vhost_v2() :: #vhost{ + virtual_host :: name(), + limits :: list(), + metadata :: metadata() + }. + +-type vhost_pattern() :: vhost_v1:vhost_v1_pattern() | + vhost_v2_pattern(). +-type vhost_v2_pattern() :: #vhost{ + virtual_host :: name() | '_', + limits :: '_', + metadata :: '_' + }. + +-export_type([name/0, + metadata_key/0, + metadata/0, + vhost/0, + vhost_v2/0, + vhost_pattern/0, + vhost_v2_pattern/0]). + +-spec new(name(), list()) -> vhost(). +new(Name, Limits) -> + case record_version_to_use() of + ?record_version -> + #vhost{virtual_host = Name, limits = Limits}; + _ -> + vhost_v1:new(Name, Limits) + end. + +-spec new(name(), list(), map()) -> vhost(). +new(Name, Limits, Metadata) -> + case record_version_to_use() of + ?record_version -> + #vhost{virtual_host = Name, limits = Limits, metadata = Metadata}; + _ -> + vhost_v1:new(Name, Limits) + end. + +-spec record_version_to_use() -> vhost_v1 | vhost_v2. + +record_version_to_use() -> + case rabbit_feature_flags:is_enabled(virtual_host_metadata) of + true -> ?record_version; + false -> vhost_v1:record_version_to_use() + end. + +-spec upgrade(vhost()) -> vhost(). + +upgrade(#vhost{} = VHost) -> VHost; +upgrade(OldVHost) -> upgrade_to(record_version_to_use(), OldVHost). + +-spec upgrade_to +(vhost_v2, vhost()) -> vhost_v2(); +(vhost_v1, vhost_v1:vhost_v1()) -> vhost_v1:vhost_v1(). + +upgrade_to(?record_version, #vhost{} = VHost) -> + VHost; +upgrade_to(?record_version, OldVHost) -> + Fields = erlang:tuple_to_list(OldVHost) ++ [#{description => <<"">>, tags => []}], + #vhost{} = erlang:list_to_tuple(Fields); +upgrade_to(Version, OldVHost) -> + vhost_v1:upgrade_to(Version, OldVHost). + + +fields() -> + case record_version_to_use() of + ?record_version -> fields(?record_version); + _ -> vhost_v1:fields() + end. + +fields(?record_version) -> record_info(fields, vhost); +fields(Version) -> vhost_v1:fields(Version). + +info_keys() -> + case record_version_to_use() of + %% note: this reports description and tags separately even though + %% they are stored in the metadata map. MK. + ?record_version -> [name, description, tags, metadata, tracing, cluster_state]; + _ -> vhost_v1:info_keys() + end. + +-spec pattern_match_all() -> vhost_pattern(). + +pattern_match_all() -> + case record_version_to_use() of + ?record_version -> #vhost{_ = '_'}; + _ -> vhost_v1:pattern_match_all() + end. + +-spec get_name(vhost()) -> name(). +get_name(#vhost{virtual_host = Value}) -> Value; +get_name(VHost) -> vhost_v1:get_name(VHost). + +-spec get_limits(vhost()) -> list(). +get_limits(#vhost{limits = Value}) -> Value; +get_limits(VHost) -> vhost_v1:get_limits(VHost). + +-spec get_metadata(vhost()) -> metadata(). +get_metadata(#vhost{metadata = Value}) -> Value; +get_metadata(VHost) -> vhost_v1:get_metadata(VHost). + +-spec get_description(vhost()) -> binary(). +get_description(#vhost{} = VHost) -> + maps:get(description, get_metadata(VHost), undefined); +get_description(VHost) -> + vhost_v1:get_description(VHost). + +-spec get_tags(vhost()) -> [atom()]. +get_tags(#vhost{} = VHost) -> + maps:get(tags, get_metadata(VHost), undefined); +get_tags(VHost) -> + vhost_v1:get_tags(VHost). + +set_limits(VHost, Value) -> + case record_version_to_use() of + ?record_version -> + VHost#vhost{limits = Value}; + _ -> + vhost_v1:set_limits(VHost, Value) + end. diff --git a/deps/rabbit/src/vhost_v1.erl b/deps/rabbit/src/vhost_v1.erl new file mode 100644 index 0000000000..5b53eb148a --- /dev/null +++ b/deps/rabbit/src/vhost_v1.erl @@ -0,0 +1,106 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2018-2020 VMware, Inc. or its affiliates. All rights reserved. +%% + +-module(vhost_v1). + +-include("vhost.hrl"). + +-export([new/2, + new/3, + upgrade/1, + upgrade_to/2, + fields/0, + fields/1, + info_keys/0, + field_name/0, + record_version_to_use/0, + pattern_match_all/0, + get_name/1, + get_limits/1, + get_metadata/1, + get_description/1, + get_tags/1, + set_limits/2 +]). + +-define(record_version, ?MODULE). + +%% Represents a vhost. +%% +%% Historically this record had 2 arguments although the 2nd +%% was never used (`dummy`, always undefined). This is because +%% single field records were/are illegal in OTP. +%% +%% As of 3.6.x, the second argument is vhost limits, +%% which is actually used and has the same default. +%% Nonetheless, this required a migration, see rabbit_upgrade_functions. + +-record(vhost, { + %% name as a binary + virtual_host :: vhost:name() | '_', + %% proplist of limits configured, if any + limits :: list() | '_'}). + +-type vhost() :: vhost_v1(). +-type vhost_v1() :: #vhost{ + virtual_host :: vhost:name(), + limits :: list() + }. + +-export_type([vhost/0, + vhost_v1/0, + vhost_pattern/0, + vhost_v1_pattern/0]). + + +-spec new(vhost:name(), list()) -> vhost(). +new(Name, Limits) -> + #vhost{virtual_host = Name, limits = Limits}. + +-spec new(vhost:name(), list(), map()) -> vhost(). +new(Name, Limits, _Metadata) -> + #vhost{virtual_host = Name, limits = Limits}. + + +-spec record_version_to_use() -> vhost_v1. +record_version_to_use() -> + ?record_version. + +-spec upgrade(vhost()) -> vhost(). +upgrade(#vhost{} = VHost) -> VHost. + +-spec upgrade_to(vhost_v1, vhost()) -> vhost(). +upgrade_to(?record_version, #vhost{} = VHost) -> + VHost. + +fields() -> fields(?record_version). + +fields(?record_version) -> record_info(fields, vhost). + +field_name() -> #vhost.virtual_host. + +info_keys() -> [name, tracing, cluster_state]. + +-type vhost_pattern() :: vhost_v1_pattern(). +-type vhost_v1_pattern() :: #vhost{ + virtual_host :: vhost:name() | '_', + limits :: '_' + }. + +-spec pattern_match_all() -> vhost_pattern(). + +pattern_match_all() -> #vhost{_ = '_'}. + +get_name(#vhost{virtual_host = Value}) -> Value. +get_limits(#vhost{limits = Value}) -> Value. + +get_metadata(_VHost) -> undefined. +get_description(_VHost) -> undefined. +get_tags(_VHost) -> undefined. + +set_limits(VHost, Value) -> + VHost#vhost{limits = Value}. |