diff options
| -rw-r--r-- | src/rabbit_feature_flags.erl | 183 | ||||
| -rw-r--r-- | src/rabbit_ff_registry.erl | 13 | ||||
| -rw-r--r-- | test/feature_flags_SUITE.erl | 121 |
3 files changed, 272 insertions, 45 deletions
diff --git a/src/rabbit_feature_flags.erl b/src/rabbit_feature_flags.erl index de9dae2c9f..cf2715a724 100644 --- a/src/rabbit_feature_flags.erl +++ b/src/rabbit_feature_flags.erl @@ -133,7 +133,8 @@ initialize_registry/3, query_supported_feature_flags/0, mark_as_enabled_remotely/2, - mark_as_enabled_remotely/4]). + mark_as_enabled_remotely/4, + registry_loading_lock/0]). -endif. %% Default timeout for operations on remote nodes. @@ -235,6 +236,8 @@ -type migration_fun_context() :: enable | is_enabled. +-type registry_vsn() :: term(). + -export_type([feature_flag_modattr/0, feature_props/0, feature_name/0, @@ -831,6 +834,26 @@ list_of_enabled_feature_flags_to_feature_states(FeatureNames) -> initialize_registry(NewSupportedFeatureFlags, NewFeatureStates, WrittenToDisk) -> + Ret = maybe_initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk), + case Ret of + ok -> ok; + restart -> initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk); + Error -> Error + end. + +maybe_initialize_registry(NewSupportedFeatureFlags, + NewFeatureStates, + WrittenToDisk) -> + %% We save the version of the current registry before computing + %% the new one. This is used when we do the actual reload: if the + %% current registry was reloaded in the meantime, we need to restart + %% the computation to make sure we don't loose data. + RegistryVsn = registry_vsn(), + %% We take the feature flags already registered. RegistryInitialized = rabbit_ff_registry:is_registry_initialized(), KnownFeatureFlags1 = case RegistryInitialized of @@ -884,10 +907,18 @@ initialize_registry(NewSupportedFeatureFlags, case Proceed of true -> rabbit_log_feature_flags:debug( - "Feature flags: (re)initialize registry"), - do_initialize_registry(AllFeatureFlags, - FeatureStates, - WrittenToDisk); + "Feature flags: (re)initialize registry (~p)", + [self()]), + T0 = erlang:timestamp(), + Ret = do_initialize_registry(RegistryVsn, + AllFeatureFlags, + FeatureStates, + WrittenToDisk), + T1 = erlang:timestamp(), + rabbit_log_feature_flags:debug( + "Feature flags: time to regen registry: ~p µs", + [timer:now_diff(T1, T0)]), + Ret; false -> rabbit_log_feature_flags:debug( "Feature flags: registry already up-to-date, skipping init"), @@ -909,22 +940,46 @@ does_registry_need_refresh(AllFeatureFlags, %% changes. CurrentAllFeatureFlags = rabbit_ff_registry:list(all), CurrentFeatureStates = rabbit_ff_registry:states(), - CurrentWrittenToDisk = rabbit_ff_registry:is_registry_written_to_disk(), + CurrentWrittenToDisk = + rabbit_ff_registry:is_registry_written_to_disk(), - AllFeatureFlags =/= CurrentAllFeatureFlags orelse - FeatureStates =/= CurrentFeatureStates orelse - WrittenToDisk =/= CurrentWrittenToDisk; + if + AllFeatureFlags =/= CurrentAllFeatureFlags -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, list of feature flags differs"), + true; + FeatureStates =/= CurrentFeatureStates -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, feature flag states differ"), + true; + WrittenToDisk =/= CurrentWrittenToDisk -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, \"written to disk\" state changed"), + true; + true -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: no"), + false + end; false -> + rabbit_log_feature_flags:debug( + "Feature flags: registry refresh needed: " + "yes, first-time initialization"), true end. --spec do_initialize_registry(feature_flags(), +-spec do_initialize_registry(registry_vsn(), + feature_flags(), feature_states(), boolean()) -> ok | {error, any()} | no_return(). %% @private -do_initialize_registry(AllFeatureFlags, +do_initialize_registry(RegistryVsn, + AllFeatureFlags, FeatureStates, WrittenToDisk) -> %% We log the state of those feature flags. @@ -954,15 +1009,10 @@ do_initialize_registry(AllFeatureFlags, %% We request the registry to be regenerated and reloaded with the %% new state. - T0 = erlang:timestamp(), - Ret = regen_registry_mod(AllFeatureFlags, - FeatureStates, - WrittenToDisk), - T1 = erlang:timestamp(), - rabbit_log_feature_flags:debug( - "Feature flags: time to regen registry: ~p µs", - [timer:now_diff(T1, T0)]), - Ret. + regen_registry_mod(RegistryVsn, + AllFeatureFlags, + FeatureStates, + WrittenToDisk). -spec query_supported_feature_flags() -> feature_flags(). %% @private @@ -1040,12 +1090,15 @@ merge_new_feature_flags(AllFeatureFlags, App, FeatureName, FeatureProps) maps:merge(AllFeatureFlags, #{FeatureName => FeatureProps1}). --spec regen_registry_mod(feature_flags(), +-spec regen_registry_mod(registry_vsn(), + feature_flags(), feature_states(), - boolean()) -> ok | {error, any()} | no_return(). + boolean()) -> + ok | restart | {error, any()} | no_return(). %% @private -regen_registry_mod(AllFeatureFlags, +regen_registry_mod(RegistryVsn, + AllFeatureFlags, FeatureStates, WrittenToDisk) -> %% Here, we recreate the source code of the `rabbit_ff_registry` @@ -1222,7 +1275,7 @@ regen_registry_mod(AllFeatureFlags, return_warnings], case compile:forms(Forms, CompileOpts) of {ok, Mod, Bin, _} -> - load_registry_mod(Mod, Bin); + load_registry_mod(RegistryVsn, Mod, Bin); {error, Errors, Warnings} -> rabbit_log_feature_flags:error( "Feature flags: registry compilation:~n" @@ -1244,26 +1297,60 @@ maybe_log_registry_source_code(Forms) -> ok end. --spec load_registry_mod(atom(), binary()) -> - ok | {error, any()} | no_return(). +-ifdef(TEST). +registry_loading_lock() -> ?FF_REGISTRY_LOADING_LOCK. +-endif. + +-spec load_registry_mod(registry_vsn(), atom(), binary()) -> + ok | restart | no_return(). %% @private -load_registry_mod(Mod, Bin) -> +load_registry_mod(RegistryVsn, Mod, Bin) -> rabbit_log_feature_flags:debug( - "Feature flags: registry module ready, loading it..."), - FakeFilename = "Compiled and loaded by " ++ ?MODULE_STRING, + "Feature flags: registry module ready, loading it (~p)...", + [self()]), + FakeFilename = "Compiled and loaded by " ?MODULE_STRING, %% Time to load the new registry, replacing the old one. We use a %% lock here to synchronize concurrent reloads. global:set_lock(?FF_REGISTRY_LOADING_LOCK, [node()]), + rabbit_log_feature_flags:debug( + "Feature flags: acquired lock before reloading registry module (~p)", + [self()]), + %% We want to make sure that the old registry (not the one being + %% currently in use) is purged by the code server. It means no + %% process lingers on that old code. + %% + %% We use code:soft_purge() for that (meaning no process is killed) + %% and we wait in an infinite loop for that to succeed. ok = purge_old_registry(Mod), - true = code:delete(Mod), - Ret = code:load_binary(Mod, FakeFilename, Bin), + %% Now we can replace the currently loaded registry by the new one. + %% The code server takes care of marking the current registry as old + %% and load the new module in an atomic operation. + %% + %% Therefore there is no chance of a window where there is no + %% registry module available, causing the one on disk to be + %% reloaded. + Ret = case registry_vsn() of + RegistryVsn -> code:load_binary(Mod, FakeFilename, Bin); + OtherVsn -> {error, {restart, RegistryVsn, OtherVsn}} + end, + rabbit_log_feature_flags:debug( + "Feature flags: releasing lock after reloading registry module (~p)", + [self()]), global:del_lock(?FF_REGISTRY_LOADING_LOCK, [node()]), case Ret of {module, _} -> rabbit_log_feature_flags:debug( - "Feature flags: registry module loaded"), + "Feature flags: registry module loaded (vsn: ~p -> ~p)", + [RegistryVsn, registry_vsn()]), ok; + {error, {restart, Expected, Current}} -> + rabbit_log_feature_flags:error( + "Feature flags: another registry module was loaded in the " + "meantime (expected old vsn: ~p, current vsn: ~p); " + "restarting the regen", + [Expected, Current]), + restart; {error, Reason} -> rabbit_log_feature_flags:error( "Feature flags: failed to load registry module: ~p", @@ -1271,6 +1358,13 @@ load_registry_mod(Mod, Bin) -> throw({feature_flag_registry_reload_failure, Reason}) end. +-spec registry_vsn() -> registry_vsn(). +%% @private + +registry_vsn() -> + Attrs = rabbit_ff_registry:module_info(attributes), + proplists:get_value(vsn, Attrs, undefined). + purge_old_registry(Mod) -> case code:is_loaded(Mod) of {file, _} -> do_purge_old_registry(Mod); @@ -1353,6 +1447,8 @@ try_to_write_enabled_feature_flags_list(FeatureNames) -> %% are unknown feature flags in that file, we want to keep their %% state, even though they are unsupported at this time. It could be %% that a plugin was disabled in the meantime. + %% + %% FIXME: Lock this code to fix concurrent read/modify/write. PreviouslyEnabled = case try_to_read_enabled_feature_flags_list() of {error, _} -> []; List -> List @@ -1733,14 +1829,14 @@ check_node_compatibility(Node, Timeout) -> %% The goal is that such feature flags are not blocking the %% communication between nodes because the code (which would %% break) is missing on those nodes. Therefore they should not be - %% considered when determinig compatibility. + %% considered when determining compatibility. exchange_feature_flags_from_unknown_apps(Node, Timeout), - %% FIXME FIXME FIXME - %% Quand on tente de mettre deux nœuds en cluster, on a : + %% FIXME: + %% When we try to cluster two nodes, we get: %% Feature flags: starting an unclustered node: all feature flags %% will be enabled by default - %% Ça ne devrait sans doute pas être le cas... + %% It should probably not be the case... %% We can now proceed with the actual compatibility check. rabbit_log_feature_flags:debug( @@ -1921,11 +2017,16 @@ merge_feature_flags_from_unknown_apps(FeatureFlags) end, #{}, FeatureFlags), - rabbit_log_feature_flags:debug( - "Feature flags: register feature flags provided by applications " - "unknown locally: ~p", - [maps:keys(FeatureFlagsFromUnknownApps)]), - initialize_registry(FeatureFlagsFromUnknownApps). + case maps:keys(FeatureFlagsFromUnknownApps) of + [] -> + ok; + _ -> + rabbit_log_feature_flags:debug( + "Feature flags: register feature flags provided by applications " + "unknown locally: ~p", + [maps:keys(FeatureFlagsFromUnknownApps)]), + initialize_registry(FeatureFlagsFromUnknownApps) + end. exchange_feature_flags_from_unknown_apps(Node, Timeout) -> %% The first step is to fetch feature flags from Erlang applications diff --git a/src/rabbit_ff_registry.erl b/src/rabbit_ff_registry.erl index ea80804905..70a86db23b 100644 --- a/src/rabbit_ff_registry.erl +++ b/src/rabbit_ff_registry.erl @@ -37,6 +37,10 @@ is_registry_initialized/0, is_registry_written_to_disk/0]). +-ifdef(TEST). +-on_load(on_load/0). +-endif. + -spec get(rabbit_feature_flags:feature_name()) -> rabbit_feature_flags:feature_props() | undefined. %% @doc @@ -183,3 +187,12 @@ always_return_true() -> always_return_false() -> not always_return_true(). + +-ifdef(TEST). +on_load() -> + _ = (catch rabbit_log_feature_flags:debug( + "Feature flags: Loading initial (uninitialized) registry " + "module (~p)", + [self()])), + ok. +-endif. diff --git a/test/feature_flags_SUITE.erl b/test/feature_flags_SUITE.erl index 443be46d3d..7cfda838ed 100644 --- a/test/feature_flags_SUITE.erl +++ b/test/feature_flags_SUITE.erl @@ -29,7 +29,8 @@ init_per_testcase/2, end_per_testcase/2, - registry/1, + registry_general_usage/1, + registry_concurrent_reloads/1, enable_feature_flag_in_a_healthy_situation/1, enable_unsupported_feature_flag_in_a_healthy_situation/1, enable_feature_flag_when_ff_file_is_unwritable/1, @@ -63,7 +64,8 @@ groups() -> [ {registry, [], [ - registry + registry_general_usage, + registry_concurrent_reloads ]}, {enabling_on_single_node, [], [ @@ -149,6 +151,7 @@ init_per_testcase(Testcase, Config) -> TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase), case ?config(tc_group_properties, Config) of [{name, registry} | _] -> + application:set_env(lager, colored, true), application:set_env( lager, handlers, [{lager_console_backend, [{level, debug}]}]), @@ -157,6 +160,9 @@ init_per_testcase(Testcase, Config) -> extra_sinks, [{rabbit_log_lager_event, [{handlers, [{lager_console_backend, [{level, debug}]}]}] + }, + {rabbit_log_feature_flags_lager_event, + [{handlers, [{lager_console_backend, [{level, debug}]}]}] }]), lager:start(), FeatureFlagsFile = filename:join(?config(priv_dir, Config), @@ -259,7 +265,7 @@ end_per_testcase(Testcase, Config) -> -define(list_ff(Which), lists:sort(maps:keys(rabbit_ff_registry:list(Which)))). -registry(_Config) -> +registry_general_usage(_Config) -> %% At first, the registry must be uninitialized. ?assertNot(rabbit_ff_registry:is_registry_initialized()), @@ -269,7 +275,8 @@ registry(_Config) -> ff_b => #{desc => "Feature flag B", stability => stable}}, - rabbit_feature_flags:inject_test_feature_flags(feature_flags_to_app_attrs(FeatureFlags)), + rabbit_feature_flags:inject_test_feature_flags( + feature_flags_to_app_attrs(FeatureFlags)), %% After initialization, it must know about the feature flags %% declared in this testsuite. They must be disabled however. @@ -383,6 +390,112 @@ registry(_Config) -> ?assertNot(rabbit_ff_registry:is_enabled(ff_c)), ?assertNot(rabbit_ff_registry:is_enabled(ff_d)). +registry_concurrent_reloads(_Config) -> + case rabbit_ff_registry:is_registry_initialized() of + true -> ok; + false -> rabbit_feature_flags:initialize_registry() + end, + ?assert(rabbit_ff_registry:is_registry_initialized()), + + Parent = self(), + + MakeName = fun(I) -> + list_to_atom(rabbit_misc:format("ff_~2..0b", [I])) + end, + + ProcIs = lists:seq(1, 10), + Fun = fun(I) -> + %% Each process will declare its own feature flag to + %% make sure that each generated registry module is + %% different, and we don't loose previously declared + %% feature flags. + Name = MakeName(I), + Desc = rabbit_misc:format("Feature flag ~b", [I]), + NewFF = #{Name => + #{desc => Desc, + stability => stable}}, + rabbit_feature_flags:initialize_registry(NewFF), + unlink(Parent) + end, + + %% Prepare feature flags which the spammer process should get at + %% some point. + FeatureFlags = #{ff_a => + #{desc => "Feature flag A", + stability => stable}, + ff_b => + #{desc => "Feature flag B", + stability => stable}}, + rabbit_feature_flags:inject_test_feature_flags( + feature_flags_to_app_attrs(FeatureFlags)), + + %% Spawn a process which heavily uses the registry. + FinalFFList = lists:sort( + maps:keys(FeatureFlags) ++ + [MakeName(I) || I <- ProcIs]), + Spammer = spawn_link(fun() -> registry_spammer([], FinalFFList) end), + rabbit_log_feature_flags:info( + ?MODULE_STRING ": Started registry spammer (~p)", + [self()]), + + %% We acquire the lock from the main process to synchronize the test + %% processes we are about to spawn. + Lock = rabbit_feature_flags:registry_loading_lock(), + ThisNode = [node()], + rabbit_log_feature_flags:info( + ?MODULE_STRING ": Acquiring registry load lock"), + global:set_lock(Lock, ThisNode), + + Pids = [begin + Pid = spawn_link(fun() -> Fun(I) end), + _ = erlang:monitor(process, Pid), + Pid + end + || I <- ProcIs], + + %% We wait for one second to make sure all processes were started + %% and already sleep on the lock. Not really "make sure" because + %% we don't have a way to verify this fact, but it must be enough, + %% right? + timer:sleep(1000), + rabbit_log_feature_flags:info( + ?MODULE_STRING ": Releasing registry load lock"), + global:del_lock(Lock, ThisNode), + + rabbit_log_feature_flags:info( + ?MODULE_STRING ": Wait for test processes to finish"), + lists:foreach( + fun(Pid) -> + receive {'DOWN', _, process, Pid, normal} -> ok end + end, + Pids), + + %% We wait for one more second to make sure the spammer sees + %% all added feature flags. + timer:sleep(1000), + + unlink(Spammer), + exit(Spammer, normal). + +registry_spammer(CurrentFeatureNames, FinalFeatureNames) -> + %% Infinite loop. + case ?list_ff(all) of + CurrentFeatureNames -> + registry_spammer(CurrentFeatureNames, FinalFeatureNames); + FinalFeatureNames -> + rabbit_log_feature_flags:info( + ?MODULE_STRING ": Registry spammer: all feature flags " + "appeared"), + registry_spammer1(FinalFeatureNames); + NewFeatureNames + when length(NewFeatureNames) > length(CurrentFeatureNames) -> + registry_spammer(NewFeatureNames, FinalFeatureNames) + end. + +registry_spammer1(FeatureNames) -> + ?assertEqual(FeatureNames, ?list_ff(all)), + registry_spammer1(FeatureNames). + enable_feature_flag_in_a_healthy_situation(Config) -> FeatureName = ff_from_testsuite, ClusterSize = ?config(rmq_nodes_count, Config), |
