summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/rabbit_feature_flags.erl183
-rw-r--r--src/rabbit_ff_registry.erl13
-rw-r--r--test/feature_flags_SUITE.erl121
3 files changed, 272 insertions, 45 deletions
diff --git a/src/rabbit_feature_flags.erl b/src/rabbit_feature_flags.erl
index de9dae2c9f..cf2715a724 100644
--- a/src/rabbit_feature_flags.erl
+++ b/src/rabbit_feature_flags.erl
@@ -133,7 +133,8 @@
initialize_registry/3,
query_supported_feature_flags/0,
mark_as_enabled_remotely/2,
- mark_as_enabled_remotely/4]).
+ mark_as_enabled_remotely/4,
+ registry_loading_lock/0]).
-endif.
%% Default timeout for operations on remote nodes.
@@ -235,6 +236,8 @@
-type migration_fun_context() :: enable | is_enabled.
+-type registry_vsn() :: term().
+
-export_type([feature_flag_modattr/0,
feature_props/0,
feature_name/0,
@@ -831,6 +834,26 @@ list_of_enabled_feature_flags_to_feature_states(FeatureNames) ->
initialize_registry(NewSupportedFeatureFlags,
NewFeatureStates,
WrittenToDisk) ->
+ Ret = maybe_initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk),
+ case Ret of
+ ok -> ok;
+ restart -> initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk);
+ Error -> Error
+ end.
+
+maybe_initialize_registry(NewSupportedFeatureFlags,
+ NewFeatureStates,
+ WrittenToDisk) ->
+ %% We save the version of the current registry before computing
+ %% the new one. This is used when we do the actual reload: if the
+ %% current registry was reloaded in the meantime, we need to restart
+ %% the computation to make sure we don't loose data.
+ RegistryVsn = registry_vsn(),
+
%% We take the feature flags already registered.
RegistryInitialized = rabbit_ff_registry:is_registry_initialized(),
KnownFeatureFlags1 = case RegistryInitialized of
@@ -884,10 +907,18 @@ initialize_registry(NewSupportedFeatureFlags,
case Proceed of
true ->
rabbit_log_feature_flags:debug(
- "Feature flags: (re)initialize registry"),
- do_initialize_registry(AllFeatureFlags,
- FeatureStates,
- WrittenToDisk);
+ "Feature flags: (re)initialize registry (~p)",
+ [self()]),
+ T0 = erlang:timestamp(),
+ Ret = do_initialize_registry(RegistryVsn,
+ AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk),
+ T1 = erlang:timestamp(),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: time to regen registry: ~p µs",
+ [timer:now_diff(T1, T0)]),
+ Ret;
false ->
rabbit_log_feature_flags:debug(
"Feature flags: registry already up-to-date, skipping init"),
@@ -909,22 +940,46 @@ does_registry_need_refresh(AllFeatureFlags,
%% changes.
CurrentAllFeatureFlags = rabbit_ff_registry:list(all),
CurrentFeatureStates = rabbit_ff_registry:states(),
- CurrentWrittenToDisk = rabbit_ff_registry:is_registry_written_to_disk(),
+ CurrentWrittenToDisk =
+ rabbit_ff_registry:is_registry_written_to_disk(),
- AllFeatureFlags =/= CurrentAllFeatureFlags orelse
- FeatureStates =/= CurrentFeatureStates orelse
- WrittenToDisk =/= CurrentWrittenToDisk;
+ if
+ AllFeatureFlags =/= CurrentAllFeatureFlags ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, list of feature flags differs"),
+ true;
+ FeatureStates =/= CurrentFeatureStates ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, feature flag states differ"),
+ true;
+ WrittenToDisk =/= CurrentWrittenToDisk ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, \"written to disk\" state changed"),
+ true;
+ true ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: no"),
+ false
+ end;
false ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: registry refresh needed: "
+ "yes, first-time initialization"),
true
end.
--spec do_initialize_registry(feature_flags(),
+-spec do_initialize_registry(registry_vsn(),
+ feature_flags(),
feature_states(),
boolean()) ->
ok | {error, any()} | no_return().
%% @private
-do_initialize_registry(AllFeatureFlags,
+do_initialize_registry(RegistryVsn,
+ AllFeatureFlags,
FeatureStates,
WrittenToDisk) ->
%% We log the state of those feature flags.
@@ -954,15 +1009,10 @@ do_initialize_registry(AllFeatureFlags,
%% We request the registry to be regenerated and reloaded with the
%% new state.
- T0 = erlang:timestamp(),
- Ret = regen_registry_mod(AllFeatureFlags,
- FeatureStates,
- WrittenToDisk),
- T1 = erlang:timestamp(),
- rabbit_log_feature_flags:debug(
- "Feature flags: time to regen registry: ~p µs",
- [timer:now_diff(T1, T0)]),
- Ret.
+ regen_registry_mod(RegistryVsn,
+ AllFeatureFlags,
+ FeatureStates,
+ WrittenToDisk).
-spec query_supported_feature_flags() -> feature_flags().
%% @private
@@ -1040,12 +1090,15 @@ merge_new_feature_flags(AllFeatureFlags, App, FeatureName, FeatureProps)
maps:merge(AllFeatureFlags,
#{FeatureName => FeatureProps1}).
--spec regen_registry_mod(feature_flags(),
+-spec regen_registry_mod(registry_vsn(),
+ feature_flags(),
feature_states(),
- boolean()) -> ok | {error, any()} | no_return().
+ boolean()) ->
+ ok | restart | {error, any()} | no_return().
%% @private
-regen_registry_mod(AllFeatureFlags,
+regen_registry_mod(RegistryVsn,
+ AllFeatureFlags,
FeatureStates,
WrittenToDisk) ->
%% Here, we recreate the source code of the `rabbit_ff_registry`
@@ -1222,7 +1275,7 @@ regen_registry_mod(AllFeatureFlags,
return_warnings],
case compile:forms(Forms, CompileOpts) of
{ok, Mod, Bin, _} ->
- load_registry_mod(Mod, Bin);
+ load_registry_mod(RegistryVsn, Mod, Bin);
{error, Errors, Warnings} ->
rabbit_log_feature_flags:error(
"Feature flags: registry compilation:~n"
@@ -1244,26 +1297,60 @@ maybe_log_registry_source_code(Forms) ->
ok
end.
--spec load_registry_mod(atom(), binary()) ->
- ok | {error, any()} | no_return().
+-ifdef(TEST).
+registry_loading_lock() -> ?FF_REGISTRY_LOADING_LOCK.
+-endif.
+
+-spec load_registry_mod(registry_vsn(), atom(), binary()) ->
+ ok | restart | no_return().
%% @private
-load_registry_mod(Mod, Bin) ->
+load_registry_mod(RegistryVsn, Mod, Bin) ->
rabbit_log_feature_flags:debug(
- "Feature flags: registry module ready, loading it..."),
- FakeFilename = "Compiled and loaded by " ++ ?MODULE_STRING,
+ "Feature flags: registry module ready, loading it (~p)...",
+ [self()]),
+ FakeFilename = "Compiled and loaded by " ?MODULE_STRING,
%% Time to load the new registry, replacing the old one. We use a
%% lock here to synchronize concurrent reloads.
global:set_lock(?FF_REGISTRY_LOADING_LOCK, [node()]),
+ rabbit_log_feature_flags:debug(
+ "Feature flags: acquired lock before reloading registry module (~p)",
+ [self()]),
+ %% We want to make sure that the old registry (not the one being
+ %% currently in use) is purged by the code server. It means no
+ %% process lingers on that old code.
+ %%
+ %% We use code:soft_purge() for that (meaning no process is killed)
+ %% and we wait in an infinite loop for that to succeed.
ok = purge_old_registry(Mod),
- true = code:delete(Mod),
- Ret = code:load_binary(Mod, FakeFilename, Bin),
+ %% Now we can replace the currently loaded registry by the new one.
+ %% The code server takes care of marking the current registry as old
+ %% and load the new module in an atomic operation.
+ %%
+ %% Therefore there is no chance of a window where there is no
+ %% registry module available, causing the one on disk to be
+ %% reloaded.
+ Ret = case registry_vsn() of
+ RegistryVsn -> code:load_binary(Mod, FakeFilename, Bin);
+ OtherVsn -> {error, {restart, RegistryVsn, OtherVsn}}
+ end,
+ rabbit_log_feature_flags:debug(
+ "Feature flags: releasing lock after reloading registry module (~p)",
+ [self()]),
global:del_lock(?FF_REGISTRY_LOADING_LOCK, [node()]),
case Ret of
{module, _} ->
rabbit_log_feature_flags:debug(
- "Feature flags: registry module loaded"),
+ "Feature flags: registry module loaded (vsn: ~p -> ~p)",
+ [RegistryVsn, registry_vsn()]),
ok;
+ {error, {restart, Expected, Current}} ->
+ rabbit_log_feature_flags:error(
+ "Feature flags: another registry module was loaded in the "
+ "meantime (expected old vsn: ~p, current vsn: ~p); "
+ "restarting the regen",
+ [Expected, Current]),
+ restart;
{error, Reason} ->
rabbit_log_feature_flags:error(
"Feature flags: failed to load registry module: ~p",
@@ -1271,6 +1358,13 @@ load_registry_mod(Mod, Bin) ->
throw({feature_flag_registry_reload_failure, Reason})
end.
+-spec registry_vsn() -> registry_vsn().
+%% @private
+
+registry_vsn() ->
+ Attrs = rabbit_ff_registry:module_info(attributes),
+ proplists:get_value(vsn, Attrs, undefined).
+
purge_old_registry(Mod) ->
case code:is_loaded(Mod) of
{file, _} -> do_purge_old_registry(Mod);
@@ -1353,6 +1447,8 @@ try_to_write_enabled_feature_flags_list(FeatureNames) ->
%% are unknown feature flags in that file, we want to keep their
%% state, even though they are unsupported at this time. It could be
%% that a plugin was disabled in the meantime.
+ %%
+ %% FIXME: Lock this code to fix concurrent read/modify/write.
PreviouslyEnabled = case try_to_read_enabled_feature_flags_list() of
{error, _} -> [];
List -> List
@@ -1733,14 +1829,14 @@ check_node_compatibility(Node, Timeout) ->
%% The goal is that such feature flags are not blocking the
%% communication between nodes because the code (which would
%% break) is missing on those nodes. Therefore they should not be
- %% considered when determinig compatibility.
+ %% considered when determining compatibility.
exchange_feature_flags_from_unknown_apps(Node, Timeout),
- %% FIXME FIXME FIXME
- %% Quand on tente de mettre deux nœuds en cluster, on a :
+ %% FIXME:
+ %% When we try to cluster two nodes, we get:
%% Feature flags: starting an unclustered node: all feature flags
%% will be enabled by default
- %% Ça ne devrait sans doute pas être le cas...
+ %% It should probably not be the case...
%% We can now proceed with the actual compatibility check.
rabbit_log_feature_flags:debug(
@@ -1921,11 +2017,16 @@ merge_feature_flags_from_unknown_apps(FeatureFlags)
end,
#{},
FeatureFlags),
- rabbit_log_feature_flags:debug(
- "Feature flags: register feature flags provided by applications "
- "unknown locally: ~p",
- [maps:keys(FeatureFlagsFromUnknownApps)]),
- initialize_registry(FeatureFlagsFromUnknownApps).
+ case maps:keys(FeatureFlagsFromUnknownApps) of
+ [] ->
+ ok;
+ _ ->
+ rabbit_log_feature_flags:debug(
+ "Feature flags: register feature flags provided by applications "
+ "unknown locally: ~p",
+ [maps:keys(FeatureFlagsFromUnknownApps)]),
+ initialize_registry(FeatureFlagsFromUnknownApps)
+ end.
exchange_feature_flags_from_unknown_apps(Node, Timeout) ->
%% The first step is to fetch feature flags from Erlang applications
diff --git a/src/rabbit_ff_registry.erl b/src/rabbit_ff_registry.erl
index ea80804905..70a86db23b 100644
--- a/src/rabbit_ff_registry.erl
+++ b/src/rabbit_ff_registry.erl
@@ -37,6 +37,10 @@
is_registry_initialized/0,
is_registry_written_to_disk/0]).
+-ifdef(TEST).
+-on_load(on_load/0).
+-endif.
+
-spec get(rabbit_feature_flags:feature_name()) ->
rabbit_feature_flags:feature_props() | undefined.
%% @doc
@@ -183,3 +187,12 @@ always_return_true() ->
always_return_false() ->
not always_return_true().
+
+-ifdef(TEST).
+on_load() ->
+ _ = (catch rabbit_log_feature_flags:debug(
+ "Feature flags: Loading initial (uninitialized) registry "
+ "module (~p)",
+ [self()])),
+ ok.
+-endif.
diff --git a/test/feature_flags_SUITE.erl b/test/feature_flags_SUITE.erl
index 443be46d3d..7cfda838ed 100644
--- a/test/feature_flags_SUITE.erl
+++ b/test/feature_flags_SUITE.erl
@@ -29,7 +29,8 @@
init_per_testcase/2,
end_per_testcase/2,
- registry/1,
+ registry_general_usage/1,
+ registry_concurrent_reloads/1,
enable_feature_flag_in_a_healthy_situation/1,
enable_unsupported_feature_flag_in_a_healthy_situation/1,
enable_feature_flag_when_ff_file_is_unwritable/1,
@@ -63,7 +64,8 @@ groups() ->
[
{registry, [],
[
- registry
+ registry_general_usage,
+ registry_concurrent_reloads
]},
{enabling_on_single_node, [],
[
@@ -149,6 +151,7 @@ init_per_testcase(Testcase, Config) ->
TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase),
case ?config(tc_group_properties, Config) of
[{name, registry} | _] ->
+ application:set_env(lager, colored, true),
application:set_env(
lager,
handlers, [{lager_console_backend, [{level, debug}]}]),
@@ -157,6 +160,9 @@ init_per_testcase(Testcase, Config) ->
extra_sinks,
[{rabbit_log_lager_event,
[{handlers, [{lager_console_backend, [{level, debug}]}]}]
+ },
+ {rabbit_log_feature_flags_lager_event,
+ [{handlers, [{lager_console_backend, [{level, debug}]}]}]
}]),
lager:start(),
FeatureFlagsFile = filename:join(?config(priv_dir, Config),
@@ -259,7 +265,7 @@ end_per_testcase(Testcase, Config) ->
-define(list_ff(Which),
lists:sort(maps:keys(rabbit_ff_registry:list(Which)))).
-registry(_Config) ->
+registry_general_usage(_Config) ->
%% At first, the registry must be uninitialized.
?assertNot(rabbit_ff_registry:is_registry_initialized()),
@@ -269,7 +275,8 @@ registry(_Config) ->
ff_b =>
#{desc => "Feature flag B",
stability => stable}},
- rabbit_feature_flags:inject_test_feature_flags(feature_flags_to_app_attrs(FeatureFlags)),
+ rabbit_feature_flags:inject_test_feature_flags(
+ feature_flags_to_app_attrs(FeatureFlags)),
%% After initialization, it must know about the feature flags
%% declared in this testsuite. They must be disabled however.
@@ -383,6 +390,112 @@ registry(_Config) ->
?assertNot(rabbit_ff_registry:is_enabled(ff_c)),
?assertNot(rabbit_ff_registry:is_enabled(ff_d)).
+registry_concurrent_reloads(_Config) ->
+ case rabbit_ff_registry:is_registry_initialized() of
+ true -> ok;
+ false -> rabbit_feature_flags:initialize_registry()
+ end,
+ ?assert(rabbit_ff_registry:is_registry_initialized()),
+
+ Parent = self(),
+
+ MakeName = fun(I) ->
+ list_to_atom(rabbit_misc:format("ff_~2..0b", [I]))
+ end,
+
+ ProcIs = lists:seq(1, 10),
+ Fun = fun(I) ->
+ %% Each process will declare its own feature flag to
+ %% make sure that each generated registry module is
+ %% different, and we don't loose previously declared
+ %% feature flags.
+ Name = MakeName(I),
+ Desc = rabbit_misc:format("Feature flag ~b", [I]),
+ NewFF = #{Name =>
+ #{desc => Desc,
+ stability => stable}},
+ rabbit_feature_flags:initialize_registry(NewFF),
+ unlink(Parent)
+ end,
+
+ %% Prepare feature flags which the spammer process should get at
+ %% some point.
+ FeatureFlags = #{ff_a =>
+ #{desc => "Feature flag A",
+ stability => stable},
+ ff_b =>
+ #{desc => "Feature flag B",
+ stability => stable}},
+ rabbit_feature_flags:inject_test_feature_flags(
+ feature_flags_to_app_attrs(FeatureFlags)),
+
+ %% Spawn a process which heavily uses the registry.
+ FinalFFList = lists:sort(
+ maps:keys(FeatureFlags) ++
+ [MakeName(I) || I <- ProcIs]),
+ Spammer = spawn_link(fun() -> registry_spammer([], FinalFFList) end),
+ rabbit_log_feature_flags:info(
+ ?MODULE_STRING ": Started registry spammer (~p)",
+ [self()]),
+
+ %% We acquire the lock from the main process to synchronize the test
+ %% processes we are about to spawn.
+ Lock = rabbit_feature_flags:registry_loading_lock(),
+ ThisNode = [node()],
+ rabbit_log_feature_flags:info(
+ ?MODULE_STRING ": Acquiring registry load lock"),
+ global:set_lock(Lock, ThisNode),
+
+ Pids = [begin
+ Pid = spawn_link(fun() -> Fun(I) end),
+ _ = erlang:monitor(process, Pid),
+ Pid
+ end
+ || I <- ProcIs],
+
+ %% We wait for one second to make sure all processes were started
+ %% and already sleep on the lock. Not really "make sure" because
+ %% we don't have a way to verify this fact, but it must be enough,
+ %% right?
+ timer:sleep(1000),
+ rabbit_log_feature_flags:info(
+ ?MODULE_STRING ": Releasing registry load lock"),
+ global:del_lock(Lock, ThisNode),
+
+ rabbit_log_feature_flags:info(
+ ?MODULE_STRING ": Wait for test processes to finish"),
+ lists:foreach(
+ fun(Pid) ->
+ receive {'DOWN', _, process, Pid, normal} -> ok end
+ end,
+ Pids),
+
+ %% We wait for one more second to make sure the spammer sees
+ %% all added feature flags.
+ timer:sleep(1000),
+
+ unlink(Spammer),
+ exit(Spammer, normal).
+
+registry_spammer(CurrentFeatureNames, FinalFeatureNames) ->
+ %% Infinite loop.
+ case ?list_ff(all) of
+ CurrentFeatureNames ->
+ registry_spammer(CurrentFeatureNames, FinalFeatureNames);
+ FinalFeatureNames ->
+ rabbit_log_feature_flags:info(
+ ?MODULE_STRING ": Registry spammer: all feature flags "
+ "appeared"),
+ registry_spammer1(FinalFeatureNames);
+ NewFeatureNames
+ when length(NewFeatureNames) > length(CurrentFeatureNames) ->
+ registry_spammer(NewFeatureNames, FinalFeatureNames)
+ end.
+
+registry_spammer1(FeatureNames) ->
+ ?assertEqual(FeatureNames, ?list_ff(all)),
+ registry_spammer1(FeatureNames).
+
enable_feature_flag_in_a_healthy_situation(Config) ->
FeatureName = ff_from_testsuite,
ClusterSize = ?config(rmq_nodes_count, Config),