diff options
| author | Daniil Fedotov <dfedotov@pivotal.io> | 2017-07-20 11:42:27 +0100 |
|---|---|---|
| committer | Daniil Fedotov <dfedotov@pivotal.io> | 2017-07-20 11:42:27 +0100 |
| commit | 0b8449036fa4340be769a988c107b67a55c9ad3c (patch) | |
| tree | 5da1812114709834880ed7a549ea698d10cb854b /src | |
| parent | 049c9023a24ae70e9489ae081965430d8c6aebab (diff) | |
| download | rabbitmq-server-git-0b8449036fa4340be769a988c107b67a55c9ad3c.tar.gz | |
Do not crash on vhost recovery failure if vhost strategy is `continue`
When a node is starting or restarting it can fail to recover a vhost.
The `continue` vhost recovery strategy means that vhosts can ba down if cannot
be recovered. This behaviour should also work for node startup and it should
skip failing vhosts.
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_vhost.erl | 5 | ||||
| -rw-r--r-- | src/rabbit_vhost_sup_sup.erl | 80 |
2 files changed, 66 insertions, 19 deletions
diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 85a967816d..df4812da49 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -55,8 +55,9 @@ recover() -> %% rabbit_vhost_sup_sup will start the actual recovery. %% So recovery will be run every time a vhost supervisor is restarted. ok = rabbit_vhost_sup_sup:start(), - [{ok, _} = rabbit_vhost_sup_sup:vhost_sup(VHost) - || VHost <- rabbit_vhost:list()], + + [ ok = rabbit_vhost_sup_sup:init_vhost(VHost) + || VHost <- rabbit_vhost:list()], ok. recover(VHost) -> diff --git a/src/rabbit_vhost_sup_sup.erl b/src/rabbit_vhost_sup_sup.erl index 3d33b872ab..5f23218f9a 100644 --- a/src/rabbit_vhost_sup_sup.erl +++ b/src/rabbit_vhost_sup_sup.erl @@ -23,7 +23,7 @@ -export([init/1]). -export([start_link/0, start/0]). --export([vhost_sup/1, vhost_sup/2, save_vhost_sup/3]). +-export([init_vhost/1, vhost_sup/1, vhost_sup/2, save_vhost_sup/3]). -export([delete_on_all_nodes/1]). -export([start_on_all_nodes/1]). @@ -31,7 +31,7 @@ -export([is_vhost_alive/1]). %% Internal --export([stop_and_delete_vhost/1]). +-export([stop_and_delete_vhost/1, start_vhost/1]). -record(vhost_sup, {vhost, vhost_sup_pid, wrapper_pid, vhost_process_pid}). @@ -59,8 +59,13 @@ init([]) -> [rabbit_vhost_sup_wrapper, rabbit_vhost_sup]}]}}. start_on_all_nodes(VHost) -> - [ {ok, _} = vhost_sup(VHost, Node) || Node <- rabbit_nodes:all_running() ], - ok. + NodesStart = [ {Node, start_vhost(VHost, Node)} + || Node <- rabbit_nodes:all_running() ], + Failures = lists:filter(fun({_, ok}) -> false; (_) -> true end, NodesStart), + case Failures of + [] -> ok; + Errors -> {error, {failed_to_start_vhost_on_nodes, Errors}} + end. delete_on_all_nodes(VHost) -> [ stop_and_delete_vhost(VHost, Node) || Node <- rabbit_nodes:all_running() ], @@ -101,9 +106,31 @@ stop_and_delete_vhost(VHost, Node) -> {error, RpcErr} end. +-spec init_vhost(rabbit_types:vhost()) -> ok. +init_vhost(VHost) -> + case start_vhost(VHost) of + {ok, _} -> ok; + {error, {no_such_vhost, VHost}} -> + {error, {no_such_vhost, VHost}}; + {error, Reason} -> + case vhost_restart_strategy() of + permanent -> + rabbit_log:error( + "Unable to initialize vhost data store for vhost '~s'." + " Reason: ~p", + [VHost, Reason]), + throw({error, Reason}); + transient -> + rabbit_log:warning( + "Unable to initialize vhost data store for vhost '~s'." + " The vhost will be stopped for this node. " + " Reason: ~p", + [VHost, Reason]), + ok + end + end. + -spec vhost_sup(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, {no_such_vhost, rabbit_types:vhost()} | term()}. -vhost_sup(VHost, Local) when Local == node(self()) -> - vhost_sup(VHost); vhost_sup(VHost, Node) -> case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, vhost_sup, [VHost]) of {ok, Pid} when is_pid(Pid) -> @@ -114,23 +141,42 @@ vhost_sup(VHost, Node) -> -spec vhost_sup(rabbit_types:vhost()) -> {ok, pid()} | {error, {no_such_vhost, rabbit_types:vhost()}}. vhost_sup(VHost) -> + case vhost_sup_pid(VHost) of + no_pid -> + case start_vhost(VHost) of + ok -> + true = is_vhost_alive(VHost), + ok; + {error, {no_such_vhost, VHost}} -> + {error, {no_such_vhost, VHost}}; + Error -> + throw(Error) + end; + {ok, Pid} when is_pid(Pid) -> + {ok, Pid} + end. + +-spec start_vhost(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, term()}. +start_vhost(VHost, Node) -> + case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, start_vhost, [VHost]) of + {ok, Pid} when is_pid(Pid) -> + {ok, Pid}; + {badrpc, RpcErr} -> + {error, RpcErr} + end. + +-spec start_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}. +start_vhost(VHost) -> case rabbit_vhost:exists(VHost) of false -> {error, {no_such_vhost, VHost}}; true -> - case vhost_sup_pid(VHost) of - no_pid -> - case supervisor2:start_child(?MODULE, [VHost]) of - {ok, _} -> ok; - {error, {already_started, _}} -> ok; - Error -> throw(Error) - end, - {ok, _} = vhost_sup_pid(VHost); - {ok, Pid} when is_pid(Pid) -> - {ok, Pid} + case supervisor2:start_child(?MODULE, [VHost]) of + {ok, Pid} -> {ok, Pid}; + {error, {already_started, Pid}} -> {ok, Pid}; + {error, Err} -> {error, Err} end end. - -spec is_vhost_alive(rabbit_types:vhost()) -> boolean(). is_vhost_alive(VHost) -> %% A vhost is considered alive if it's supervision tree is alive and |
