summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniil Fedotov <dfedotov@pivotal.io>2017-07-20 11:42:27 +0100
committerDaniil Fedotov <dfedotov@pivotal.io>2017-07-20 11:42:27 +0100
commit0b8449036fa4340be769a988c107b67a55c9ad3c (patch)
tree5da1812114709834880ed7a549ea698d10cb854b /src
parent049c9023a24ae70e9489ae081965430d8c6aebab (diff)
downloadrabbitmq-server-git-0b8449036fa4340be769a988c107b67a55c9ad3c.tar.gz
Do not crash on vhost recovery failure if vhost strategy is `continue`
When a node is starting or restarting it can fail to recover a vhost. The `continue` vhost recovery strategy means that vhosts can ba down if cannot be recovered. This behaviour should also work for node startup and it should skip failing vhosts.
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_vhost.erl5
-rw-r--r--src/rabbit_vhost_sup_sup.erl80
2 files changed, 66 insertions, 19 deletions
diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl
index 85a967816d..df4812da49 100644
--- a/src/rabbit_vhost.erl
+++ b/src/rabbit_vhost.erl
@@ -55,8 +55,9 @@ recover() ->
%% rabbit_vhost_sup_sup will start the actual recovery.
%% So recovery will be run every time a vhost supervisor is restarted.
ok = rabbit_vhost_sup_sup:start(),
- [{ok, _} = rabbit_vhost_sup_sup:vhost_sup(VHost)
- || VHost <- rabbit_vhost:list()],
+
+ [ ok = rabbit_vhost_sup_sup:init_vhost(VHost)
+ || VHost <- rabbit_vhost:list()],
ok.
recover(VHost) ->
diff --git a/src/rabbit_vhost_sup_sup.erl b/src/rabbit_vhost_sup_sup.erl
index 3d33b872ab..5f23218f9a 100644
--- a/src/rabbit_vhost_sup_sup.erl
+++ b/src/rabbit_vhost_sup_sup.erl
@@ -23,7 +23,7 @@
-export([init/1]).
-export([start_link/0, start/0]).
--export([vhost_sup/1, vhost_sup/2, save_vhost_sup/3]).
+-export([init_vhost/1, vhost_sup/1, vhost_sup/2, save_vhost_sup/3]).
-export([delete_on_all_nodes/1]).
-export([start_on_all_nodes/1]).
@@ -31,7 +31,7 @@
-export([is_vhost_alive/1]).
%% Internal
--export([stop_and_delete_vhost/1]).
+-export([stop_and_delete_vhost/1, start_vhost/1]).
-record(vhost_sup, {vhost, vhost_sup_pid, wrapper_pid, vhost_process_pid}).
@@ -59,8 +59,13 @@ init([]) ->
[rabbit_vhost_sup_wrapper, rabbit_vhost_sup]}]}}.
start_on_all_nodes(VHost) ->
- [ {ok, _} = vhost_sup(VHost, Node) || Node <- rabbit_nodes:all_running() ],
- ok.
+ NodesStart = [ {Node, start_vhost(VHost, Node)}
+ || Node <- rabbit_nodes:all_running() ],
+ Failures = lists:filter(fun({_, ok}) -> false; (_) -> true end, NodesStart),
+ case Failures of
+ [] -> ok;
+ Errors -> {error, {failed_to_start_vhost_on_nodes, Errors}}
+ end.
delete_on_all_nodes(VHost) ->
[ stop_and_delete_vhost(VHost, Node) || Node <- rabbit_nodes:all_running() ],
@@ -101,9 +106,31 @@ stop_and_delete_vhost(VHost, Node) ->
{error, RpcErr}
end.
+-spec init_vhost(rabbit_types:vhost()) -> ok.
+init_vhost(VHost) ->
+ case start_vhost(VHost) of
+ {ok, _} -> ok;
+ {error, {no_such_vhost, VHost}} ->
+ {error, {no_such_vhost, VHost}};
+ {error, Reason} ->
+ case vhost_restart_strategy() of
+ permanent ->
+ rabbit_log:error(
+ "Unable to initialize vhost data store for vhost '~s'."
+ " Reason: ~p",
+ [VHost, Reason]),
+ throw({error, Reason});
+ transient ->
+ rabbit_log:warning(
+ "Unable to initialize vhost data store for vhost '~s'."
+ " The vhost will be stopped for this node. "
+ " Reason: ~p",
+ [VHost, Reason]),
+ ok
+ end
+ end.
+
-spec vhost_sup(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, {no_such_vhost, rabbit_types:vhost()} | term()}.
-vhost_sup(VHost, Local) when Local == node(self()) ->
- vhost_sup(VHost);
vhost_sup(VHost, Node) ->
case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, vhost_sup, [VHost]) of
{ok, Pid} when is_pid(Pid) ->
@@ -114,23 +141,42 @@ vhost_sup(VHost, Node) ->
-spec vhost_sup(rabbit_types:vhost()) -> {ok, pid()} | {error, {no_such_vhost, rabbit_types:vhost()}}.
vhost_sup(VHost) ->
+ case vhost_sup_pid(VHost) of
+ no_pid ->
+ case start_vhost(VHost) of
+ ok ->
+ true = is_vhost_alive(VHost),
+ ok;
+ {error, {no_such_vhost, VHost}} ->
+ {error, {no_such_vhost, VHost}};
+ Error ->
+ throw(Error)
+ end;
+ {ok, Pid} when is_pid(Pid) ->
+ {ok, Pid}
+ end.
+
+-spec start_vhost(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, term()}.
+start_vhost(VHost, Node) ->
+ case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, start_vhost, [VHost]) of
+ {ok, Pid} when is_pid(Pid) ->
+ {ok, Pid};
+ {badrpc, RpcErr} ->
+ {error, RpcErr}
+ end.
+
+-spec start_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}.
+start_vhost(VHost) ->
case rabbit_vhost:exists(VHost) of
false -> {error, {no_such_vhost, VHost}};
true ->
- case vhost_sup_pid(VHost) of
- no_pid ->
- case supervisor2:start_child(?MODULE, [VHost]) of
- {ok, _} -> ok;
- {error, {already_started, _}} -> ok;
- Error -> throw(Error)
- end,
- {ok, _} = vhost_sup_pid(VHost);
- {ok, Pid} when is_pid(Pid) ->
- {ok, Pid}
+ case supervisor2:start_child(?MODULE, [VHost]) of
+ {ok, Pid} -> {ok, Pid};
+ {error, {already_started, Pid}} -> {ok, Pid};
+ {error, Err} -> {error, Err}
end
end.
-
-spec is_vhost_alive(rabbit_types:vhost()) -> boolean().
is_vhost_alive(VHost) ->
%% A vhost is considered alive if it's supervision tree is alive and