summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniil Fedotov <dfedotov@pivotal.io>2017-07-20 11:42:27 +0100
committerDaniil Fedotov <dfedotov@pivotal.io>2017-07-20 11:42:27 +0100
commit0b8449036fa4340be769a988c107b67a55c9ad3c (patch)
tree5da1812114709834880ed7a549ea698d10cb854b
parent049c9023a24ae70e9489ae081965430d8c6aebab (diff)
downloadrabbitmq-server-git-0b8449036fa4340be769a988c107b67a55c9ad3c.tar.gz
Do not crash on vhost recovery failure if vhost strategy is `continue`
When a node is starting or restarting it can fail to recover a vhost. The `continue` vhost recovery strategy means that vhosts can ba down if cannot be recovered. This behaviour should also work for node startup and it should skip failing vhosts.
-rw-r--r--src/rabbit_vhost.erl5
-rw-r--r--src/rabbit_vhost_sup_sup.erl80
2 files changed, 66 insertions, 19 deletions
diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl
index 85a967816d..df4812da49 100644
--- a/src/rabbit_vhost.erl
+++ b/src/rabbit_vhost.erl
@@ -55,8 +55,9 @@ recover() ->
%% rabbit_vhost_sup_sup will start the actual recovery.
%% So recovery will be run every time a vhost supervisor is restarted.
ok = rabbit_vhost_sup_sup:start(),
- [{ok, _} = rabbit_vhost_sup_sup:vhost_sup(VHost)
- || VHost <- rabbit_vhost:list()],
+
+ [ ok = rabbit_vhost_sup_sup:init_vhost(VHost)
+ || VHost <- rabbit_vhost:list()],
ok.
recover(VHost) ->
diff --git a/src/rabbit_vhost_sup_sup.erl b/src/rabbit_vhost_sup_sup.erl
index 3d33b872ab..5f23218f9a 100644
--- a/src/rabbit_vhost_sup_sup.erl
+++ b/src/rabbit_vhost_sup_sup.erl
@@ -23,7 +23,7 @@
-export([init/1]).
-export([start_link/0, start/0]).
--export([vhost_sup/1, vhost_sup/2, save_vhost_sup/3]).
+-export([init_vhost/1, vhost_sup/1, vhost_sup/2, save_vhost_sup/3]).
-export([delete_on_all_nodes/1]).
-export([start_on_all_nodes/1]).
@@ -31,7 +31,7 @@
-export([is_vhost_alive/1]).
%% Internal
--export([stop_and_delete_vhost/1]).
+-export([stop_and_delete_vhost/1, start_vhost/1]).
-record(vhost_sup, {vhost, vhost_sup_pid, wrapper_pid, vhost_process_pid}).
@@ -59,8 +59,13 @@ init([]) ->
[rabbit_vhost_sup_wrapper, rabbit_vhost_sup]}]}}.
start_on_all_nodes(VHost) ->
- [ {ok, _} = vhost_sup(VHost, Node) || Node <- rabbit_nodes:all_running() ],
- ok.
+ NodesStart = [ {Node, start_vhost(VHost, Node)}
+ || Node <- rabbit_nodes:all_running() ],
+ Failures = lists:filter(fun({_, ok}) -> false; (_) -> true end, NodesStart),
+ case Failures of
+ [] -> ok;
+ Errors -> {error, {failed_to_start_vhost_on_nodes, Errors}}
+ end.
delete_on_all_nodes(VHost) ->
[ stop_and_delete_vhost(VHost, Node) || Node <- rabbit_nodes:all_running() ],
@@ -101,9 +106,31 @@ stop_and_delete_vhost(VHost, Node) ->
{error, RpcErr}
end.
+-spec init_vhost(rabbit_types:vhost()) -> ok.
+init_vhost(VHost) ->
+ case start_vhost(VHost) of
+ {ok, _} -> ok;
+ {error, {no_such_vhost, VHost}} ->
+ {error, {no_such_vhost, VHost}};
+ {error, Reason} ->
+ case vhost_restart_strategy() of
+ permanent ->
+ rabbit_log:error(
+ "Unable to initialize vhost data store for vhost '~s'."
+ " Reason: ~p",
+ [VHost, Reason]),
+ throw({error, Reason});
+ transient ->
+ rabbit_log:warning(
+ "Unable to initialize vhost data store for vhost '~s'."
+ " The vhost will be stopped for this node. "
+ " Reason: ~p",
+ [VHost, Reason]),
+ ok
+ end
+ end.
+
-spec vhost_sup(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, {no_such_vhost, rabbit_types:vhost()} | term()}.
-vhost_sup(VHost, Local) when Local == node(self()) ->
- vhost_sup(VHost);
vhost_sup(VHost, Node) ->
case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, vhost_sup, [VHost]) of
{ok, Pid} when is_pid(Pid) ->
@@ -114,23 +141,42 @@ vhost_sup(VHost, Node) ->
-spec vhost_sup(rabbit_types:vhost()) -> {ok, pid()} | {error, {no_such_vhost, rabbit_types:vhost()}}.
vhost_sup(VHost) ->
+ case vhost_sup_pid(VHost) of
+ no_pid ->
+ case start_vhost(VHost) of
+ ok ->
+ true = is_vhost_alive(VHost),
+ ok;
+ {error, {no_such_vhost, VHost}} ->
+ {error, {no_such_vhost, VHost}};
+ Error ->
+ throw(Error)
+ end;
+ {ok, Pid} when is_pid(Pid) ->
+ {ok, Pid}
+ end.
+
+-spec start_vhost(rabbit_types:vhost(), node()) -> {ok, pid()} | {error, term()}.
+start_vhost(VHost, Node) ->
+ case rabbit_misc:rpc_call(Node, rabbit_vhost_sup_sup, start_vhost, [VHost]) of
+ {ok, Pid} when is_pid(Pid) ->
+ {ok, Pid};
+ {badrpc, RpcErr} ->
+ {error, RpcErr}
+ end.
+
+-spec start_vhost(rabbit_types:vhost()) -> {ok, pid()} | {error, term()}.
+start_vhost(VHost) ->
case rabbit_vhost:exists(VHost) of
false -> {error, {no_such_vhost, VHost}};
true ->
- case vhost_sup_pid(VHost) of
- no_pid ->
- case supervisor2:start_child(?MODULE, [VHost]) of
- {ok, _} -> ok;
- {error, {already_started, _}} -> ok;
- Error -> throw(Error)
- end,
- {ok, _} = vhost_sup_pid(VHost);
- {ok, Pid} when is_pid(Pid) ->
- {ok, Pid}
+ case supervisor2:start_child(?MODULE, [VHost]) of
+ {ok, Pid} -> {ok, Pid};
+ {error, {already_started, Pid}} -> {ok, Pid};
+ {error, Err} -> {error, Err}
end
end.
-
-spec is_vhost_alive(rabbit_types:vhost()) -> boolean().
is_vhost_alive(VHost) ->
%% A vhost is considered alive if it's supervision tree is alive and