Check rabbit_node_monitor during health-check

Tests + comment outlining the problem. The check itself is in separate commit to `rabbitmq-common`.
author: Alexey Lebedeff <alebedev@mirantis.com> 2016-08-10 14:19:42 +0300
committer: Alexey Lebedeff <alebedev@mirantis.com> 2016-08-10 14:19:42 +0300
commit: 0e35bc61d33b39b2181b0df619274889e6a9b87d (patch)
tree: 9306635c2d852e310e64afce471cd2861c70a4a9 /src
parent: ca4c0c011f963b4511da111261bb48bdd3dabad2 (diff)
download: rabbitmq-server-git-0e35bc61d33b39b2181b0df619274889e6a9b87d.tar.gz
1 files changed, 11 insertions, 0 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl
index 5865ba8227..db4d41221e 100644
--- a/src/rabbit_autoheal.erl
+++ b/src/rabbit_autoheal.erl
@@ -297,6 +297,17 @@ winner_finish(Notify) ->
     send(leader(), {autoheal_finished, node()}),
     not_healing.
 
+%% XXX This can enter infinite loop, if mnesia was somehow restarted
+%% outside of our control - i.e. somebody started app back by hand or
+%% completely restarted node. One possible solution would be something
+%% like this (but it needs some more pondering and is left for some
+%% other patch):
+%% - monitor top-level mnesia supervisors of all losers
+%% - notify loosers about the fact that they are indeed loosers
+%% - wait for all monitors to go 'DOWN' (+ maybe some timeout on the whole process)
+%% - do one round of parallel rpc calls to check whether mnesia is still stoppend on all
+%%   loosers
+%% - If everything is still stopped, continue autoheall process. Or cancel it otherwise.
 wait_for_mnesia_shutdown([Node | Rest] = AllNodes) ->
     case rpc:call(Node, mnesia, system_info, [is_running]) of
         no ->
author	Alexey Lebedeff <alebedev@mirantis.com>	2016-08-10 14:19:42 +0300
committer	Alexey Lebedeff <alebedev@mirantis.com>	2016-08-10 14:19:42 +0300
commit	0e35bc61d33b39b2181b0df619274889e6a9b87d (patch)
tree	9306635c2d852e310e64afce471cd2861c70a4a9 /src
parent	ca4c0c011f963b4511da111261bb48bdd3dabad2 (diff)
download	rabbitmq-server-git-0e35bc61d33b39b2181b0df619274889e6a9b87d.tar.gz