summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Klishin <michael@clojurewerkz.org>2016-08-16 14:02:36 +0300
committerMichael Klishin <michael@clojurewerkz.org>2016-08-16 14:02:36 +0300
commit4d4144eb920464c60d57896dd2fdeb034714913b (patch)
tree8d7e5bea50134c1a17d61d88db150de333d9ecf5 /src
parent9d4f0aaa22653130aebc7408729b3164bbe63227 (diff)
parent741e195437f1c958fefbc42768031394d5d53518 (diff)
downloadrabbitmq-server-git-4d4144eb920464c60d57896dd2fdeb034714913b.tar.gz
Merge branch 'binarin-rabbitmq-server-health-check-node-monitor' into stable
Diffstat (limited to 'src')
-rw-r--r--src/rabbit_autoheal.erl11
1 files changed, 11 insertions, 0 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl
index 5865ba8227..db4d41221e 100644
--- a/src/rabbit_autoheal.erl
+++ b/src/rabbit_autoheal.erl
@@ -297,6 +297,17 @@ winner_finish(Notify) ->
send(leader(), {autoheal_finished, node()}),
not_healing.
+%% XXX This can enter infinite loop, if mnesia was somehow restarted
+%% outside of our control - i.e. somebody started app back by hand or
+%% completely restarted node. One possible solution would be something
+%% like this (but it needs some more pondering and is left for some
+%% other patch):
+%% - monitor top-level mnesia supervisors of all losers
+%% - notify loosers about the fact that they are indeed loosers
+%% - wait for all monitors to go 'DOWN' (+ maybe some timeout on the whole process)
+%% - do one round of parallel rpc calls to check whether mnesia is still stoppend on all
+%% loosers
+%% - If everything is still stopped, continue autoheall process. Or cancel it otherwise.
wait_for_mnesia_shutdown([Node | Rest] = AllNodes) ->
case rpc:call(Node, mnesia, system_info, [is_running]) of
no ->