diff options
| author | Jean-Sébastien Pédron <jean-sebastien@rabbitmq.com> | 2015-07-30 18:06:24 +0200 |
|---|---|---|
| committer | Jean-Sébastien Pédron <jean-sebastien@rabbitmq.com> | 2015-08-05 16:16:38 +0200 |
| commit | 78bf4ce6ef185669066213dca38bc641138108a8 (patch) | |
| tree | 8b4d08d8de24a29072d922334885d1394da79e95 /src | |
| parent | ae53a636074e19b371be04ebdb80c659720df9d7 (diff) | |
| download | rabbitmq-server-git-78bf4ce6ef185669066213dca38bc641138108a8.tar.gz | |
rabbit_node_monitor: Rework `global` hang workaround
Up-to and including Erlang 17, `global` would use `erlang:now()` to tag
messages in the process dictionary. In Erlang 18.0, it uses a unique
integer, so we can't use this value to determine for how long messages
are stuck in the dictionary.
We now do two snapshots of the dictionary and compare them.
References #233.
Diffstat (limited to 'src')
| -rw-r--r-- | src/rabbit_node_monitor.erl | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index e3960c5c8a..43db5431e0 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -288,24 +288,28 @@ workaround_global_hang() -> receive global_sync_done -> ok - after 15000 -> + after 10000 -> find_blocked_global_peers() end. find_blocked_global_peers() -> + Snapshot1 = snapshot_global_dict(), + timer:sleep(10000), + Snapshot2 = snapshot_global_dict(), + find_blocked_global_peers1(Snapshot2, Snapshot1). + +snapshot_global_dict() -> {status, _, _, [Dict | _]} = sys:get_status(global_name_server), - find_blocked_global_peers1(Dict). + [E || {{sync_tag_his, _}, _} = E <- Dict]. -find_blocked_global_peers1([{{sync_tag_his, Peer}, Timestamp} | Rest]) -> - Diff = timer:now_diff(erlang:now(), Timestamp), - if - Diff >= 10000 -> unblock_global_peer(Peer); - true -> ok +find_blocked_global_peers1([{{sync_tag_his, Peer}, _} = Item | Rest], + OlderSnapshot) -> + case lists:member(Item, OlderSnapshot) of + true -> unblock_global_peer(Peer); + false -> ok end, - find_blocked_global_peers1(Rest); -find_blocked_global_peers1([_ | Rest]) -> - find_blocked_global_peers1(Rest); -find_blocked_global_peers1([]) -> + find_blocked_global_peers1(Rest, OlderSnapshot); +find_blocked_global_peers1([], _) -> ok. unblock_global_peer(PeerNode) -> |
