summaryrefslogtreecommitdiff
path: root/src/osd/OSD.h
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-05-29 13:26:45 -0700
committerSage Weil <sage@inktank.com>2013-05-29 22:43:50 -0700
commit0c0595514db6590d5f89b5deac2d8bdf11d0b530 (patch)
tree6f18cb4d140188a37b5a904f86e356cd54d40f9c /src/osd/OSD.h
parent04aa2b5edf72eb59a5dc688475df59dda25a3cac (diff)
downloadceph-0c0595514db6590d5f89b5deac2d8bdf11d0b530.tar.gz
osd: wait for healthy pings from peers in waiting-for-healthy state
If we are (wrongly) marked down, we need to go into the waiting-for-healthy state and verify that our network interfaces are working before trying to rejoin the cluster. - make _is_healthy() check require positive proof of pings working - do heartbeat checks and updates in this state - reset the random peers every heartbeat_interval, in case we keep picking bad ones Signed-off-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'src/osd/OSD.h')
-rw-r--r--src/osd/OSD.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 50f7c9c073d..effbb5e3533 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -734,6 +734,7 @@ private:
Messenger *hbclient_messenger;
Messenger *hb_front_server_messenger;
Messenger *hb_back_server_messenger;
+ utime_t last_heartbeat_resample; ///< last time we chose random peers in waiting-for-healthy state
void _add_heartbeat_peer(int p);
void _remove_heartbeat_peer(int p);
@@ -745,6 +746,11 @@ private:
void heartbeat_entry();
void need_heartbeat_peer_update();
+ void heartbeat_kick() {
+ Mutex::Locker l(heartbeat_lock);
+ heartbeat_cond.Signal();
+ }
+
struct T_Heartbeat : public Thread {
OSD *osd;
T_Heartbeat(OSD *o) : osd(o) {}
@@ -1121,6 +1127,8 @@ protected:
void start_boot();
void _maybe_boot(epoch_t oldest, epoch_t newest);
void _send_boot();
+
+ void start_waiting_for_healthy();
bool _is_healthy();
friend class C_OSD_GetVersion;