summaryrefslogtreecommitdiff
path: root/src/osd/OSD.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/osd/OSD.cc')
-rw-r--r--src/osd/OSD.cc75
1 files changed, 59 insertions, 16 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 4bbcd06243e..cf08418262b 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2292,6 +2292,8 @@ void OSD::maybe_update_heartbeat_peers()
return;
heartbeat_need_update = false;
+ dout(10) << "maybe_update_heartbeat_peers updating" << dendl;
+
heartbeat_epoch = osdmap->get_epoch();
// build heartbeat from set
@@ -2314,25 +2316,66 @@ void OSD::maybe_update_heartbeat_peers()
pg->heartbeat_peer_lock.Unlock();
}
- map<int,HeartbeatInfo>::iterator p = heartbeat_peers.begin();
- while (p != heartbeat_peers.end()) {
+ // include next and previous up osds to ensure we have a fully-connected set
+ set<int> want, extras;
+ int next = osdmap->get_next_up_osd_after(whoami);
+ if (next >= 0)
+ want.insert(next);
+ int prev = osdmap->get_previous_up_osd_before(whoami);
+ if (prev >= 0)
+ want.insert(prev);
+
+ for (set<int>::iterator p = want.begin(); p != want.end(); ++p) {
+ dout(10) << " adding neighbor peer osd." << *p << dendl;
+ extras.insert(*p);
+ _add_heartbeat_peer(*p);
+ }
+
+ // identify extras
+ for (map<int,HeartbeatInfo>::iterator p = heartbeat_peers.begin();
+ p != heartbeat_peers.end();
+ ++p) {
if (p->second.epoch < osdmap->get_epoch()) {
- dout(20) << " removing heartbeat peer osd." << p->first
- << " " << p->second.con_back->get_peer_addr()
- << " " << (p->second.con_front ? p->second.con_front->get_peer_addr() : entity_addr_t())
- << dendl;
- hbclient_messenger->mark_down(p->second.con_back);
- p->second.con_back->put();
- if (p->second.con_front) {
- hbclient_messenger->mark_down(p->second.con_front);
- p->second.con_front->put();
- }
- heartbeat_peers.erase(p++);
- } else {
- ++p;
+ extras.insert(p->first);
+ }
+ }
+
+ // too few?
+ int start = osdmap->get_next_up_osd_after(whoami);
+ for (int n = start; n >= 0; ) {
+ if ((int)heartbeat_peers.size() >= g_conf->osd_heartbeat_min_peers)
+ break;
+ if (!extras.count(n) && !want.count(n) && n != whoami) {
+ dout(10) << " adding random peer osd." << n << dendl;
+ extras.insert(n);
+ _add_heartbeat_peer(n);
+ }
+ n = osdmap->get_next_up_osd_after(n);
+ if (n == start)
+ break; // came full circle; stop
+ }
+
+ // too many?
+ for (set<int>::iterator p = extras.begin();
+ (int)heartbeat_peers.size() > g_conf->osd_heartbeat_min_peers && p != extras.end();
+ ++p) {
+ if (want.count(*p))
+ continue;
+ map<int,HeartbeatInfo>::iterator q = heartbeat_peers.find(*p);
+ dout(20) << " removing heartbeat peer osd." << q->first
+ << " " << q->second.con_back->get_peer_addr()
+ << " " << (q->second.con_front ? q->second.con_front->get_peer_addr() : entity_addr_t())
+ << dendl;
+ hbclient_messenger->mark_down(q->second.con_back);
+ q->second.con_back->put();
+ if (q->second.con_front) {
+ hbclient_messenger->mark_down(q->second.con_front);
+ q->second.con_front->put();
}
+ heartbeat_peers.erase(q);
}
- dout(10) << "maybe_update_heartbeat_peers " << heartbeat_peers.size() << " peers" << dendl;
+
+ dout(10) << "maybe_update_heartbeat_peers " << heartbeat_peers.size() << " peers, extras " << extras << dendl;
}
void OSD::reset_heartbeat_peers()