summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-11-29 14:16:16 -0800
committerSage Weil <sage@inktank.com>2012-11-29 15:15:31 -0800
commit774d5bf14c4dbc64d3acdffa24df9bca076d8d97 (patch)
tree72d42a707d8cc584700768afee8398660674a51c
parentb31a99abda75b9170a5805b02944a0c0c78245b7 (diff)
downloadceph-774d5bf14c4dbc64d3acdffa24df9bca076d8d97.tar.gz
osd: move next_osdmap under separate lock
It doesn't actually interfere with publish_lock, and the current osdmap ref. Document what is going on. Always preceed publish_map() with one or more pre_publish_map() calls. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/OSD.cc8
-rw-r--r--src/osd/OSD.h34
2 files changed, 29 insertions, 13 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 2f4eb0f56ec..a252265fe9f 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -158,6 +158,7 @@ OSDService::OSDService(OSD *osd) :
rep_scrub_wq(osd->rep_scrub_wq),
class_handler(osd->class_handler),
publish_lock("OSDService::publish_lock"),
+ pre_publish_lock("OSDService::pre_publish_lock"),
sched_scrub_lock("OSDService::sched_scrub_lock"), scrubs_pending(0),
scrubs_active(0),
watch_lock("OSD::watch_lock"),
@@ -2498,7 +2499,7 @@ void OSD::send_alive()
void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch)
{
- Mutex::Locker l(publish_lock);
+ Mutex::Locker l(pre_publish_lock);
// service map is always newer/newest
assert(from_epoch <= next_osdmap->get_epoch());
@@ -2513,7 +2514,7 @@ void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epo
Connection *OSDService::get_con_osd_cluster(int peer, epoch_t from_epoch)
{
- Mutex::Locker l(publish_lock);
+ Mutex::Locker l(pre_publish_lock);
// service map is always newer/newest
assert(from_epoch <= next_osdmap->get_epoch());
@@ -2527,7 +2528,7 @@ Connection *OSDService::get_con_osd_cluster(int peer, epoch_t from_epoch)
Connection *OSDService::get_con_osd_hb(int peer, epoch_t from_epoch)
{
- Mutex::Locker l(publish_lock);
+ Mutex::Locker l(pre_publish_lock);
// service map is always newer/newest
assert(from_epoch <= next_osdmap->get_epoch());
@@ -3995,6 +3996,7 @@ void OSD::activate_map()
}
to_remove.clear();
+ service.pre_publish_map(osdmap);
service.publish_map(osdmap);
// scan pg's
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index e5258b0c99f..2b623efa339 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -183,7 +183,7 @@ public:
ClassHandler *&class_handler;
// -- superblock --
- Mutex publish_lock;
+ Mutex publish_lock, pre_publish_lock;
OSDSuperblock superblock;
OSDSuperblock get_superblock() {
Mutex::Locker l(publish_lock);
@@ -193,24 +193,38 @@ public:
Mutex::Locker l(publish_lock);
superblock = block;
}
- OSDMapRef osdmap, next_osdmap;
+
+ int get_nodeid() const { return whoami; }
+
+ OSDMapRef osdmap;
OSDMapRef get_osdmap() {
Mutex::Locker l(publish_lock);
return osdmap;
}
- void pre_publish_map(OSDMapRef map) {
- Mutex::Locker l(publish_lock);
- next_osdmap = map;
- }
void publish_map(OSDMapRef map) {
Mutex::Locker l(publish_lock);
osdmap = map;
- next_osdmap = map;
}
- int get_nodeid() const { return whoami; }
-
- // -- message helpers --
+ /*
+ * osdmap - current published amp
+ * next_osdmap - pre_published map that is about to be published.
+ *
+ * We use the next_osdmap to send messages and initiate connections,
+ * but only if the target is the same instance as the one in the map
+ * epoch the current user is working from (i.e., the result is
+ * equivalent to what is in next_osdmap).
+ *
+ * This allows the helpers to start ignoring osds that are about to
+ * go down, and let OSD::handle_osd_map()/note_down_osd() mark them
+ * down, without worrying about reopening connections from threads
+ * working from old maps.
+ */
+ OSDMapRef next_osdmap;
+ void pre_publish_map(OSDMapRef map) {
+ Mutex::Locker l(pre_publish_lock);
+ next_osdmap = map;
+ }
Connection *get_con_osd_cluster(int peer, epoch_t from_epoch);
Connection *get_con_osd_hb(int peer, epoch_t from_epoch);
void send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch);