diff options
author | Sage Weil <sage@inktank.com> | 2012-11-29 14:16:16 -0800 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2012-11-29 15:15:31 -0800 |
commit | 774d5bf14c4dbc64d3acdffa24df9bca076d8d97 (patch) | |
tree | 72d42a707d8cc584700768afee8398660674a51c | |
parent | b31a99abda75b9170a5805b02944a0c0c78245b7 (diff) | |
download | ceph-774d5bf14c4dbc64d3acdffa24df9bca076d8d97.tar.gz |
osd: move next_osdmap under separate lock
It doesn't actually interfere with publish_lock, and the current osdmap
ref.
Document what is going on.
Always preceed publish_map() with one or more pre_publish_map() calls.
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/osd/OSD.cc | 8 | ||||
-rw-r--r-- | src/osd/OSD.h | 34 |
2 files changed, 29 insertions, 13 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2f4eb0f56ec..a252265fe9f 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -158,6 +158,7 @@ OSDService::OSDService(OSD *osd) : rep_scrub_wq(osd->rep_scrub_wq), class_handler(osd->class_handler), publish_lock("OSDService::publish_lock"), + pre_publish_lock("OSDService::pre_publish_lock"), sched_scrub_lock("OSDService::sched_scrub_lock"), scrubs_pending(0), scrubs_active(0), watch_lock("OSD::watch_lock"), @@ -2498,7 +2499,7 @@ void OSD::send_alive() void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch) { - Mutex::Locker l(publish_lock); + Mutex::Locker l(pre_publish_lock); // service map is always newer/newest assert(from_epoch <= next_osdmap->get_epoch()); @@ -2513,7 +2514,7 @@ void OSDService::send_message_osd_cluster(int peer, Message *m, epoch_t from_epo Connection *OSDService::get_con_osd_cluster(int peer, epoch_t from_epoch) { - Mutex::Locker l(publish_lock); + Mutex::Locker l(pre_publish_lock); // service map is always newer/newest assert(from_epoch <= next_osdmap->get_epoch()); @@ -2527,7 +2528,7 @@ Connection *OSDService::get_con_osd_cluster(int peer, epoch_t from_epoch) Connection *OSDService::get_con_osd_hb(int peer, epoch_t from_epoch) { - Mutex::Locker l(publish_lock); + Mutex::Locker l(pre_publish_lock); // service map is always newer/newest assert(from_epoch <= next_osdmap->get_epoch()); @@ -3995,6 +3996,7 @@ void OSD::activate_map() } to_remove.clear(); + service.pre_publish_map(osdmap); service.publish_map(osdmap); // scan pg's diff --git a/src/osd/OSD.h b/src/osd/OSD.h index e5258b0c99f..2b623efa339 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -183,7 +183,7 @@ public: ClassHandler *&class_handler; // -- superblock -- - Mutex publish_lock; + Mutex publish_lock, pre_publish_lock; OSDSuperblock superblock; OSDSuperblock get_superblock() { Mutex::Locker l(publish_lock); @@ -193,24 +193,38 @@ public: Mutex::Locker l(publish_lock); superblock = block; } - OSDMapRef osdmap, next_osdmap; + + int get_nodeid() const { return whoami; } + + OSDMapRef osdmap; OSDMapRef get_osdmap() { Mutex::Locker l(publish_lock); return osdmap; } - void pre_publish_map(OSDMapRef map) { - Mutex::Locker l(publish_lock); - next_osdmap = map; - } void publish_map(OSDMapRef map) { Mutex::Locker l(publish_lock); osdmap = map; - next_osdmap = map; } - int get_nodeid() const { return whoami; } - - // -- message helpers -- + /* + * osdmap - current published amp + * next_osdmap - pre_published map that is about to be published. + * + * We use the next_osdmap to send messages and initiate connections, + * but only if the target is the same instance as the one in the map + * epoch the current user is working from (i.e., the result is + * equivalent to what is in next_osdmap). + * + * This allows the helpers to start ignoring osds that are about to + * go down, and let OSD::handle_osd_map()/note_down_osd() mark them + * down, without worrying about reopening connections from threads + * working from old maps. + */ + OSDMapRef next_osdmap; + void pre_publish_map(OSDMapRef map) { + Mutex::Locker l(pre_publish_lock); + next_osdmap = map; + } Connection *get_con_osd_cluster(int peer, epoch_t from_epoch); Connection *get_con_osd_hb(int peer, epoch_t from_epoch); void send_message_osd_cluster(int peer, Message *m, epoch_t from_epoch); |