summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage.weil@dreamhost.com>2012-04-29 08:17:06 -0700
committerSage Weil <sage.weil@dreamhost.com>2012-04-29 09:03:24 -0700
commitd64e1b974023e06b8d96d2c620510697f0f4957f (patch)
treea78008ee1bf7e8fae42383d9be2644167a72656f
parent4ae857cb121fc89f1fc6c58d8a55b94e3b8b93dc (diff)
downloadceph-d64e1b974023e06b8d96d2c620510697f0f4957f.tar.gz
osd: keep pgs locked during handle_osd_map dance
Currently we drop and retake locks during handle_osd_map calls to advance_map and activate_map. Instead, take them all once, and hold them. This avoids leaving dirty in-core state in the PG without the lock held. This will clearly go away as soon as the map threading stuff is redone. Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
-rw-r--r--src/osd/OSD.cc44
-rw-r--r--src/osd/PG.cc29
-rw-r--r--src/osd/PG.h9
3 files changed, 61 insertions, 21 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 0b93793ea47..b5ae763f2b5 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3254,6 +3254,14 @@ void OSD::handle_osd_map(MOSDMap *m)
C_Contexts *fin = new C_Contexts(g_ceph_context);
+ // lock all pgs
+ for (hash_map<pg_t,PG*>::iterator i = pg_map.begin();
+ i != pg_map.end();
+ i++) {
+ PG *pg = i->second;
+ pg->lock_with_map_lock_held(true);
+ }
+
// advance through the new maps
for (epoch_t cur = start; cur <= superblock.newest_map; cur++) {
dout(10) << " advance to epoch " << cur << " (<= newest " << superblock.newest_map << ")" << dendl;
@@ -3288,16 +3296,16 @@ void OSD::handle_osd_map(MOSDMap *m)
// yay!
activate_map(t, fin->contexts);
- } else {
- // write updated pg state to store
- for (hash_map<pg_t,PG*>::iterator i = pg_map.begin();
- i != pg_map.end();
- i++) {
- PG *pg = i->second;
- pg->lock_with_map_lock_held();
- pg->write_if_dirty(t);
- pg->unlock();
- }
+ }
+
+ // write and unlock pgs
+ for (hash_map<pg_t,PG*>::iterator i = pg_map.begin();
+ i != pg_map.end();
+ i++) {
+ PG *pg = i->second;
+ //pg->lock_with_map_lock_held();
+ pg->write_if_dirty(t);
+ pg->unlock();
}
bool do_shutdown = false;
@@ -3544,10 +3552,14 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin)
vector<int> newup, newacting;
osdmap->pg_to_up_acting_osds(pg->info.pgid, newup, newacting);
- pg->lock_with_map_lock_held();
+ //pg->lock_with_map_lock_held();
+
+ // update pg's osdmap ref, assert lock is held
+ pg->reassert_lock_with_map_lock_held();
+
dout(10) << "Scanning pg " << *pg << dendl;
pg->handle_advance_map(osdmap, lastmap, newup, newacting, 0);
- pg->unlock();
+ //pg->unlock();
}
// scan pgs with waiters
@@ -3590,7 +3602,7 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin)
it != pg_map.end();
it++) {
PG *pg = it->second;
- pg->lock_with_map_lock_held();
+ //pg->lock_with_map_lock_held();
if (pg->is_primary())
num_pg_primary++;
@@ -3605,16 +3617,16 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin)
if (!osdmap->have_pg_pool(pg->info.pgid.pool())) {
//pool is deleted!
queue_pg_for_deletion(pg);
- pg->unlock();
+ //pg->unlock();
continue;
}
PG::RecoveryCtx rctx(&query_map, &info_map, &notify_list, &tfin, &t);
pg->handle_activate_map(&rctx);
- pg->write_if_dirty(t);
+ //pg->write_if_dirty(t);
- pg->unlock();
+ //pg->unlock();
}
do_notifies(notify_list, osdmap->get_epoch()); // notify? (residual|replica)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 7bcc642f351..8e1e4b232fa 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -48,6 +48,12 @@ void PG::lock(bool no_lockdep)
osd->map_lock.put_read();
_lock.Lock(no_lockdep);
osdmap_ref.swap(map);
+
+ // if we have unrecorded dirty state with the lock dropped, there is a bug
+ assert(!dirty_info);
+ assert(!dirty_log);
+
+ dout(30) << "lock" << dendl;
}
/*
@@ -58,6 +64,29 @@ void PG::lock_with_map_lock_held(bool no_lockdep)
{
_lock.Lock(no_lockdep);
osdmap_ref = osd->osdmap;
+
+ // if we have unrecorded dirty state with the lock dropped, there is a bug
+ assert(!dirty_info);
+ assert(!dirty_log);
+
+ dout(30) << "lock_with_map_lock_held" << dendl;
+}
+
+void PG::reassert_lock_with_map_lock_held()
+{
+ assert(_lock.is_locked());
+ osdmap_ref = osd->osdmap;
+
+ dout(30) << "reassert_lock_with_map_lock_held" << dendl;
+}
+
+void PG::unlock()
+{
+ dout(30) << "unlock" << dendl;
+ assert(!dirty_info);
+ assert(!dirty_log);
+ osdmap_ref.reset();
+ _lock.Unlock();
}
std::string PG::gen_prefix() const
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 6b107bdb550..bb749119986 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -358,16 +358,15 @@ public:
bool deleting; // true while RemoveWQ should be chewing on us
void lock(bool no_lockdep = false);
- void unlock() {
- //generic_dout(0) << this << " " << info.pgid << " unlock" << dendl;
- osdmap_ref.reset();
- _lock.Unlock();
- }
+ void unlock();
/* During handle_osd_map, the osd holds a write lock to the osdmap.
* *_with_map_lock_held assume that the map_lock is already held */
void lock_with_map_lock_held(bool no_lockdep = false);
+ // assert we still have lock held, and update our map ref
+ void reassert_lock_with_map_lock_held();
+
void assert_locked() {
assert(_lock.is_locked());
}