diff options
author | Sage Weil <sage@newdream.net> | 2012-02-27 09:56:21 -0800 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2012-02-27 09:56:21 -0800 |
commit | 91b119a064eb9203c0cdc3d87fb4c80cb5b90917 (patch) | |
tree | 60706812c7eeecc41915dbb8bd69e0c3d15927f6 | |
parent | 402ece5e311bf352aba5209c605e857b18cf190a (diff) | |
download | ceph-91b119a064eb9203c0cdc3d87fb4c80cb5b90917.tar.gz |
osd: fix recursive map_lock via check_replay_queue()
Also drop activate_pg() helper while we're at it, so it's clear that we
are the only user.
recursive lock of OSD::map_lock (33)
ceph version 0.42-146-g7ad35ce (commit:7ad35ce489cc5f9169eb838e1196fa2ca4d6e985)
2012-02-24 12:30:16.541416 1: (PG::lock(bool)+0x2a) [0xa09348]
2012-02-24 12:30:16.541424 2: (OSD::_lookup_lock_pg(pg_t)+0xbd) [0x84b8df]
2012-02-24 12:30:16.541431 3: (OSD::activate_pg(pg_t, utime_t)+0x9f) [0x87463b]
2012-02-24 12:30:16.541442 4: (OSD::check_replay_queue()+0x12f) [0x87452d]
2012-02-24 12:30:16.541450 5: (OSD::tick()+0x23c) [0x8535ea]
2012-02-24 12:30:16.541456 6: (OSD::C_Tick::finish(int)+0x1f) [0x881671]
2012-02-24 12:30:16.541462 7: (SafeTimer::timer_thread()+0x2d5) [0x8f8211]
2012-02-24 12:30:16.541468 8: (SafeTimerThread::entry()+0x1c) [0x8f923c]
2012-02-24 12:30:16.541475 9: (Thread::_entry_func(void*)+0x23) [0x9c8109]
2012-02-24 12:30:16.541485 10: (()+0x68ba) [0x7f9dbed838ba]
2012-02-24 12:30:16.541491 11: (clone()+0x6d) [0x7f9dbd66f02d]
2012-02-24 12:30:16.541495 common/lockdep.cc: In function 'int lockdep_will_lock(const char*, int)' thread 7f9db9d98700 time 2012-02-24 12:30:16.541504
Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
Reviewed-by: Sam Just <samuel.just@dreamhost.com>
-rw-r--r-- | src/osd/OSD.cc | 50 | ||||
-rw-r--r-- | src/osd/OSD.h | 2 |
2 files changed, 29 insertions, 23 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 4637277d830..99d41bba5d8 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1166,6 +1166,15 @@ PG *OSD::_lookup_lock_pg(pg_t pgid) return pg; } +PG *OSD::_lookup_lock_pg_with_map_lock_held(pg_t pgid) +{ + assert(osd_lock.is_locked()); + assert(pg_map.count(pgid)); + PG *pg = pg_map[pgid]; + pg->lock_with_map_lock_held(); + return pg; +} + PG *OSD::lookup_lock_raw_pg(pg_t pgid) { Mutex::Locker l(osd_lock); @@ -5019,8 +5028,13 @@ void OSD::_remove_pg(PG *pg) // ========================================================= // RECOVERY +/* + * caller holds osd_lock + */ void OSD::check_replay_queue() { + assert(osd_lock.is_locked()); + utime_t now = ceph_clock_now(g_ceph_context); list< pair<pg_t,utime_t> > pgids; replay_queue_lock.Lock(); @@ -5031,29 +5045,21 @@ void OSD::check_replay_queue() } replay_queue_lock.Unlock(); - for (list< pair<pg_t,utime_t> >::iterator p = pgids.begin(); p != pgids.end(); p++) - activate_pg(p->first, p->second); -} - -/* - * NOTE: this is called from SafeTimer, so caller holds osd_lock - */ -void OSD::activate_pg(pg_t pgid, utime_t activate_at) -{ - assert(osd_lock.is_locked()); - - if (pg_map.count(pgid)) { - PG *pg = _lookup_lock_pg(pgid); - dout(10) << "activate_pg " << *pg << dendl; - if (pg->is_active() && - pg->is_replay() && - pg->get_role() == 0 && - pg->replay_until == activate_at) { - pg->replay_queued_ops(); + for (list< pair<pg_t,utime_t> >::iterator p = pgids.begin(); p != pgids.end(); p++) { + pg_t pgid = p->first; + if (pg_map.count(pgid)) { + PG *pg = _lookup_lock_pg_with_map_lock_held(pgid); + dout(10) << "check_replay_queue " << *pg << dendl; + if (pg->is_active() && + pg->is_replay() && + pg->get_role() == 0 && + pg->replay_until == p->second) { + pg->replay_queued_ops(); + } + pg->unlock(); + } else { + dout(10) << "check_replay_queue pgid " << pgid << " (not found)" << dendl; } - pg->unlock(); - } else { - dout(10) << "activate_pg pgid " << pgid << " (not found)" << dendl; } // wake up _all_ pg waiters; raw pg -> actual pg mapping may have shifted diff --git a/src/osd/OSD.h b/src/osd/OSD.h index f80b725a477..ad6ff7e3c92 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -446,6 +446,7 @@ protected: bool _have_pg(pg_t pgid); PG *_lookup_lock_pg(pg_t pgid); + PG *_lookup_lock_pg_with_map_lock_held(pg_t pgid); PG *_open_lock_pg(pg_t pg, bool no_lockdep_check=false); // create new PG (in memory) PG *_create_lock_pg(pg_t pgid, bool newly_created, int role, vector<int>& up, vector<int>& acting, pg_history_t history, @@ -760,7 +761,6 @@ protected: list< pair<pg_t, utime_t > > replay_queue; void check_replay_queue(); - void activate_pg(pg_t pgid, utime_t activate_at); // -- snap trimming -- |