summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2012-02-27 09:56:21 -0800
committerSage Weil <sage@newdream.net>2012-02-27 09:56:21 -0800
commit91b119a064eb9203c0cdc3d87fb4c80cb5b90917 (patch)
tree60706812c7eeecc41915dbb8bd69e0c3d15927f6
parent402ece5e311bf352aba5209c605e857b18cf190a (diff)
downloadceph-91b119a064eb9203c0cdc3d87fb4c80cb5b90917.tar.gz
osd: fix recursive map_lock via check_replay_queue()
Also drop activate_pg() helper while we're at it, so it's clear that we are the only user. recursive lock of OSD::map_lock (33) ceph version 0.42-146-g7ad35ce (commit:7ad35ce489cc5f9169eb838e1196fa2ca4d6e985) 2012-02-24 12:30:16.541416 1: (PG::lock(bool)+0x2a) [0xa09348] 2012-02-24 12:30:16.541424 2: (OSD::_lookup_lock_pg(pg_t)+0xbd) [0x84b8df] 2012-02-24 12:30:16.541431 3: (OSD::activate_pg(pg_t, utime_t)+0x9f) [0x87463b] 2012-02-24 12:30:16.541442 4: (OSD::check_replay_queue()+0x12f) [0x87452d] 2012-02-24 12:30:16.541450 5: (OSD::tick()+0x23c) [0x8535ea] 2012-02-24 12:30:16.541456 6: (OSD::C_Tick::finish(int)+0x1f) [0x881671] 2012-02-24 12:30:16.541462 7: (SafeTimer::timer_thread()+0x2d5) [0x8f8211] 2012-02-24 12:30:16.541468 8: (SafeTimerThread::entry()+0x1c) [0x8f923c] 2012-02-24 12:30:16.541475 9: (Thread::_entry_func(void*)+0x23) [0x9c8109] 2012-02-24 12:30:16.541485 10: (()+0x68ba) [0x7f9dbed838ba] 2012-02-24 12:30:16.541491 11: (clone()+0x6d) [0x7f9dbd66f02d] 2012-02-24 12:30:16.541495 common/lockdep.cc: In function 'int lockdep_will_lock(const char*, int)' thread 7f9db9d98700 time 2012-02-24 12:30:16.541504 Signed-off-by: Sage Weil <sage.weil@dreamhost.com> Reviewed-by: Sam Just <samuel.just@dreamhost.com>
-rw-r--r--src/osd/OSD.cc50
-rw-r--r--src/osd/OSD.h2
2 files changed, 29 insertions, 23 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 4637277d830..99d41bba5d8 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1166,6 +1166,15 @@ PG *OSD::_lookup_lock_pg(pg_t pgid)
return pg;
}
+PG *OSD::_lookup_lock_pg_with_map_lock_held(pg_t pgid)
+{
+ assert(osd_lock.is_locked());
+ assert(pg_map.count(pgid));
+ PG *pg = pg_map[pgid];
+ pg->lock_with_map_lock_held();
+ return pg;
+}
+
PG *OSD::lookup_lock_raw_pg(pg_t pgid)
{
Mutex::Locker l(osd_lock);
@@ -5019,8 +5028,13 @@ void OSD::_remove_pg(PG *pg)
// =========================================================
// RECOVERY
+/*
+ * caller holds osd_lock
+ */
void OSD::check_replay_queue()
{
+ assert(osd_lock.is_locked());
+
utime_t now = ceph_clock_now(g_ceph_context);
list< pair<pg_t,utime_t> > pgids;
replay_queue_lock.Lock();
@@ -5031,29 +5045,21 @@ void OSD::check_replay_queue()
}
replay_queue_lock.Unlock();
- for (list< pair<pg_t,utime_t> >::iterator p = pgids.begin(); p != pgids.end(); p++)
- activate_pg(p->first, p->second);
-}
-
-/*
- * NOTE: this is called from SafeTimer, so caller holds osd_lock
- */
-void OSD::activate_pg(pg_t pgid, utime_t activate_at)
-{
- assert(osd_lock.is_locked());
-
- if (pg_map.count(pgid)) {
- PG *pg = _lookup_lock_pg(pgid);
- dout(10) << "activate_pg " << *pg << dendl;
- if (pg->is_active() &&
- pg->is_replay() &&
- pg->get_role() == 0 &&
- pg->replay_until == activate_at) {
- pg->replay_queued_ops();
+ for (list< pair<pg_t,utime_t> >::iterator p = pgids.begin(); p != pgids.end(); p++) {
+ pg_t pgid = p->first;
+ if (pg_map.count(pgid)) {
+ PG *pg = _lookup_lock_pg_with_map_lock_held(pgid);
+ dout(10) << "check_replay_queue " << *pg << dendl;
+ if (pg->is_active() &&
+ pg->is_replay() &&
+ pg->get_role() == 0 &&
+ pg->replay_until == p->second) {
+ pg->replay_queued_ops();
+ }
+ pg->unlock();
+ } else {
+ dout(10) << "check_replay_queue pgid " << pgid << " (not found)" << dendl;
}
- pg->unlock();
- } else {
- dout(10) << "activate_pg pgid " << pgid << " (not found)" << dendl;
}
// wake up _all_ pg waiters; raw pg -> actual pg mapping may have shifted
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index f80b725a477..ad6ff7e3c92 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -446,6 +446,7 @@ protected:
bool _have_pg(pg_t pgid);
PG *_lookup_lock_pg(pg_t pgid);
+ PG *_lookup_lock_pg_with_map_lock_held(pg_t pgid);
PG *_open_lock_pg(pg_t pg, bool no_lockdep_check=false); // create new PG (in memory)
PG *_create_lock_pg(pg_t pgid, bool newly_created,
int role, vector<int>& up, vector<int>& acting, pg_history_t history,
@@ -760,7 +761,6 @@ protected:
list< pair<pg_t, utime_t > > replay_queue;
void check_replay_queue();
- void activate_pg(pg_t pgid, utime_t activate_at);
// -- snap trimming --