diff options
author | Josh Durgin <josh.durgin@inktank.com> | 2013-04-29 13:58:07 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-04-30 13:47:47 -0700 |
commit | c2bcc2a60c2c1f66c757c01ed6bcc6778821f81d (patch) | |
tree | 631c6829622003a2d8dbe760d2495695517cfea2 | |
parent | 17612a407ab7bd1d08945d663e2088c5c81cee33 (diff) | |
download | ceph-c2bcc2a60c2c1f66c757c01ed6bcc6778821f81d.tar.gz |
ObjectCacher: wait for all reads when stopping flusher
Stopping the flusher is essentially the shutdown step for the
ObjectCacher - the next thing is actually destroying it.
If we leave any reads outstanding, when they complete they will
attempt to use the now-destroyed ObjectCacher. This is particularly a
problem with rbd images, since an -ENOENT can instantly complete many
readers, so the upper layers don't wait for the other rados-level
reads of that object to finish before trying to shutdown the cache.
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/osdc/ObjectCacher.cc | 22 | ||||
-rw-r--r-- | src/osdc/ObjectCacher.h | 3 |
2 files changed, 23 insertions, 2 deletions
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc index 21370f0f982..56c56d640f1 100644 --- a/src/osdc/ObjectCacher.cc +++ b/src/osdc/ObjectCacher.cc @@ -502,7 +502,7 @@ ObjectCacher::ObjectCacher(CephContext *cct_, string name, WritebackHandler& wb, flush_set_callback(flush_callback), flush_set_callback_arg(flush_callback_arg), flusher_stop(false), flusher_thread(this), finisher(cct), stat_clean(0), stat_zero(0), stat_dirty(0), stat_rx(0), stat_tx(0), stat_missing(0), - stat_error(0), stat_dirty_waiting(0) + stat_error(0), stat_dirty_waiting(0), reads_outstanding(0) { this->max_dirty_age.set_from_double(max_dirty_age); perf_start(); @@ -592,7 +592,8 @@ void ObjectCacher::close_object(Object *ob) void ObjectCacher::bh_read(BufferHead *bh) { assert(lock.is_locked()); - ldout(cct, 7) << "bh_read on " << *bh << dendl; + ldout(cct, 7) << "bh_read on " << *bh << " outstanding reads " + << reads_outstanding << dendl; mark_rx(bh); @@ -607,6 +608,7 @@ void ObjectCacher::bh_read(BufferHead *bh) bh->start(), bh->length(), bh->ob->get_snap(), &onfinish->bl, oset->truncate_size, oset->truncate_seq, onfinish); + ++reads_outstanding; } void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, @@ -619,6 +621,7 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, << " " << start << "~" << length << " (bl is " << bl.length() << ")" << " returned " << r + << " outstanding reads " << reads_outstanding << dendl; if (bl.length() < length) { @@ -768,6 +771,8 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, ldout(cct, 20) << "finishing waiters " << ls << dendl; finish_contexts(cct, ls, err); + --reads_outstanding; + read_cond.Signal(); } @@ -1433,6 +1438,19 @@ void ObjectCacher::flusher_entry() break; flusher_cond.WaitInterval(cct, lock, utime_t(1,0)); } + + /* Wait for reads to finish. This is only possible if handling + * -ENOENT made some read completions finish before their rados read + * came back. If we don't wait for them, and destroy the cache, when + * the rados reads do come back their callback will try to access the + * no-longer-valid ObjectCacher. + */ + while (reads_outstanding > 0) { + ldout(cct, 10) << "Waiting for all reads to complete. Number left: " + << reads_outstanding << dendl; + read_cond.Wait(lock); + } + lock.Unlock(); ldout(cct, 10) << "flusher finish" << dendl; } diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h index a17046f9126..80a90bd0457 100644 --- a/src/osdc/ObjectCacher.h +++ b/src/osdc/ObjectCacher.h @@ -444,6 +444,9 @@ class ObjectCacher { loff_t release(Object *o); void purge(Object *o); + int64_t reads_outstanding; + Cond read_cond; + int _readx(OSDRead *rd, ObjectSet *oset, Context *onfinish, bool external_call); |