diff options
author | Sage Weil <sage@inktank.com> | 2013-08-20 16:57:46 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-08-20 16:57:46 -0700 |
commit | cf8dbd248b8792781394fe87db141ad5704dc3b3 (patch) | |
tree | b31e58372ca16e5afc826ecff240a93217fa5628 | |
parent | edf2c3449ec96d91d3d7ad01c50f7a79b7b2f7cc (diff) | |
parent | 1c50c446152ab0e571ae5508edb4ad7c7614c310 (diff) | |
download | ceph-cf8dbd248b8792781394fe87db141ad5704dc3b3.tar.gz |
Merge remote-tracking branch 'gh/wip-6004' into next
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
-rw-r--r-- | src/osdc/ObjectCacher.cc | 31 | ||||
-rw-r--r-- | src/osdc/ObjectCacher.h | 18 |
2 files changed, 37 insertions, 12 deletions
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc index 51fad699555..01eeccc03be 100644 --- a/src/osdc/ObjectCacher.cc +++ b/src/osdc/ObjectCacher.cc @@ -30,6 +30,7 @@ ObjectCacher::BufferHead *ObjectCacher::Object::split(BufferHead *left, loff_t o // split off right ObjectCacher::BufferHead *right = new BufferHead(this); right->last_write_tid = left->last_write_tid; + right->last_read_tid = left->last_read_tid; right->set_state(left->get_state()); right->snapc = left->snapc; @@ -113,6 +114,10 @@ void ObjectCacher::Object::try_merge_bh(BufferHead *bh) assert(oc->lock.is_locked()); ldout(oc->cct, 10) << "try_merge_bh " << *bh << dendl; + // do not merge rx buffers; last_read_tid may not match + if (bh->is_rx()) + return; + // to the left? map<loff_t,BufferHead*>::iterator p = data.find(bh->start()); assert(p->second == bh); @@ -500,6 +505,7 @@ ObjectCacher::ObjectCacher(CephContext *cct_, string name, WritebackHandler& wb, max_size(max_bytes), max_objects(max_objects), block_writes_upfront(block_writes_upfront), flush_set_callback(flush_callback), flush_set_callback_arg(flush_callback_arg), + last_read_tid(0), flusher_stop(false), flusher_thread(this), finisher(cct), stat_clean(0), stat_zero(0), stat_dirty(0), stat_rx(0), stat_tx(0), stat_missing(0), stat_error(0), stat_dirty_waiting(0), reads_outstanding(0) @@ -603,25 +609,29 @@ void ObjectCacher::bh_read(BufferHead *bh) << reads_outstanding << dendl; mark_rx(bh); + bh->last_read_tid = ++last_read_tid; // finisher - C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob, + C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob, bh->last_read_tid, bh->start(), bh->length()); // go writeback_handler.read(bh->ob->get_oid(), bh->ob->get_oloc(), bh->start(), bh->length(), bh->ob->get_snap(), &onfinish->bl, bh->ob->truncate_size, bh->ob->truncate_seq, onfinish); + ++reads_outstanding; } -void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, - uint64_t length, bufferlist &bl, int r, +void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, tid_t tid, + loff_t start, uint64_t length, + bufferlist &bl, int r, bool trust_enoent) { assert(lock.is_locked()); ldout(cct, 7) << "bh_read_finish " << oid + << " tid " << tid << " " << start << "~" << length << " (bl is " << bl.length() << ")" << " returned " << r @@ -711,7 +721,7 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, BufferHead *bh = p->second; ldout(cct, 20) << "checking bh " << *bh << dendl; - + // finishers? for (map<loff_t, list<Context*> >::iterator it = bh->waitfor_read.begin(); it != bh->waitfor_read.end(); @@ -720,9 +730,9 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, bh->waitfor_read.clear(); if (bh->start() > opos) { - ldout(cct, 1) << "weirdness: gap when applying read results, " - << opos << "~" << bh->start() - opos - << dendl; + ldout(cct, 1) << "bh_read_finish skipping gap " + << opos << "~" << bh->start() - opos + << dendl; opos = bh->start(); continue; } @@ -733,6 +743,13 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start, continue; } + if (bh->last_read_tid != tid) { + ldout(cct, 10) << "bh_read_finish bh->last_read_tid " << bh->last_read_tid + << " != tid " << tid << ", skipping" << dendl; + opos = bh->end(); + continue; + } + assert(opos >= bh->start()); assert(bh->start() == opos); // we don't merge rx bh's... yet! assert(bh->length() <= start+(loff_t)length-opos); diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h index 7d5ce6fad04..a62a41fd8ce 100644 --- a/src/osdc/ObjectCacher.h +++ b/src/osdc/ObjectCacher.h @@ -104,6 +104,7 @@ class ObjectCacher { Object *ob; bufferlist bl; tid_t last_write_tid; // version of bh (if non-zero) + tid_t last_read_tid; // tid of last read op (if any) utime_t last_write; SnapContext snapc; int error; // holds return value for failed reads @@ -116,6 +117,7 @@ class ObjectCacher { ref(0), ob(o), last_write_tid(0), + last_read_tid(0), error(0) { ex.start = ex.length = 0; } @@ -339,6 +341,8 @@ class ObjectCacher { vector<hash_map<sobject_t, Object*> > objects; // indexed by pool_id + tid_t last_read_tid; + set<BufferHead*> dirty_bh; LRU bh_lru_dirty, bh_lru_rest; LRU ob_lru; @@ -455,8 +459,9 @@ class ObjectCacher { bool external_call); public: - void bh_read_finish(int64_t poolid, sobject_t oid, loff_t offset, - uint64_t length, bufferlist &bl, int r, + void bh_read_finish(int64_t poolid, sobject_t oid, tid_t tid, + loff_t offset, uint64_t length, + bufferlist &bl, int r, bool trust_enoent); void bh_write_commit(int64_t poolid, sobject_t oid, loff_t offset, uint64_t length, tid_t t, int r); @@ -469,17 +474,20 @@ class ObjectCacher { uint64_t length; xlist<C_ReadFinish*>::item set_item; bool trust_enoent; + tid_t tid; public: bufferlist bl; - C_ReadFinish(ObjectCacher *c, Object *ob, loff_t s, uint64_t l) : + C_ReadFinish(ObjectCacher *c, Object *ob, tid_t t, loff_t s, uint64_t l) : oc(c), poolid(ob->oloc.pool), oid(ob->get_soid()), start(s), length(l), - set_item(this), trust_enoent(true) { + set_item(this), trust_enoent(true), + tid(t) { ob->reads.push_back(&set_item); } void finish(int r) { - oc->bh_read_finish(poolid, oid, start, length, bl, r, trust_enoent); + oc->bh_read_finish(poolid, oid, tid, start, length, bl, r, trust_enoent); + // object destructor clears the list if (set_item.is_on_list()) set_item.remove_myself(); |