summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-20 16:57:46 -0700
committerSage Weil <sage@inktank.com>2013-08-20 16:57:46 -0700
commitcf8dbd248b8792781394fe87db141ad5704dc3b3 (patch)
treeb31e58372ca16e5afc826ecff240a93217fa5628
parentedf2c3449ec96d91d3d7ad01c50f7a79b7b2f7cc (diff)
parent1c50c446152ab0e571ae5508edb4ad7c7614c310 (diff)
downloadceph-cf8dbd248b8792781394fe87db141ad5704dc3b3.tar.gz
Merge remote-tracking branch 'gh/wip-6004' into next
Reviewed-by: Josh Durgin <josh.durgin@inktank.com> Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
-rw-r--r--src/osdc/ObjectCacher.cc31
-rw-r--r--src/osdc/ObjectCacher.h18
2 files changed, 37 insertions, 12 deletions
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 51fad699555..01eeccc03be 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -30,6 +30,7 @@ ObjectCacher::BufferHead *ObjectCacher::Object::split(BufferHead *left, loff_t o
// split off right
ObjectCacher::BufferHead *right = new BufferHead(this);
right->last_write_tid = left->last_write_tid;
+ right->last_read_tid = left->last_read_tid;
right->set_state(left->get_state());
right->snapc = left->snapc;
@@ -113,6 +114,10 @@ void ObjectCacher::Object::try_merge_bh(BufferHead *bh)
assert(oc->lock.is_locked());
ldout(oc->cct, 10) << "try_merge_bh " << *bh << dendl;
+ // do not merge rx buffers; last_read_tid may not match
+ if (bh->is_rx())
+ return;
+
// to the left?
map<loff_t,BufferHead*>::iterator p = data.find(bh->start());
assert(p->second == bh);
@@ -500,6 +505,7 @@ ObjectCacher::ObjectCacher(CephContext *cct_, string name, WritebackHandler& wb,
max_size(max_bytes), max_objects(max_objects),
block_writes_upfront(block_writes_upfront),
flush_set_callback(flush_callback), flush_set_callback_arg(flush_callback_arg),
+ last_read_tid(0),
flusher_stop(false), flusher_thread(this), finisher(cct),
stat_clean(0), stat_zero(0), stat_dirty(0), stat_rx(0), stat_tx(0), stat_missing(0),
stat_error(0), stat_dirty_waiting(0), reads_outstanding(0)
@@ -603,25 +609,29 @@ void ObjectCacher::bh_read(BufferHead *bh)
<< reads_outstanding << dendl;
mark_rx(bh);
+ bh->last_read_tid = ++last_read_tid;
// finisher
- C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob,
+ C_ReadFinish *onfinish = new C_ReadFinish(this, bh->ob, bh->last_read_tid,
bh->start(), bh->length());
// go
writeback_handler.read(bh->ob->get_oid(), bh->ob->get_oloc(),
bh->start(), bh->length(), bh->ob->get_snap(),
&onfinish->bl, bh->ob->truncate_size, bh->ob->truncate_seq,
onfinish);
+
++reads_outstanding;
}
-void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start,
- uint64_t length, bufferlist &bl, int r,
+void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, tid_t tid,
+ loff_t start, uint64_t length,
+ bufferlist &bl, int r,
bool trust_enoent)
{
assert(lock.is_locked());
ldout(cct, 7) << "bh_read_finish "
<< oid
+ << " tid " << tid
<< " " << start << "~" << length
<< " (bl is " << bl.length() << ")"
<< " returned " << r
@@ -711,7 +721,7 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start,
BufferHead *bh = p->second;
ldout(cct, 20) << "checking bh " << *bh << dendl;
-
+
// finishers?
for (map<loff_t, list<Context*> >::iterator it = bh->waitfor_read.begin();
it != bh->waitfor_read.end();
@@ -720,9 +730,9 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start,
bh->waitfor_read.clear();
if (bh->start() > opos) {
- ldout(cct, 1) << "weirdness: gap when applying read results, "
- << opos << "~" << bh->start() - opos
- << dendl;
+ ldout(cct, 1) << "bh_read_finish skipping gap "
+ << opos << "~" << bh->start() - opos
+ << dendl;
opos = bh->start();
continue;
}
@@ -733,6 +743,13 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, loff_t start,
continue;
}
+ if (bh->last_read_tid != tid) {
+ ldout(cct, 10) << "bh_read_finish bh->last_read_tid " << bh->last_read_tid
+ << " != tid " << tid << ", skipping" << dendl;
+ opos = bh->end();
+ continue;
+ }
+
assert(opos >= bh->start());
assert(bh->start() == opos); // we don't merge rx bh's... yet!
assert(bh->length() <= start+(loff_t)length-opos);
diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h
index 7d5ce6fad04..a62a41fd8ce 100644
--- a/src/osdc/ObjectCacher.h
+++ b/src/osdc/ObjectCacher.h
@@ -104,6 +104,7 @@ class ObjectCacher {
Object *ob;
bufferlist bl;
tid_t last_write_tid; // version of bh (if non-zero)
+ tid_t last_read_tid; // tid of last read op (if any)
utime_t last_write;
SnapContext snapc;
int error; // holds return value for failed reads
@@ -116,6 +117,7 @@ class ObjectCacher {
ref(0),
ob(o),
last_write_tid(0),
+ last_read_tid(0),
error(0) {
ex.start = ex.length = 0;
}
@@ -339,6 +341,8 @@ class ObjectCacher {
vector<hash_map<sobject_t, Object*> > objects; // indexed by pool_id
+ tid_t last_read_tid;
+
set<BufferHead*> dirty_bh;
LRU bh_lru_dirty, bh_lru_rest;
LRU ob_lru;
@@ -455,8 +459,9 @@ class ObjectCacher {
bool external_call);
public:
- void bh_read_finish(int64_t poolid, sobject_t oid, loff_t offset,
- uint64_t length, bufferlist &bl, int r,
+ void bh_read_finish(int64_t poolid, sobject_t oid, tid_t tid,
+ loff_t offset, uint64_t length,
+ bufferlist &bl, int r,
bool trust_enoent);
void bh_write_commit(int64_t poolid, sobject_t oid, loff_t offset,
uint64_t length, tid_t t, int r);
@@ -469,17 +474,20 @@ class ObjectCacher {
uint64_t length;
xlist<C_ReadFinish*>::item set_item;
bool trust_enoent;
+ tid_t tid;
public:
bufferlist bl;
- C_ReadFinish(ObjectCacher *c, Object *ob, loff_t s, uint64_t l) :
+ C_ReadFinish(ObjectCacher *c, Object *ob, tid_t t, loff_t s, uint64_t l) :
oc(c), poolid(ob->oloc.pool), oid(ob->get_soid()), start(s), length(l),
- set_item(this), trust_enoent(true) {
+ set_item(this), trust_enoent(true),
+ tid(t) {
ob->reads.push_back(&set_item);
}
void finish(int r) {
- oc->bh_read_finish(poolid, oid, start, length, bl, r, trust_enoent);
+ oc->bh_read_finish(poolid, oid, tid, start, length, bl, r, trust_enoent);
+
// object destructor clears the list
if (set_item.is_on_list())
set_item.remove_myself();