diff options
author | Dan Mick <dan.mick@inktank.com> | 2012-12-14 16:36:27 -0800 |
---|---|---|
committer | Dan Mick <dan.mick@inktank.com> | 2012-12-14 16:36:27 -0800 |
commit | 4e39c37d1130b68694db01c7ff886ee3b6a93501 (patch) | |
tree | a5b391a46b61fe947fbb296df0768cc0acae65d7 | |
parent | 8cf367cb79046b08cc593b14f77526eef2758ee6 (diff) | |
download | ceph-4e39c37d1130b68694db01c7ff886ee3b6a93501.tar.gz |
Striper: permit sparse reads to propagate through assemble_results
Striping was always zero-filling buffers that could be sparse;
noticed by customers in librbd read_iterate(). This allows
buffers results to come back with null buffers, so that librbd
and rbd can preserve sparseness.
Also, copious comments and more logging added.
Signed-off-by: Dan Mick <dan.mick@inktank.com>
-rw-r--r-- | src/osdc/Striper.cc | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/src/osdc/Striper.cc b/src/osdc/Striper.cc index 7b5a402893f..40da177f254 100644 --- a/src/osdc/Striper.cc +++ b/src/osdc/Striper.cc @@ -272,26 +272,51 @@ void Striper::StripedReadResult::assemble_result(CephContext *cct, bufferlist& b if (p == partial.rend()) return; + // partial is a map of object offset to (bufferlist, desired object length) + // end = objoff + objlen, or 1 past end in object uint64_t end = p->first + p->second.second; while (p != partial.rend()) { // sanity check - ldout(cct, 20) << "assemble_result(" << this << ") " << p->first << "~" << p->second.second + ldout(cct, 20) << "assemble_result(" << this << ") " + << p->first << "~" << p->second.second << " " << p->second.first.length() << " bytes" << dendl; + // require offset of this chunk to be "current position less + // length of this chunk"; that is, require contiguity assert(p->first == end - p->second.second); + + // new position is offset of this chunk end = p->first; + // len is actual data length size_t len = p->second.first.length(); - if (len < p->second.second) { + + // if there's data, but less than required: + if (len && (len < p->second.second)) { + // if zero_tail mode or we've already accumulated some data in bl if (zero_tail || bl.length()) { + // create a zero end-of-chunk, put it on front of the output bl, + // and suck any data we *did* have off the partial entry into output bl + ldout(cct, 20) << "assemble_result(" << this << ")" + << " adding " << p->second.first.length() + << " data, padded with " << p->second.second - len + << " zeros" << dendl; bufferptr bp(p->second.second - len); bp.zero(); bl.push_front(bp); bl.claim_prepend(p->second.first); } else { + // if there's less, but we don't have any yet or not zero-tail, + // suck everything we have onto the output list (partial end of req) + ldout(cct, 20) << "assemble_result(" << this << ") adding partial " + << "buffer len " << p->second.first.length() << dendl; bl.claim_prepend(p->second.first); } } else { + // if we have all the data, or there is none at all, grab a full + // or empty buffer and stick it on the front here + ldout(cct, 20) << "assemble_result(" << this << ") adding entire " + << "buffer len " << p->second.first.length() << dendl; bl.claim_prepend(p->second.first); } p++; |