summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Mick <dan.mick@inktank.com>2012-12-14 16:36:27 -0800
committerDan Mick <dan.mick@inktank.com>2012-12-14 16:36:27 -0800
commit4e39c37d1130b68694db01c7ff886ee3b6a93501 (patch)
treea5b391a46b61fe947fbb296df0768cc0acae65d7
parent8cf367cb79046b08cc593b14f77526eef2758ee6 (diff)
downloadceph-4e39c37d1130b68694db01c7ff886ee3b6a93501.tar.gz
Striper: permit sparse reads to propagate through assemble_results
Striping was always zero-filling buffers that could be sparse; noticed by customers in librbd read_iterate(). This allows buffers results to come back with null buffers, so that librbd and rbd can preserve sparseness. Also, copious comments and more logging added. Signed-off-by: Dan Mick <dan.mick@inktank.com>
-rw-r--r--src/osdc/Striper.cc29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/osdc/Striper.cc b/src/osdc/Striper.cc
index 7b5a402893f..40da177f254 100644
--- a/src/osdc/Striper.cc
+++ b/src/osdc/Striper.cc
@@ -272,26 +272,51 @@ void Striper::StripedReadResult::assemble_result(CephContext *cct, bufferlist& b
if (p == partial.rend())
return;
+ // partial is a map of object offset to (bufferlist, desired object length)
+ // end = objoff + objlen, or 1 past end in object
uint64_t end = p->first + p->second.second;
while (p != partial.rend()) {
// sanity check
- ldout(cct, 20) << "assemble_result(" << this << ") " << p->first << "~" << p->second.second
+ ldout(cct, 20) << "assemble_result(" << this << ") "
+ << p->first << "~" << p->second.second
<< " " << p->second.first.length() << " bytes"
<< dendl;
+ // require offset of this chunk to be "current position less
+ // length of this chunk"; that is, require contiguity
assert(p->first == end - p->second.second);
+
+ // new position is offset of this chunk
end = p->first;
+ // len is actual data length
size_t len = p->second.first.length();
- if (len < p->second.second) {
+
+ // if there's data, but less than required:
+ if (len && (len < p->second.second)) {
+ // if zero_tail mode or we've already accumulated some data in bl
if (zero_tail || bl.length()) {
+ // create a zero end-of-chunk, put it on front of the output bl,
+ // and suck any data we *did* have off the partial entry into output bl
+ ldout(cct, 20) << "assemble_result(" << this << ")"
+ << " adding " << p->second.first.length()
+ << " data, padded with " << p->second.second - len
+ << " zeros" << dendl;
bufferptr bp(p->second.second - len);
bp.zero();
bl.push_front(bp);
bl.claim_prepend(p->second.first);
} else {
+ // if there's less, but we don't have any yet or not zero-tail,
+ // suck everything we have onto the output list (partial end of req)
+ ldout(cct, 20) << "assemble_result(" << this << ") adding partial "
+ << "buffer len " << p->second.first.length() << dendl;
bl.claim_prepend(p->second.first);
}
} else {
+ // if we have all the data, or there is none at all, grab a full
+ // or empty buffer and stick it on the front here
+ ldout(cct, 20) << "assemble_result(" << this << ") adding entire "
+ << "buffer len " << p->second.first.length() << dendl;
bl.claim_prepend(p->second.first);
}
p++;