Striper: permit sparse reads to propagate through assemble_results

Striping was always zero-filling buffers that could be sparse; noticed by customers in librbd read_iterate(). This allows buffers results to come back with null buffers, so that librbd and rbd can preserve sparseness. Also, copious comments and more logging added. Signed-off-by: Dan Mick <dan.mick@inktank.com>
author: Dan Mick <dan.mick@inktank.com> 2012-12-14 16:36:27 -0800
committer: Dan Mick <dan.mick@inktank.com> 2012-12-14 16:36:27 -0800
commit: 4e39c37d1130b68694db01c7ff886ee3b6a93501 (patch)
tree: a5b391a46b61fe947fbb296df0768cc0acae65d7
parent: 8cf367cb79046b08cc593b14f77526eef2758ee6 (diff)
download: ceph-4e39c37d1130b68694db01c7ff886ee3b6a93501.tar.gz
1 files changed, 27 insertions, 2 deletions
diff --git a/src/osdc/Striper.cc b/src/osdc/Striper.cc
index 7b5a402893f..40da177f254 100644
--- a/src/osdc/Striper.cc
+++ b/src/osdc/Striper.cc
@@ -272,26 +272,51 @@ void Striper::StripedReadResult::assemble_result(CephContext *cct, bufferlist& b
   if (p == partial.rend())
     return;
 
+  // partial is a map of object offset to (bufferlist, desired object length)
+  // end = objoff + objlen, or 1 past end in object
   uint64_t end = p->first + p->second.second;
   while (p != partial.rend()) {
     // sanity check
-    ldout(cct, 20) << "assemble_result(" << this << ") " << p->first << "~" << p->second.second
+    ldout(cct, 20) << "assemble_result(" << this << ") "
+		   << p->first << "~" << p->second.second
 		   << " " << p->second.first.length() << " bytes"
 		   << dendl;
+    // require offset of this chunk to be "current position less
+    // length of this chunk"; that is, require contiguity
     assert(p->first == end - p->second.second);
+
+    // new position is offset of this chunk
     end = p->first;
 
+    // len is actual data length
     size_t len = p->second.first.length();
-    if (len < p->second.second) {
+
+    // if there's data, but less than required:
+    if (len && (len < p->second.second)) {
+      // if zero_tail mode or we've already accumulated some data in bl
       if (zero_tail || bl.length()) {
+	// create a zero end-of-chunk, put it on front of the output bl,
+	// and suck any data we *did* have off the partial entry into output bl
+	ldout(cct, 20) << "assemble_result(" << this << ")"
+		       << " adding " << p->second.first.length()
+		       << " data, padded with " << p->second.second - len
+		       << " zeros" << dendl;
 	bufferptr bp(p->second.second - len);
 	bp.zero();
 	bl.push_front(bp);
 	bl.claim_prepend(p->second.first);
       } else {
+	// if there's less, but we don't have any yet or not zero-tail,
+	// suck everything we have onto the output list (partial end of req)
+	ldout(cct, 20) << "assemble_result(" << this << ") adding partial "
+		       << "buffer len " << p->second.first.length() << dendl;
 	bl.claim_prepend(p->second.first);
       }
     } else {
+      // if we have all the data, or there is none at all, grab a full
+      // or empty buffer and stick it on the front here
+      ldout(cct, 20) << "assemble_result(" << this << ") adding entire "
+		     << "buffer len " << p->second.first.length() << dendl;
       bl.claim_prepend(p->second.first);
     }
     p++;
author	Dan Mick <dan.mick@inktank.com>	2012-12-14 16:36:27 -0800
committer	Dan Mick <dan.mick@inktank.com>	2012-12-14 16:36:27 -0800
commit	4e39c37d1130b68694db01c7ff886ee3b6a93501 (patch)
tree	a5b391a46b61fe947fbb296df0768cc0acae65d7
parent	8cf367cb79046b08cc593b14f77526eef2758ee6 (diff)
download	ceph-4e39c37d1130b68694db01c7ff886ee3b6a93501.tar.gz