diff options
author | Greg Farnum <greg@inktank.com> | 2013-10-08 14:57:31 -0700 |
---|---|---|
committer | Greg Farnum <greg@inktank.com> | 2013-10-17 13:11:44 -0700 |
commit | 649efe5291de436b49958b98faac42f71db3a4fe (patch) | |
tree | 0902b3f4d4ed9992c76419faad6029d5080db49d | |
parent | 55f1368271b18faefdab0b603f207878b836c02f (diff) | |
download | ceph-649efe5291de436b49958b98faac42f71db3a4fe.tar.gz |
osd: Add a new object_copy_data_t, and use it in the OSD/Objecter
Right now this is very primitive, but we're about to extend it to
deal with request versioning appropriately, and adding in some
extra fields.
Sadly we are doing a little extra copying in the Objecter as a result, but
too bad -- being able to do updates will be worth it.
Signed-off-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | src/osd/ReplicatedPG.cc | 17 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 65 | ||||
-rw-r--r-- | src/osd/osd_types.h | 35 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 32 | ||||
-rw-r--r-- | src/test/encoding/types.h | 1 |
5 files changed, 119 insertions, 31 deletions
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 4020aa8116a..3c7f1103774 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3615,12 +3615,13 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) goto fail; } + object_copy_data_t reply_obj; // size, mtime - ::encode(oi.size, osd_op.outdata); - ::encode(oi.mtime, osd_op.outdata); + reply_obj.size = oi.size; + reply_obj.mtime = oi.mtime; // attrs - map<string,bufferptr> out_attrs; + map<string,bufferptr>& out_attrs = reply_obj.in_attrs; if (!cursor.attr_complete) { result = osd->store->getattrs(coll, soid, out_attrs, true); if (result < 0) @@ -3628,12 +3629,11 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) cursor.attr_complete = true; dout(20) << " got attrs" << dendl; } - ::encode(out_attrs, osd_op.outdata); int64_t left = out_max - osd_op.outdata.length(); // data - bufferlist bl; + bufferlist& bl = reply_obj.data; if (left > 0 && !cursor.data_complete) { if (cursor.data_offset < oi.size) { result = osd->store->read(coll, oi.soid, cursor.data_offset, left, bl); @@ -3648,10 +3648,9 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) dout(20) << " got data" << dendl; } } - ::encode(bl, osd_op.outdata); // omap - std::map<std::string,bufferlist> out_omap; + std::map<std::string,bufferlist>& out_omap = reply_obj.omap; if (left > 0 && !cursor.omap_complete) { ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(coll, oi.soid); assert(iter); @@ -3669,14 +3668,14 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) dout(20) << " got omap" << dendl; } } - ::encode(out_omap, osd_op.outdata); dout(20) << " cursor.is_complete=" << cursor.is_complete() << " " << out_attrs.size() << " attrs" << " " << bl.length() << " bytes" << " " << out_omap.size() << " keys" << dendl; - ::encode(cursor, osd_op.outdata); + reply_obj.cursor = cursor; + ::encode(reply_obj, osd_op.outdata); result = 0; } break; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 27f7b171677..692ac05ff56 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2522,6 +2522,71 @@ void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& o.back()->omap_complete = true; } +// -- object_copy_data_t -- + +void object_copy_data_t::encode(bufferlist& bl) const +{ + ::encode(size, bl); + ::encode(mtime, bl); + ::encode(in_attrs, bl); + ::encode(data, bl); + ::encode(omap, bl); + ::encode(cursor, bl); +} + +void object_copy_data_t::decode(bufferlist::iterator& bl) +{ + ::decode(size, bl); + ::decode(mtime, bl); + ::decode(attrs, bl); + ::decode(data, bl); + ::decode(omap, bl); + ::decode(cursor, bl); +} + +void object_copy_data_t::generate_test_instances(list<object_copy_data_t*>& o) +{ + o.push_back(new object_copy_data_t()); + + list<object_copy_cursor_t*> cursors; + object_copy_cursor_t::generate_test_instances(cursors); + list<object_copy_cursor_t*>::iterator ci = cursors.begin(); + o.back()->cursor = **(ci++); + + o.push_back(new object_copy_data_t()); + o.back()->cursor = **(ci++); + + o.push_back(new object_copy_data_t()); + o.back()->size = 1234; + o.back()->mtime.set_from_double(1234); + bufferptr bp("there", 5); + o.back()->in_attrs["hello"] = bp; + bufferlist bl; + bl.push_back(bp); + o.back()->attrs["hello"] = bl; + bufferptr bp2("not", 3); + bufferlist bl2; + bl2.push_back(bp2); + o.back()->omap["why"] = bl2; + bufferptr databp("iamsomedatatocontain", 20); + o.back()->data.push_back(databp); +} + +void object_copy_data_t::dump(Formatter *f) const +{ + f->open_object_section("cursor"); + cursor.dump(f); + f->close_section(); // cursor + f->dump_int("size", size); + f->dump_stream("mtime") << mtime; + /* we should really print out the attrs here, but bufferlist + const-correctness prents that */ + f->dump_int("in_attrs_size", in_attrs.size()); + f->dump_int("attrs_size", attrs.size()); + f->dump_int("omap_size", omap.size()); + f->dump_int("data_length", data.length()); +} + // -- pg_create_t -- void pg_create_t::encode(bufferlist &bl) const diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a54fc65f375..26506fa30f7 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1835,6 +1835,41 @@ struct object_copy_cursor_t { }; WRITE_CLASS_ENCODER(object_copy_cursor_t) +/** + * object_copy_data_t + * + * Return data from a copy request. The semantics are a litte strange + * right now to accommodate the implicit encoding that was previously used + * in its place. + * + * In particular, the sender unconditionally fills in the cursor (from what + * it receives and sends), the size, and the mtime, but is responsible for + * figuring out whether it should put any data in the in_attrs, data, or + * omap members (corresponding to xattrs, object data, and the omap entries) + * based on external data (the client includes a max amount to return with + * the copy request). The client then looks into the attrs, data, and/or omap + * based on the contents of the cursor. Note the change from in_attrs to attrs -- + * this is the result of some silly interface differences which were + * previously elided because bufferlists and bufferptrs encode on the wire the + * same way. + */ +struct object_copy_data_t { + object_copy_cursor_t cursor; + uint64_t size; + utime_t mtime; + map<string, bufferptr> in_attrs; + map<string, bufferlist> attrs; + bufferlist data; + map<string, bufferlist> omap; +public: + object_copy_data_t() : size((uint64_t)-1) {} + + static void generate_test_instances(list<object_copy_data_t*>& o); + void encode(bufferlist& bl) const; + void decode(bufferlist::iterator& bl); + void dump(Formatter *f) const; +}; +WRITE_CLASS_ENCODER(object_copy_data_t) /** * pg creation info diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 1196633276d..b48c6bfcc14 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -591,31 +591,19 @@ struct ObjectOperation { return; try { bufferlist::iterator p = bl.begin(); - uint64_t size; - ::decode(size, p); + object_copy_data_t copy_reply; + ::decode(copy_reply, p); if (out_size) - *out_size = size; - utime_t mtime; - ::decode(mtime, p); + *out_size = copy_reply.size; if (out_mtime) - *out_mtime = mtime; - if (out_attrs) { - ::decode_noclear(*out_attrs, p); - } else { - std::map<std::string,bufferlist> t; - ::decode(t, p); - } - bufferlist bl; - ::decode(bl, p); + *out_mtime = copy_reply.mtime; + if (out_attrs) + *out_attrs = copy_reply.attrs; if (out_data) - out_data->claim_append(bl); - if (out_omap) { - ::decode_noclear(*out_omap, p); - } else { - std::map<std::string,bufferlist> t; - ::decode(t, p); - } - ::decode(*cursor, p); + out_data->claim_append(copy_reply.data); + if (out_omap) + *out_omap = copy_reply.omap; + *cursor = copy_reply.cursor; } catch (buffer::error& e) { r = -EIO; } diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index 18ed795c3ef..e446e7d7375 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -64,6 +64,7 @@ TYPE(pg_missing_t::item) TYPE(pg_missing_t) TYPE(pg_ls_response_t) TYPE(object_copy_cursor_t) +TYPE(object_copy_data_t) TYPE(pg_create_t) TYPE(watch_info_t) TYPE(object_info_t) |