diff options
author | David Zafman <david.zafman@inktank.com> | 2013-02-22 15:15:06 -0800 |
---|---|---|
committer | David Zafman <david.zafman@inktank.com> | 2013-03-04 23:16:43 -0800 |
commit | db4ccc0827e4fc4b6755712eee6a3a3a95ac27a0 (patch) | |
tree | 875928ddafe7e1afe693c994a370df2df1cb9679 | |
parent | 3b5933e0a014eb44a1cff4663921db5f29f32b11 (diff) | |
download | ceph-db4ccc0827e4fc4b6755712eee6a3a3a95ac27a0.tar.gz |
osd/librados: add op to list clones/snaps for an object
Returning snap_set_t with clone info
and snapshots in ascending order
Add clones with snapshots to obj_list_snap_response_t
New rados_types.hpp with snap_set_t/clone_info_t
Move snap_t to rados_types.hpp
Add generate_test_instances() and TYPE() to encoding/types.h
Feature: #4207
Signed-off-by: David Zafman <david.zafman@inktank.com>
-rw-r--r-- | src/Makefile.am | 2 | ||||
-rw-r--r-- | src/include/rados.h | 2 | ||||
-rw-r--r-- | src/include/rados/librados.hpp | 4 | ||||
-rw-r--r-- | src/include/rados/rados_types.hpp | 25 | ||||
-rw-r--r-- | src/librados/librados.cc | 22 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 88 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 2 | ||||
-rw-r--r-- | src/osd/osd_types.h | 109 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 50 | ||||
-rw-r--r-- | src/test/encoding/types.h | 2 |
10 files changed, 305 insertions, 1 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 4a1d7633a38..af4a259efbc 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1178,6 +1178,7 @@ rados_includedir = $(includedir)/rados rados_include_DATA = \ $(srcdir)/include/rados/librados.h \ $(srcdir)/include/rados/rados_types.h \ + $(srcdir)/include/rados/rados_types.hpp \ $(srcdir)/include/rados/librados.hpp \ $(srcdir)/include/buffer.h \ $(srcdir)/include/page.h \ @@ -1642,6 +1643,7 @@ noinst_HEADERS = \ include/xlist.h\ include/rados/librados.h\ include/rados/rados_types.h\ + include/rados/rados_types.hpp\ include/rados/librados.hpp\ include/rados/librgw.h\ include/rados/page.h\ diff --git a/src/include/rados.h b/src/include/rados.h index 093a04baf86..f4f120a8f15 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -179,6 +179,8 @@ enum { CEPH_OSD_OP_LIST_WATCHERS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 9, + CEPH_OSD_OP_LIST_SNAPS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 10, + /* write */ CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index 5bc1495e1c7..1463a34996d 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -12,6 +12,7 @@ #include "buffer.h" #include "librados.h" +#include "include/rados/rados_types.hpp" namespace librados { @@ -26,7 +27,6 @@ namespace librados class RadosClient; typedef void *list_ctx_t; - typedef uint64_t snap_t; typedef uint64_t auid_t; typedef void *config_t; @@ -328,6 +328,7 @@ namespace librados * @param prval [out] place error code in prval upon completion */ void list_watchers(std::list<obj_watch_t> *out_watchers, int *prval); + void list_snaps(snap_set_t *out_snaps, int *prval); }; @@ -501,6 +502,7 @@ namespace librados int unwatch(const std::string& o, uint64_t handle); int notify(const std::string& o, uint64_t ver, bufferlist& bl); int list_watchers(const std::string& o, std::list<obj_watch_t> *out_watchers); + int list_snaps(const std::string& o, snap_set_t *out_snaps); void set_notify_timeout(uint32_t timeout); // assert version for next sync operations diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp new file mode 100644 index 00000000000..eb28d4f8b6e --- /dev/null +++ b/src/include/rados/rados_types.hpp @@ -0,0 +1,25 @@ +#ifndef CEPH_RADOS_TYPES_HPP +#define CEPH_RADOS_TYPES_HPP + +#include <utility> +#include <vector> +#include "include/inttypes.h" + +namespace librados { + +typedef uint64_t snap_t; + +struct clone_info_t { + static const snap_t HEAD = ((snap_t)-1); + snap_t cloneid; + std::vector<snap_t> snaps; // ascending + std::vector< std::pair<uint64_t,uint64_t> > overlap; + uint64_t size; +}; + +struct snap_set_t { + std::vector<clone_info_t> clones; // ascending +}; + +} +#endif diff --git a/src/librados/librados.cc b/src/librados/librados.cc index c03a20f8d12..59ff0d1c3e8 100644 --- a/src/librados/librados.cc +++ b/src/librados/librados.cc @@ -219,6 +219,14 @@ void librados::ObjectReadOperation::list_watchers( o->list_watchers(out_watchers, prval); } +void librados::ObjectReadOperation::list_snaps( + snap_set_t *out_snaps, + int *prval) +{ + ::ObjectOperation *o = (::ObjectOperation *)impl; + o->list_snaps(out_snaps, prval); +} + int librados::IoCtx::omap_get_vals(const std::string& oid, const std::string& start_after, const std::string& filter_prefix, @@ -1040,6 +1048,20 @@ int librados::IoCtx::list_watchers(const std::string& oid, return r; } +int librados::IoCtx::list_snaps(const std::string& oid, + snap_set_t *out_snaps) +{ + ObjectReadOperation op; + int r; + op.list_snaps(out_snaps, &r); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + + return r; +} + void librados::IoCtx::set_notify_timeout(uint32_t timeout) { io_ctx_impl->set_notify_timeout(timeout); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index b1f60d4f0dc..ea71f2b81e8 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -2252,6 +2252,94 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) break; } + case CEPH_OSD_OP_LIST_SNAPS: + { + obj_list_snap_response_t resp; + + if (!ssc) { + ssc = ctx->obc->ssc = get_snapset_context(soid.oid, + soid.get_key(), soid.hash, false); + } + + assert(ssc); + + vector<snapid_t>::reverse_iterator snap_iter = + ssc->snapset.snaps.rbegin(); + + int clonecount = ssc->snapset.clones.size(); + if (ssc->snapset.head_exists) + clonecount++; + resp.clones.reserve(clonecount); + for (vector<snapid_t>::const_iterator clone_iter = ssc->snapset.clones.begin(); + clone_iter != ssc->snapset.clones.end(); ++clone_iter) { + clone_info ci; + + dout(20) << "List clones id=" << *clone_iter << dendl; + + ci.cloneid = *clone_iter; + + for (;snap_iter != ssc->snapset.snaps.rend() + && (*snap_iter <= ci.cloneid); snap_iter++) { + + dout(20) << "List snaps id=" << *snap_iter << dendl; + + assert(*snap_iter != CEPH_NOSNAP); + assert(*snap_iter != CEPH_SNAPDIR); + + ci.snaps.push_back(*snap_iter); + } + + map<snapid_t, interval_set<uint64_t> >::const_iterator coi; + coi = ssc->snapset.clone_overlap.find(ci.cloneid); + if (coi == ssc->snapset.clone_overlap.end()) { + osd->clog.error() << "osd." << osd->whoami << ": inconsistent clone_overlap found for oid " + << soid << " clone " << *clone_iter; + result = EINVAL; + break; + } + const interval_set<uint64_t> &o = coi->second; + ci.overlap.reserve(o.num_intervals()); + for (interval_set<uint64_t>::const_iterator r = o.begin(); + r != o.end(); ++r) { + ci.overlap.push_back(pair<uint64_t,uint64_t>(r.get_start(), r.get_len())); + } + + map<snapid_t, uint64_t>::const_iterator si; + si = ssc->snapset.clone_size.find(ci.cloneid); + if (si == ssc->snapset.clone_size.end()) { + osd->clog.error() << "osd." << osd->whoami << ": inconsistent clone_size found for oid " + << soid << " clone " << *clone_iter; + result = EINVAL; + break; + } + ci.size = si->second; + + resp.clones.push_back(ci); + } + if (ssc->snapset.head_exists) { + clone_info ci; + + assert(obs.exists); + + ci.cloneid = clone_info::HEAD; + + //Put remaining snapshots into head clone + for (;snap_iter != ssc->snapset.snaps.rend(); snap_iter++) + ci.snaps.push_back(*snap_iter); + + //Size for HEAD is oi.size + ci.size = oi.size; + + resp.clones.push_back(ci); + } + + resp.encode(osd_op.outdata); + result = 0; + + ctx->delta_stats.num_rd++; + break; + } + case CEPH_OSD_OP_ASSERT_SRC_VERSION: { uint64_t ver = op.watch.ver; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 219c1bfdec8..9c890397e8d 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -20,6 +20,8 @@ extern "C" { #include "PG.h" #include "OSDMap.h" +const snapid_t clone_info::HEAD((uint64_t)-1); + // -- osd_reqid_t -- void osd_reqid_t::encode(bufferlist &bl) const { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index fbdc51d8164..3f464852398 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2093,4 +2093,113 @@ struct obj_list_watch_response_t { WRITE_CLASS_ENCODER(obj_list_watch_response_t) +struct clone_info { + static const snapid_t HEAD; + + snapid_t cloneid; + vector<snapid_t> snaps; // ascending + vector< pair<uint64_t,uint64_t> > overlap; + uint64_t size; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(cloneid, bl); + ::encode(snaps, bl); + ::encode(overlap, bl); + ::encode(size, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(cloneid, bl); + ::decode(snaps, bl); + ::decode(overlap, bl); + ::decode(size, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + if (cloneid == HEAD) + f->dump_string("cloneid", "HEAD"); + else + f->dump_unsigned("cloneid", cloneid.val); + f->open_array_section("snapshots"); + for (vector<snapid_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) { + f->open_object_section("snap"); + f->dump_unsigned("id", p->val); + f->close_section(); + } + f->close_section(); + f->open_array_section("overlaps"); + for (vector< pair<uint64_t,uint64_t> >::const_iterator q = overlap.begin(); + q != overlap.end(); ++q) { + f->open_object_section("overlap"); + f->dump_unsigned("offset", q->first); + f->dump_unsigned("length", q->second); + f->close_section(); + } + f->close_section(); + f->dump_unsigned("size", size); + } + static void generate_test_instances(list<clone_info*>& o) { + o.push_back(new clone_info); + o.push_back(new clone_info); + o.back()->cloneid = 1; + o.back()->snaps.push_back(1); + o.back()->overlap.push_back(pair<uint64_t,uint64_t>(0,4096)); + o.back()->overlap.push_back(pair<uint64_t,uint64_t>(8192,4096)); + o.back()->size = 16384; + o.push_back(new clone_info); + o.back()->cloneid = HEAD; + o.back()->size = 32768; + } +}; + +WRITE_CLASS_ENCODER(clone_info) + +/** + * obj list snaps response format + * + */ +struct obj_list_snap_response_t { + vector<clone_info> clones; // ascending + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(clones, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(clones, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + f->open_array_section("clones"); + for (vector<clone_info>::const_iterator p = clones.begin(); p != clones.end(); ++p) { + f->open_object_section("clone"); + p->dump(f); + f->close_section(); + } + f->close_section(); + } + static void generate_test_instances(list<obj_list_snap_response_t*>& o) { + o.push_back(new obj_list_snap_response_t); + o.push_back(new obj_list_snap_response_t); + clone_info cl; + cl.cloneid = 1; + cl.snaps.push_back(1); + cl.overlap.push_back(pair<uint64_t,uint64_t>(0,4096)); + cl.overlap.push_back(pair<uint64_t,uint64_t>(8192,4096)); + cl.size = 16384; + o.back()->clones.push_back(cl); + cl.cloneid = clone_info::HEAD; + cl.snaps.clear(); + cl.overlap.clear(); + cl.size = 32768; + o.back()->clones.push_back(cl); + } +}; + +WRITE_CLASS_ENCODER(obj_list_snap_response_t) + #endif diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 6cc3cb88426..f9583400c6e 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -25,6 +25,7 @@ #include "common/admin_socket.h" #include "common/Timer.h" #include "include/rados/rados_types.h" +#include "include/rados/rados_types.hpp" #include <list> #include <map> @@ -344,6 +345,43 @@ struct ObjectOperation { } } }; + struct C_ObjectOperation_decodesnaps : public Context { + bufferlist bl; + librados::snap_set_t *psnaps; + int *prval; + C_ObjectOperation_decodesnaps(librados::snap_set_t *ps, int *pr) + : psnaps(ps), prval(pr) {} + void finish(int r) { + if (r >= 0) { + bufferlist::iterator p = bl.begin(); + try { + obj_list_snap_response_t resp; + ::decode(resp, p); + if (psnaps) { + + psnaps->clones.clear(); + vector<clone_info>::iterator ci; + for (ci = resp.clones.begin(); ci != resp.clones.end(); ci++) { + librados::clone_info_t clone; + + clone.cloneid = ci->cloneid; + clone.snaps.reserve(ci->snaps.size()); + clone.snaps.insert(clone.snaps.end(), ci->snaps.begin(), ci->snaps.end()); + clone.overlap = ci->overlap; + clone.size = ci->size; + + psnaps->clones.push_back(clone); + } + } + *prval = 0; + } + catch (buffer::error& e) { + if (prval) + *prval = -EIO; + } + } + } + }; void getxattrs(std::map<std::string,bufferlist> *pattrs, int *prval) { add_op(CEPH_OSD_OP_GETXATTRS); if (pattrs || prval) { @@ -539,6 +577,18 @@ struct ObjectOperation { } } + void list_snaps(librados::snap_set_t *out, int *prval) { + (void)add_op(CEPH_OSD_OP_LIST_SNAPS); + if (prval || out) { + unsigned p = ops.size() - 1; + C_ObjectOperation_decodesnaps *h = + new C_ObjectOperation_decodesnaps(out, prval); + out_handler[p] = h; + out_bl[p] = &h->bl; + out_rval[p] = prval; + } + } + void assert_version(uint64_t ver) { bufferlist bl; add_watch(CEPH_OSD_OP_ASSERT_VER, 0, ver, 0, bl); diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index b69bb9fd07c..a554d7e114e 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -63,6 +63,8 @@ TYPE(ObjectRecoveryProgress) TYPE(ScrubMap::object) TYPE(ScrubMap) TYPE(osd_peer_stat_t) +TYPE(clone_info) +TYPE(obj_list_snap_response_t) #include "os/ObjectStore.h" TYPE(ObjectStore::Transaction) |