summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Zafman <david.zafman@inktank.com>2013-02-22 15:15:06 -0800
committerDavid Zafman <david.zafman@inktank.com>2013-03-04 23:16:43 -0800
commitdb4ccc0827e4fc4b6755712eee6a3a3a95ac27a0 (patch)
tree875928ddafe7e1afe693c994a370df2df1cb9679
parent3b5933e0a014eb44a1cff4663921db5f29f32b11 (diff)
downloadceph-db4ccc0827e4fc4b6755712eee6a3a3a95ac27a0.tar.gz
osd/librados: add op to list clones/snaps for an object
Returning snap_set_t with clone info and snapshots in ascending order Add clones with snapshots to obj_list_snap_response_t New rados_types.hpp with snap_set_t/clone_info_t Move snap_t to rados_types.hpp Add generate_test_instances() and TYPE() to encoding/types.h Feature: #4207 Signed-off-by: David Zafman <david.zafman@inktank.com>
-rw-r--r--src/Makefile.am2
-rw-r--r--src/include/rados.h2
-rw-r--r--src/include/rados/librados.hpp4
-rw-r--r--src/include/rados/rados_types.hpp25
-rw-r--r--src/librados/librados.cc22
-rw-r--r--src/osd/ReplicatedPG.cc88
-rw-r--r--src/osd/osd_types.cc2
-rw-r--r--src/osd/osd_types.h109
-rw-r--r--src/osdc/Objecter.h50
-rw-r--r--src/test/encoding/types.h2
10 files changed, 305 insertions, 1 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 4a1d7633a38..af4a259efbc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1178,6 +1178,7 @@ rados_includedir = $(includedir)/rados
rados_include_DATA = \
$(srcdir)/include/rados/librados.h \
$(srcdir)/include/rados/rados_types.h \
+ $(srcdir)/include/rados/rados_types.hpp \
$(srcdir)/include/rados/librados.hpp \
$(srcdir)/include/buffer.h \
$(srcdir)/include/page.h \
@@ -1642,6 +1643,7 @@ noinst_HEADERS = \
include/xlist.h\
include/rados/librados.h\
include/rados/rados_types.h\
+ include/rados/rados_types.hpp\
include/rados/librados.hpp\
include/rados/librgw.h\
include/rados/page.h\
diff --git a/src/include/rados.h b/src/include/rados.h
index 093a04baf86..f4f120a8f15 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -179,6 +179,8 @@ enum {
CEPH_OSD_OP_LIST_WATCHERS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 9,
+ CEPH_OSD_OP_LIST_SNAPS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 10,
+
/* write */
CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp
index 5bc1495e1c7..1463a34996d 100644
--- a/src/include/rados/librados.hpp
+++ b/src/include/rados/librados.hpp
@@ -12,6 +12,7 @@
#include "buffer.h"
#include "librados.h"
+#include "include/rados/rados_types.hpp"
namespace librados
{
@@ -26,7 +27,6 @@ namespace librados
class RadosClient;
typedef void *list_ctx_t;
- typedef uint64_t snap_t;
typedef uint64_t auid_t;
typedef void *config_t;
@@ -328,6 +328,7 @@ namespace librados
* @param prval [out] place error code in prval upon completion
*/
void list_watchers(std::list<obj_watch_t> *out_watchers, int *prval);
+ void list_snaps(snap_set_t *out_snaps, int *prval);
};
@@ -501,6 +502,7 @@ namespace librados
int unwatch(const std::string& o, uint64_t handle);
int notify(const std::string& o, uint64_t ver, bufferlist& bl);
int list_watchers(const std::string& o, std::list<obj_watch_t> *out_watchers);
+ int list_snaps(const std::string& o, snap_set_t *out_snaps);
void set_notify_timeout(uint32_t timeout);
// assert version for next sync operations
diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp
new file mode 100644
index 00000000000..eb28d4f8b6e
--- /dev/null
+++ b/src/include/rados/rados_types.hpp
@@ -0,0 +1,25 @@
+#ifndef CEPH_RADOS_TYPES_HPP
+#define CEPH_RADOS_TYPES_HPP
+
+#include <utility>
+#include <vector>
+#include "include/inttypes.h"
+
+namespace librados {
+
+typedef uint64_t snap_t;
+
+struct clone_info_t {
+ static const snap_t HEAD = ((snap_t)-1);
+ snap_t cloneid;
+ std::vector<snap_t> snaps; // ascending
+ std::vector< std::pair<uint64_t,uint64_t> > overlap;
+ uint64_t size;
+};
+
+struct snap_set_t {
+ std::vector<clone_info_t> clones; // ascending
+};
+
+}
+#endif
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index c03a20f8d12..59ff0d1c3e8 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -219,6 +219,14 @@ void librados::ObjectReadOperation::list_watchers(
o->list_watchers(out_watchers, prval);
}
+void librados::ObjectReadOperation::list_snaps(
+ snap_set_t *out_snaps,
+ int *prval)
+{
+ ::ObjectOperation *o = (::ObjectOperation *)impl;
+ o->list_snaps(out_snaps, prval);
+}
+
int librados::IoCtx::omap_get_vals(const std::string& oid,
const std::string& start_after,
const std::string& filter_prefix,
@@ -1040,6 +1048,20 @@ int librados::IoCtx::list_watchers(const std::string& oid,
return r;
}
+int librados::IoCtx::list_snaps(const std::string& oid,
+ snap_set_t *out_snaps)
+{
+ ObjectReadOperation op;
+ int r;
+ op.list_snaps(out_snaps, &r);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+
+ return r;
+}
+
void librados::IoCtx::set_notify_timeout(uint32_t timeout)
{
io_ctx_impl->set_notify_timeout(timeout);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index b1f60d4f0dc..ea71f2b81e8 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -2252,6 +2252,94 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
break;
}
+ case CEPH_OSD_OP_LIST_SNAPS:
+ {
+ obj_list_snap_response_t resp;
+
+ if (!ssc) {
+ ssc = ctx->obc->ssc = get_snapset_context(soid.oid,
+ soid.get_key(), soid.hash, false);
+ }
+
+ assert(ssc);
+
+ vector<snapid_t>::reverse_iterator snap_iter =
+ ssc->snapset.snaps.rbegin();
+
+ int clonecount = ssc->snapset.clones.size();
+ if (ssc->snapset.head_exists)
+ clonecount++;
+ resp.clones.reserve(clonecount);
+ for (vector<snapid_t>::const_iterator clone_iter = ssc->snapset.clones.begin();
+ clone_iter != ssc->snapset.clones.end(); ++clone_iter) {
+ clone_info ci;
+
+ dout(20) << "List clones id=" << *clone_iter << dendl;
+
+ ci.cloneid = *clone_iter;
+
+ for (;snap_iter != ssc->snapset.snaps.rend()
+ && (*snap_iter <= ci.cloneid); snap_iter++) {
+
+ dout(20) << "List snaps id=" << *snap_iter << dendl;
+
+ assert(*snap_iter != CEPH_NOSNAP);
+ assert(*snap_iter != CEPH_SNAPDIR);
+
+ ci.snaps.push_back(*snap_iter);
+ }
+
+ map<snapid_t, interval_set<uint64_t> >::const_iterator coi;
+ coi = ssc->snapset.clone_overlap.find(ci.cloneid);
+ if (coi == ssc->snapset.clone_overlap.end()) {
+ osd->clog.error() << "osd." << osd->whoami << ": inconsistent clone_overlap found for oid "
+ << soid << " clone " << *clone_iter;
+ result = EINVAL;
+ break;
+ }
+ const interval_set<uint64_t> &o = coi->second;
+ ci.overlap.reserve(o.num_intervals());
+ for (interval_set<uint64_t>::const_iterator r = o.begin();
+ r != o.end(); ++r) {
+ ci.overlap.push_back(pair<uint64_t,uint64_t>(r.get_start(), r.get_len()));
+ }
+
+ map<snapid_t, uint64_t>::const_iterator si;
+ si = ssc->snapset.clone_size.find(ci.cloneid);
+ if (si == ssc->snapset.clone_size.end()) {
+ osd->clog.error() << "osd." << osd->whoami << ": inconsistent clone_size found for oid "
+ << soid << " clone " << *clone_iter;
+ result = EINVAL;
+ break;
+ }
+ ci.size = si->second;
+
+ resp.clones.push_back(ci);
+ }
+ if (ssc->snapset.head_exists) {
+ clone_info ci;
+
+ assert(obs.exists);
+
+ ci.cloneid = clone_info::HEAD;
+
+ //Put remaining snapshots into head clone
+ for (;snap_iter != ssc->snapset.snaps.rend(); snap_iter++)
+ ci.snaps.push_back(*snap_iter);
+
+ //Size for HEAD is oi.size
+ ci.size = oi.size;
+
+ resp.clones.push_back(ci);
+ }
+
+ resp.encode(osd_op.outdata);
+ result = 0;
+
+ ctx->delta_stats.num_rd++;
+ break;
+ }
+
case CEPH_OSD_OP_ASSERT_SRC_VERSION:
{
uint64_t ver = op.watch.ver;
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 219c1bfdec8..9c890397e8d 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -20,6 +20,8 @@ extern "C" {
#include "PG.h"
#include "OSDMap.h"
+const snapid_t clone_info::HEAD((uint64_t)-1);
+
// -- osd_reqid_t --
void osd_reqid_t::encode(bufferlist &bl) const
{
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index fbdc51d8164..3f464852398 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -2093,4 +2093,113 @@ struct obj_list_watch_response_t {
WRITE_CLASS_ENCODER(obj_list_watch_response_t)
+struct clone_info {
+ static const snapid_t HEAD;
+
+ snapid_t cloneid;
+ vector<snapid_t> snaps; // ascending
+ vector< pair<uint64_t,uint64_t> > overlap;
+ uint64_t size;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ ::encode(cloneid, bl);
+ ::encode(snaps, bl);
+ ::encode(overlap, bl);
+ ::encode(size, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::iterator& bl) {
+ DECODE_START(1, bl);
+ ::decode(cloneid, bl);
+ ::decode(snaps, bl);
+ ::decode(overlap, bl);
+ ::decode(size, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const {
+ if (cloneid == HEAD)
+ f->dump_string("cloneid", "HEAD");
+ else
+ f->dump_unsigned("cloneid", cloneid.val);
+ f->open_array_section("snapshots");
+ for (vector<snapid_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
+ f->open_object_section("snap");
+ f->dump_unsigned("id", p->val);
+ f->close_section();
+ }
+ f->close_section();
+ f->open_array_section("overlaps");
+ for (vector< pair<uint64_t,uint64_t> >::const_iterator q = overlap.begin();
+ q != overlap.end(); ++q) {
+ f->open_object_section("overlap");
+ f->dump_unsigned("offset", q->first);
+ f->dump_unsigned("length", q->second);
+ f->close_section();
+ }
+ f->close_section();
+ f->dump_unsigned("size", size);
+ }
+ static void generate_test_instances(list<clone_info*>& o) {
+ o.push_back(new clone_info);
+ o.push_back(new clone_info);
+ o.back()->cloneid = 1;
+ o.back()->snaps.push_back(1);
+ o.back()->overlap.push_back(pair<uint64_t,uint64_t>(0,4096));
+ o.back()->overlap.push_back(pair<uint64_t,uint64_t>(8192,4096));
+ o.back()->size = 16384;
+ o.push_back(new clone_info);
+ o.back()->cloneid = HEAD;
+ o.back()->size = 32768;
+ }
+};
+
+WRITE_CLASS_ENCODER(clone_info)
+
+/**
+ * obj list snaps response format
+ *
+ */
+struct obj_list_snap_response_t {
+ vector<clone_info> clones; // ascending
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ ::encode(clones, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::iterator& bl) {
+ DECODE_START(1, bl);
+ ::decode(clones, bl);
+ DECODE_FINISH(bl);
+ }
+ void dump(Formatter *f) const {
+ f->open_array_section("clones");
+ for (vector<clone_info>::const_iterator p = clones.begin(); p != clones.end(); ++p) {
+ f->open_object_section("clone");
+ p->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+ }
+ static void generate_test_instances(list<obj_list_snap_response_t*>& o) {
+ o.push_back(new obj_list_snap_response_t);
+ o.push_back(new obj_list_snap_response_t);
+ clone_info cl;
+ cl.cloneid = 1;
+ cl.snaps.push_back(1);
+ cl.overlap.push_back(pair<uint64_t,uint64_t>(0,4096));
+ cl.overlap.push_back(pair<uint64_t,uint64_t>(8192,4096));
+ cl.size = 16384;
+ o.back()->clones.push_back(cl);
+ cl.cloneid = clone_info::HEAD;
+ cl.snaps.clear();
+ cl.overlap.clear();
+ cl.size = 32768;
+ o.back()->clones.push_back(cl);
+ }
+};
+
+WRITE_CLASS_ENCODER(obj_list_snap_response_t)
+
#endif
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 6cc3cb88426..f9583400c6e 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -25,6 +25,7 @@
#include "common/admin_socket.h"
#include "common/Timer.h"
#include "include/rados/rados_types.h"
+#include "include/rados/rados_types.hpp"
#include <list>
#include <map>
@@ -344,6 +345,43 @@ struct ObjectOperation {
}
}
};
+ struct C_ObjectOperation_decodesnaps : public Context {
+ bufferlist bl;
+ librados::snap_set_t *psnaps;
+ int *prval;
+ C_ObjectOperation_decodesnaps(librados::snap_set_t *ps, int *pr)
+ : psnaps(ps), prval(pr) {}
+ void finish(int r) {
+ if (r >= 0) {
+ bufferlist::iterator p = bl.begin();
+ try {
+ obj_list_snap_response_t resp;
+ ::decode(resp, p);
+ if (psnaps) {
+
+ psnaps->clones.clear();
+ vector<clone_info>::iterator ci;
+ for (ci = resp.clones.begin(); ci != resp.clones.end(); ci++) {
+ librados::clone_info_t clone;
+
+ clone.cloneid = ci->cloneid;
+ clone.snaps.reserve(ci->snaps.size());
+ clone.snaps.insert(clone.snaps.end(), ci->snaps.begin(), ci->snaps.end());
+ clone.overlap = ci->overlap;
+ clone.size = ci->size;
+
+ psnaps->clones.push_back(clone);
+ }
+ }
+ *prval = 0;
+ }
+ catch (buffer::error& e) {
+ if (prval)
+ *prval = -EIO;
+ }
+ }
+ }
+ };
void getxattrs(std::map<std::string,bufferlist> *pattrs, int *prval) {
add_op(CEPH_OSD_OP_GETXATTRS);
if (pattrs || prval) {
@@ -539,6 +577,18 @@ struct ObjectOperation {
}
}
+ void list_snaps(librados::snap_set_t *out, int *prval) {
+ (void)add_op(CEPH_OSD_OP_LIST_SNAPS);
+ if (prval || out) {
+ unsigned p = ops.size() - 1;
+ C_ObjectOperation_decodesnaps *h =
+ new C_ObjectOperation_decodesnaps(out, prval);
+ out_handler[p] = h;
+ out_bl[p] = &h->bl;
+ out_rval[p] = prval;
+ }
+ }
+
void assert_version(uint64_t ver) {
bufferlist bl;
add_watch(CEPH_OSD_OP_ASSERT_VER, 0, ver, 0, bl);
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index b69bb9fd07c..a554d7e114e 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -63,6 +63,8 @@ TYPE(ObjectRecoveryProgress)
TYPE(ScrubMap::object)
TYPE(ScrubMap)
TYPE(osd_peer_stat_t)
+TYPE(clone_info)
+TYPE(obj_list_snap_response_t)
#include "os/ObjectStore.h"
TYPE(ObjectStore::Transaction)