summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Benjamin <matt@linuxbox.com>2013-05-23 19:13:54 -0700
committerDavid Zafman <david.zafman@inktank.com>2013-10-03 15:23:01 -0700
commitb01ce1124176697abfb477be3695633b92e1f73f (patch)
tree395af86fc734d067263d02a261e679f54603a8a8
parentfb9f9263b60e915f77f987cd8e190c0512d0c848 (diff)
downloadceph-b01ce1124176697abfb477be3695633b92e1f73f.tar.gz
client: add barrier and types
This gives us a framework to implement commit operations that are guaranteed to terminate even if unstable writes come in after commit is called. Adapted to use Boost interval classes by Adam Emerson <aemerson@linuxbox.com>. Signed-off-by: Matt Benjamin <matt@linuxbox.com>
-rw-r--r--src/client/Client.h6
-rw-r--r--src/client/Makefile.am2
-rw-r--r--src/client/barrier.cc182
-rw-r--r--src/client/barrier.h135
4 files changed, 325 insertions, 0 deletions
diff --git a/src/client/Client.h b/src/client/Client.h
index 15b10a97008..6bdeffa4b83 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -34,6 +34,8 @@ using namespace __gnu_cxx;
#include "include/interval_set.h"
#include "include/lru.h"
+#include "barrier.h"
+
#include "mds/mdstypes.h"
#include "msg/Message.h"
@@ -303,6 +305,9 @@ protected:
int num_flushing_caps;
hash_map<inodeno_t,SnapRealm*> snap_realms;
+ /* async block write barrier support */
+ map<uint64_t, BarrierContext* > barriers;
+
SnapRealm *get_snap_realm(inodeno_t r);
SnapRealm *get_snap_realm_maybe(inodeno_t r);
void put_snap_realm(SnapRealm *realm);
@@ -357,6 +362,7 @@ protected:
friend class C_Client_PutInode; // calls put_inode()
friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb
+ friend class C_Block_Sync; // Calls block map and protected helpers
//int get_cache_size() { return lru.lru_get_size(); }
//void set_cache_size(int m) { lru.lru_set_max(m); }
diff --git a/src/client/Makefile.am b/src/client/Makefile.am
index 53107eba517..de0bfdba200 100644
--- a/src/client/Makefile.am
+++ b/src/client/Makefile.am
@@ -1,5 +1,6 @@
libclient_la_SOURCES = \
client/Client.cc \
+ client/barrier.cc \
client/Inode.cc \
client/Dentry.cc \
client/MetaRequest.cc \
@@ -20,6 +21,7 @@ noinst_HEADERS += \
client/ClientSnapRealm.h \
client/SyntheticClient.h \
client/Trace.h \
+ client/barrier.h \
client/ioctl.h \
client/ObjecterWriteback.h
diff --git a/src/client/barrier.cc b/src/client/barrier.cc
new file mode 100644
index 00000000000..3e89b5e795e
--- /dev/null
+++ b/src/client/barrier.cc
@@ -0,0 +1,182 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ *
+ * Copyright (C) 2012 CohortFS, LLC.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/param.h>
+#endif
+
+#include <iostream>
+using namespace std;
+
+#include "include/Context.h"
+#include "Client.h"
+#include "common/config.h"
+#include "barrier.h"
+#include "include/assert.h"
+
+#undef dout_prefix
+#define dout_prefix *_dout << "client." << whoami << " "
+
+#define dout_subsys ceph_subsys_client
+
+#define cldout(cl, v) dout_impl((cl)->cct, dout_subsys, v) \
+ *_dout << "client." << cl->whoami << " "
+
+/* C_Block_Sync */
+C_Block_Sync::C_Block_Sync(Client *c, uint64_t i, barrier_interval iv,
+ int *r=0) :
+ cl(c), ino(i), iv(iv), rval(r)
+{
+ state = CBlockSync_State_None;
+ barrier = NULL;
+
+ cldout(cl, 1) << "C_Block_Sync for " << ino << dendl;
+
+ if (!cl->barriers[ino]) {
+ cl->barriers[ino] = new BarrierContext(cl, ino);
+ }
+ /* XXX current semantics aren't commit-ordered */
+ cl->barriers[ino]->write_nobarrier(*this);
+}
+
+void C_Block_Sync::finish(int r) {
+ cldout(cl, 1) << "C_Block_Sync::finish() for " << ino << " "
+ << iv << " r==" << r << dendl;
+ if (rval)
+ *rval = r;
+ cl->barriers[ino]->complete(*this);
+}
+
+/* Barrier */
+Barrier::Barrier()
+{ }
+
+Barrier::~Barrier()
+{ }
+
+/* BarrierContext */
+BarrierContext::BarrierContext(Client *c, uint64_t ino) :
+ cl(c), ino(ino), lock("BarrierContext")
+{ };
+
+void BarrierContext::write_nobarrier(C_Block_Sync &cbs)
+{
+ Mutex::Locker locker(lock);
+ cbs.state = CBlockSync_State_Unclaimed;
+ outstanding_writes.push_back(cbs);
+}
+
+void BarrierContext::write_barrier(C_Block_Sync &cbs)
+{
+ Mutex::Locker locker(lock);
+ barrier_interval &iv = cbs.iv;
+ bool done = false;
+
+ { /* find blocking commit--intrusive no help here */
+ BarrierList::iterator iter;
+ for (iter = active_commits.begin();
+ !done && (iter != active_commits.end());
+ ++iter) {
+ Barrier &barrier = *iter;
+ while (boost::icl::intersects(barrier.span, iv)) {
+ /* wait on this */
+ barrier.cond.Wait(lock);
+ done = true;
+ }
+ }
+ }
+
+ cbs.state = CBlockSync_State_Unclaimed;
+ outstanding_writes.push_back(cbs);
+
+} /* write_barrier */
+
+void BarrierContext::commit_barrier(barrier_interval &civ)
+{
+ Mutex::Locker locker(lock);
+
+ /* we commit outstanding writes--if none exist, we don't care */
+ if (outstanding_writes.size() == 0)
+ return;
+
+ boost::icl::interval_set<uint64_t> cvs;
+ cvs.insert(civ);
+
+ Barrier *barrier = NULL;
+ BlockSyncList::iterator iter, iter2;
+
+ iter = outstanding_writes.begin();
+ while (iter != outstanding_writes.end()) {
+ barrier_interval &iv = iter->iv;
+ if (boost::icl::intersects(cvs, iv)) {
+ C_Block_Sync &a_write = *iter;
+ if (! barrier)
+ barrier = new Barrier();
+ /* mark the callback */
+ a_write.state = CBlockSync_State_Committing;
+ a_write.barrier = barrier;
+ iter2 = iter++;
+ outstanding_writes.erase(iter2);
+ barrier->write_list.push_back(a_write);
+ barrier->span.insert(iv);
+ /* avoid iter invalidate */
+ } else {
+ iter++;
+ }
+ }
+
+ if (barrier) {
+ active_commits.push_back(*barrier);
+ /* and wait on this */
+ barrier->cond.Wait(lock);
+ }
+
+} /* commit_barrier */
+
+void BarrierContext::complete(C_Block_Sync &cbs)
+{
+ Mutex::Locker locker(lock);
+ BlockSyncList::iterator iter =
+ BlockSyncList::s_iterator_to(cbs);
+
+ switch (cbs.state) {
+ case CBlockSync_State_Unclaimed:
+ /* cool, no waiting */
+ outstanding_writes.erase(iter);
+ break;
+ case CBlockSync_State_Committing:
+ {
+ Barrier *barrier = iter->barrier;
+ barrier->write_list.erase(iter);
+ /* signal waiters */
+ barrier->cond.Signal();
+ /* dispose cleared barrier */
+ if (barrier->write_list.size() == 0) {
+ BarrierList::iterator iter2 =
+ BarrierList::s_iterator_to(*barrier);
+ active_commits.erase(iter2);
+ delete barrier;
+ }
+ }
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ cbs.state = CBlockSync_State_Completed;
+
+} /* complete */
+
+BarrierContext::~BarrierContext()
+{ }
diff --git a/src/client/barrier.h b/src/client/barrier.h
new file mode 100644
index 00000000000..cc6faa6b8cf
--- /dev/null
+++ b/src/client/barrier.h
@@ -0,0 +1,135 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ *
+ * Copyright (C) 2012 CohortFS, LLC.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef BARRIER_H
+#define BARRIER_H
+
+#include "include/types.h"
+
+#include <string>
+#include <list>
+#include <set>
+#include <map>
+#include <boost/intrusive/list.hpp>
+#define BOOST_ICL_USE_STATIC_BOUNDED_INTERVALS
+#include <boost/icl/interval_set.hpp>
+#include <fstream>
+#include <exception>
+
+using std::list;
+using std::set;
+using std::map;
+using std::fstream;
+
+#include <ext/hash_map>
+
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "common/config.h"
+
+class Client;
+
+typedef boost::icl::interval<uint64_t>::type barrier_interval;
+
+using namespace std;
+
+/*
+ * we keep count of uncommitted writes on the inode, so that
+ * ll_commit_blocks can do the right thing.
+ *
+ * This is just a hacked copy of Ceph's sync callback.
+ */
+
+enum CBlockSync_State
+{
+ CBlockSync_State_None, /* initial state */
+ CBlockSync_State_Unclaimed, /* outstanding write */
+ CBlockSync_State_Committing, /* commit in progress */
+ CBlockSync_State_Completed,
+};
+
+class Barrier;
+class BarrierContext;
+
+class C_Block_Sync : public Context {
+private:
+ Client *cl;
+ uint64_t ino;
+ barrier_interval iv;
+ enum CBlockSync_State state;
+ Barrier *barrier;
+ int *rval; /* see Cond.h */
+
+public:
+ boost::intrusive::list_member_hook<> intervals_hook;
+ C_Block_Sync(Client *c, uint64_t i, barrier_interval iv, int *r);
+ void finish(int rval);
+
+ friend class Barrier;
+ friend class BarrierContext;
+};
+
+typedef boost::intrusive::list< C_Block_Sync,
+ boost::intrusive::member_hook<
+ C_Block_Sync,
+ boost::intrusive::list_member_hook<>,
+ &C_Block_Sync::intervals_hook >
+ > BlockSyncList;
+
+class Barrier
+{
+private:
+ Cond cond;
+ boost::icl::interval_set<uint64_t> span;
+ BlockSyncList write_list;
+
+public:
+ boost::intrusive::list_member_hook<> active_commits_hook;
+
+ Barrier();
+ ~Barrier();
+
+ friend class BarrierContext;
+};
+
+typedef boost::intrusive::list< Barrier,
+ boost::intrusive::member_hook<
+ Barrier,
+ boost::intrusive::list_member_hook<>,
+ &Barrier::active_commits_hook >
+ > BarrierList;
+
+class BarrierContext
+{
+private:
+ Client *cl;
+ uint64_t ino;
+ Mutex lock;
+
+ // writes not claimed by a commit
+ BlockSyncList outstanding_writes;
+
+ // commits in progress, with their claimed writes
+ BarrierList active_commits;
+ boost::icl::interval_set<uint64_t> active_commit_interval;
+
+public:
+ BarrierContext(Client *c, uint64_t ino);
+ void write_nobarrier(C_Block_Sync &cbs);
+ void write_barrier(C_Block_Sync &cbs);
+ void commit_barrier(barrier_interval &civ);
+ void complete(C_Block_Sync &cbs);
+ ~BarrierContext();
+};
+
+#endif