diff options
author | Matt Benjamin <matt@linuxbox.com> | 2013-05-23 19:13:54 -0700 |
---|---|---|
committer | David Zafman <david.zafman@inktank.com> | 2013-10-03 15:23:01 -0700 |
commit | b01ce1124176697abfb477be3695633b92e1f73f (patch) | |
tree | 395af86fc734d067263d02a261e679f54603a8a8 | |
parent | fb9f9263b60e915f77f987cd8e190c0512d0c848 (diff) | |
download | ceph-b01ce1124176697abfb477be3695633b92e1f73f.tar.gz |
client: add barrier and types
This gives us a framework to implement commit operations that are
guaranteed to terminate even if unstable writes come in after commit
is called.
Adapted to use Boost interval classes by Adam Emerson
<aemerson@linuxbox.com>.
Signed-off-by: Matt Benjamin <matt@linuxbox.com>
-rw-r--r-- | src/client/Client.h | 6 | ||||
-rw-r--r-- | src/client/Makefile.am | 2 | ||||
-rw-r--r-- | src/client/barrier.cc | 182 | ||||
-rw-r--r-- | src/client/barrier.h | 135 |
4 files changed, 325 insertions, 0 deletions
diff --git a/src/client/Client.h b/src/client/Client.h index 15b10a97008..6bdeffa4b83 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -34,6 +34,8 @@ using namespace __gnu_cxx; #include "include/interval_set.h" #include "include/lru.h" +#include "barrier.h" + #include "mds/mdstypes.h" #include "msg/Message.h" @@ -303,6 +305,9 @@ protected: int num_flushing_caps; hash_map<inodeno_t,SnapRealm*> snap_realms; + /* async block write barrier support */ + map<uint64_t, BarrierContext* > barriers; + SnapRealm *get_snap_realm(inodeno_t r); SnapRealm *get_snap_realm_maybe(inodeno_t r); void put_snap_realm(SnapRealm *realm); @@ -357,6 +362,7 @@ protected: friend class C_Client_PutInode; // calls put_inode() friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb + friend class C_Block_Sync; // Calls block map and protected helpers //int get_cache_size() { return lru.lru_get_size(); } //void set_cache_size(int m) { lru.lru_set_max(m); } diff --git a/src/client/Makefile.am b/src/client/Makefile.am index 53107eba517..de0bfdba200 100644 --- a/src/client/Makefile.am +++ b/src/client/Makefile.am @@ -1,5 +1,6 @@ libclient_la_SOURCES = \ client/Client.cc \ + client/barrier.cc \ client/Inode.cc \ client/Dentry.cc \ client/MetaRequest.cc \ @@ -20,6 +21,7 @@ noinst_HEADERS += \ client/ClientSnapRealm.h \ client/SyntheticClient.h \ client/Trace.h \ + client/barrier.h \ client/ioctl.h \ client/ObjecterWriteback.h diff --git a/src/client/barrier.cc b/src/client/barrier.cc new file mode 100644 index 00000000000..3e89b5e795e --- /dev/null +++ b/src/client/barrier.cc @@ -0,0 +1,182 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * + * Copyright (C) 2012 CohortFS, LLC. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#if defined(__FreeBSD__) +#include <sys/param.h> +#endif + +#include <iostream> +using namespace std; + +#include "include/Context.h" +#include "Client.h" +#include "common/config.h" +#include "barrier.h" +#include "include/assert.h" + +#undef dout_prefix +#define dout_prefix *_dout << "client." << whoami << " " + +#define dout_subsys ceph_subsys_client + +#define cldout(cl, v) dout_impl((cl)->cct, dout_subsys, v) \ + *_dout << "client." << cl->whoami << " " + +/* C_Block_Sync */ +C_Block_Sync::C_Block_Sync(Client *c, uint64_t i, barrier_interval iv, + int *r=0) : + cl(c), ino(i), iv(iv), rval(r) +{ + state = CBlockSync_State_None; + barrier = NULL; + + cldout(cl, 1) << "C_Block_Sync for " << ino << dendl; + + if (!cl->barriers[ino]) { + cl->barriers[ino] = new BarrierContext(cl, ino); + } + /* XXX current semantics aren't commit-ordered */ + cl->barriers[ino]->write_nobarrier(*this); +} + +void C_Block_Sync::finish(int r) { + cldout(cl, 1) << "C_Block_Sync::finish() for " << ino << " " + << iv << " r==" << r << dendl; + if (rval) + *rval = r; + cl->barriers[ino]->complete(*this); +} + +/* Barrier */ +Barrier::Barrier() +{ } + +Barrier::~Barrier() +{ } + +/* BarrierContext */ +BarrierContext::BarrierContext(Client *c, uint64_t ino) : + cl(c), ino(ino), lock("BarrierContext") +{ }; + +void BarrierContext::write_nobarrier(C_Block_Sync &cbs) +{ + Mutex::Locker locker(lock); + cbs.state = CBlockSync_State_Unclaimed; + outstanding_writes.push_back(cbs); +} + +void BarrierContext::write_barrier(C_Block_Sync &cbs) +{ + Mutex::Locker locker(lock); + barrier_interval &iv = cbs.iv; + bool done = false; + + { /* find blocking commit--intrusive no help here */ + BarrierList::iterator iter; + for (iter = active_commits.begin(); + !done && (iter != active_commits.end()); + ++iter) { + Barrier &barrier = *iter; + while (boost::icl::intersects(barrier.span, iv)) { + /* wait on this */ + barrier.cond.Wait(lock); + done = true; + } + } + } + + cbs.state = CBlockSync_State_Unclaimed; + outstanding_writes.push_back(cbs); + +} /* write_barrier */ + +void BarrierContext::commit_barrier(barrier_interval &civ) +{ + Mutex::Locker locker(lock); + + /* we commit outstanding writes--if none exist, we don't care */ + if (outstanding_writes.size() == 0) + return; + + boost::icl::interval_set<uint64_t> cvs; + cvs.insert(civ); + + Barrier *barrier = NULL; + BlockSyncList::iterator iter, iter2; + + iter = outstanding_writes.begin(); + while (iter != outstanding_writes.end()) { + barrier_interval &iv = iter->iv; + if (boost::icl::intersects(cvs, iv)) { + C_Block_Sync &a_write = *iter; + if (! barrier) + barrier = new Barrier(); + /* mark the callback */ + a_write.state = CBlockSync_State_Committing; + a_write.barrier = barrier; + iter2 = iter++; + outstanding_writes.erase(iter2); + barrier->write_list.push_back(a_write); + barrier->span.insert(iv); + /* avoid iter invalidate */ + } else { + iter++; + } + } + + if (barrier) { + active_commits.push_back(*barrier); + /* and wait on this */ + barrier->cond.Wait(lock); + } + +} /* commit_barrier */ + +void BarrierContext::complete(C_Block_Sync &cbs) +{ + Mutex::Locker locker(lock); + BlockSyncList::iterator iter = + BlockSyncList::s_iterator_to(cbs); + + switch (cbs.state) { + case CBlockSync_State_Unclaimed: + /* cool, no waiting */ + outstanding_writes.erase(iter); + break; + case CBlockSync_State_Committing: + { + Barrier *barrier = iter->barrier; + barrier->write_list.erase(iter); + /* signal waiters */ + barrier->cond.Signal(); + /* dispose cleared barrier */ + if (barrier->write_list.size() == 0) { + BarrierList::iterator iter2 = + BarrierList::s_iterator_to(*barrier); + active_commits.erase(iter2); + delete barrier; + } + } + break; + default: + assert(false); + break; + } + + cbs.state = CBlockSync_State_Completed; + +} /* complete */ + +BarrierContext::~BarrierContext() +{ } diff --git a/src/client/barrier.h b/src/client/barrier.h new file mode 100644 index 00000000000..cc6faa6b8cf --- /dev/null +++ b/src/client/barrier.h @@ -0,0 +1,135 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * + * Copyright (C) 2012 CohortFS, LLC. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef BARRIER_H +#define BARRIER_H + +#include "include/types.h" + +#include <string> +#include <list> +#include <set> +#include <map> +#include <boost/intrusive/list.hpp> +#define BOOST_ICL_USE_STATIC_BOUNDED_INTERVALS +#include <boost/icl/interval_set.hpp> +#include <fstream> +#include <exception> + +using std::list; +using std::set; +using std::map; +using std::fstream; + +#include <ext/hash_map> + +#include "common/Mutex.h" +#include "common/Cond.h" +#include "common/config.h" + +class Client; + +typedef boost::icl::interval<uint64_t>::type barrier_interval; + +using namespace std; + +/* + * we keep count of uncommitted writes on the inode, so that + * ll_commit_blocks can do the right thing. + * + * This is just a hacked copy of Ceph's sync callback. + */ + +enum CBlockSync_State +{ + CBlockSync_State_None, /* initial state */ + CBlockSync_State_Unclaimed, /* outstanding write */ + CBlockSync_State_Committing, /* commit in progress */ + CBlockSync_State_Completed, +}; + +class Barrier; +class BarrierContext; + +class C_Block_Sync : public Context { +private: + Client *cl; + uint64_t ino; + barrier_interval iv; + enum CBlockSync_State state; + Barrier *barrier; + int *rval; /* see Cond.h */ + +public: + boost::intrusive::list_member_hook<> intervals_hook; + C_Block_Sync(Client *c, uint64_t i, barrier_interval iv, int *r); + void finish(int rval); + + friend class Barrier; + friend class BarrierContext; +}; + +typedef boost::intrusive::list< C_Block_Sync, + boost::intrusive::member_hook< + C_Block_Sync, + boost::intrusive::list_member_hook<>, + &C_Block_Sync::intervals_hook > + > BlockSyncList; + +class Barrier +{ +private: + Cond cond; + boost::icl::interval_set<uint64_t> span; + BlockSyncList write_list; + +public: + boost::intrusive::list_member_hook<> active_commits_hook; + + Barrier(); + ~Barrier(); + + friend class BarrierContext; +}; + +typedef boost::intrusive::list< Barrier, + boost::intrusive::member_hook< + Barrier, + boost::intrusive::list_member_hook<>, + &Barrier::active_commits_hook > + > BarrierList; + +class BarrierContext +{ +private: + Client *cl; + uint64_t ino; + Mutex lock; + + // writes not claimed by a commit + BlockSyncList outstanding_writes; + + // commits in progress, with their claimed writes + BarrierList active_commits; + boost::icl::interval_set<uint64_t> active_commit_interval; + +public: + BarrierContext(Client *c, uint64_t ino); + void write_nobarrier(C_Block_Sync &cbs); + void write_barrier(C_Block_Sync &cbs); + void commit_barrier(barrier_interval &civ); + void complete(C_Block_Sync &cbs); + ~BarrierContext(); +}; + +#endif |