diff options
-rw-r--r-- | src/mds/CInode.cc | 36 | ||||
-rw-r--r-- | src/mds/CInode.h | 5 | ||||
-rw-r--r-- | src/mds/Locker.cc | 7 | ||||
-rw-r--r-- | src/mds/Locker.h | 3 | ||||
-rw-r--r-- | src/mds/Mutation.cc | 31 | ||||
-rw-r--r-- | src/mds/Mutation.h | 6 | ||||
-rw-r--r-- | src/mds/Server.cc | 47 | ||||
-rw-r--r-- | src/mds/mdstypes.h | 7 | ||||
-rw-r--r-- | src/messages/MMDSSlaveRequest.h | 1 |
9 files changed, 124 insertions, 19 deletions
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index c12930837df..af70b681ffc 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -130,6 +130,7 @@ ostream& operator<<(ostream& out, CInode& in) if (in.state_test(CInode::STATE_DIRTYPARENT)) out << " dirtyparent"; if (in.is_freezing_inode()) out << " FREEZING=" << in.auth_pin_freeze_allowance; if (in.is_frozen_inode()) out << " FROZEN"; + if (in.is_frozen_auth_pin()) out << " FROZEN_AUTHPIN"; inode_t *pi = in.get_projected_inode(); if (pi->is_truncating()) @@ -1862,7 +1863,8 @@ void CInode::add_waiter(uint64_t tag, Context *c) // wait on the directory? // make sure its not the inode that is explicitly ambiguous|freezing|frozen if (((tag & WAIT_SINGLEAUTH) && !state_test(STATE_AMBIGUOUSAUTH)) || - ((tag & WAIT_UNFREEZE) && !is_frozen_inode() && !is_freezing_inode())) { + ((tag & WAIT_UNFREEZE) && + !is_frozen_inode() && !is_freezing_inode() && !is_frozen_auth_pin())) { dout(15) << "passing waiter up tree" << dendl; parent->dir->add_waiter(tag, c); return; @@ -1885,8 +1887,10 @@ bool CInode::freeze_inode(int auth_pin_allowance) dout(10) << "freeze_inode - frozen" << dendl; assert(auth_pins == auth_pin_allowance); - get(PIN_FROZEN); - state_set(STATE_FROZEN); + if (!state_test(STATE_FROZEN)) { + get(PIN_FROZEN); + state_set(STATE_FROZEN); + } return true; } @@ -1904,10 +1908,34 @@ void CInode::unfreeze_inode(list<Context*>& finished) take_waiting(WAIT_UNFREEZE, finished); } +void CInode::unfreeze_inode() +{ + list<Context*> finished; + unfreeze_inode(finished); + mdcache->mds->queue_waiters(finished); +} + +void CInode::freeze_auth_pin() +{ + assert(state_test(CInode::STATE_FROZEN)); + state_set(CInode::STATE_FROZENAUTHPIN); +} + +void CInode::unfreeze_auth_pin() +{ + assert(state_test(CInode::STATE_FROZENAUTHPIN)); + state_clear(CInode::STATE_FROZENAUTHPIN); + if (!state_test(STATE_FREEZING|STATE_FROZEN)) { + list<Context*> finished; + take_waiting(WAIT_UNFREEZE, finished); + mdcache->mds->queue_waiters(finished); + } +} // auth_pins bool CInode::can_auth_pin() { - if (is_freezing_inode() || is_frozen_inode()) return false; + if (is_freezing_inode() || is_frozen_inode() || is_frozen_auth_pin()) + return false; if (parent) return parent->can_auth_pin(); return true; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index b76b52414c9..e43ecf50fa3 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -181,6 +181,7 @@ public: static const int STATE_DIRTYPARENT = (1<<14); static const int STATE_DIRTYRSTAT = (1<<15); static const int STATE_STRAYPINNED = (1<<16); + static const int STATE_FROZENAUTHPIN = (1<<17); // -- waiters -- static const uint64_t WAIT_DIR = (1<<0); @@ -856,6 +857,7 @@ public: // -- freeze -- bool is_freezing_inode() { return state_test(STATE_FREEZING); } bool is_frozen_inode() { return state_test(STATE_FROZEN); } + bool is_frozen_auth_pin() { return state_test(STATE_FROZENAUTHPIN); } bool is_frozen(); bool is_frozen_dir(); bool is_freezing(); @@ -864,7 +866,10 @@ public: * auth_pins it is itself holding/responsible for. */ bool freeze_inode(int auth_pin_allowance=0); void unfreeze_inode(list<Context*>& finished); + void unfreeze_inode(); + void freeze_auth_pin(); + void unfreeze_auth_pin(); // -- reference counting -- void bad_put(int by) { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 63f83116fe1..ee4799e18f8 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -174,7 +174,8 @@ bool Locker::acquire_locks(MDRequest *mdr, set<SimpleLock*> &rdlocks, set<SimpleLock*> &wrlocks, set<SimpleLock*> &xlocks, - map<SimpleLock*,int> *remote_wrlocks) + map<SimpleLock*,int> *remote_wrlocks, + CInode *auth_pin_freeze) { if (mdr->done_locking && !mdr->is_slave()) { // not on slaves! master requests locks piecemeal. @@ -336,7 +337,9 @@ bool Locker::acquire_locks(MDRequest *mdr, dout(10) << " req remote auth_pin of " << **q << dendl; MDSCacheObjectInfo info; (*q)->set_object_info(info); - req->get_authpins().push_back(info); + req->get_authpins().push_back(info); + if (*q == auth_pin_freeze) + (*q)->set_object_info(req->get_authpin_freeze()); mdr->pin(*q); } mds->send_message_mds(req, p->first); diff --git a/src/mds/Locker.h b/src/mds/Locker.h index a1cf59e3185..b3b9919e7fd 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -88,7 +88,8 @@ public: set<SimpleLock*> &rdlocks, set<SimpleLock*> &wrlocks, set<SimpleLock*> &xlocks, - map<SimpleLock*,int> *remote_wrlocks=NULL); + map<SimpleLock*,int> *remote_wrlocks=NULL, + CInode *auth_pin_freeze=NULL); void cancel_locking(Mutation *mut, set<CInode*> *pneed_issue); void drop_locks(Mutation *mut, set<CInode*> *pneed_issue=0); diff --git a/src/mds/Mutation.cc b/src/mds/Mutation.cc index 6321ffc160a..a9c35134bc8 100644 --- a/src/mds/Mutation.cc +++ b/src/mds/Mutation.cc @@ -82,8 +82,39 @@ void Mutation::auth_unpin(MDSCacheObject *object) auth_pins.erase(object); } +bool Mutation::freeze_auth_pin(CInode *inode) +{ + assert(!auth_pin_freeze || auth_pin_freeze == inode); + auth_pin_freeze = inode; + auth_pin(inode); + if (!inode->freeze_inode(1)) + return false; + + inode->freeze_auth_pin(); + inode->unfreeze_inode(); + return true; +} + +void Mutation::unfreeze_auth_pin(CInode *inode) +{ + assert(auth_pin_freeze == inode); + assert(is_auth_pinned(inode)); + if (inode->is_frozen_auth_pin()) + inode->unfreeze_auth_pin(); + else + inode->unfreeze_inode(); + auth_pin_freeze = NULL; +} + +bool Mutation::can_auth_pin(MDSCacheObject *object) +{ + return object->can_auth_pin() || (is_auth_pinned(object) && object == auth_pin_freeze); +} + void Mutation::drop_local_auth_pins() { + if (auth_pin_freeze) + unfreeze_auth_pin(auth_pin_freeze); for (set<MDSCacheObject*>::iterator it = auth_pins.begin(); it != auth_pins.end(); it++) { diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index cba6223864e..37cc764254d 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -50,6 +50,7 @@ struct Mutation { // auth pins set< MDSCacheObject* > remote_auth_pins; set< MDSCacheObject* > auth_pins; + CInode *auth_pin_freeze; // held locks set< SimpleLock* > rdlocks; // always local. @@ -81,12 +82,14 @@ struct Mutation { : attempt(0), ls(0), slave_to_mds(-1), + auth_pin_freeze(NULL), locking(NULL), done_locking(false), committing(false), aborted(false), killed(false) { } Mutation(metareqid_t ri, __u32 att=0, int slave_to=-1) : reqid(ri), attempt(att), ls(0), slave_to_mds(slave_to), + auth_pin_freeze(NULL), locking(NULL), done_locking(false), committing(false), aborted(false), killed(false) { } virtual ~Mutation() { @@ -120,6 +123,9 @@ struct Mutation { bool is_auth_pinned(MDSCacheObject *object); void auth_pin(MDSCacheObject *object); void auth_unpin(MDSCacheObject *object); + bool freeze_auth_pin(CInode *inode); + void unfreeze_auth_pin(CInode *inode); + bool can_auth_pin(MDSCacheObject *object); void drop_local_auth_pins(); void add_projected_inode(CInode *in); void pop_and_dirty_projected_inodes(); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 1545fef73c5..72fd7da2305 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1487,6 +1487,7 @@ void Server::handle_slave_auth_pin(MDRequest *mdr) // build list of objects list<MDSCacheObject*> objects; + CInode *auth_pin_freeze = NULL; bool fail = false; for (vector<MDSCacheObjectInfo>::iterator p = mdr->slave_request->get_authpins().begin(); @@ -1500,6 +1501,8 @@ void Server::handle_slave_auth_pin(MDRequest *mdr) } objects.push_back(object); + if (*p == mdr->slave_request->get_authpin_freeze()) + auth_pin_freeze = dynamic_cast<CInode*>(object); } // can we auth pin them? @@ -1512,8 +1515,7 @@ void Server::handle_slave_auth_pin(MDRequest *mdr) fail = true; break; } - if (!mdr->is_auth_pinned(*p) && - !(*p)->can_auth_pin()) { + if (!mdr->can_auth_pin(*p)) { // wait dout(10) << " waiting for authpinnable on " << **p << dendl; (*p)->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr)); @@ -1527,6 +1529,22 @@ void Server::handle_slave_auth_pin(MDRequest *mdr) if (fail) { mdr->drop_local_auth_pins(); // just in case } else { + /* handle_slave_rename_prep() call freeze_inode() to wait for all other operations + * on the source inode to complete. This happens after all locks for the rename + * operation are acquired. But to acquire locks, we need auth pin locks' parent + * objects first. So there is an ABBA deadlock if someone auth pins the source inode + * after locks are acquired and before Server::handle_slave_rename_prep() is called. + * The solution is freeze the inode and prevent other MDRequests from getting new + * auth pins. + */ + if (auth_pin_freeze) { + dout(10) << " freezing auth pin on " << *auth_pin_freeze << dendl; + if (!mdr->freeze_auth_pin(auth_pin_freeze)) { + auth_pin_freeze->add_waiter(CInode::WAIT_FROZEN, new C_MDS_RetryRequest(mdcache, mdr)); + mds->mdlog->flush(); + return; + } + } for (list<MDSCacheObject*>::iterator p = objects.begin(); p != objects.end(); ++p) { @@ -1923,7 +1941,8 @@ CInode* Server::rdlock_path_pin_ref(MDRequest *mdr, int n, // do NOT proceed if freezing, as cap release may defer in that case, and // we could deadlock when we try to lock @ref. // if we're already auth_pinned, continue; the release has already been processed. - if (ref->is_frozen() || (ref->is_freezing() && !mdr->is_auth_pinned(ref))) { + if (ref->is_frozen() || ref->is_frozen_auth_pin() || + (ref->is_freezing() && !mdr->is_auth_pinned(ref))) { dout(7) << "waiting for !frozen/authpinnable on " << *ref << dendl; ref->add_waiter(CInode::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr)); /* If we have any auth pins, this will deadlock. @@ -5246,7 +5265,9 @@ void Server::handle_client_rename(MDRequest *mdr) // take any locks needed for anchor creation/verification mds->mdcache->anchor_create_prep_locks(mdr, srci, rdlocks, xlocks); - if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks, &remote_wrlocks)) + CInode *auth_pin_freeze = !srcdn->is_auth() && srcdnl->is_primary() ? srci : NULL; + if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks, + &remote_wrlocks, auth_pin_freeze)) return; if (oldin && @@ -5996,9 +6017,7 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) // am i srcdn auth? if (srcdn->is_auth()) { - if (srcdnl->is_primary() && - !srcdnl->get_inode()->is_freezing_inode() && - !srcdnl->get_inode()->is_frozen_inode()) { + if (srcdnl->is_primary()) { // set ambiguous auth for srci /* * NOTE: we don't worry about ambiguous cache expire as we do @@ -6015,7 +6034,13 @@ void Server::handle_slave_rename_prep(MDRequest *mdr) int allowance = 2; // 1 for the mdr auth_pin, 1 for the link lock allowance += srcdnl->get_inode()->is_dir(); // for the snap lock dout(10) << " freezing srci " << *srcdnl->get_inode() << " with allowance " << allowance << dendl; - if (!srcdnl->get_inode()->freeze_inode(allowance)) { + bool frozen_inode = srcdnl->get_inode()->freeze_inode(allowance); + + // unfreeze auth pin after freezing the inode to avoid queueing waiters + if (srcdnl->get_inode()->is_frozen_auth_pin()) + mdr->unfreeze_auth_pin(srcdnl->get_inode()); + + if (!frozen_inode) { srcdnl->get_inode()->add_waiter(CInode::WAIT_FROZEN, new C_MDS_RetryRequest(mdcache, mdr)); return; } @@ -6183,8 +6208,7 @@ void Server::_commit_slave_rename(MDRequest *mdr, int r, destdnl->get_inode()->take_waiting(CInode::WAIT_SINGLEAUTH, finished); // unfreeze - assert(destdnl->get_inode()->is_frozen_inode() || - destdnl->get_inode()->is_freezing_inode()); + assert(destdnl->get_inode()->is_frozen_inode()); destdnl->get_inode()->unfreeze_inode(finished); mds->queue_waiters(finished); @@ -6207,8 +6231,7 @@ void Server::_commit_slave_rename(MDRequest *mdr, int r, destdnl->get_inode()->take_waiting(CInode::WAIT_SINGLEAUTH, finished); // unfreeze - assert(destdnl->get_inode()->is_frozen_inode() || - destdnl->get_inode()->is_freezing_inode()); + assert(destdnl->get_inode()->is_frozen_inode()); destdnl->get_inode()->unfreeze_inode(finished); mds->queue_waiters(finished); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index db4dbf1ac61..22e754eb2a1 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -1250,6 +1250,13 @@ public: } }; +inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) { + if (l.ino || r.ino) + return l.ino == r.ino && l.snapid == r.snapid; + else + return l.dirfrag == r.dirfrag && l.dname == r.dname; +} + WRITE_CLASS_ENCODER(MDSCacheObjectInfo) diff --git a/src/messages/MMDSSlaveRequest.h b/src/messages/MMDSSlaveRequest.h index 4f2bb5948bd..03ec582c49e 100644 --- a/src/messages/MMDSSlaveRequest.h +++ b/src/messages/MMDSSlaveRequest.h @@ -112,6 +112,7 @@ public: int get_lock_type() { return lock_type; } MDSCacheObjectInfo &get_object_info() { return object_info; } + MDSCacheObjectInfo &get_authpin_freeze() { return object_info; } vector<MDSCacheObjectInfo>& get_authpins() { return authpins; } |