diff options
-rw-r--r-- | src/mds/MDCache.cc | 99 | ||||
-rw-r--r-- | src/mds/MDCache.h | 17 | ||||
-rw-r--r-- | src/mds/Mutation.h | 5 | ||||
-rw-r--r-- | src/mds/Server.cc | 8 | ||||
-rw-r--r-- | src/mds/events/EMetaBlob.h | 3 | ||||
-rw-r--r-- | src/mds/journal.cc | 53 |
6 files changed, 124 insertions, 61 deletions
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index adcf8c1ef28..97adb273750 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2867,19 +2867,16 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) if (mds->is_resolve()) { // replay - assert(uncommitted_slave_updates[from].count(*p)); + MDSlaveUpdate *su = get_uncommitted_slave_update(*p, from); + assert(su); + // log commit mds->mdlog->start_submit_entry(new ESlaveUpdate(mds->mdlog, "unknown", *p, from, - ESlaveUpdate::OP_COMMIT, - uncommitted_slave_updates[from][*p]->origop)); - - delete uncommitted_slave_updates[from][*p]; - uncommitted_slave_updates[from].erase(*p); - if (uncommitted_slave_updates[from].empty()) - uncommitted_slave_updates.erase(from); - + ESlaveUpdate::OP_COMMIT, su->origop)); mds->mdlog->wait_for_safe(new C_MDC_SlaveCommit(this, from, *p)); mds->mdlog->flush(); + + finish_uncommitted_slave_update(*p, from); } else { MDRequest *mdr = request_get(*p); assert(mdr->slave_request == 0); // shouldn't be doing anything! @@ -2893,28 +2890,24 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) dout(10) << " abort on slave " << *p << dendl; if (mds->is_resolve()) { - assert(uncommitted_slave_updates[from].count(*p)); + MDSlaveUpdate *su = get_uncommitted_slave_update(*p, from); + assert(su); // perform rollback (and journal a rollback entry) // note: this will hold up the resolve a bit, until the rollback entries journal. - switch (uncommitted_slave_updates[from][*p]->origop) { + switch (su->origop) { case ESlaveUpdate::LINK: - mds->server->do_link_rollback(uncommitted_slave_updates[from][*p]->rollback, from, 0); + mds->server->do_link_rollback(su->rollback, from, 0); break; case ESlaveUpdate::RENAME: - mds->server->do_rename_rollback(uncommitted_slave_updates[from][*p]->rollback, from, 0); + mds->server->do_rename_rollback(su->rollback, from, 0); break; case ESlaveUpdate::RMDIR: - mds->server->do_rmdir_rollback(uncommitted_slave_updates[from][*p]->rollback, from, 0); + mds->server->do_rmdir_rollback(su->rollback, from, 0); break; default: assert(0); } - - delete uncommitted_slave_updates[from][*p]; - uncommitted_slave_updates[from].erase(*p); - if (uncommitted_slave_updates[from].empty()) - uncommitted_slave_updates.erase(from); } else { MDRequest *mdr = request_get(*p); if (mdr->more()->slave_commit) { @@ -2939,7 +2932,67 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack) ack->put(); } +void MDCache::add_uncommitted_slave_update(metareqid_t reqid, int master, MDSlaveUpdate *su) +{ + assert(uncommitted_slave_updates[master].count(reqid) == 0); + uncommitted_slave_updates[master][reqid] = su; + if (su->rename_olddir) + uncommitted_slave_rename_olddir[su->rename_olddir]++; + for(set<CInode*>::iterator p = su->unlinked.begin(); p != su->unlinked.end(); p++) + uncommitted_slave_unlink[*p]++; +} +void MDCache::finish_uncommitted_slave_update(metareqid_t reqid, int master) +{ + assert(uncommitted_slave_updates[master].count(reqid)); + MDSlaveUpdate* su = uncommitted_slave_updates[master][reqid]; + + uncommitted_slave_updates[master].erase(reqid); + if (uncommitted_slave_updates[master].empty()) + uncommitted_slave_updates.erase(master); + // discard the non-auth subtree we renamed out of + if (su->rename_olddir) { + uncommitted_slave_rename_olddir[su->rename_olddir]--; + if (uncommitted_slave_rename_olddir[su->rename_olddir] == 0) { + uncommitted_slave_rename_olddir.erase(su->rename_olddir); + CDir *root = get_subtree_root(su->rename_olddir); + if (root->get_dir_auth() == CDIR_AUTH_UNDEF) + try_trim_non_auth_subtree(root); + } + } + // removed the inodes that were unlinked by slave update + for(set<CInode*>::iterator p = su->unlinked.begin(); p != su->unlinked.end(); p++) { + CInode *in = *p; + uncommitted_slave_unlink[in]--; + if (uncommitted_slave_unlink[in] == 0) { + uncommitted_slave_unlink.erase(in); + if (!in->get_projected_parent_dn()) + mds->mdcache->remove_inode_recursive(in); + } + } + delete su; +} + +MDSlaveUpdate* MDCache::get_uncommitted_slave_update(metareqid_t reqid, int master) +{ + + MDSlaveUpdate* su = NULL; + if (uncommitted_slave_updates.count(master) && + uncommitted_slave_updates[master].count(reqid)) { + su = uncommitted_slave_updates[master][reqid]; + assert(su); + } + return su; +} + +void MDCache::finish_rollback(metareqid_t reqid) { + assert(need_resolve_rollback.count(reqid)); + if (mds->is_resolve()) + finish_uncommitted_slave_update(reqid, need_resolve_rollback[reqid]); + need_resolve_rollback.erase(reqid); + if (need_resolve_rollback.empty()) + maybe_resolve_finish(); +} void MDCache::disambiguate_imports() { @@ -5788,6 +5841,10 @@ bool MDCache::trim_non_auth_subtree(CDir *dir) { dout(10) << "trim_non_auth_subtree(" << dir << ") " << *dir << dendl; + // preserve the dir for rollback + if (uncommitted_slave_rename_olddir.count(dir)) + return true; + bool keep_dir = false; CDir::map_t::iterator j = dir->begin(); CDir::map_t::iterator i = j; @@ -5805,7 +5862,9 @@ bool MDCache::trim_non_auth_subtree(CDir *dir) for (list<CDir*>::iterator subdir = subdirs.begin(); subdir != subdirs.end(); ++subdir) { - if ((*subdir)->is_subtree_root() || my_ambiguous_imports.count((*subdir)->dirfrag())) { + if (uncommitted_slave_rename_olddir.count(*subdir) || // preserve the dir for rollback + my_ambiguous_imports.count((*subdir)->dirfrag()) || + (*subdir)->is_subtree_root()) { keep_inode = true; dout(10) << "trim_non_auth_subtree(" << dir << ") subdir " << *subdir << "is kept!" << dendl; } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 31c7467bf41..dffc6ba1831 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -312,6 +312,8 @@ protected: map<int, map<dirfrag_t, vector<dirfrag_t> > > other_ambiguous_imports; map<int, map<metareqid_t, MDSlaveUpdate*> > uncommitted_slave_updates; // slave: for replay. + map<CDir*, int> uncommitted_slave_rename_olddir; // slave: preserve the non-auth dir until seeing commit. + map<CInode*, int> uncommitted_slave_unlink; // slave: preserve the unlinked inode until seeing commit. // track master requests whose slaves haven't acknowledged commit struct umaster { @@ -329,7 +331,7 @@ protected: set<int> wants_resolve; // nodes i need to send my resolve to set<int> got_resolve; // nodes i got resolves from set<int> need_resolve_ack; // nodes i need a resolve_ack from - set<metareqid_t> need_resolve_rollback; // rollbacks i'm writing to the journal + map<metareqid_t, int> need_resolve_rollback; // rollbacks i'm writing to the journal void handle_resolve(MMDSResolve *m); void handle_resolve_ack(MMDSResolveAck *m); @@ -337,17 +339,16 @@ protected: void disambiguate_imports(); void recalc_auth_bits(); void trim_unlinked_inodes(); + void add_uncommitted_slave_update(metareqid_t reqid, int master, MDSlaveUpdate*); + void finish_uncommitted_slave_update(metareqid_t reqid, int master); + MDSlaveUpdate* get_uncommitted_slave_update(metareqid_t reqid, int master); public: void remove_inode_recursive(CInode *in); - void add_rollback(metareqid_t reqid) { - need_resolve_rollback.insert(reqid); - } - void finish_rollback(metareqid_t reqid) { - need_resolve_rollback.erase(reqid); - if (need_resolve_rollback.empty()) - maybe_resolve_finish(); + void add_rollback(metareqid_t reqid, int master) { + need_resolve_rollback[reqid] = master; } + void finish_rollback(metareqid_t reqid); // ambiguous imports void add_ambiguous_import(dirfrag_t base, const vector<dirfrag_t>& bounds); diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index d0d3ecabf8c..36d62a74bf0 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -298,10 +298,13 @@ struct MDSlaveUpdate { bufferlist rollback; elist<MDSlaveUpdate*>::item item; Context *waiter; + CDir* rename_olddir; + set<CInode*> unlinked; MDSlaveUpdate(int oo, bufferlist &rbl, elist<MDSlaveUpdate*> &list) : origop(oo), item(this), - waiter(0) { + waiter(0), + rename_olddir(0) { rollback.claim(rbl); list.push_back(&item); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index ee1547ff067..cce4f8c7293 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4378,12 +4378,11 @@ void Server::do_link_rollback(bufferlist &rbl, int master, MDRequest *mdr) Mutation *mut = mdr; if (!mut) { assert(mds->is_resolve()); - mds->mdcache->add_rollback(rollback.reqid); // need to finish this update before resolve finishes + mds->mdcache->add_rollback(rollback.reqid, master); // need to finish this update before resolve finishes mut = new Mutation(rollback.reqid); mut->ls = mds->mdlog->get_current_segment(); } - CInode *in = mds->mdcache->get_inode(rollback.ino); assert(in); dout(10) << " target is " << *in << dendl; @@ -4976,7 +4975,7 @@ void Server::do_rmdir_rollback(bufferlist &rbl, int master, MDRequest *mdr) dout(10) << "do_rmdir_rollback on " << rollback.reqid << dendl; if (!mdr) { assert(mds->is_resolve()); - mds->mdcache->add_rollback(rollback.reqid); // need to finish this update before resolve finishes + mds->mdcache->add_rollback(rollback.reqid, master); // need to finish this update before resolve finishes } CDir *dir = mds->mdcache->get_dirfrag(rollback.src_dir); @@ -6480,8 +6479,9 @@ void Server::do_rename_rollback(bufferlist &rbl, int master, MDRequest *mdr) dout(10) << "do_rename_rollback on " << rollback.reqid << dendl; if (!mdr) { assert(mds->is_resolve()); - mds->mdcache->add_rollback(rollback.reqid); // need to finish this update before resolve finishes + mds->mdcache->add_rollback(rollback.reqid, master); // need to finish this update before resolve finishes } + Mutation *mut = new Mutation(rollback.reqid); mut->ls = mds->mdlog->get_current_segment(); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 9bbd615e31d..77ceb9458ba 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -27,6 +27,7 @@ class MDS; class MDLog; class LogSegment; +class MDSlaveUpdate; /* * a bunch of metadata in the journal @@ -674,7 +675,7 @@ private: } void update_segment(LogSegment *ls); - void replay(MDS *mds, LogSegment *ls=0); + void replay(MDS *mds, LogSegment *ls, MDSlaveUpdate *su=NULL); }; WRITE_CLASS_ENCODER(EMetaBlob) WRITE_CLASS_ENCODER(EMetaBlob::fullbit) diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 72a5e5e4ad9..6e22c5f1914 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -415,7 +415,7 @@ void EMetaBlob::fullbit::update_inode(MDS *mds, CInode *in) in->old_inodes = old_inodes; } -void EMetaBlob::replay(MDS *mds, LogSegment *logseg) +void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) { dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl; @@ -676,8 +676,12 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) // see if we can discard the subtree we renamed out of CDir *root = mds->mdcache->get_subtree_root(olddir); - if (root->get_dir_auth() == CDIR_AUTH_UNDEF) - mds->mdcache->try_trim_non_auth_subtree(root); + if (root->get_dir_auth() == CDIR_AUTH_UNDEF) { + if (slaveup) // preserve the old dir until slave commit + slaveup->rename_olddir = olddir; + else + mds->mdcache->try_trim_non_auth_subtree(root); + } } // if we are the srci importer, we'll also have some dirfrags we have to open up... @@ -710,8 +714,12 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) for (set<CInode*>::iterator p = linked.begin(); p != linked.end(); p++) unlinked.erase(*p); dout(10) << " unlinked set contains " << unlinked << dendl; - for (map<CInode*, CDir*>::iterator p = unlinked.begin(); p != unlinked.end(); ++p) - mds->mdcache->remove_inode_recursive(p->first); + for (map<CInode*, CDir*>::iterator p = unlinked.begin(); p != unlinked.end(); ++p) { + if (slaveup) // preserve unlinked inodes until slave commit + slaveup->unlinked.insert(p->first); + else + mds->mdcache->remove_inode_recursive(p->first); + } } // table client transactions @@ -1107,23 +1115,21 @@ void ECommitted::replay(MDS *mds) void ESlaveUpdate::replay(MDS *mds) { + MDSlaveUpdate *su; switch (op) { case ESlaveUpdate::OP_PREPARE: dout(10) << "ESlaveUpdate.replay prepare " << reqid << " for mds." << master << ": applying commit, saving rollback info" << dendl; - assert(mds->mdcache->uncommitted_slave_updates[master].count(reqid) == 0); - commit.replay(mds, _segment); - mds->mdcache->uncommitted_slave_updates[master][reqid] = - new MDSlaveUpdate(origop, rollback, _segment->slave_updates); + su = new MDSlaveUpdate(origop, rollback, _segment->slave_updates); + commit.replay(mds, _segment, su); + mds->mdcache->add_uncommitted_slave_update(reqid, master, su); break; case ESlaveUpdate::OP_COMMIT: - if (mds->mdcache->uncommitted_slave_updates[master].count(reqid)) { + su = mds->mdcache->get_uncommitted_slave_update(reqid, master); + if (su) { dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << master << dendl; - delete mds->mdcache->uncommitted_slave_updates[master][reqid]; - mds->mdcache->uncommitted_slave_updates[master].erase(reqid); - if (mds->mdcache->uncommitted_slave_updates[master].empty()) - mds->mdcache->uncommitted_slave_updates.erase(master); + mds->mdcache->finish_uncommitted_slave_update(reqid, master); } else { dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << master << ": ignoring, no previously saved prepare" << dendl; @@ -1131,19 +1137,12 @@ void ESlaveUpdate::replay(MDS *mds) break; case ESlaveUpdate::OP_ROLLBACK: - if (mds->mdcache->uncommitted_slave_updates[master].count(reqid)) { - dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << master - << ": applying rollback commit blob" << dendl; - assert(mds->mdcache->uncommitted_slave_updates[master].count(reqid)); - commit.replay(mds, _segment); - delete mds->mdcache->uncommitted_slave_updates[master][reqid]; - mds->mdcache->uncommitted_slave_updates[master].erase(reqid); - if (mds->mdcache->uncommitted_slave_updates[master].empty()) - mds->mdcache->uncommitted_slave_updates.erase(master); - } else { - dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << master - << ": ignoring, no previously saved prepare" << dendl; - } + dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << master + << ": applying rollback commit blob" << dendl; + su = mds->mdcache->get_uncommitted_slave_update(reqid, master); + if (su) + mds->mdcache->finish_uncommitted_slave_update(reqid, master); + commit.replay(mds, _segment); break; default: |