diff options
-rw-r--r-- | src/mds/MDCache.cc | 120 | ||||
-rw-r--r-- | src/mds/MDCache.h | 31 | ||||
-rw-r--r-- | src/mds/journal.cc | 8 |
3 files changed, 123 insertions, 36 deletions
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index c15aeaa7fcd..9f97f31fcbf 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -11041,13 +11041,23 @@ void MDCache::fragment_unmark_unfreeze_dirs(list<CDir*>& dirs) } } -class C_MDC_FragmentLoggedAndStored : public Context { +class C_MDC_FragmentPrep : public Context { MDCache *mdcache; MDRequest *mdr; public: - C_MDC_FragmentLoggedAndStored(MDCache *m, MDRequest *r) : mdcache(m), mdr(r) {} + C_MDC_FragmentPrep(MDCache *m, MDRequest *r) : mdcache(m), mdr(r) {} virtual void finish(int r) { - mdcache->fragment_logged_and_stored(mdr); + mdcache->_fragment_logged(mdr); + } +}; + +class C_MDC_FragmentStore : public Context { + MDCache *mdcache; + MDRequest *mdr; +public: + C_MDC_FragmentStore(MDCache *m, MDRequest *r) : mdcache(m), mdr(r) {} + virtual void finish(int r) { + mdcache->_fragment_stored(mdr); } }; @@ -11065,6 +11075,20 @@ public: } }; +class C_MDC_FragmentFinish : public Context { + MDCache *mdcache; + dirfrag_t basedirfrag; + list<CDir*> resultfrags; +public: + C_MDC_FragmentFinish(MDCache *m, dirfrag_t f, list<CDir*>& l) : + mdcache(m), basedirfrag(f) { + resultfrags.swap(l); + } + virtual void finish(int r) { + mdcache->_fragment_finish(basedirfrag, resultfrags); + } +}; + void MDCache::fragment_frozen(list<CDir*>& dirs, frag_t basefrag, int bits) { dout(10) << "fragment_frozen " << dirs << " " << basefrag << " by " << bits @@ -11144,6 +11168,11 @@ void MDCache::dispatch_fragment_dir(MDRequest *mdr) assert(!diri->dirfragtree.is_leaf(*p)); le->metablob.add_dir_context(*info.resultfrags.begin()); + for (list<CDir*>::iterator p = info.resultfrags.begin(); + p != info.resultfrags.end(); + ++p) { + le->metablob.add_dir(*p, false); + } // dft lock mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); @@ -11162,15 +11191,28 @@ void MDCache::dispatch_fragment_dir(MDRequest *mdr) mut->add_updated_lock(&diri->nestlock); */ - // freeze, journal, and store resulting frags - C_GatherBuilder gather(g_ceph_context, new C_MDC_FragmentLoggedAndStored(this, mdr)); + add_uncommitted_fragment(dirfrag_t(diri->ino(), info.basefrag), info.bits, le->orig_frags); + mds->mdlog->submit_entry(le, new C_MDC_FragmentPrep(this, mdr)); + mds->mdlog->flush(); +} + +void MDCache::_fragment_logged(MDRequest *mdr) +{ + assert(fragment_requests.count(mdr->reqid)); + fragment_info_t &info = fragment_requests[mdr->reqid]; + CInode *diri = info.resultfrags.front()->get_inode(); + + dout(10) << "fragment_logged " << info.resultfrags << " " << info.basefrag + << " bits " << info.bits << " on " << *diri << dendl; + + // store resulting frags + C_GatherBuilder gather(g_ceph_context, new C_MDC_FragmentStore(this, mdr)); for (list<CDir*>::iterator p = info.resultfrags.begin(); p != info.resultfrags.end(); ++p) { CDir *dir = *p; - dout(10) << " result frag " << *dir << dendl; - le->metablob.add_dir(dir, false); + dout(10) << " storing result frag " << *dir << dendl; // freeze and store them too dir->auth_pin(this); @@ -11178,19 +11220,16 @@ void MDCache::dispatch_fragment_dir(MDRequest *mdr) dir->commit(0, gather.new_sub(), true); // ignore authpinnability } - add_uncommitted_fragment(dirfrag_t(diri->ino(), info.basefrag), info.bits, le->orig_frags); - mds->mdlog->submit_entry(le, gather.new_sub()); - mds->mdlog->flush(); gather.activate(); } -void MDCache::fragment_logged_and_stored(MDRequest *mdr) +void MDCache::_fragment_stored(MDRequest *mdr) { assert(fragment_requests.count(mdr->reqid)); fragment_info_t &info = fragment_requests[mdr->reqid]; CInode *diri = info.resultfrags.front()->get_inode(); - dout(10) << "fragment_logged_and_stored " << info.resultfrags << " " << info.basefrag + dout(10) << "fragment_stored " << info.resultfrags << " " << info.basefrag << " bits " << info.bits << " on " << *diri << dendl; // tell peers @@ -11280,6 +11319,7 @@ void MDCache::_fragment_finish(dirfrag_t basedirfrag, list<CDir*>& resultfrags) { dout(10) << "fragment_finish " << basedirfrag << dendl; assert(uncommitted_fragments.count(basedirfrag)); + ufragment &uf = uncommitted_fragments[basedirfrag]; // unmark & auth_unpin for (list<CDir*>::iterator p = resultfrags.begin(); p != resultfrags.end(); ++p) { @@ -11287,7 +11327,11 @@ void MDCache::_fragment_finish(dirfrag_t basedirfrag, list<CDir*>& resultfrags) (*p)->auth_unpin(this); } - finish_uncommitted_fragment(basedirfrag); + EFragment *le = new EFragment(mds->mdlog, EFragment::OP_FINISH, + basedirfrag.ino, basedirfrag.frag, uf.bits); + mds->mdlog->start_submit_entry(le); + + finish_uncommitted_fragment(basedirfrag, EFragment::OP_FINISH); } /* This function DOES put the passed message before returning */ @@ -11346,11 +11390,32 @@ void MDCache::add_uncommitted_fragment(dirfrag_t basedirfrag, int bits, list<fra uf.rollback.swap(*rollback); } -void MDCache::finish_uncommitted_fragment(dirfrag_t basedirfrag) +void MDCache::finish_uncommitted_fragment(dirfrag_t basedirfrag, int op) { - dout(10) << "finish_uncommitted_fragments: base dirfrag " << basedirfrag << dendl; + dout(10) << "finish_uncommitted_fragments: base dirfrag " << basedirfrag + << " op " << EFragment::op_name(op) << dendl; if (uncommitted_fragments.count(basedirfrag)) { - uncommitted_fragments.erase(basedirfrag); + ufragment& uf = uncommitted_fragments[basedirfrag]; + if (op != EFragment::OP_FINISH && !uf.old_frags.empty()) { + uf.committed = true; + } else { + uncommitted_fragments.erase(basedirfrag); + } + } +} + +void MDCache::rollback_uncommitted_fragment(dirfrag_t basedirfrag, list<frag_t>& old_frags) +{ + dout(10) << "rollback_uncommitted_fragment: base dirfrag " << basedirfrag + << " old_frags (" << old_frags << ")" << dendl; + if (uncommitted_fragments.count(basedirfrag)) { + ufragment& uf = uncommitted_fragments[basedirfrag]; + if (!uf.old_frags.empty()) { + uf.old_frags.swap(old_frags); + uf.committed = true; + } else { + uncommitted_fragments.erase(basedirfrag); + } } } @@ -11363,12 +11428,28 @@ void MDCache::rollback_uncommitted_fragments() ufragment &uf = p->second; CInode *diri = get_inode(p->first.ino); assert(diri); + + if (uf.committed) { + list<CDir*> frags; + diri->get_dirfrags_under(p->first.frag, frags); + for (list<CDir*>::iterator q = frags.begin(); q != frags.end(); ++q) { + CDir *dir = *q; + dir->auth_pin(this); + dir->state_set(CDir::STATE_FRAGMENTING); + } + _fragment_committed(p->first, frags); + continue; + } + dout(10) << " rolling back " << p->first << " refragment by " << uf.bits << " bits" << dendl; LogSegment *ls = mds->mdlog->get_current_segment(); EFragment *le = new EFragment(mds->mdlog, EFragment::OP_ROLLBACK, diri->ino(), p->first.frag, uf.bits); mds->mdlog->start_entry(le); + list<frag_t> old_frags; + diri->dirfragtree.get_leaves_under(p->first.frag, old_frags); + list<CDir*> resultfrags; if (uf.old_frags.empty()) { // created by old format EFragment @@ -11410,6 +11491,9 @@ void MDCache::rollback_uncommitted_fragments() if (g_conf->mds_debug_frag) diri->verify_dirfrags(); + for (list<frag_t>::iterator q = old_frags.begin(); q != old_frags.end(); ++q) + assert(!diri->dirfragtree.is_leaf(*q)); + for (list<CDir*>::iterator q = resultfrags.begin(); q != resultfrags.end(); ++q) { CDir *dir = *q; dir->auth_pin(this); @@ -11417,8 +11501,10 @@ void MDCache::rollback_uncommitted_fragments() } mds->mdlog->submit_entry(le); + + uf.old_frags.swap(old_frags); + _fragment_committed(p->first, resultfrags); } - uncommitted_fragments.clear(); } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 170adf78a3e..b9e7cfa823f 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -945,9 +945,10 @@ protected: private: struct ufragment { int bits; + bool committed; list<frag_t> old_frags; bufferlist rollback; - ufragment() : bits(0) {} + ufragment() : bits(0), committed(false) {} }; map<dirfrag_t, ufragment> uncommitted_fragments; @@ -970,39 +971,35 @@ private: CDir *force_dir_fragment(CInode *diri, frag_t fg); void get_force_dirfrag_bound_set(vector<dirfrag_t>& dfs, set<CDir*>& bounds); - - friend class EFragment; - bool can_fragment(CInode *diri, list<CDir*>& dirs); - -public: - void split_dir(CDir *dir, int byn); - void merge_dir(CInode *diri, frag_t fg); - -private: void fragment_freeze_dirs(list<CDir*>& dirs, C_GatherBuilder &gather); void fragment_mark_and_complete(list<CDir*>& dirs); void fragment_frozen(list<CDir*>& dirs, frag_t basefrag, int bits); void fragment_unmark_unfreeze_dirs(list<CDir*>& dirs); void dispatch_fragment_dir(MDRequest *mdr); - void fragment_logged_and_stored(MDRequest *mdr); + void _fragment_logged(MDRequest *mdr); + void _fragment_stored(MDRequest *mdr); void _fragment_committed(dirfrag_t f, list<CDir*>& resultfrags); void _fragment_finish(dirfrag_t f, list<CDir*>& resultfrags); -public: - void rollback_uncommitted_fragments(); -private: - + friend class EFragment; friend class C_MDC_FragmentFrozen; friend class C_MDC_FragmentMarking; - friend class C_MDC_FragmentLoggedAndStored; + friend class C_MDC_FragmentPrep; + friend class C_MDC_FragmentStore; friend class C_MDC_FragmentCommit; + friend class C_MDC_FragmentFinish; void handle_fragment_notify(MMDSFragmentNotify *m); void add_uncommitted_fragment(dirfrag_t basedirfrag, int bits, list<frag_t>& old_frag, bufferlist *rollback=NULL); - void finish_uncommitted_fragment(dirfrag_t basedirfrag); + void finish_uncommitted_fragment(dirfrag_t basedirfrag, int op); + void rollback_uncommitted_fragment(dirfrag_t basedirfrag, list<frag_t>& old_frags); +public: + void split_dir(CDir *dir, int byn); + void merge_dir(CInode *diri, frag_t fg); + void rollback_uncommitted_fragments(); // -- updates -- //int send_inode_updates(CInode *in); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 237e1349396..ece1156fc58 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -2399,6 +2399,7 @@ void EFragment::replay(MDS *mds) case OP_ROLLBACK: if (in) { + in->dirfragtree.get_leaves_under(basefrag, old_frags); if (orig_frags.empty()) { // old format EFragment mds->mdcache->adjust_dir_fragments(in, basefrag, -bits, resultfrags, waiters, true); @@ -2407,9 +2408,12 @@ void EFragment::replay(MDS *mds) mds->mdcache->force_dir_fragment(in, *p); } } - // fall-thru + mds->mdcache->rollback_uncommitted_fragment(dirfrag_t(ino, basefrag), old_frags); + break; + case OP_COMMIT: - mds->mdcache->finish_uncommitted_fragment(dirfrag_t(ino, basefrag)); + case OP_FINISH: + mds->mdcache->finish_uncommitted_fragment(dirfrag_t(ino, basefrag), op); break; default: |