diff options
Diffstat (limited to 'src/mds/journal.cc')
-rw-r--r-- | src/mds/journal.cc | 193 |
1 files changed, 46 insertions, 147 deletions
diff --git a/src/mds/journal.cc b/src/mds/journal.cc index b8139e3a05b..9eb0e73feba 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -185,9 +185,16 @@ void LogSegment::try_to_expire(MDS *mds, C_GatherBuilder &gather_bld) assert(g_conf->mds_kill_journal_expire_at != 3); // backtraces to be stored/updated - for (elist<BacktraceInfo*>::iterator p = update_backtraces.begin(); !p.end(); ++p) { - BacktraceInfo *btinfo = *p; - store_backtrace_update(mds, btinfo, gather_bld.new_sub()); + for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) { + CInode *in = *p; + assert(in->is_auth()); + if (in->can_auth_pin()) { + dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl; + in->store_backtrace(gather_bld.new_sub()); + } else { + dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl; + in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub()); + } } assert(g_conf->mds_kill_journal_expire_at != 4); @@ -267,101 +274,6 @@ void LogSegment::try_to_expire(MDS *mds, C_GatherBuilder &gather_bld) } } -// ---------------------------- -// backtrace handling - -// BacktraceInfo is used for keeping the -// current state of the backtrace to be stored later on -// logsegment expire. Constructing a BacktraceInfo -// automatically puts it on the LogSegment list that is passed in, -// after building the backtrace based on the current state of the inode. We -// construct the backtrace here to avoid keeping a ref to the inode. -BacktraceInfo::BacktraceInfo( - int64_t l, CInode *i, LogSegment *ls, int64_t p) : - location(l), pool(p) { - - // on setlayout cases, forward pointers mean - // pool != location, but for all others it does - if (pool == -1) pool = location; - - bt.pool = pool; - i->build_backtrace(l, &bt); - ls->update_backtraces.push_back(&item_logseg); -} - -// When the info_t is destroyed, it just needs to remove itself -// from the LogSegment list -BacktraceInfo::~BacktraceInfo() { - item_logseg.remove_myself(); -} - -// Queue a backtrace for later -void LogSegment::queue_backtrace_update(CInode *inode, int64_t location, int64_t pool) { - // allocating a pointer here and not setting it to anything - // might look strange, but the constructor adds itself to the backtraces - // list of this LogSegment, which is how we keep track of it - new BacktraceInfo(location, inode, this, pool); -} - -void LogSegment::remove_pending_backtraces(inodeno_t ino, int64_t pool) { - elist<BacktraceInfo*>::iterator i = update_backtraces.begin(); - while(!i.end()) { - ++i; - if((*i)->bt.ino == ino && (*i)->location == pool) { - delete (*i); - } - } -} - -unsigned LogSegment::encode_parent_mutation(ObjectOperation& m, BacktraceInfo *info) -{ - bufferlist parent; - ::encode(info->bt, parent); - m.setxattr("parent", parent); - return parent.length(); -} - -struct C_LogSegment_StoredBacktrace : public Context { - LogSegment *ls; - BacktraceInfo *info; - Context *fin; - C_LogSegment_StoredBacktrace(LogSegment *l, BacktraceInfo *c, - Context *f) : ls(l), info(c), fin(f) {} - void finish(int r) { - ls->_stored_backtrace(info, fin); - } -}; - -void LogSegment::store_backtrace_update(MDS *mds, BacktraceInfo *info, Context *fin) -{ - ObjectOperation m; - // prev_pool will be the target pool on create,mkdir,etc. - encode_parent_mutation(m, info); - - // write it. - SnapContext snapc; - - object_t oid = CInode::get_object_name(info->bt.ino, frag_t(), ""); - - dout(10) << "store_parent for oid " << oid << " location " << info->location << " pool " << info->pool << dendl; - - // store the backtrace in the specified pool - object_locator_t oloc(info->location); - - mds->objecter->mutate(oid, oloc, m, snapc, ceph_clock_now(g_ceph_context), 0, - NULL, new C_LogSegment_StoredBacktrace(this, info, fin) ); - -} - -void LogSegment::_stored_backtrace(BacktraceInfo *info, Context *fin) -{ - delete info; - if (fin) { - fin->finish(0); - delete fin; - } -} - #undef DOUT_COND #define DOUT_COND(cct, l) (l<=cct->_conf->debug_mds || l <= cct->_conf->debug_mds_log) @@ -372,8 +284,6 @@ void LogSegment::_stored_backtrace(BacktraceInfo *info, Context *fin) EMetaBlob::EMetaBlob(MDLog *mdlog) : opened_ino(0), renamed_dirino(0), inotablev(0), sessionmapv(0), allocated_ino(0), - old_pool(-1), - update_bt(false), last_subtree_map(mdlog ? mdlog->get_last_segment_offset() : 0), my_offset(mdlog ? mdlog->get_write_pos() : 0) //, _segment(0) { } @@ -406,7 +316,7 @@ void EMetaBlob::add_dir_context(CDir *dir, int mode) if (mode == TO_AUTH_SUBTREE_ROOT) { // subtree root? - if (dir->is_subtree_root()) { + if (dir->is_subtree_root() && !dir->state_test(CDir::STATE_EXPORTBOUND)) { if (dir->is_auth() && !dir->is_ambiguous_auth()) { // it's an auth subtree, we don't need maybe (if any), and we're done. dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached unambig auth subtree, don't need " << maybe @@ -485,10 +395,10 @@ void EMetaBlob::update_segment(LogSegment *ls) // EMetaBlob::fullbit void EMetaBlob::fullbit::encode(bufferlist& bl) const { - ENCODE_START(5, 5, bl); + ENCODE_START(6, 5, bl); if (!_enc.length()) { fullbit copy(dn, dnfirst, dnlast, dnv, inode, dirfragtree, xattrs, symlink, - snapbl, dirty, &old_inodes); + snapbl, state, &old_inodes); bl.append(copy._enc); } else { bl.append(_enc); @@ -497,7 +407,7 @@ void EMetaBlob::fullbit::encode(bufferlist& bl) const { } void EMetaBlob::fullbit::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(5, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); ::decode(dn, bl); ::decode(dnfirst, bl); ::decode(dnlast, bl); @@ -519,7 +429,14 @@ void EMetaBlob::fullbit::decode(bufferlist::iterator &bl) { } } } - ::decode(dirty, bl); + if (struct_v >= 6) { + ::decode(state, bl); + } else { + bool dirty; + ::decode(dirty, bl); + state = dirty ? EMetaBlob::fullbit::STATE_DIRTY : 0; + } + if (struct_v >= 3) { bool old_inodes_present; ::decode(old_inodes_present, bl); @@ -571,7 +488,7 @@ void EMetaBlob::fullbit::dump(Formatter *f) const f->close_section(); // file layout policy } } - f->dump_string("dirty", dirty ? "true" : "false"); + f->dump_string("state", state_string()); if (!old_inodes.empty()) { f->open_array_section("old inodes"); for (old_inodes_t::const_iterator iter = old_inodes.begin(); @@ -824,7 +741,7 @@ void EMetaBlob::dirlump::generate_test_instances(list<dirlump*>& ls) */ void EMetaBlob::encode(bufferlist& bl) const { - ENCODE_START(6, 5, bl); + ENCODE_START(7, 5, bl); ::encode(lump_order, bl); ::encode(lump_map, bl); ::encode(roots, bl); @@ -842,13 +759,18 @@ void EMetaBlob::encode(bufferlist& bl) const ::encode(client_reqs, bl); ::encode(renamed_dirino, bl); ::encode(renamed_dir_frags, bl); - ::encode(old_pool, bl); - ::encode(update_bt, bl); + { + // make MDS use v6 format happy + int64_t i = -1; + bool b = false; + ::encode(i, bl); + ::encode(b, bl); + } ENCODE_FINISH(bl); } void EMetaBlob::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); ::decode(lump_order, bl); ::decode(lump_map, bl); if (struct_v >= 4) { @@ -887,8 +809,11 @@ void EMetaBlob::decode(bufferlist::iterator &bl) ::decode(renamed_dir_frags, bl); } if (struct_v >= 6) { - ::decode(old_pool, bl); - ::decode(update_bt, bl); + // ignore + int64_t i; + bool b; + ::decode(i, bl); + ::decode(b, bl); } DECODE_FINISH(bl); } @@ -1004,7 +929,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (isnew) mds->mdcache->add_inode(in); - if ((*p)->dirty) in->_mark_dirty(logseg); + if ((*p)->is_dirty()) in->_mark_dirty(logseg); dout(10) << "EMetaBlob.replay " << (isnew ? " added root ":" updated root ") << *in << dendl; } @@ -1106,11 +1031,11 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (!dn) { dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast); dn->set_version(p->dnv); - if (p->dirty) dn->_mark_dirty(logseg); + if (p->is_dirty()) dn->_mark_dirty(logseg); dout(10) << "EMetaBlob.replay added " << *dn << dendl; } else { dn->set_version(p->dnv); - if (p->dirty) dn->_mark_dirty(logseg); + if (p->is_dirty()) dn->_mark_dirty(logseg); dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *dn << dendl; dn->first = p->dnfirst; assert(dn->last == p->dnlast); @@ -1135,7 +1060,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (unlinked.count(in)) linked.insert(in); dir->link_primary_inode(dn, in); - if (p->dirty) in->_mark_dirty(logseg); + if (p->is_dirty()) in->_mark_dirty(logseg); dout(10) << "EMetaBlob.replay added " << *in << dendl; } else { if (dn->get_linkage()->get_inode() != in && in->get_parent_dn()) { @@ -1146,7 +1071,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (in->get_parent_dn() && in->inode.anchored != p->inode.anchored) in->get_parent_dn()->adjust_nested_anchors( (int)p->inode.anchored - (int)in->inode.anchored ); p->update_inode(mds, in); - if (p->dirty) in->_mark_dirty(logseg); + if (p->is_dirty()) in->_mark_dirty(logseg); if (dn->get_linkage()->get_inode() != in) { if (!dn->get_linkage()->is_null()) { // note: might be remote. as with stray reintegration. if (dn->get_linkage()->is_primary()) { @@ -1171,35 +1096,8 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) } assert(g_conf->mds_kill_journal_replay_at != 2); - - // store backtrace for allocated inos (create, mkdir, symlink, mknod) - if (allocated_ino || used_preallocated_ino) { - if (in->inode.is_dir()) { - logseg->queue_backtrace_update(in, mds->mdsmap->get_metadata_pool()); - } else { - logseg->queue_backtrace_update(in, in->inode.layout.fl_pg_pool); - } - } - // handle change of pool with backtrace update - if (old_pool != -1 && old_pool != in->inode.layout.fl_pg_pool) { - // update backtrace on new data pool - logseg->queue_backtrace_update(in, in->inode.layout.fl_pg_pool); - - // set forwarding pointer on old backtrace - logseg->queue_backtrace_update(in, old_pool, in->inode.layout.fl_pg_pool); - } - // handle backtrace update if specified (used by rename) - if (update_bt) { - if (in->is_dir()) { - // replace previous backtrace on this inode with myself - logseg->remove_pending_backtraces(in->ino(), mds->mdsmap->get_metadata_pool()); - logseg->queue_backtrace_update(in, mds->mdsmap->get_metadata_pool()); - } else { - // remove all pending backtraces going to the same pool - logseg->remove_pending_backtraces(in->ino(), in->inode.layout.fl_pg_pool); - logseg->queue_backtrace_update(in, in->inode.layout.fl_pg_pool); - } - } + if (p->is_dirty_parent()) + in->_mark_dirty_parent(logseg, p->is_dirty_pool()); } // remote dentries @@ -1280,7 +1178,8 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) list<frag_t> leaves; renamed_diri->dirfragtree.get_leaves(leaves); for (list<frag_t>::iterator p = leaves.begin(); p != leaves.end(); ++p) { - CDir *dir = renamed_diri->get_or_open_dirfrag(mds->mdcache, *p); + CDir *dir = renamed_diri->get_dirfrag(*p); + assert(dir); // preserve subtree bound until slave commit if (dir->get_dir_auth() == CDIR_AUTH_UNDEF) slaveup->olddirs.insert(dir); |