summaryrefslogtreecommitdiff
path: root/src/mds/journal.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/mds/journal.cc')
-rw-r--r--src/mds/journal.cc193
1 files changed, 46 insertions, 147 deletions
diff --git a/src/mds/journal.cc b/src/mds/journal.cc
index b8139e3a05b..9eb0e73feba 100644
--- a/src/mds/journal.cc
+++ b/src/mds/journal.cc
@@ -185,9 +185,16 @@ void LogSegment::try_to_expire(MDS *mds, C_GatherBuilder &gather_bld)
assert(g_conf->mds_kill_journal_expire_at != 3);
// backtraces to be stored/updated
- for (elist<BacktraceInfo*>::iterator p = update_backtraces.begin(); !p.end(); ++p) {
- BacktraceInfo *btinfo = *p;
- store_backtrace_update(mds, btinfo, gather_bld.new_sub());
+ for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) {
+ CInode *in = *p;
+ assert(in->is_auth());
+ if (in->can_auth_pin()) {
+ dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl;
+ in->store_backtrace(gather_bld.new_sub());
+ } else {
+ dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl;
+ in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub());
+ }
}
assert(g_conf->mds_kill_journal_expire_at != 4);
@@ -267,101 +274,6 @@ void LogSegment::try_to_expire(MDS *mds, C_GatherBuilder &gather_bld)
}
}
-// ----------------------------
-// backtrace handling
-
-// BacktraceInfo is used for keeping the
-// current state of the backtrace to be stored later on
-// logsegment expire. Constructing a BacktraceInfo
-// automatically puts it on the LogSegment list that is passed in,
-// after building the backtrace based on the current state of the inode. We
-// construct the backtrace here to avoid keeping a ref to the inode.
-BacktraceInfo::BacktraceInfo(
- int64_t l, CInode *i, LogSegment *ls, int64_t p) :
- location(l), pool(p) {
-
- // on setlayout cases, forward pointers mean
- // pool != location, but for all others it does
- if (pool == -1) pool = location;
-
- bt.pool = pool;
- i->build_backtrace(l, &bt);
- ls->update_backtraces.push_back(&item_logseg);
-}
-
-// When the info_t is destroyed, it just needs to remove itself
-// from the LogSegment list
-BacktraceInfo::~BacktraceInfo() {
- item_logseg.remove_myself();
-}
-
-// Queue a backtrace for later
-void LogSegment::queue_backtrace_update(CInode *inode, int64_t location, int64_t pool) {
- // allocating a pointer here and not setting it to anything
- // might look strange, but the constructor adds itself to the backtraces
- // list of this LogSegment, which is how we keep track of it
- new BacktraceInfo(location, inode, this, pool);
-}
-
-void LogSegment::remove_pending_backtraces(inodeno_t ino, int64_t pool) {
- elist<BacktraceInfo*>::iterator i = update_backtraces.begin();
- while(!i.end()) {
- ++i;
- if((*i)->bt.ino == ino && (*i)->location == pool) {
- delete (*i);
- }
- }
-}
-
-unsigned LogSegment::encode_parent_mutation(ObjectOperation& m, BacktraceInfo *info)
-{
- bufferlist parent;
- ::encode(info->bt, parent);
- m.setxattr("parent", parent);
- return parent.length();
-}
-
-struct C_LogSegment_StoredBacktrace : public Context {
- LogSegment *ls;
- BacktraceInfo *info;
- Context *fin;
- C_LogSegment_StoredBacktrace(LogSegment *l, BacktraceInfo *c,
- Context *f) : ls(l), info(c), fin(f) {}
- void finish(int r) {
- ls->_stored_backtrace(info, fin);
- }
-};
-
-void LogSegment::store_backtrace_update(MDS *mds, BacktraceInfo *info, Context *fin)
-{
- ObjectOperation m;
- // prev_pool will be the target pool on create,mkdir,etc.
- encode_parent_mutation(m, info);
-
- // write it.
- SnapContext snapc;
-
- object_t oid = CInode::get_object_name(info->bt.ino, frag_t(), "");
-
- dout(10) << "store_parent for oid " << oid << " location " << info->location << " pool " << info->pool << dendl;
-
- // store the backtrace in the specified pool
- object_locator_t oloc(info->location);
-
- mds->objecter->mutate(oid, oloc, m, snapc, ceph_clock_now(g_ceph_context), 0,
- NULL, new C_LogSegment_StoredBacktrace(this, info, fin) );
-
-}
-
-void LogSegment::_stored_backtrace(BacktraceInfo *info, Context *fin)
-{
- delete info;
- if (fin) {
- fin->finish(0);
- delete fin;
- }
-}
-
#undef DOUT_COND
#define DOUT_COND(cct, l) (l<=cct->_conf->debug_mds || l <= cct->_conf->debug_mds_log)
@@ -372,8 +284,6 @@ void LogSegment::_stored_backtrace(BacktraceInfo *info, Context *fin)
EMetaBlob::EMetaBlob(MDLog *mdlog) : opened_ino(0), renamed_dirino(0),
inotablev(0), sessionmapv(0),
allocated_ino(0),
- old_pool(-1),
- update_bt(false),
last_subtree_map(mdlog ? mdlog->get_last_segment_offset() : 0),
my_offset(mdlog ? mdlog->get_write_pos() : 0) //, _segment(0)
{ }
@@ -406,7 +316,7 @@ void EMetaBlob::add_dir_context(CDir *dir, int mode)
if (mode == TO_AUTH_SUBTREE_ROOT) {
// subtree root?
- if (dir->is_subtree_root()) {
+ if (dir->is_subtree_root() && !dir->state_test(CDir::STATE_EXPORTBOUND)) {
if (dir->is_auth() && !dir->is_ambiguous_auth()) {
// it's an auth subtree, we don't need maybe (if any), and we're done.
dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached unambig auth subtree, don't need " << maybe
@@ -485,10 +395,10 @@ void EMetaBlob::update_segment(LogSegment *ls)
// EMetaBlob::fullbit
void EMetaBlob::fullbit::encode(bufferlist& bl) const {
- ENCODE_START(5, 5, bl);
+ ENCODE_START(6, 5, bl);
if (!_enc.length()) {
fullbit copy(dn, dnfirst, dnlast, dnv, inode, dirfragtree, xattrs, symlink,
- snapbl, dirty, &old_inodes);
+ snapbl, state, &old_inodes);
bl.append(copy._enc);
} else {
bl.append(_enc);
@@ -497,7 +407,7 @@ void EMetaBlob::fullbit::encode(bufferlist& bl) const {
}
void EMetaBlob::fullbit::decode(bufferlist::iterator &bl) {
- DECODE_START_LEGACY_COMPAT_LEN(5, 5, 5, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
::decode(dn, bl);
::decode(dnfirst, bl);
::decode(dnlast, bl);
@@ -519,7 +429,14 @@ void EMetaBlob::fullbit::decode(bufferlist::iterator &bl) {
}
}
}
- ::decode(dirty, bl);
+ if (struct_v >= 6) {
+ ::decode(state, bl);
+ } else {
+ bool dirty;
+ ::decode(dirty, bl);
+ state = dirty ? EMetaBlob::fullbit::STATE_DIRTY : 0;
+ }
+
if (struct_v >= 3) {
bool old_inodes_present;
::decode(old_inodes_present, bl);
@@ -571,7 +488,7 @@ void EMetaBlob::fullbit::dump(Formatter *f) const
f->close_section(); // file layout policy
}
}
- f->dump_string("dirty", dirty ? "true" : "false");
+ f->dump_string("state", state_string());
if (!old_inodes.empty()) {
f->open_array_section("old inodes");
for (old_inodes_t::const_iterator iter = old_inodes.begin();
@@ -824,7 +741,7 @@ void EMetaBlob::dirlump::generate_test_instances(list<dirlump*>& ls)
*/
void EMetaBlob::encode(bufferlist& bl) const
{
- ENCODE_START(6, 5, bl);
+ ENCODE_START(7, 5, bl);
::encode(lump_order, bl);
::encode(lump_map, bl);
::encode(roots, bl);
@@ -842,13 +759,18 @@ void EMetaBlob::encode(bufferlist& bl) const
::encode(client_reqs, bl);
::encode(renamed_dirino, bl);
::encode(renamed_dir_frags, bl);
- ::encode(old_pool, bl);
- ::encode(update_bt, bl);
+ {
+ // make MDS use v6 format happy
+ int64_t i = -1;
+ bool b = false;
+ ::encode(i, bl);
+ ::encode(b, bl);
+ }
ENCODE_FINISH(bl);
}
void EMetaBlob::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
::decode(lump_order, bl);
::decode(lump_map, bl);
if (struct_v >= 4) {
@@ -887,8 +809,11 @@ void EMetaBlob::decode(bufferlist::iterator &bl)
::decode(renamed_dir_frags, bl);
}
if (struct_v >= 6) {
- ::decode(old_pool, bl);
- ::decode(update_bt, bl);
+ // ignore
+ int64_t i;
+ bool b;
+ ::decode(i, bl);
+ ::decode(b, bl);
}
DECODE_FINISH(bl);
}
@@ -1004,7 +929,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
if (isnew)
mds->mdcache->add_inode(in);
- if ((*p)->dirty) in->_mark_dirty(logseg);
+ if ((*p)->is_dirty()) in->_mark_dirty(logseg);
dout(10) << "EMetaBlob.replay " << (isnew ? " added root ":" updated root ") << *in << dendl;
}
@@ -1106,11 +1031,11 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
if (!dn) {
dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast);
dn->set_version(p->dnv);
- if (p->dirty) dn->_mark_dirty(logseg);
+ if (p->is_dirty()) dn->_mark_dirty(logseg);
dout(10) << "EMetaBlob.replay added " << *dn << dendl;
} else {
dn->set_version(p->dnv);
- if (p->dirty) dn->_mark_dirty(logseg);
+ if (p->is_dirty()) dn->_mark_dirty(logseg);
dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *dn << dendl;
dn->first = p->dnfirst;
assert(dn->last == p->dnlast);
@@ -1135,7 +1060,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
if (unlinked.count(in))
linked.insert(in);
dir->link_primary_inode(dn, in);
- if (p->dirty) in->_mark_dirty(logseg);
+ if (p->is_dirty()) in->_mark_dirty(logseg);
dout(10) << "EMetaBlob.replay added " << *in << dendl;
} else {
if (dn->get_linkage()->get_inode() != in && in->get_parent_dn()) {
@@ -1146,7 +1071,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
if (in->get_parent_dn() && in->inode.anchored != p->inode.anchored)
in->get_parent_dn()->adjust_nested_anchors( (int)p->inode.anchored - (int)in->inode.anchored );
p->update_inode(mds, in);
- if (p->dirty) in->_mark_dirty(logseg);
+ if (p->is_dirty()) in->_mark_dirty(logseg);
if (dn->get_linkage()->get_inode() != in) {
if (!dn->get_linkage()->is_null()) { // note: might be remote. as with stray reintegration.
if (dn->get_linkage()->is_primary()) {
@@ -1171,35 +1096,8 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
}
assert(g_conf->mds_kill_journal_replay_at != 2);
-
- // store backtrace for allocated inos (create, mkdir, symlink, mknod)
- if (allocated_ino || used_preallocated_ino) {
- if (in->inode.is_dir()) {
- logseg->queue_backtrace_update(in, mds->mdsmap->get_metadata_pool());
- } else {
- logseg->queue_backtrace_update(in, in->inode.layout.fl_pg_pool);
- }
- }
- // handle change of pool with backtrace update
- if (old_pool != -1 && old_pool != in->inode.layout.fl_pg_pool) {
- // update backtrace on new data pool
- logseg->queue_backtrace_update(in, in->inode.layout.fl_pg_pool);
-
- // set forwarding pointer on old backtrace
- logseg->queue_backtrace_update(in, old_pool, in->inode.layout.fl_pg_pool);
- }
- // handle backtrace update if specified (used by rename)
- if (update_bt) {
- if (in->is_dir()) {
- // replace previous backtrace on this inode with myself
- logseg->remove_pending_backtraces(in->ino(), mds->mdsmap->get_metadata_pool());
- logseg->queue_backtrace_update(in, mds->mdsmap->get_metadata_pool());
- } else {
- // remove all pending backtraces going to the same pool
- logseg->remove_pending_backtraces(in->ino(), in->inode.layout.fl_pg_pool);
- logseg->queue_backtrace_update(in, in->inode.layout.fl_pg_pool);
- }
- }
+ if (p->is_dirty_parent())
+ in->_mark_dirty_parent(logseg, p->is_dirty_pool());
}
// remote dentries
@@ -1280,7 +1178,8 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
list<frag_t> leaves;
renamed_diri->dirfragtree.get_leaves(leaves);
for (list<frag_t>::iterator p = leaves.begin(); p != leaves.end(); ++p) {
- CDir *dir = renamed_diri->get_or_open_dirfrag(mds->mdcache, *p);
+ CDir *dir = renamed_diri->get_dirfrag(*p);
+ assert(dir);
// preserve subtree bound until slave commit
if (dir->get_dir_auth() == CDIR_AUTH_UNDEF)
slaveup->olddirs.insert(dir);