diff options
Diffstat (limited to 'src/mds/MDCache.cc')
-rw-r--r-- | src/mds/MDCache.cc | 933 |
1 files changed, 755 insertions, 178 deletions
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index f0a2cc2a7f0..0c279b66a91 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -79,6 +79,9 @@ #include "messages/MMDSFindIno.h" #include "messages/MMDSFindInoReply.h" +#include "messages/MMDSOpenIno.h" +#include "messages/MMDSOpenInoReply.h" + #include "messages/MClientRequest.h" #include "messages/MClientCaps.h" #include "messages/MClientSnap.h" @@ -235,6 +238,8 @@ void MDCache::remove_inode(CInode *o) if (o->is_dirty()) o->mark_clean(); + if (o->is_dirty_parent()) + o->clear_dirty_parent(); o->filelock.remove_dirty(); o->nestlock.remove_dirty(); @@ -461,7 +466,7 @@ void MDCache::_create_system_file(CDir *dir, const char *name, CInode *in, Conte if (!in->is_mdsdir()) { predirty_journal_parents(mut, &le->metablob, in, dir, PREDIRTY_PRIMARY|PREDIRTY_DIR, 1); - le->metablob.add_primary_dentry(dn, true, in); + le->metablob.add_primary_dentry(dn, in, true); } else { predirty_journal_parents(mut, &le->metablob, in, dir, PREDIRTY_DIR, 1); journal_dirty_inode(mut, &le->metablob, in); @@ -1552,7 +1557,7 @@ void MDCache::journal_cow_dentry(Mutation *mut, EMetaBlob *metablob, CDentry *dn CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows); oldin->inode.version = olddn->pre_dirty(); dout(10) << " olddn " << *olddn << dendl; - metablob->add_primary_dentry(olddn, true, 0); + metablob->add_primary_dentry(olddn, 0, true); mut->add_cow_dentry(olddn); } else { assert(dnl->is_remote()); @@ -1585,7 +1590,13 @@ void MDCache::journal_dirty_inode(Mutation *mut, EMetaBlob *metablob, CInode *in CDentry *dn = in->get_projected_parent_dn(); if (!dn->get_projected_linkage()->is_null()) // no need to cow a null dentry journal_cow_dentry(mut, metablob, dn, follows); - metablob->add_primary_dentry(dn, true, in); + if (in->get_projected_inode()->is_backtrace_updated()) { + bool dirty_pool = in->get_projected_inode()->layout.fl_pg_pool != + in->get_previous_projected_inode()->layout.fl_pg_pool; + metablob->add_primary_dentry(dn, in, true, true, dirty_pool); + } else { + metablob->add_primary_dentry(dn, in, true); + } } } @@ -2144,32 +2155,27 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, struct C_MDC_CommittedMaster : public Context { MDCache *cache; metareqid_t reqid; - LogSegment *ls; - list<Context*> waiters; - C_MDC_CommittedMaster(MDCache *s, metareqid_t r, LogSegment *l, list<Context*> &w) : - cache(s), reqid(r), ls(l) { - waiters.swap(w); - } + C_MDC_CommittedMaster(MDCache *s, metareqid_t r) : cache(s), reqid(r) {} void finish(int r) { - cache->_logged_master_commit(reqid, ls, waiters); + cache->_logged_master_commit(reqid); } }; void MDCache::log_master_commit(metareqid_t reqid) { dout(10) << "log_master_commit " << reqid << dendl; + uncommitted_masters[reqid].committing = true; mds->mdlog->start_submit_entry(new ECommitted(reqid), - new C_MDC_CommittedMaster(this, reqid, - uncommitted_masters[reqid].ls, - uncommitted_masters[reqid].waiters)); - mds->mdcache->uncommitted_masters.erase(reqid); + new C_MDC_CommittedMaster(this, reqid)); } -void MDCache::_logged_master_commit(metareqid_t reqid, LogSegment *ls, list<Context*> &waiters) +void MDCache::_logged_master_commit(metareqid_t reqid) { dout(10) << "_logged_master_commit " << reqid << dendl; - ls->uncommitted_masters.erase(reqid); - mds->queue_waiters(waiters); + assert(uncommitted_masters.count(reqid)); + uncommitted_masters[reqid].ls->uncommitted_masters.erase(reqid); + mds->queue_waiters(uncommitted_masters[reqid].waiters); + uncommitted_masters.erase(reqid); } // while active... @@ -2179,7 +2185,7 @@ void MDCache::committed_master_slave(metareqid_t r, int from) dout(10) << "committed_master_slave mds." << from << " on " << r << dendl; assert(uncommitted_masters.count(r)); uncommitted_masters[r].slaves.erase(from); - if (uncommitted_masters[r].slaves.empty()) + if (!uncommitted_masters[r].recovering && uncommitted_masters[r].slaves.empty()) log_master_commit(r); } @@ -2196,20 +2202,20 @@ void MDCache::logged_master_update(metareqid_t reqid) } /* - * The mds could crash after receiving all slaves' commit acknowledgement, - * but before journalling the ECommitted. + * Master may crash after receiving all slaves' commit acks, but before journalling + * the final commit. Slaves may crash after journalling the slave commit, but before + * sending commit ack to the master. Commit masters with no uncommitted slave when + * resolve finishes. */ void MDCache::finish_committed_masters() { - map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin(); - while (p != uncommitted_masters.end()) { - if (p->second.slaves.empty()) { - metareqid_t reqid = p->first; - dout(10) << "finish_committed_masters " << reqid << dendl; - ++p; - log_master_commit(reqid); - } else { - ++p; + for (map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin(); + p != uncommitted_masters.end(); + ++p) { + p->second.recovering = false; + if (!p->second.committing && p->second.slaves.empty()) { + dout(10) << "finish_committed_masters " << p->first << dendl; + log_master_commit(p->first); } } } @@ -2450,8 +2456,6 @@ void MDCache::resolve_start() adjust_subtree_auth(rootdir, CDIR_AUTH_UNKNOWN); } resolve_gather = recovery_set; - resolve_gather.erase(mds->get_nodeid()); - rejoin_gather = resolve_gather; } void MDCache::send_resolves() @@ -2705,6 +2709,16 @@ void MDCache::handle_mds_failure(int who) } } + for (map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin(); + p != uncommitted_masters.end(); + ++p) { + // The failed MDS may have already committed the slave update + if (p->second.slaves.count(who)) { + p->second.recovering = true; + p->second.slaves.erase(who); + } + } + while (!finish.empty()) { dout(10) << "cleaning up slave request " << *finish.front() << dendl; request_finish(finish.front()); @@ -2712,6 +2726,7 @@ void MDCache::handle_mds_failure(int who) } kick_find_ino_peers(who); + kick_open_ino_peers(who); show_subtrees(); } @@ -2771,7 +2786,7 @@ void MDCache::handle_mds_recovery(int who) } kick_discovers(who); - + kick_open_ino_peers(who); kick_find_ino_peers(who); // queue them up. @@ -2964,17 +2979,17 @@ void MDCache::maybe_resolve_finish() dout(10) << "maybe_resolve_finish still waiting for resolves (" << resolve_gather << ")" << dendl; return; + } + + dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl; + disambiguate_imports(); + finish_committed_masters(); + if (mds->is_resolve()) { + trim_unlinked_inodes(); + recalc_auth_bits(); + mds->resolve_done(); } else { - dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl; - disambiguate_imports(); - if (mds->is_resolve()) { - trim_unlinked_inodes(); - recalc_auth_bits(); - trim_non_auth(); - mds->resolve_done(); - } else { - maybe_send_pending_rejoins(); - } + maybe_send_pending_rejoins(); } } @@ -3397,6 +3412,8 @@ void MDCache::recalc_auth_bits() dnl->get_inode()->state_clear(CInode::STATE_AUTH); if (dnl->get_inode()->is_dirty()) dnl->get_inode()->mark_clean(); + if (dnl->get_inode()->is_dirty_parent()) + dnl->get_inode()->clear_dirty_parent(); // avoid touching scatterlocks for our subtree roots! if (subtree_inodes.count(dnl->get_inode()) == 0) dnl->get_inode()->clear_scatter_dirty(); @@ -3451,6 +3468,15 @@ void MDCache::recalc_auth_bits() * after recovery. */ +void MDCache::rejoin_start() +{ + dout(10) << "rejoin_start" << dendl; + + rejoin_gather = recovery_set; + // need finish opening cap inodes before sending cache rejoins + rejoin_gather.insert(mds->get_nodeid()); + process_imported_caps(); +} /* * rejoin phase! @@ -3467,6 +3493,11 @@ void MDCache::rejoin_send_rejoins() { dout(10) << "rejoin_send_rejoins with recovery_set " << recovery_set << dendl; + if (rejoin_gather.count(mds->get_nodeid())) { + dout(7) << "rejoin_send_rejoins still processing imported caps, delaying" << dendl; + rejoins_pending = true; + return; + } if (!resolve_gather.empty()) { dout(7) << "rejoin_send_rejoins still waiting for resolves (" << resolve_gather << ")" << dendl; @@ -3476,12 +3507,6 @@ void MDCache::rejoin_send_rejoins() map<int, MMDSCacheRejoin*> rejoins; - // encode cap list once. - bufferlist cap_export_bl; - if (mds->is_rejoin()) { - ::encode(cap_exports, cap_export_bl); - ::encode(cap_export_paths, cap_export_bl); - } // if i am rejoining, send a rejoin to everyone. // otherwise, just send to others who are rejoining. @@ -3490,12 +3515,20 @@ void MDCache::rejoin_send_rejoins() ++p) { if (*p == mds->get_nodeid()) continue; // nothing to myself! if (rejoin_sent.count(*p)) continue; // already sent a rejoin to this node! - if (mds->is_rejoin()) { + if (mds->is_rejoin()) rejoins[*p] = new MMDSCacheRejoin(MMDSCacheRejoin::OP_WEAK); - rejoins[*p]->copy_cap_exports(cap_export_bl); - } else if (mds->mdsmap->is_rejoin(*p)) + else if (mds->mdsmap->is_rejoin(*p)) rejoins[*p] = new MMDSCacheRejoin(MMDSCacheRejoin::OP_STRONG); - } + } + + if (mds->is_rejoin()) { + for (map<inodeno_t,map<client_t,ceph_mds_cap_reconnect> >::iterator p = cap_exports.begin(); + p != cap_exports.end(); + p++) { + assert(cap_export_targets.count(p->first)); + rejoins[cap_export_targets[p->first]]->cap_exports[p->first] = p->second; + } + } assert(!migrator->is_importing()); assert(!migrator->is_exporting()); @@ -3821,7 +3854,7 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) p != weak->cap_exports.end(); ++p) { CInode *in = get_inode(p->first); - if (!in || !in->is_auth()) continue; + assert(!in || in->is_auth()); for (map<client_t,ceph_mds_cap_reconnect>::iterator q = p->second.begin(); q != p->second.end(); ++q) { @@ -3838,16 +3871,7 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) p != weak->cap_exports.end(); ++p) { CInode *in = get_inode(p->first); - if (in && !in->is_auth()) - continue; - filepath& path = weak->cap_export_paths[p->first]; - if (!in) { - if (!path_is_mine(path)) - continue; - cap_import_paths[p->first] = path; - dout(10) << " noting cap import " << p->first << " path " << path << dendl; - } - + assert(in && in->is_auth()); // note for (map<client_t,ceph_mds_cap_reconnect>::iterator q = p->second.begin(); q != p->second.end(); @@ -4016,6 +4040,7 @@ public: } }; +#if 0 /** * parallel_fetch -- make a pass at fetching a bunch of paths in parallel * @@ -4134,9 +4159,7 @@ bool MDCache::parallel_fetch_traverse_dir(inodeno_t ino, filepath& path, missing.insert(ino); return true; } - - - +#endif /* * rejoin_scour_survivor_replica - remove source from replica list on unmentioned objects @@ -4505,7 +4528,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) int from = ack->get_source().num(); // for sending cache expire message - list<CInode*> isolated_inodes; + set<CInode*> isolated_inodes; // dirs for (map<dirfrag_t, MMDSCacheRejoin::dirfrag_strong>::iterator p = ack->strong_dirfrags.begin(); @@ -4521,19 +4544,20 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) diri = new CInode(this, false); diri->inode.ino = p->first.ino; diri->inode.mode = S_IFDIR; - if (MDS_INO_MDSDIR(p->first.ino)) { + add_inode(diri); + if (MDS_INO_MDSDIR(from) == p->first.ino) { diri->inode_auth = pair<int,int>(from, CDIR_AUTH_UNKNOWN); - add_inode(diri); dout(10) << " add inode " << *diri << dendl; } else { - diri->inode_auth = CDIR_AUTH_UNDEF; - isolated_inodes.push_back(diri); + diri->inode_auth = CDIR_AUTH_DEFAULT; + isolated_inodes.insert(diri); dout(10) << " unconnected dirfrag " << p->first << dendl; } } // barebones dirfrag; the full dirfrag loop below will clean up. dir = diri->add_dirfrag(new CDir(diri, p->first.frag, this, false)); - if (dir->authority().first != from) + if (dir->authority() != CDIR_AUTH_UNDEF && + dir->authority().first != from) adjust_subtree_auth(dir, from); dout(10) << " add dirfrag " << *dir << dendl; } @@ -4598,6 +4622,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) in->get_parent_dir()->unlink_inode(in->get_parent_dn()); } dn->dir->link_primary_inode(dn, in); + isolated_inodes.erase(in); } } @@ -4659,20 +4684,9 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) dout(10) << " got inode locks " << *in << dendl; } - // trim unconnected subtree - if (!isolated_inodes.empty()) { - map<int, MCacheExpire*> expiremap; - for (list<CInode*>::iterator p = isolated_inodes.begin(); - p != isolated_inodes.end(); - ++p) { - list<CDir*> ls; - (*p)->get_dirfrags(ls); - trim_dirfrag(*ls.begin(), 0, expiremap); - assert((*p)->get_num_ref() == 0); - delete *p; - } - send_expire_messages(expiremap); - } + // FIXME: This can happen if entire subtree, together with the inode subtree root + // belongs to, were trimmed between sending cache rejoin and receiving rejoin ack. + assert(isolated_inodes.empty()); // done? assert(rejoin_ack_gather.count(from)); @@ -4840,16 +4854,9 @@ void MDCache::rejoin_gather_finish() if (open_undef_inodes_dirfrags()) return; - // fetch paths? - // do this before ack, since some inodes we may have already gotten - // from surviving MDSs. - if (!cap_import_paths.empty()) { - if (parallel_fetch(cap_import_paths, cap_imports_missing)) { - return; - } - } - - process_imported_caps(); + if (process_imported_caps()) + return; + choose_lock_states_and_reconnect_caps(); identify_files_to_recover(rejoin_recover_q, rejoin_check_q); @@ -4867,34 +4874,123 @@ void MDCache::rejoin_gather_finish() } } -void MDCache::process_imported_caps() +class C_MDC_RejoinOpenInoFinish: public Context { + MDCache *cache; + inodeno_t ino; +public: + C_MDC_RejoinOpenInoFinish(MDCache *c, inodeno_t i) : cache(c), ino(i) {} + void finish(int r) { + cache->rejoin_open_ino_finish(ino, r); + } +}; + +void MDCache::rejoin_open_ino_finish(inodeno_t ino, int ret) +{ + dout(10) << "open_caps_inode_finish ino " << ino << " ret " << ret << dendl; + + if (ret < 0) { + cap_imports_missing.insert(ino); + } else if (ret == mds->get_nodeid()) { + assert(get_inode(ino)); + } else { + map<inodeno_t,map<client_t,map<int,ceph_mds_cap_reconnect> > >::iterator p; + p = cap_imports.find(ino); + assert(p != cap_imports.end()); + for (map<client_t,map<int,ceph_mds_cap_reconnect> >::iterator q = p->second.begin(); + q != p->second.end(); + ++q) { + assert(q->second.count(-1)); + assert(q->second.size() == 1); + rejoin_export_caps(p->first, q->first, q->second[-1], ret); + } + cap_imports.erase(p); + } + + assert(cap_imports_num_opening > 0); + cap_imports_num_opening--; + + if (cap_imports_num_opening == 0) { + if (rejoin_gather.count(mds->get_nodeid())) + process_imported_caps(); + else + rejoin_gather_finish(); + } +} + +bool MDCache::process_imported_caps() { dout(10) << "process_imported_caps" << dendl; - // process cap imports - // ino -> client -> frommds -> capex - map<inodeno_t,map<client_t, map<int,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin(); - while (p != cap_imports.end()) { + map<inodeno_t,map<client_t, map<int,ceph_mds_cap_reconnect> > >::iterator p; + for (p = cap_imports.begin(); p != cap_imports.end(); ++p) { CInode *in = get_inode(p->first); - if (!in) { - dout(10) << "process_imported_caps still missing " << p->first - << ", will try again after replayed client requests" - << dendl; - ++p; + if (in) { + assert(in->is_auth()); + cap_imports_missing.erase(p->first); continue; } - for (map<client_t, map<int,ceph_mds_cap_reconnect> >::iterator q = p->second.begin(); - q != p->second.end(); - ++q) - for (map<int,ceph_mds_cap_reconnect>::iterator r = q->second.begin(); + if (cap_imports_missing.count(p->first) > 0) + continue; + + cap_imports_num_opening++; + dout(10) << " opening missing ino " << p->first << dendl; + open_ino(p->first, (int64_t)-1, new C_MDC_RejoinOpenInoFinish(this, p->first), false); + } + + if (cap_imports_num_opening > 0) + return true; + + // called by rejoin_gather_finish() ? + if (rejoin_gather.count(mds->get_nodeid()) == 0) { + // process cap imports + // ino -> client -> frommds -> capex + p = cap_imports.begin(); + while (p != cap_imports.end()) { + CInode *in = get_inode(p->first); + if (!in) { + dout(10) << " still missing ino " << p->first + << ", will try again after replayed client requests" << dendl; + ++p; + continue; + } + assert(in->is_auth()); + for (map<client_t,map<int,ceph_mds_cap_reconnect> >::iterator q = p->second.begin(); + q != p->second.end(); + ++q) + for (map<int,ceph_mds_cap_reconnect>::iterator r = q->second.begin(); + r != q->second.end(); + ++r) { + dout(20) << " add_reconnected_cap " << in->ino() << " client." << q->first << dendl; + add_reconnected_cap(in, q->first, inodeno_t(r->second.snaprealm)); + rejoin_import_cap(in, q->first, r->second, r->first); + } + cap_imports.erase(p++); // remove and move on + } + } else { + for (map<inodeno_t,map<client_t,ceph_mds_cap_reconnect> >::iterator q = cap_exports.begin(); + q != cap_exports.end(); + q++) { + for (map<client_t,ceph_mds_cap_reconnect>::iterator r = q->second.begin(); r != q->second.end(); ++r) { - dout(20) << " add_reconnected_cap " << in->ino() << " client." << q->first << dendl; - add_reconnected_cap(in, q->first, inodeno_t(r->second.snaprealm)); - rejoin_import_cap(in, q->first, r->second, r->first); + dout(10) << " exporting caps for client." << r->first << " ino " << q->first << dendl; + Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(r->first.v)); + assert(session); + // mark client caps stale. + MClientCaps *m = new MClientCaps(CEPH_CAP_OP_EXPORT, q->first, 0, 0, 0); + mds->send_message_client_counted(m, session); } - cap_imports.erase(p++); // remove and move on + } + + trim_non_auth(); + + rejoin_gather.erase(mds->get_nodeid()); + maybe_send_pending_rejoins(); + + if (rejoin_gather.empty() && rejoin_ack_gather.count(mds->get_nodeid())) + rejoin_gather_finish(); } + return false; } void MDCache::check_realm_past_parents(SnapRealm *realm) @@ -5056,9 +5152,12 @@ void MDCache::export_remaining_imported_caps() { dout(10) << "export_remaining_imported_caps" << dendl; + stringstream warn_str; + for (map<inodeno_t,map<client_t,map<int,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin(); p != cap_imports.end(); ++p) { + warn_str << " ino " << p->first << "\n"; for (map<client_t,map<int,ceph_mds_cap_reconnect> >::iterator q = p->second.begin(); q != p->second.end(); ++q) { @@ -5072,6 +5171,11 @@ void MDCache::export_remaining_imported_caps() } cap_imports.clear(); + + if (warn_str.peek() != EOF) { + mds->clog.warn() << "failed to reconnect caps for missing inodes:" << "\n"; + mds->clog.warn(warn_str); + } } void MDCache::try_reconnect_cap(CInode *in, Session *session) @@ -5216,9 +5320,22 @@ void MDCache::open_snap_parents() gather.set_finisher(new C_MDC_OpenSnapParents(this)); gather.activate(); } else { + if (!reconnected_snaprealms.empty()) { + stringstream warn_str; + for (map<inodeno_t,map<client_t,snapid_t> >::iterator p = reconnected_snaprealms.begin(); + p != reconnected_snaprealms.end(); + ++p) { + warn_str << " unconnected snaprealm " << p->first << "\n"; + for (map<client_t,snapid_t>::iterator q = p->second.begin(); + q != p->second.end(); + ++q) + warn_str << " client." << q->first << " snapid " << q->second << "\n"; + } + mds->clog.warn() << "open_snap_parents has:" << "\n"; + mds->clog.warn(warn_str); + } assert(rejoin_waiters.empty()); assert(missing_snap_parents.empty()); - //assert(reconnected_snaprealms.empty()); // FIXME: need to properly address #5031 dout(10) << "open_snap_parents - all open" << dendl; do_delayed_cap_imports(); @@ -5504,7 +5621,7 @@ void MDCache::queue_file_recover(CInode *in) } in->parent->first = in->first; - le->metablob.add_primary_dentry(in->parent, true, in); + le->metablob.add_primary_dentry(in->parent, in, true); mds->mdlog->submit_entry(le, new C_MDC_QueuedCow(this, in, mut)); mds->mdlog->flush(); } @@ -5784,7 +5901,7 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) EUpdate *le = new EUpdate(mds->mdlog, "truncate finish"); mds->mdlog->start_entry(le); le->metablob.add_dir_context(in->get_parent_dir()); - le->metablob.add_primary_dentry(in->get_projected_parent_dn(), true, in); + le->metablob.add_primary_dentry(in->get_projected_parent_dn(), in, true); le->metablob.add_truncate_finish(in->ino(), ls->offset); journal_dirty_inode(mut, &le->metablob, in); @@ -6133,7 +6250,6 @@ void MDCache::trim_inode(CDentry *dn, CInode *in, CDir *con, map<int, MCacheExpi void MDCache::trim_non_auth() { dout(7) << "trim_non_auth" << dendl; - stringstream warn_str_dirs; // temporarily pin all subtree roots for (map<CDir*, set<CDir*> >::iterator p = subtrees.begin(); @@ -6167,22 +6283,18 @@ void MDCache::trim_non_auth() assert(dir); // unlink the dentry - dout(15) << "trim_non_auth removing " << *dn << dendl; + dout(10) << " removing " << *dn << dendl; if (dnl->is_remote()) { dir->unlink_inode(dn); } else if (dnl->is_primary()) { CInode *in = dnl->get_inode(); + dout(10) << " removing " << *in << dendl; list<CDir*> ls; - warn_str_dirs << in->get_parent_dn()->get_name() << "\n"; in->get_dirfrags(ls); for (list<CDir*>::iterator p = ls.begin(); p != ls.end(); ++p) { CDir *subdir = *p; - filepath fp; - subdir->get_inode()->make_path(fp); - warn_str_dirs << fp << "\n"; - if (subdir->is_subtree_root()) - remove_subtree(subdir); + assert(!subdir->is_subtree_root()); in->close_dirfrag(subdir->dirfrag().frag); } dir->unlink_inode(dn); @@ -6221,18 +6333,13 @@ void MDCache::trim_non_auth() for (list<CDir*>::iterator p = ls.begin(); p != ls.end(); ++p) { - dout(0) << " ... " << **p << dendl; - CInode *diri = (*p)->get_inode(); - filepath fp; - diri->make_path(fp); - warn_str_dirs << fp << "\n"; + dout(10) << " removing " << **p << dendl; assert((*p)->get_num_ref() == 1); // SUBTREE remove_subtree((*p)); in->close_dirfrag((*p)->dirfrag().frag); } - dout(0) << " ... " << *in << dendl; - if (in->get_parent_dn()) - warn_str_dirs << in->get_parent_dn()->get_name() << "\n"; + dout(10) << " removing " << *in << dendl; + assert(!in->get_parent_dn()); assert(in->get_num_ref() == 0); remove_inode(in); } @@ -6241,10 +6348,6 @@ void MDCache::trim_non_auth() } show_subtrees(); - if (warn_str_dirs.peek() != EOF) { - mds->clog.info() << "trim_non_auth has deleted paths: " << "\n"; - mds->clog.info(warn_str_dirs); - } } /** @@ -7024,6 +7127,13 @@ void MDCache::dispatch(Message *m) case MSG_MDS_FINDINOREPLY: handle_find_ino_reply(static_cast<MMDSFindInoReply *>(m)); break; + + case MSG_MDS_OPENINO: + handle_open_ino(static_cast<MMDSOpenIno *>(m)); + break; + case MSG_MDS_OPENINOREPLY: + handle_open_ino_reply(static_cast<MMDSOpenInoReply *>(m)); + break; default: dout(7) << "cache unknown message " << m->get_type() << dendl; @@ -7232,8 +7342,8 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, Context *fin, // wh } else { dout(7) << "remote link to " << dnl->get_remote_ino() << ", which i don't have" << dendl; assert(mdr); // we shouldn't hit non-primary dentries doing a non-mdr traversal! - open_remote_ino(dnl->get_remote_ino(), _get_waiter(mdr, req, fin), - (null_okay && depth == path.depth() - 1)); + open_remote_dentry(dn, true, _get_waiter(mdr, req, fin), + (null_okay && depth == path.depth() - 1)); if (mds->logger) mds->logger->inc(l_mds_trino); return 1; } @@ -7390,6 +7500,7 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, Context *fin, // wh return 0; } +#if 0 /** * Find out if the MDS is auth for a given path. * @@ -7422,6 +7533,7 @@ bool MDCache::path_is_mine(filepath& path) return cur->is_auth(); } +#endif CInode *MDCache::cache_traverse(const filepath& fp) { @@ -7678,36 +7790,51 @@ void MDCache::open_remote_ino_2(inodeno_t ino, vector<Anchor>& anchortrace, bool struct C_MDC_OpenRemoteDentry : public Context { MDCache *mdc; CDentry *dn; - bool projected; + inodeno_t ino; Context *onfinish; - C_MDC_OpenRemoteDentry(MDCache *m, CDentry *d, bool p, Context *f) : - mdc(m), dn(d), projected(p), onfinish(f) {} + bool want_xlocked; + int mode; + C_MDC_OpenRemoteDentry(MDCache *m, CDentry *d, inodeno_t i, Context *f, + bool wx, int md) : + mdc(m), dn(d), ino(i), onfinish(f), want_xlocked(wx), mode(md) {} void finish(int r) { - mdc->_open_remote_dentry_finish(r, dn, projected, onfinish); + mdc->_open_remote_dentry_finish(dn, ino, onfinish, want_xlocked, mode, r); } }; -void MDCache::open_remote_dentry(CDentry *dn, bool projected, Context *fin) +void MDCache::open_remote_dentry(CDentry *dn, bool projected, Context *fin, bool want_xlocked) { dout(10) << "open_remote_dentry " << *dn << dendl; CDentry::linkage_t *dnl = projected ? dn->get_projected_linkage() : dn->get_linkage(); - open_remote_ino(dnl->get_remote_ino(), - new C_MDC_OpenRemoteDentry(this, dn, projected, fin)); + inodeno_t ino = dnl->get_remote_ino(); + int mode = g_conf->mds_open_remote_link_mode; + Context *fin2 = new C_MDC_OpenRemoteDentry(this, dn, ino, fin, want_xlocked, mode); + if (mode == 0) + open_remote_ino(ino, fin2, want_xlocked); // anchor + else + open_ino(ino, -1, fin2, true, want_xlocked); // backtrace } -void MDCache::_open_remote_dentry_finish(int r, CDentry *dn, bool projected, Context *fin) +void MDCache::_open_remote_dentry_finish(CDentry *dn, inodeno_t ino, Context *fin, + bool want_xlocked, int mode, int r) { - if (r == -ENOENT) { - dout(0) << "open_remote_dentry_finish bad remote dentry " << *dn << dendl; - dn->state_set(CDentry::STATE_BADREMOTEINO); - } else if (r != 0) - assert(0); - fin->finish(r); - delete fin; + if (r < 0) { + if (mode == 0) { + dout(0) << "open_remote_dentry_finish bad remote dentry " << *dn << dendl; + dn->state_set(CDentry::STATE_BADREMOTEINO); + } else { + dout(7) << "open_remote_dentry_finish failed to open ino " << ino + << " for " << *dn << ", retry using anchortable" << dendl; + assert(mode == 1); + Context *fin2 = new C_MDC_OpenRemoteDentry(this, dn, ino, fin, want_xlocked, 0); + open_remote_ino(ino, fin2, want_xlocked); + return; + } + } + fin->complete(r < 0 ? r : 0); } - void MDCache::make_trace(vector<CDentry*>& trace, CInode *in) { // empty trace if we're a base inode @@ -7724,6 +7851,443 @@ void MDCache::make_trace(vector<CDentry*>& trace, CInode *in) } +// ------------------------------------------------------------------------------- +// Open inode by inode number + +class C_MDC_OpenInoBacktraceFetched : public Context { + MDCache *cache; + inodeno_t ino; + public: + bufferlist bl; + C_MDC_OpenInoBacktraceFetched(MDCache *c, inodeno_t i) : + cache(c), ino(i) {} + void finish(int r) { + cache->_open_ino_backtrace_fetched(ino, bl, r); + } +}; + +struct C_MDC_OpenInoTraverseDir : public Context { + MDCache *cache; + inodeno_t ino; + public: + C_MDC_OpenInoTraverseDir(MDCache *c, inodeno_t i) : cache(c), ino(i) {} + void finish(int r) { + assert(cache->opening_inodes.count(ino)); + cache->_open_ino_traverse_dir(ino, cache->opening_inodes[ino], r); + } +}; + +struct C_MDC_OpenInoParentOpened : public Context { + MDCache *cache; + inodeno_t ino; + public: + C_MDC_OpenInoParentOpened(MDCache *c, inodeno_t i) : cache(c), ino(i) {} + void finish(int r) { + cache->_open_ino_parent_opened(ino, r); + } +}; + +void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err) +{ + dout(10) << "_open_ino_backtrace_fetched ino " << ino << " errno " << err << dendl; + + assert(opening_inodes.count(ino)); + open_ino_info_t& info = opening_inodes[ino]; + + CInode *in = get_inode(ino); + if (in) { + dout(10) << " found cached " << *in << dendl; + open_ino_finish(ino, info, in->authority().first); + return; + } + + inode_backtrace_t backtrace; + if (err == 0) { + ::decode(backtrace, bl); + if (backtrace.pool != info.pool) { + dout(10) << " old object in pool " << info.pool + << ", retrying pool " << backtrace.pool << dendl; + info.pool = backtrace.pool; + C_MDC_OpenInoBacktraceFetched *fin = new C_MDC_OpenInoBacktraceFetched(this, ino); + fetch_backtrace(ino, info.pool, fin->bl, fin); + return; + } + } else if (err == -ENOENT) { + int64_t meta_pool = mds->mdsmap->get_metadata_pool(); + if (info.pool != meta_pool) { + dout(10) << " no object in pool " << info.pool + << ", retrying pool " << meta_pool << dendl; + info.pool = meta_pool; + C_MDC_OpenInoBacktraceFetched *fin = new C_MDC_OpenInoBacktraceFetched(this, ino); + fetch_backtrace(ino, info.pool, fin->bl, fin); + return; + } + } + + if (err == 0) { + if (backtrace.ancestors.empty()) { + dout(10) << " got empty backtrace " << dendl; + err = -EIO; + } else if (!info.ancestors.empty()) { + if (info.ancestors[0] == backtrace.ancestors[0]) { + dout(10) << " got same parents " << info.ancestors[0] << " 2 times" << dendl; + err = -EINVAL; + } + } + } + if (err) { + dout(10) << " failed to open ino " << ino << dendl; + open_ino_finish(ino, info, err); + return; + } + + dout(10) << " got backtrace " << backtrace << dendl; + info.ancestors = backtrace.ancestors; + + _open_ino_traverse_dir(ino, info, 0); +} + +void MDCache::_open_ino_parent_opened(inodeno_t ino, int ret) +{ + dout(10) << "_open_ino_parent_opened ino " << ino << " ret " << ret << dendl; + + assert(opening_inodes.count(ino)); + open_ino_info_t& info = opening_inodes[ino]; + + CInode *in = get_inode(ino); + if (in) { + dout(10) << " found cached " << *in << dendl; + open_ino_finish(ino, info, in->authority().first); + return; + } + + if (ret == mds->get_nodeid()) { + _open_ino_traverse_dir(ino, info, 0); + } else { + if (ret >= 0) { + info.check_peers = true; + info.auth_hint = ret; + info.checked.erase(ret); + } + do_open_ino(ino, info, ret); + } +} + +Context* MDCache::_open_ino_get_waiter(inodeno_t ino, MMDSOpenIno *m) +{ + if (m) + return new C_MDS_RetryMessage(mds, m); + else + return new C_MDC_OpenInoTraverseDir(this, ino); +} + +void MDCache::_open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int ret) +{ + dout(10) << "_open_ino_trvserse_dir ino " << ino << " ret " << ret << dendl; + + CInode *in = get_inode(ino); + if (in) { + dout(10) << " found cached " << *in << dendl; + open_ino_finish(ino, info, in->authority().first); + return; + } + + if (ret) { + do_open_ino(ino, info, ret); + return; + } + + int hint = info.auth_hint; + ret = open_ino_traverse_dir(ino, NULL, info.ancestors, + info.discover, info.want_xlocked, &hint); + if (ret > 0) + return; + if (hint != mds->get_nodeid()) + info.auth_hint = hint; + do_open_ino(ino, info, ret); +} + +int MDCache::open_ino_traverse_dir(inodeno_t ino, MMDSOpenIno *m, + vector<inode_backpointer_t>& ancestors, + bool discover, bool want_xlocked, int *hint) +{ + dout(10) << "open_ino_traverse_dir ino " << ino << " " << ancestors << dendl; + int err = 0; + for (unsigned i = 0; i < ancestors.size(); i++) { + CInode *diri = get_inode(ancestors[i].dirino); + + if (!diri) { + if (discover && MDS_INO_IS_MDSDIR(ancestors[i].dirino)) { + open_foreign_mdsdir(ancestors[i].dirino, _open_ino_get_waiter(ino, m)); + return 1; + } + continue; + } + + if (diri->state_test(CInode::STATE_REJOINUNDEF)) + continue; + + if (!diri->is_dir()) { + dout(10) << " " << *diri << " is not dir" << dendl; + if (i == 0) + err = -ENOTDIR; + break; + } + + string &name = ancestors[i].dname; + frag_t fg = diri->pick_dirfrag(name); + CDir *dir = diri->get_dirfrag(fg); + if (!dir) { + if (diri->is_auth()) { + if (diri->is_frozen()) { + dout(10) << " " << *diri << " is frozen, waiting " << dendl; + diri->add_waiter(CDir::WAIT_UNFREEZE, _open_ino_get_waiter(ino, m)); + return 1; + } + dir = diri->get_or_open_dirfrag(this, fg); + } else if (discover) { + open_remote_dirfrag(diri, fg, _open_ino_get_waiter(ino, m)); + return 1; + } + } + if (dir) { + inodeno_t next_ino = i > 0 ? ancestors[i - 1].dirino : ino; + if (dir->is_auth()) { + CDentry *dn = dir->lookup(name); + CDentry::linkage_t *dnl = dn ? dn->get_linkage() : NULL; + + if (dnl && dnl->is_primary() && + dnl->get_inode()->state_test(CInode::STATE_REJOINUNDEF)) { + dout(10) << " fetching undef " << *dnl->get_inode() << dendl; + dir->fetch(_open_ino_get_waiter(ino, m)); + return 1; + } + + if (!dnl && !dir->is_complete() && + (!dir->has_bloom() || dir->is_in_bloom(name))) { + dout(10) << " fetching incomplete " << *dir << dendl; + dir->fetch(_open_ino_get_waiter(ino, m)); + return 1; + } + + dout(10) << " no ino " << next_ino << " in " << *dir << dendl; + if (i == 0) + err = -ENOENT; + } else if (discover) { + discover_ino(dir, next_ino, _open_ino_get_waiter(ino, m), + (i == 0 && want_xlocked)); + return 1; + } + } + if (hint && i == 0) + *hint = dir ? dir->authority().first : diri->authority().first; + break; + } + return err; +} + +void MDCache::open_ino_finish(inodeno_t ino, open_ino_info_t& info, int ret) +{ + dout(10) << "open_ino_finish ino " << ino << " ret " << ret << dendl; + + finish_contexts(g_ceph_context, info.waiters, ret); + opening_inodes.erase(ino); +} + +void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err) +{ + if (err < 0) { + info.checked.clear(); + info.checked.insert(mds->get_nodeid()); + info.checking = -1; + info.check_peers = true; + info.fetch_backtrace = true; + if (info.discover) { + info.discover = false; + info.ancestors.clear(); + } + } + + if (info.check_peers) { + info.check_peers = false; + info.checking = -1; + do_open_ino_peer(ino, info); + } else if (info.fetch_backtrace) { + info.check_peers = true; + info.fetch_backtrace = false; + info.checking = mds->get_nodeid(); + info.checked.clear(); + info.checked.insert(mds->get_nodeid()); + C_MDC_OpenInoBacktraceFetched *fin = new C_MDC_OpenInoBacktraceFetched(this, ino); + fetch_backtrace(ino, info.pool, fin->bl, fin); + } else { + assert(!info.ancestors.empty()); + info.checking = mds->get_nodeid(); + open_ino(info.ancestors[0].dirino, mds->mdsmap->get_metadata_pool(), + new C_MDC_OpenInoParentOpened(this, ino), info.want_replica); + } +} + +void MDCache::do_open_ino_peer(inodeno_t ino, open_ino_info_t& info) +{ + set<int> all, active; + mds->mdsmap->get_mds_set(all); + mds->mdsmap->get_clientreplay_or_active_or_stopping_mds_set(active); + if (mds->get_state() == MDSMap::STATE_REJOIN) + mds->mdsmap->get_mds_set(active, MDSMap::STATE_REJOIN); + + dout(10) << "do_open_ino_peer " << ino << " active " << active + << " all " << all << " checked " << info.checked << dendl; + + int peer = -1; + if (info.auth_hint >= 0) { + if (active.count(info.auth_hint)) { + peer = info.auth_hint; + info.auth_hint = -1; + } + } else { + for (set<int>::iterator p = active.begin(); p != active.end(); ++p) + if (*p != mds->get_nodeid() && info.checked.count(*p) == 0) { + peer = *p; + break; + } + } + if (peer < 0) { + if (all.size() > active.size() && all != info.checked) { + dout(10) << " waiting for more peers to be active" << dendl; + } else { + dout(10) << " all MDS peers have been checked " << dendl; + do_open_ino(ino, info, 0); + } + } else { + info.checking = peer; + mds->send_message_mds(new MMDSOpenIno(info.tid, ino, info.ancestors), peer); + } +} + +void MDCache::handle_open_ino(MMDSOpenIno *m) +{ + dout(10) << "handle_open_ino " << *m << dendl; + + inodeno_t ino = m->ino; + MMDSOpenInoReply *reply; + CInode *in = get_inode(ino); + if (in) { + dout(10) << " have " << *in << dendl; + reply = new MMDSOpenInoReply(m->get_tid(), ino, 0); + if (in->is_auth()) { + touch_inode(in); + while (1) { + CDentry *pdn = in->get_parent_dn(); + if (!pdn) + break; + CInode *diri = pdn->get_dir()->get_inode(); + reply->ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->name, + in->inode.version)); + in = diri; + } + } else { + reply->hint = in->authority().first; + } + } else { + int hint = -1; + int ret = open_ino_traverse_dir(ino, m, m->ancestors, false, false, &hint); + if (ret > 0) + return; + reply = new MMDSOpenInoReply(m->get_tid(), ino, hint, ret); + } + mds->messenger->send_message(reply, m->get_connection()); + m->put(); +} + +void MDCache::handle_open_ino_reply(MMDSOpenInoReply *m) +{ + dout(10) << "handle_open_ino_reply " << *m << dendl; + + inodeno_t ino = m->ino; + int from = m->get_source().num(); + if (opening_inodes.count(ino)) { + open_ino_info_t& info = opening_inodes[ino]; + + if (info.checking == from) + info.checking = -1; + info.checked.insert(from); + + CInode *in = get_inode(ino); + if (in) { + dout(10) << " found cached " << *in << dendl; + open_ino_finish(ino, info, in->authority().first); + } else if (!m->ancestors.empty()) { + dout(10) << " found ino " << ino << " on mds." << from << dendl; + if (!info.want_replica) { + open_ino_finish(ino, info, from); + return; + } + + info.ancestors = m->ancestors; + info.auth_hint = from; + info.checking = mds->get_nodeid(); + info.discover = true; + _open_ino_traverse_dir(ino, info, 0); + } else if (m->error) { + dout(10) << " error " << m->error << " from mds." << from << dendl; + do_open_ino(ino, info, m->error); + } else { + if (m->hint >= 0 && m->hint != mds->get_nodeid()) { + info.auth_hint = m->hint; + info.checked.erase(m->hint); + } + do_open_ino_peer(ino, info); + } + } + m->put(); +} + +void MDCache::kick_open_ino_peers(int who) +{ + dout(10) << "kick_open_ino_peers mds." << who << dendl; + + for (map<inodeno_t, open_ino_info_t>::iterator p = opening_inodes.begin(); + p != opening_inodes.end(); + ++p) { + open_ino_info_t& info = p->second; + if (info.checking == who) { + dout(10) << " kicking ino " << p->first << " who was checking mds." << who << dendl; + info.checking = -1; + do_open_ino_peer(p->first, info); + } else if (info.checking == -1) { + dout(10) << " kicking ino " << p->first << " who was waiting" << dendl; + do_open_ino_peer(p->first, info); + } + } +} + +void MDCache::open_ino(inodeno_t ino, int64_t pool, Context* fin, + bool want_replica, bool want_xlocked) +{ + dout(10) << "open_ino " << ino << " pool " << pool << " want_replica " + << want_replica << dendl; + + if (opening_inodes.count(ino)) { + open_ino_info_t& info = opening_inodes[ino]; + if (want_replica) { + info.want_replica = true; + if (want_xlocked) + info.want_xlocked = true; + } + info.waiters.push_back(fin); + } else { + open_ino_info_t& info = opening_inodes[ino]; + info.checked.insert(mds->get_nodeid()); + info.want_replica = want_replica; + info.want_xlocked = want_xlocked; + info.tid = ++open_ino_last_tid; + info.pool = pool >= 0 ? pool : mds->mdsmap->get_first_data_pool(); + info.waiters.push_back(fin); + do_open_ino(ino, info, 0); + } +} + /* ---------------------------- */ /* @@ -8388,7 +8952,7 @@ void MDCache::snaprealm_create(MDRequest *mdr, CInode *in) predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); journal_cow_inode(mut, &le->metablob, in); - le->metablob.add_primary_dentry(in->get_projected_parent_dn(), true, in); + le->metablob.add_primary_dentry(in->get_projected_parent_dn(), in, true); mds->mdlog->submit_entry(le, new C_MDC_snaprealm_create_finish(this, mdr, mut, in)); mds->mdlog->flush(); @@ -8631,6 +9195,20 @@ void MDCache::eval_remote(CDentry *dn) } } +void MDCache::fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin) +{ + object_t oid = CInode::get_object_name(ino, frag_t(), ""); + mds->objecter->getxattr(oid, object_locator_t(pool), "parent", CEPH_NOSNAP, &bl, 0, fin); +} + +void MDCache::remove_backtrace(inodeno_t ino, int64_t pool, Context *fin) +{ + SnapContext snapc; + object_t oid = CInode::get_object_name(ino, frag_t(), ""); + mds->objecter->removexattr(oid, object_locator_t(pool), "parent", snapc, + ceph_clock_now(g_ceph_context), 0, NULL, fin); +} + class C_MDC_PurgeStrayPurged : public Context { MDCache *cache; CDentry *dn; @@ -8645,13 +9223,12 @@ public: class C_MDC_PurgeForwardingPointers : public Context { MDCache *cache; CDentry *dn; - Context *fin; public: - inode_backtrace_t backtrace; - C_MDC_PurgeForwardingPointers(MDCache *c, CDentry *d, Context *f) : - cache(c), dn(d), fin(f) {} + bufferlist bl; + C_MDC_PurgeForwardingPointers(MDCache *c, CDentry *d) : + cache(c), dn(d) {} void finish(int r) { - cache->_purge_forwarding_pointers(&backtrace, dn, r, fin); + cache->_purge_forwarding_pointers(bl, dn, r); } }; @@ -8666,18 +9243,22 @@ public: } }; -void MDCache::_purge_forwarding_pointers(inode_backtrace_t *backtrace, CDentry *d, int r, Context *fin) +void MDCache::_purge_forwarding_pointers(bufferlist& bl, CDentry *dn, int r) { assert(r == 0 || r == -ENOENT || r == -ENODATA); + inode_backtrace_t backtrace; + if (r == 0) + ::decode(backtrace, bl); + // setup gathering context C_GatherBuilder gather_bld(g_ceph_context); // remove all the objects with forwarding pointer backtraces (aka sentinels) - for (set<int64_t>::const_iterator i = backtrace->old_pools.begin(); - i != backtrace->old_pools.end(); + for (set<int64_t>::const_iterator i = backtrace.old_pools.begin(); + i != backtrace.old_pools.end(); ++i) { SnapContext snapc; - object_t oid = CInode::get_object_name(backtrace->ino, frag_t(), ""); + object_t oid = CInode::get_object_name(backtrace.ino, frag_t(), ""); object_locator_t oloc(*i); mds->objecter->remove(oid, oloc, snapc, ceph_clock_now(g_ceph_context), 0, @@ -8685,10 +9266,10 @@ void MDCache::_purge_forwarding_pointers(inode_backtrace_t *backtrace, CDentry * } if (gather_bld.has_subs()) { - gather_bld.set_finisher(fin); + gather_bld.set_finisher(new C_MDC_PurgeStray(this, dn)); gather_bld.activate(); } else { - fin->finish(r); + _purge_stray(dn, r); } } @@ -8752,17 +9333,12 @@ void MDCache::purge_stray(CDentry *dn) if (in->is_dir()) { dout(10) << "purge_stray dir ... implement me!" << dendl; // FIXME XXX // remove the backtrace - SnapContext snapc; - object_t oid = CInode::get_object_name(in->ino(), frag_t(), ""); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); - - mds->objecter->removexattr(oid, oloc, "parent", snapc, ceph_clock_now(g_ceph_context), 0, - NULL, new C_MDC_PurgeStrayPurged(this, dn)); + remove_backtrace(in->ino(), mds->mdsmap->get_metadata_pool(), + new C_MDC_PurgeStrayPurged(this, dn)); } else if (in->is_file()) { // get the backtrace before blowing away the object - C_MDC_PurgeStray *strayfin = new C_MDC_PurgeStray(this, dn); - C_MDC_PurgeForwardingPointers *fpfin = new C_MDC_PurgeForwardingPointers(this, dn, strayfin); - in->fetch_backtrace(&fpfin->backtrace, fpfin); + C_MDC_PurgeForwardingPointers *fin = new C_MDC_PurgeForwardingPointers(this, dn); + fetch_backtrace(in->ino(), in->get_inode().layout.fl_pg_pool, fin->bl, fin); } else { // not a dir or file; purged! _purge_stray_purged(dn); @@ -8837,7 +9413,7 @@ void MDCache::_purge_stray_purged(CDentry *dn, int r) pi->version = in->pre_dirty(); le->metablob.add_dir_context(dn->dir); - le->metablob.add_primary_dentry(dn, true, in); + le->metablob.add_primary_dentry(dn, in, true); mds->mdlog->submit_entry(le, new C_MDC_PurgeStrayLoggedTruncate(this, dn, mds->mdlog->get_current_segment())); } @@ -9178,7 +9754,8 @@ void MDCache::handle_discover(MDiscover *dis) snapid_t snapid = dis->get_snapid(); // get started. - if (MDS_INO_IS_BASE(dis->get_base_ino())) { + if (MDS_INO_IS_BASE(dis->get_base_ino()) && + !dis->wants_base_dir() && dis->get_want().depth() == 0) { // wants root dout(7) << "handle_discover from mds." << from << " wants base + " << dis->get_want().get_path() |