summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-05-15 16:35:39 +0800
committerYan, Zheng <zheng.z.yan@intel.com>2013-05-28 13:57:21 +0800
commit7a6ec35367fa9a8f5728201efc0a42119fac884c (patch)
tree293a71495db6fd2197252f01f03899c8e19dff4a
parent0c1ca8edda43b39878c29d65a577691db7e49a08 (diff)
downloadceph-7a6ec35367fa9a8f5728201efc0a42119fac884c.tar.gz
mds: fix slave commit tracking
MDS may crash after journalling a slave commit, but before sending commit ack to the master. Later when the MDS restarts, it will not send commit ack to the master. So the master waits for the commit ack forever. The fix is remove failed MDS from requests' uncommitted slave list. When failed MDS recovers, its resolve message will tell the master which slave requests are not committed. The master will re-add the recovering MDS to requests' uncommitted slave list if necessary. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
-rw-r--r--src/mds/MDCache.cc55
-rw-r--r--src/mds/MDCache.h3
-rw-r--r--src/mds/MDS.cc1
3 files changed, 36 insertions, 23 deletions
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 24ef1cd8db8..9db51b1ce55 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -2174,7 +2174,7 @@ void MDCache::committed_master_slave(metareqid_t r, int from)
dout(10) << "committed_master_slave mds." << from << " on " << r << dendl;
assert(uncommitted_masters.count(r));
uncommitted_masters[r].slaves.erase(from);
- if (uncommitted_masters[r].slaves.empty())
+ if (!uncommitted_masters[r].recovering && uncommitted_masters[r].slaves.empty())
log_master_commit(r);
}
@@ -2191,20 +2191,20 @@ void MDCache::logged_master_update(metareqid_t reqid)
}
/*
- * The mds could crash after receiving all slaves' commit acknowledgement,
- * but before journalling the ECommitted.
+ * Master may crash after receiving all slaves' commit acks, but before journalling
+ * the final commit. Slaves may crash after journalling the slave commit, but before
+ * sending commit ack to the master. Commit masters with no uncommitted slave when
+ * resolve finishes.
*/
void MDCache::finish_committed_masters()
{
- map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin();
- while (p != uncommitted_masters.end()) {
- if (p->second.slaves.empty()) {
- metareqid_t reqid = p->first;
- dout(10) << "finish_committed_masters " << reqid << dendl;
- ++p;
- log_master_commit(reqid);
- } else {
- ++p;
+ for (map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin();
+ p != uncommitted_masters.end();
+ ++p) {
+ p->second.recovering = false;
+ if (!p->second.committing && p->second.slaves.empty()) {
+ dout(10) << "finish_committed_masters " << p->first << dendl;
+ log_master_commit(p->first);
}
}
}
@@ -2700,6 +2700,16 @@ void MDCache::handle_mds_failure(int who)
}
}
+ for (map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin();
+ p != uncommitted_masters.end();
+ ++p) {
+ // The failed MDS may have already committed the slave update
+ if (p->second.slaves.count(who)) {
+ p->second.recovering = true;
+ p->second.slaves.erase(who);
+ }
+ }
+
while (!finish.empty()) {
dout(10) << "cleaning up slave request " << *finish.front() << dendl;
request_finish(finish.front());
@@ -2959,17 +2969,18 @@ void MDCache::maybe_resolve_finish()
dout(10) << "maybe_resolve_finish still waiting for resolves ("
<< resolve_gather << ")" << dendl;
return;
+ }
+
+ dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl;
+ disambiguate_imports();
+ finish_committed_masters();
+ if (mds->is_resolve()) {
+ trim_unlinked_inodes();
+ recalc_auth_bits();
+ trim_non_auth();
+ mds->resolve_done();
} else {
- dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl;
- disambiguate_imports();
- if (mds->is_resolve()) {
- trim_unlinked_inodes();
- recalc_auth_bits();
- trim_non_auth();
- mds->resolve_done();
- } else {
- maybe_send_pending_rejoins();
- }
+ maybe_send_pending_rejoins();
}
}
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index c41692b5169..3c73bef7417 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -323,6 +323,9 @@ protected:
LogSegment *ls;
list<Context*> waiters;
bool safe;
+ bool committing;
+ bool recovering;
+ umaster() : committing(false), recovering(false) {}
};
map<metareqid_t, umaster> uncommitted_masters; // master: req -> slave set
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index 935fb0c417e..16b857e1a8a 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -1460,7 +1460,6 @@ void MDS::reconnect_done()
void MDS::rejoin_joint_start()
{
dout(1) << "rejoin_joint_start" << dendl;
- mdcache->finish_committed_masters();
mdcache->rejoin_send_rejoins();
}
void MDS::rejoin_done()