summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2009-11-20 13:03:41 -0800
committerSage Weil <sage@newdream.net>2009-11-20 13:40:38 -0800
commit29efb0b563e3659206c94d026aa54c138b17ece8 (patch)
tree3043c1cfacef153c593e519a2bd0f85af6266152
parent780ee5a299487da46d8e62e4afc27063a521a7e3 (diff)
downloadceph-29efb0b563e3659206c94d026aa54c138b17ece8.tar.gz
mds: fix reconnect race
Don't ignore client replay or session msgs if we are moving to reconnect state. The client may get the mdsmap before us and send things our way, so take them if we are reconnect, or if we want reconnect.
-rw-r--r--src/mds/MDS.cc1
-rw-r--r--src/mds/MDS.h6
-rw-r--r--src/mds/Server.cc7
3 files changed, 12 insertions, 2 deletions
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index 234e5e0a034..2e63a66b844 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -994,6 +994,7 @@ void MDS::reconnect_start()
{
dout(1) << "reconnect_start" << dendl;
server->reconnect_clients();
+ finish_contexts(waiting_for_reconnect);
}
void MDS::reconnect_done()
{
diff --git a/src/mds/MDS.h b/src/mds/MDS.h
index 9a919879bb1..5c296417162 100644
--- a/src/mds/MDS.h
+++ b/src/mds/MDS.h
@@ -184,7 +184,7 @@ class MDS : public Dispatcher {
int state; // my confirmed state
int want_state; // the state i want
- list<Context*> waiting_for_active, waiting_for_replay;
+ list<Context*> waiting_for_active, waiting_for_replay, waiting_for_reconnect;
list<Context*> replay_queue;
map<int, list<Context*> > waiting_for_active_peer;
list<Context*> waiting_for_nolaggy;
@@ -203,11 +203,15 @@ class MDS : public Dispatcher {
void wait_for_replay(Context *c) {
waiting_for_replay.push_back(c);
}
+ void wait_for_reconnect(Context *c) {
+ waiting_for_reconnect.push_back(c);
+ }
void enqueue_replay(Context *c) {
replay_queue.push_back(c);
}
int get_state() { return state; }
+ int get_want_state() { return want_state; }
bool is_creating() { return state == MDSMap::STATE_CREATING; }
bool is_starting() { return state == MDSMap::STATE_STARTING; }
bool is_standby() { return state == MDSMap::STATE_STANDBY; }
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 5d07b7e4567..a8faa79271e 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -101,7 +101,7 @@ void Server::dispatch(Message *m)
// active?
if (!mds->is_active() &&
!(mds->is_stopping() && m->get_orig_source().is_mds())) {
- if (mds->is_reconnect() &&
+ if ((mds->is_reconnect() || mds->get_want_state() == CEPH_MDS_STATE_RECONNECT) &&
m->get_type() == CEPH_MSG_CLIENT_REQUEST &&
((MClientRequest*)m)->is_replay()) {
dout(3) << "queuing replayed op" << dendl;
@@ -457,6 +457,11 @@ void Server::handle_client_reconnect(MClientReconnect *m)
int from = m->get_source().num();
Session *session = get_session(m);
+ if (!mds->is_reconnect() && mds->get_want_state() == CEPH_MDS_STATE_RECONNECT) {
+ dout(10) << " we're almost in reconnect state (mdsmap delivery race?); waiting" << dendl;
+ mds->wait_for_reconnect(new C_MDS_RetryMessage(mds, m));
+ return;
+ }
if (!mds->is_reconnect() || !session) {
stringstream ss;
utime_t delay = g_clock.now();