diff options
author | Sage Weil <sage@newdream.net> | 2009-11-20 13:03:41 -0800 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-11-20 13:40:38 -0800 |
commit | 29efb0b563e3659206c94d026aa54c138b17ece8 (patch) | |
tree | 3043c1cfacef153c593e519a2bd0f85af6266152 | |
parent | 780ee5a299487da46d8e62e4afc27063a521a7e3 (diff) | |
download | ceph-29efb0b563e3659206c94d026aa54c138b17ece8.tar.gz |
mds: fix reconnect race
Don't ignore client replay or session msgs if we are
moving to reconnect state. The client may get the mdsmap
before us and send things our way, so take them if we
are reconnect, or if we want reconnect.
-rw-r--r-- | src/mds/MDS.cc | 1 | ||||
-rw-r--r-- | src/mds/MDS.h | 6 | ||||
-rw-r--r-- | src/mds/Server.cc | 7 |
3 files changed, 12 insertions, 2 deletions
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 234e5e0a034..2e63a66b844 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -994,6 +994,7 @@ void MDS::reconnect_start() { dout(1) << "reconnect_start" << dendl; server->reconnect_clients(); + finish_contexts(waiting_for_reconnect); } void MDS::reconnect_done() { diff --git a/src/mds/MDS.h b/src/mds/MDS.h index 9a919879bb1..5c296417162 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -184,7 +184,7 @@ class MDS : public Dispatcher { int state; // my confirmed state int want_state; // the state i want - list<Context*> waiting_for_active, waiting_for_replay; + list<Context*> waiting_for_active, waiting_for_replay, waiting_for_reconnect; list<Context*> replay_queue; map<int, list<Context*> > waiting_for_active_peer; list<Context*> waiting_for_nolaggy; @@ -203,11 +203,15 @@ class MDS : public Dispatcher { void wait_for_replay(Context *c) { waiting_for_replay.push_back(c); } + void wait_for_reconnect(Context *c) { + waiting_for_reconnect.push_back(c); + } void enqueue_replay(Context *c) { replay_queue.push_back(c); } int get_state() { return state; } + int get_want_state() { return want_state; } bool is_creating() { return state == MDSMap::STATE_CREATING; } bool is_starting() { return state == MDSMap::STATE_STARTING; } bool is_standby() { return state == MDSMap::STATE_STANDBY; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5d07b7e4567..a8faa79271e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -101,7 +101,7 @@ void Server::dispatch(Message *m) // active? if (!mds->is_active() && !(mds->is_stopping() && m->get_orig_source().is_mds())) { - if (mds->is_reconnect() && + if ((mds->is_reconnect() || mds->get_want_state() == CEPH_MDS_STATE_RECONNECT) && m->get_type() == CEPH_MSG_CLIENT_REQUEST && ((MClientRequest*)m)->is_replay()) { dout(3) << "queuing replayed op" << dendl; @@ -457,6 +457,11 @@ void Server::handle_client_reconnect(MClientReconnect *m) int from = m->get_source().num(); Session *session = get_session(m); + if (!mds->is_reconnect() && mds->get_want_state() == CEPH_MDS_STATE_RECONNECT) { + dout(10) << " we're almost in reconnect state (mdsmap delivery race?); waiting" << dendl; + mds->wait_for_reconnect(new C_MDS_RetryMessage(mds, m)); + return; + } if (!mds->is_reconnect() || !session) { stringstream ss; utime_t delay = g_clock.now(); |