diff options
author | Sage Weil <sage@inktank.com> | 2013-07-15 13:46:43 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-15 13:46:43 -0700 |
commit | 6a524c358c9697b5b577fa6d9db01d025c084946 (patch) | |
tree | 890d9752333b2fd76bdde26984ec82817e4706f4 | |
parent | 2bf95e5a1c8dffed23bfebc7f55d7d5eea40080b (diff) | |
parent | 34f76bd915bd35e638a306c0a8d3e062abcbec6a (diff) | |
download | ceph-6a524c358c9697b5b577fa6d9db01d025c084946.tar.gz |
Merge pull request #436 from ceph/wip-mon-fixes
Wip mon fixes
Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | src/mon/Elector.cc | 5 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 34 | ||||
-rw-r--r-- | src/mon/Monitor.h | 5 | ||||
-rw-r--r-- | src/mon/Paxos.cc | 98 | ||||
-rw-r--r-- | src/mon/Paxos.h | 14 | ||||
-rw-r--r-- | src/mon/PaxosService.cc | 21 |
6 files changed, 103 insertions, 74 deletions
diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc index 4b1221d2c31..1650a997b2d 100644 --- a/src/mon/Elector.cc +++ b/src/mon/Elector.cc @@ -55,7 +55,8 @@ void Elector::bump_epoch(epoch_t e) MonitorDBStore::Transaction t; t.put(Monitor::MONITOR_NAME, "election_epoch", epoch); mon->store->apply_transaction(t); - mon->reset(); + + mon->join_election(); // clear up some state electing_me = false; @@ -198,7 +199,6 @@ void Elector::handle_propose(MMonElection *m) dout(5) << " got propose from old epoch, quorum is " << mon->quorum << ", " << m->get_source() << " must have just started" << dendl; // we may be active; make sure we reset things in the monitor appropriately. - mon->reset(); mon->start_election(); } else { dout(5) << " ignoring old propose" << dendl; @@ -215,7 +215,6 @@ void Elector::handle_propose(MMonElection *m) } else { // wait, i should win! if (!electing_me) { - mon->reset(); mon->start_election(); } } diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 914714d733c..9b665bdad3d 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -644,7 +644,7 @@ void Monitor::bootstrap() // reset state = STATE_PROBING; - reset(); + _reset(); // sync store if (g_conf->mon_compact_on_bootstrap) { @@ -708,9 +708,12 @@ void Monitor::_add_bootstrap_peer_hint(string cmd, string args, ostream& ss) } // called by bootstrap(), or on leader|peon -> electing -void Monitor::reset() +void Monitor::_reset() { - dout(10) << "reset" << dendl; + dout(10) << __func__ << dendl; + + assert(state == STATE_ELECTING || + state == STATE_PROBING); cancel_probe_timeout(); timecheck_finish(); @@ -1407,14 +1410,21 @@ void Monitor::handle_probe_reply(MMonProbe *m) m->put(); } +void Monitor::join_election() +{ + dout(10) << __func__ << dendl; + state = STATE_ELECTING; + _reset(); +} + void Monitor::start_election() { dout(10) << "start_election" << dendl; + state = STATE_ELECTING; + _reset(); cancel_probe_timeout(); - // call a new election - state = STATE_ELECTING; clog.info() << "mon." << name << " calling new monitor election\n"; elector.call_election(); } @@ -1447,18 +1457,15 @@ epoch_t Monitor::get_epoch() void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features) { - if (!is_electing()) - reset(); - + dout(10) << __func__ << " epoch " << epoch << " quorum " << active + << " features " << features << dendl; + assert(is_electing()); state = STATE_LEADER; leader_since = ceph_clock_now(g_ceph_context); leader = rank; quorum = active; quorum_features = features; outside_quorum.clear(); - dout(10) << "win_election, epoch " << epoch << " quorum is " << quorum - << " features are " << quorum_features - << dendl; clog.info() << "mon." << name << "@" << rank << " won leader election with quorum " << quorum << "\n"; @@ -2205,14 +2212,12 @@ void Monitor::handle_command(MMonCommand *m) string quorumcmd; cmd_getval(g_ceph_context, cmdmap, "quorumcmd", quorumcmd); if (quorumcmd == "exit") { - reset(); start_election(); elector.stop_participating(); rs = "stopped responding to quorum, initiated new election"; r = 0; } else if (quorumcmd == "enter") { elector.start_participating(); - reset(); start_election(); rs = "started responding to quorum, initiated new election"; r = 0; @@ -2315,6 +2320,9 @@ void Monitor::handle_forward(MForward *m) m->msg = NULL; // so ~MForward doesn't delete it req->set_connection(c); + // not super accurate, but better than nothing. + req->set_recv_stamp(m->get_recv_stamp()); + /* * note which election epoch this is; we will drop the message if * there is a future election since our peers will resend routed diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 35bff4207ad..5bf0c0ef962 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -509,8 +509,11 @@ public: return quorum_features; } +private: + void _reset(); ///< called from bootstrap, start_, or join_election +public: void bootstrap(); - void reset(); + void join_election(); void start_election(); void win_standalone_election(); void win_election(epoch_t epoch, set<int>& q, diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index d988c641547..26886451a49 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -427,11 +427,13 @@ void Paxos::handle_last(MMonPaxos *last) dout(10) << "that's everyone. active!" << dendl; extend_lease(); - finish_proposal(); + if (do_refresh()) { + finish_round(); - finish_contexts(g_ceph_context, waiting_for_active); - finish_contexts(g_ceph_context, waiting_for_readable); - finish_contexts(g_ceph_context, waiting_for_writeable); + finish_contexts(g_ceph_context, waiting_for_active); + finish_contexts(g_ceph_context, waiting_for_readable); + finish_contexts(g_ceph_context, waiting_for_writeable); + } } } } else { @@ -507,11 +509,15 @@ void Paxos::begin(bufferlist& v) if (mon->get_quorum().size() == 1) { // we're alone, take it easy commit(); - finish_proposal(); - finish_contexts(g_ceph_context, waiting_for_active); - finish_contexts(g_ceph_context, waiting_for_commit); - finish_contexts(g_ceph_context, waiting_for_readable); - finish_contexts(g_ceph_context, waiting_for_writeable); + if (do_refresh()) { + assert(is_updating()); // we can't be updating-previous with quorum of 1 + commit_proposal(); + finish_round(); + finish_contexts(g_ceph_context, waiting_for_active); + finish_contexts(g_ceph_context, waiting_for_commit); + finish_contexts(g_ceph_context, waiting_for_readable); + finish_contexts(g_ceph_context, waiting_for_writeable); + } return; } @@ -589,14 +595,12 @@ void Paxos::handle_accept(MMonPaxos *accept) if (accept->pn != accepted_pn) { // we accepted a higher pn, from some other leader dout(10) << " we accepted a higher pn " << accepted_pn << ", ignoring" << dendl; - accept->put(); - return; + goto out; } if (last_committed > 0 && accept->last_committed < last_committed-1) { dout(10) << " this is from an old round, ignoring" << dendl; - accept->put(); - return; + goto out; } assert(accept->last_committed == last_committed || // not committed accept->last_committed == last_committed-1); // committed @@ -612,6 +616,11 @@ void Paxos::handle_accept(MMonPaxos *accept) // note: this may happen before the lease is reextended (below) dout(10) << " got majority, committing" << dendl; commit(); + if (!do_refresh()) + goto out; + if (is_updating()) + commit_proposal(); + finish_contexts(g_ceph_context, waiting_for_commit); } // done? @@ -624,14 +633,15 @@ void Paxos::handle_accept(MMonPaxos *accept) // yay! extend_lease(); - finish_proposal(); + finish_round(); // wake people up finish_contexts(g_ceph_context, waiting_for_active); - finish_contexts(g_ceph_context, waiting_for_commit); finish_contexts(g_ceph_context, waiting_for_readable); finish_contexts(g_ceph_context, waiting_for_writeable); } + + out: accept->put(); } @@ -787,44 +797,48 @@ void Paxos::warn_on_future_time(utime_t t, entity_name_t from) } -void Paxos::finish_proposal() +bool Paxos::do_refresh() { - assert(mon->is_leader()); + bool need_bootstrap = false; // make sure we have the latest state loaded up - bool need_bootstrap = false; mon->refresh_from_paxos(&need_bootstrap); - // ok, now go active! - state = STATE_ACTIVE; + if (need_bootstrap) { + dout(10) << " doing requested bootstrap" << dendl; + mon->bootstrap(); + return false; + } + + return true; +} - // finish off the last proposal - if (!proposals.empty()) { - assert(mon->is_leader()); +void Paxos::commit_proposal() +{ + dout(10) << __func__ << dendl; + assert(mon->is_leader()); + assert(!proposals.empty()); + assert(is_updating()); - C_Proposal *proposal = static_cast<C_Proposal*>(proposals.front()); - if (!proposal->proposed) { - dout(10) << __func__ << " proposal " << proposal << ": we must have received a stay message and we're " - << "trying to finish before time. " - << "Instead, propose it (if we are active)!" << dendl; - } else { - dout(10) << __func__ << " proposal " << proposal << " took " - << (ceph_clock_now(NULL) - proposal->proposal_time) - << " to finish" << dendl; - proposals.pop_front(); - proposal->complete(0); - } - } + C_Proposal *proposal = static_cast<C_Proposal*>(proposals.front()); + assert(proposal->proposed); + dout(10) << __func__ << " proposal " << proposal << " took " + << (ceph_clock_now(NULL) - proposal->proposal_time) + << " to finish" << dendl; + proposals.pop_front(); + proposal->complete(0); +} + +void Paxos::finish_round() +{ + assert(mon->is_leader()); + + // ok, now go active! + state = STATE_ACTIVE; dout(10) << __func__ << " state " << state << " proposals left " << proposals.size() << dendl; - if (need_bootstrap) { - dout(10) << " doing requested bootstrap" << dendl; - mon->bootstrap(); - return; - } - if (should_trim()) { trim(); } diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index 1cdad50e5bb..c85bcaf495e 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -983,7 +983,19 @@ private: * Begin proposing the Proposal at the front of the proposals queue. */ void propose_queued(); - void finish_proposal(); + + /** + * refresh state from store + * + * Called when we have new state for the mon to consume. If we return false, + * abort (we triggered a bootstrap). + * + * @returns true on success, false if we are now bootstrapping + */ + bool do_refresh(); + + void commit_proposal(); + void finish_round(); public: /** diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc index efe60aafd69..f5416a8fa87 100644 --- a/src/mon/PaxosService.cc +++ b/src/mon/PaxosService.cc @@ -163,10 +163,9 @@ void PaxosService::propose_pending() { dout(10) << "propose_pending" << dendl; assert(have_pending); + assert(!proposing); assert(mon->is_leader()); assert(is_active()); - if (!is_active()) - return; if (proposal_timer) { dout(10) << " canceling proposal_timer " << proposal_timer << dendl; @@ -232,6 +231,12 @@ void PaxosService::restart() finish_contexts(g_ceph_context, waiting_for_finished_proposal, -EAGAIN); + if (have_pending) { + discard_pending(); + have_pending = false; + } + proposing = false; + on_restart(); } @@ -239,18 +244,6 @@ void PaxosService::election_finished() { dout(10) << "election_finished" << dendl; - if (proposal_timer) { - dout(10) << " canceling proposal_timer " << proposal_timer << dendl; - mon->timer.cancel_event(proposal_timer); - proposal_timer = 0; - } - - if (have_pending) { - discard_pending(); - have_pending = false; - } - proposing = false; - finish_contexts(g_ceph_context, waiting_for_finished_proposal, -EAGAIN); // make sure we update our state |