summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-07-15 13:46:43 -0700
committerSage Weil <sage@inktank.com>2013-07-15 13:46:43 -0700
commit6a524c358c9697b5b577fa6d9db01d025c084946 (patch)
tree890d9752333b2fd76bdde26984ec82817e4706f4
parent2bf95e5a1c8dffed23bfebc7f55d7d5eea40080b (diff)
parent34f76bd915bd35e638a306c0a8d3e062abcbec6a (diff)
downloadceph-6a524c358c9697b5b577fa6d9db01d025c084946.tar.gz
Merge pull request #436 from ceph/wip-mon-fixes
Wip mon fixes Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r--src/mon/Elector.cc5
-rw-r--r--src/mon/Monitor.cc34
-rw-r--r--src/mon/Monitor.h5
-rw-r--r--src/mon/Paxos.cc98
-rw-r--r--src/mon/Paxos.h14
-rw-r--r--src/mon/PaxosService.cc21
6 files changed, 103 insertions, 74 deletions
diff --git a/src/mon/Elector.cc b/src/mon/Elector.cc
index 4b1221d2c31..1650a997b2d 100644
--- a/src/mon/Elector.cc
+++ b/src/mon/Elector.cc
@@ -55,7 +55,8 @@ void Elector::bump_epoch(epoch_t e)
MonitorDBStore::Transaction t;
t.put(Monitor::MONITOR_NAME, "election_epoch", epoch);
mon->store->apply_transaction(t);
- mon->reset();
+
+ mon->join_election();
// clear up some state
electing_me = false;
@@ -198,7 +199,6 @@ void Elector::handle_propose(MMonElection *m)
dout(5) << " got propose from old epoch, quorum is " << mon->quorum
<< ", " << m->get_source() << " must have just started" << dendl;
// we may be active; make sure we reset things in the monitor appropriately.
- mon->reset();
mon->start_election();
} else {
dout(5) << " ignoring old propose" << dendl;
@@ -215,7 +215,6 @@ void Elector::handle_propose(MMonElection *m)
} else {
// wait, i should win!
if (!electing_me) {
- mon->reset();
mon->start_election();
}
}
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 914714d733c..9b665bdad3d 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -644,7 +644,7 @@ void Monitor::bootstrap()
// reset
state = STATE_PROBING;
- reset();
+ _reset();
// sync store
if (g_conf->mon_compact_on_bootstrap) {
@@ -708,9 +708,12 @@ void Monitor::_add_bootstrap_peer_hint(string cmd, string args, ostream& ss)
}
// called by bootstrap(), or on leader|peon -> electing
-void Monitor::reset()
+void Monitor::_reset()
{
- dout(10) << "reset" << dendl;
+ dout(10) << __func__ << dendl;
+
+ assert(state == STATE_ELECTING ||
+ state == STATE_PROBING);
cancel_probe_timeout();
timecheck_finish();
@@ -1407,14 +1410,21 @@ void Monitor::handle_probe_reply(MMonProbe *m)
m->put();
}
+void Monitor::join_election()
+{
+ dout(10) << __func__ << dendl;
+ state = STATE_ELECTING;
+ _reset();
+}
+
void Monitor::start_election()
{
dout(10) << "start_election" << dendl;
+ state = STATE_ELECTING;
+ _reset();
cancel_probe_timeout();
- // call a new election
- state = STATE_ELECTING;
clog.info() << "mon." << name << " calling new monitor election\n";
elector.call_election();
}
@@ -1447,18 +1457,15 @@ epoch_t Monitor::get_epoch()
void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features)
{
- if (!is_electing())
- reset();
-
+ dout(10) << __func__ << " epoch " << epoch << " quorum " << active
+ << " features " << features << dendl;
+ assert(is_electing());
state = STATE_LEADER;
leader_since = ceph_clock_now(g_ceph_context);
leader = rank;
quorum = active;
quorum_features = features;
outside_quorum.clear();
- dout(10) << "win_election, epoch " << epoch << " quorum is " << quorum
- << " features are " << quorum_features
- << dendl;
clog.info() << "mon." << name << "@" << rank
<< " won leader election with quorum " << quorum << "\n";
@@ -2205,14 +2212,12 @@ void Monitor::handle_command(MMonCommand *m)
string quorumcmd;
cmd_getval(g_ceph_context, cmdmap, "quorumcmd", quorumcmd);
if (quorumcmd == "exit") {
- reset();
start_election();
elector.stop_participating();
rs = "stopped responding to quorum, initiated new election";
r = 0;
} else if (quorumcmd == "enter") {
elector.start_participating();
- reset();
start_election();
rs = "started responding to quorum, initiated new election";
r = 0;
@@ -2315,6 +2320,9 @@ void Monitor::handle_forward(MForward *m)
m->msg = NULL; // so ~MForward doesn't delete it
req->set_connection(c);
+ // not super accurate, but better than nothing.
+ req->set_recv_stamp(m->get_recv_stamp());
+
/*
* note which election epoch this is; we will drop the message if
* there is a future election since our peers will resend routed
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 35bff4207ad..5bf0c0ef962 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -509,8 +509,11 @@ public:
return quorum_features;
}
+private:
+ void _reset(); ///< called from bootstrap, start_, or join_election
+public:
void bootstrap();
- void reset();
+ void join_election();
void start_election();
void win_standalone_election();
void win_election(epoch_t epoch, set<int>& q,
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index d988c641547..26886451a49 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -427,11 +427,13 @@ void Paxos::handle_last(MMonPaxos *last)
dout(10) << "that's everyone. active!" << dendl;
extend_lease();
- finish_proposal();
+ if (do_refresh()) {
+ finish_round();
- finish_contexts(g_ceph_context, waiting_for_active);
- finish_contexts(g_ceph_context, waiting_for_readable);
- finish_contexts(g_ceph_context, waiting_for_writeable);
+ finish_contexts(g_ceph_context, waiting_for_active);
+ finish_contexts(g_ceph_context, waiting_for_readable);
+ finish_contexts(g_ceph_context, waiting_for_writeable);
+ }
}
}
} else {
@@ -507,11 +509,15 @@ void Paxos::begin(bufferlist& v)
if (mon->get_quorum().size() == 1) {
// we're alone, take it easy
commit();
- finish_proposal();
- finish_contexts(g_ceph_context, waiting_for_active);
- finish_contexts(g_ceph_context, waiting_for_commit);
- finish_contexts(g_ceph_context, waiting_for_readable);
- finish_contexts(g_ceph_context, waiting_for_writeable);
+ if (do_refresh()) {
+ assert(is_updating()); // we can't be updating-previous with quorum of 1
+ commit_proposal();
+ finish_round();
+ finish_contexts(g_ceph_context, waiting_for_active);
+ finish_contexts(g_ceph_context, waiting_for_commit);
+ finish_contexts(g_ceph_context, waiting_for_readable);
+ finish_contexts(g_ceph_context, waiting_for_writeable);
+ }
return;
}
@@ -589,14 +595,12 @@ void Paxos::handle_accept(MMonPaxos *accept)
if (accept->pn != accepted_pn) {
// we accepted a higher pn, from some other leader
dout(10) << " we accepted a higher pn " << accepted_pn << ", ignoring" << dendl;
- accept->put();
- return;
+ goto out;
}
if (last_committed > 0 &&
accept->last_committed < last_committed-1) {
dout(10) << " this is from an old round, ignoring" << dendl;
- accept->put();
- return;
+ goto out;
}
assert(accept->last_committed == last_committed || // not committed
accept->last_committed == last_committed-1); // committed
@@ -612,6 +616,11 @@ void Paxos::handle_accept(MMonPaxos *accept)
// note: this may happen before the lease is reextended (below)
dout(10) << " got majority, committing" << dendl;
commit();
+ if (!do_refresh())
+ goto out;
+ if (is_updating())
+ commit_proposal();
+ finish_contexts(g_ceph_context, waiting_for_commit);
}
// done?
@@ -624,14 +633,15 @@ void Paxos::handle_accept(MMonPaxos *accept)
// yay!
extend_lease();
- finish_proposal();
+ finish_round();
// wake people up
finish_contexts(g_ceph_context, waiting_for_active);
- finish_contexts(g_ceph_context, waiting_for_commit);
finish_contexts(g_ceph_context, waiting_for_readable);
finish_contexts(g_ceph_context, waiting_for_writeable);
}
+
+ out:
accept->put();
}
@@ -787,44 +797,48 @@ void Paxos::warn_on_future_time(utime_t t, entity_name_t from)
}
-void Paxos::finish_proposal()
+bool Paxos::do_refresh()
{
- assert(mon->is_leader());
+ bool need_bootstrap = false;
// make sure we have the latest state loaded up
- bool need_bootstrap = false;
mon->refresh_from_paxos(&need_bootstrap);
- // ok, now go active!
- state = STATE_ACTIVE;
+ if (need_bootstrap) {
+ dout(10) << " doing requested bootstrap" << dendl;
+ mon->bootstrap();
+ return false;
+ }
+
+ return true;
+}
- // finish off the last proposal
- if (!proposals.empty()) {
- assert(mon->is_leader());
+void Paxos::commit_proposal()
+{
+ dout(10) << __func__ << dendl;
+ assert(mon->is_leader());
+ assert(!proposals.empty());
+ assert(is_updating());
- C_Proposal *proposal = static_cast<C_Proposal*>(proposals.front());
- if (!proposal->proposed) {
- dout(10) << __func__ << " proposal " << proposal << ": we must have received a stay message and we're "
- << "trying to finish before time. "
- << "Instead, propose it (if we are active)!" << dendl;
- } else {
- dout(10) << __func__ << " proposal " << proposal << " took "
- << (ceph_clock_now(NULL) - proposal->proposal_time)
- << " to finish" << dendl;
- proposals.pop_front();
- proposal->complete(0);
- }
- }
+ C_Proposal *proposal = static_cast<C_Proposal*>(proposals.front());
+ assert(proposal->proposed);
+ dout(10) << __func__ << " proposal " << proposal << " took "
+ << (ceph_clock_now(NULL) - proposal->proposal_time)
+ << " to finish" << dendl;
+ proposals.pop_front();
+ proposal->complete(0);
+}
+
+void Paxos::finish_round()
+{
+ assert(mon->is_leader());
+
+ // ok, now go active!
+ state = STATE_ACTIVE;
dout(10) << __func__ << " state " << state
<< " proposals left " << proposals.size() << dendl;
- if (need_bootstrap) {
- dout(10) << " doing requested bootstrap" << dendl;
- mon->bootstrap();
- return;
- }
-
if (should_trim()) {
trim();
}
diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h
index 1cdad50e5bb..c85bcaf495e 100644
--- a/src/mon/Paxos.h
+++ b/src/mon/Paxos.h
@@ -983,7 +983,19 @@ private:
* Begin proposing the Proposal at the front of the proposals queue.
*/
void propose_queued();
- void finish_proposal();
+
+ /**
+ * refresh state from store
+ *
+ * Called when we have new state for the mon to consume. If we return false,
+ * abort (we triggered a bootstrap).
+ *
+ * @returns true on success, false if we are now bootstrapping
+ */
+ bool do_refresh();
+
+ void commit_proposal();
+ void finish_round();
public:
/**
diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc
index efe60aafd69..f5416a8fa87 100644
--- a/src/mon/PaxosService.cc
+++ b/src/mon/PaxosService.cc
@@ -163,10 +163,9 @@ void PaxosService::propose_pending()
{
dout(10) << "propose_pending" << dendl;
assert(have_pending);
+ assert(!proposing);
assert(mon->is_leader());
assert(is_active());
- if (!is_active())
- return;
if (proposal_timer) {
dout(10) << " canceling proposal_timer " << proposal_timer << dendl;
@@ -232,6 +231,12 @@ void PaxosService::restart()
finish_contexts(g_ceph_context, waiting_for_finished_proposal, -EAGAIN);
+ if (have_pending) {
+ discard_pending();
+ have_pending = false;
+ }
+ proposing = false;
+
on_restart();
}
@@ -239,18 +244,6 @@ void PaxosService::election_finished()
{
dout(10) << "election_finished" << dendl;
- if (proposal_timer) {
- dout(10) << " canceling proposal_timer " << proposal_timer << dendl;
- mon->timer.cancel_event(proposal_timer);
- proposal_timer = 0;
- }
-
- if (have_pending) {
- discard_pending();
- have_pending = false;
- }
- proposing = false;
-
finish_contexts(g_ceph_context, waiting_for_finished_proposal, -EAGAIN);
// make sure we update our state