summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-05-31 17:09:19 -0700
committerSage Weil <sage@inktank.com>2013-06-02 16:34:18 -0700
commitffb87918fa7b829a5199eec08804dc540a819bf2 (patch)
tree3874e0087e28d1225f27eb5ea143508067b18350
parent38f8d850d35500e3d8751cd14c5cdaaff682c7d7 (diff)
downloadceph-ffb87918fa7b829a5199eec08804dc540a819bf2.tar.gz
mon: start lease timer from peon_init()
In the scenario: - leader wins, peons lose - leader sees it is too far behind on paxos and bootstraps - leader tries to sync with someone, waits for a quorum of the others - peons sit around forever waiting The problem is that they never time out because paxos never issues a lease, which is the normal timeout that lets them detect a leader failure. Avoid this by starting the lease timeout as soon as we lose the election. The timeout callback just does a bootstrap and does not rely on any other state. I see one possible danger here: there may be some "normal" cases where the leader takes a long time to issue its first lease that we currently tolerate, but won't with this new check in place. I hope that raising the lease interval/timeout or reducing the allowed paxos drift will make that a non-issue. If it is problematic, we will need a separate explicit "i am alive" from the leader while it is getting ready to issue the lease to prevent a live-lock. Backport: cuttlefish, bobtail Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Greg Farnum <greg@inktank.com> (cherry picked from commit f1ccb2d808453ad7ef619c2faa41a8f6e0077bd9)
-rw-r--r--src/mon/Paxos.cc17
-rw-r--r--src/mon/Paxos.h3
2 files changed, 16 insertions, 4 deletions
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index a1a04ae6c1e..f5cbe89092b 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -878,10 +878,7 @@ void Paxos::handle_lease(MMonPaxos *lease)
mon->messenger->send_message(ack, lease->get_source_inst());
// (re)set timeout event.
- if (lease_timeout_event)
- mon->timer.cancel_event(lease_timeout_event);
- lease_timeout_event = new C_LeaseTimeout(this);
- mon->timer.add_event_after(g_conf->mon_lease_ack_timeout, lease_timeout_event);
+ reset_lease_timeout();
// kick waiters
finish_contexts(g_ceph_context, waiting_for_active);
@@ -936,6 +933,15 @@ void Paxos::lease_ack_timeout()
mon->bootstrap();
}
+void Paxos::reset_lease_timeout()
+{
+ dout(20) << "reset_lease_timeout - setting timeout event" << dendl;
+ if (lease_timeout_event)
+ mon->timer.cancel_event(lease_timeout_event);
+ lease_timeout_event = new C_LeaseTimeout(this);
+ mon->timer.add_event_after(g_conf->mon_lease_ack_timeout, lease_timeout_event);
+}
+
void Paxos::lease_timeout()
{
dout(5) << "lease_timeout -- calling new election" << dendl;
@@ -1104,6 +1110,9 @@ void Paxos::peon_init()
lease_expire = utime_t();
dout(10) << "peon_init -- i am a peon" << dendl;
+ // start a timer, in case the leader never manages to issue a lease
+ reset_lease_timeout();
+
// no chance to write now!
finish_contexts(g_ceph_context, waiting_for_writeable, -EAGAIN);
finish_contexts(g_ceph_context, waiting_for_commit, -EAGAIN);
diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h
index be63889575e..04553776b93 100644
--- a/src/mon/Paxos.h
+++ b/src/mon/Paxos.h
@@ -956,6 +956,9 @@ private:
*/
void lease_timeout(); // on peon, if lease isn't extended
+ /// restart the lease timeout timer
+ void reset_lease_timeout();
+
/**
* Cancel all of Paxos' timeout/renew events.
*/