From a0ba5c66162af720627fcf7ba63fdc76ac97f568 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Wed, 2 Oct 2013 18:14:23 -0700 Subject: Journaler: do not advance safe_pos inappropriately We were previously advancing safe_pos to the end of the first flush we had started in the queue, without checking if the finishing flush was actually the first one we'd triggered. When crossing object boundaries the flushes can come back out of order (especially on a degraded cluster), which could lead to problems if the MDS got restarted without fully committing the intermediary events. Now, we only advance the safe_pos if we just received the first flush in the queue. This continues to function as intended before -- any follow-on flushes that completed earlier have been removed from the pending_safe queue so we advance to either the (already very advanced) flush_pos, or to the first position in pending_safe that still hasn't finished. And because all of our safety callbacks are triggered by advancing safe_pos (and not merely getting a flush), our data is safe without any further changes to the code base. Signed-off-by: Greg Farnum --- src/osdc/Journaler.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index ba4ca8dc4b9..badfb3c54e8 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -405,12 +405,17 @@ void Journaler::_finish_flush(int r, uint64_t start, utime_t stamp) } // adjust safe_pos - assert(pending_safe.count(start)); - pending_safe.erase(start); - if (pending_safe.empty()) - safe_pos = flush_pos; - else - safe_pos = *pending_safe.begin(); + std::set::iterator start_iter = pending_safe.find(start); + assert(start_iter != pending_safe.end()); + if (start_iter == pending_safe.begin()) { + pending_safe.erase(start_iter); + if (pending_safe.empty()) + safe_pos = flush_pos; + else + safe_pos = *pending_safe.begin(); + } else { + pending_safe.erase(start_iter); + } ldout(cct, 10) << "_finish_flush safe from " << start << ", pending_safe " << pending_safe -- cgit v1.2.1