summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-03-19 14:26:16 -0700
committerSage Weil <sage@inktank.com>2013-03-22 09:15:20 -0700
commite5940da9a534821d0d8f872c13f9ac26fb05a0f5 (patch)
tree26a3a6288b80058ecfa26b272b156d1fe8465fcf
parent7118df89cd4377137f6c37d17efc8b3bd8fef1bc (diff)
downloadceph-e5940da9a534821d0d8f872c13f9ac26fb05a0f5.tar.gz
os/FileJournal: fix aio self-throttling deadlock
This block of code tries to limit the number of aios in flight by waiting for the amount of data to be written to grow relative to a function of the number of aios. Strictly speaking, the condition we are waiting for is a function of both aio_num and the write queue, but we are only woken by changes in aio_num, and were (in rare cases) waiting when aio_num == 0 and there was no possibility of being woken. Fix this by verifying that aio_num > 0, and restructuring the loop to recheck that condition on each wakeup. Fixes: #4079 Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/os/FileJournal.cc25
1 files changed, 17 insertions, 8 deletions
diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc
index 52765359b9f..576c965fb3b 100644
--- a/src/os/FileJournal.cc
+++ b/src/os/FileJournal.cc
@@ -1127,19 +1127,28 @@ void FileJournal::write_thread_entry()
// should we back off to limit aios in flight? try to do this
// adaptively so that we submit larger aios once we have lots of
// them in flight.
- int exp = MIN(aio_num * 2, 24);
- long unsigned min_new = 1ull << exp;
- long unsigned cur = throttle_bytes.get_current();
- dout(20) << "write_thread_entry aio throttle: aio num " << aio_num << " bytes " << aio_bytes
- << " ... exp " << exp << " min_new " << min_new
- << " ... pending " << cur << dendl;
- if (cur < min_new) {
+ //
+ // NOTE: our condition here is based on aio_num (protected by
+ // aio_lock) and throttle_bytes (part of the write queue). when
+ // we sleep, we *only* wait for aio_num to change, and do not
+ // wake when more data is queued. this is not strictly correct,
+ // but should be fine given that we will have plenty of aios in
+ // flight if we hit this limit to ensure we keep the device
+ // saturated.
+ while (aio_num > 0) {
+ int exp = MIN(aio_num * 2, 24);
+ long unsigned min_new = 1ull << exp;
+ long unsigned cur = throttle_bytes.get_current();
+ dout(20) << "write_thread_entry aio throttle: aio num " << aio_num << " bytes " << aio_bytes
+ << " ... exp " << exp << " min_new " << min_new
+ << " ... pending " << cur << dendl;
+ if (cur >= min_new)
+ break;
dout(20) << "write_thread_entry deferring until more aios complete: "
<< aio_num << " aios with " << aio_bytes << " bytes needs " << min_new
<< " bytes to start a new aio (currently " << cur << " pending)" << dendl;
aio_cond.Wait(aio_lock);
dout(20) << "write_thread_entry woke up" << dendl;
- continue;
}
}
#endif