summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-03-19 14:26:16 -0700
committerSage Weil <sage@inktank.com>2013-03-22 17:46:04 -0700
commit3569489b541ac0643520d20b08c788c26dfaff7f (patch)
tree312546a51a63b059f2835abad237c27005e9e298
parent664ffa7d2178e486b00fa0706067f19b1bb9ab82 (diff)
downloadceph-3569489b541ac0643520d20b08c788c26dfaff7f.tar.gz
os/FileJournal: fix aio self-throttling deadlock
This block of code tries to limit the number of aios in flight by waiting for the amount of data to be written to grow relative to a function of the number of aios. Strictly speaking, the condition we are waiting for is a function of both aio_num and the write queue, but we are only woken by changes in aio_num, and were (in rare cases) waiting when aio_num == 0 and there was no possibility of being woken. Fix this by verifying that aio_num > 0, and restructuring the loop to recheck that condition on each wakeup. Fixes: #4079 Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Samuel Just <sam.just@inktank.com> (cherry picked from commit e5940da9a534821d0d8f872c13f9ac26fb05a0f5)
-rw-r--r--src/os/FileJournal.cc25
1 files changed, 17 insertions, 8 deletions
diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc
index 218347efa49..f27e96e4cf6 100644
--- a/src/os/FileJournal.cc
+++ b/src/os/FileJournal.cc
@@ -1117,19 +1117,28 @@ void FileJournal::write_thread_entry()
// should we back off to limit aios in flight? try to do this
// adaptively so that we submit larger aios once we have lots of
// them in flight.
- int exp = MIN(aio_num * 2, 24);
- long unsigned min_new = 1ull << exp;
- long unsigned cur = throttle_bytes.get_current();
- dout(20) << "write_thread_entry aio throttle: aio num " << aio_num << " bytes " << aio_bytes
- << " ... exp " << exp << " min_new " << min_new
- << " ... pending " << cur << dendl;
- if (cur < min_new) {
+ //
+ // NOTE: our condition here is based on aio_num (protected by
+ // aio_lock) and throttle_bytes (part of the write queue). when
+ // we sleep, we *only* wait for aio_num to change, and do not
+ // wake when more data is queued. this is not strictly correct,
+ // but should be fine given that we will have plenty of aios in
+ // flight if we hit this limit to ensure we keep the device
+ // saturated.
+ while (aio_num > 0) {
+ int exp = MIN(aio_num * 2, 24);
+ long unsigned min_new = 1ull << exp;
+ long unsigned cur = throttle_bytes.get_current();
+ dout(20) << "write_thread_entry aio throttle: aio num " << aio_num << " bytes " << aio_bytes
+ << " ... exp " << exp << " min_new " << min_new
+ << " ... pending " << cur << dendl;
+ if (cur >= min_new)
+ break;
dout(20) << "write_thread_entry deferring until more aios complete: "
<< aio_num << " aios with " << aio_bytes << " bytes needs " << min_new
<< " bytes to start a new aio (currently " << cur << " pending)" << dendl;
aio_cond.Wait(aio_lock);
dout(20) << "write_thread_entry woke up" << dendl;
- continue;
}
}
#endif