diff options
author | Sage Weil <sage@inktank.com> | 2013-03-19 14:26:16 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-03-22 17:46:04 -0700 |
commit | 3569489b541ac0643520d20b08c788c26dfaff7f (patch) | |
tree | 312546a51a63b059f2835abad237c27005e9e298 | |
parent | 664ffa7d2178e486b00fa0706067f19b1bb9ab82 (diff) | |
download | ceph-3569489b541ac0643520d20b08c788c26dfaff7f.tar.gz |
os/FileJournal: fix aio self-throttling deadlock
This block of code tries to limit the number of aios in flight by waiting
for the amount of data to be written to grow relative to a function of the
number of aios. Strictly speaking, the condition we are waiting for is a
function of both aio_num and the write queue, but we are only woken by
changes in aio_num, and were (in rare cases) waiting when aio_num == 0 and
there was no possibility of being woken.
Fix this by verifying that aio_num > 0, and restructuring the loop to
recheck that condition on each wakeup.
Fixes: #4079
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit e5940da9a534821d0d8f872c13f9ac26fb05a0f5)
-rw-r--r-- | src/os/FileJournal.cc | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc index 218347efa49..f27e96e4cf6 100644 --- a/src/os/FileJournal.cc +++ b/src/os/FileJournal.cc @@ -1117,19 +1117,28 @@ void FileJournal::write_thread_entry() // should we back off to limit aios in flight? try to do this // adaptively so that we submit larger aios once we have lots of // them in flight. - int exp = MIN(aio_num * 2, 24); - long unsigned min_new = 1ull << exp; - long unsigned cur = throttle_bytes.get_current(); - dout(20) << "write_thread_entry aio throttle: aio num " << aio_num << " bytes " << aio_bytes - << " ... exp " << exp << " min_new " << min_new - << " ... pending " << cur << dendl; - if (cur < min_new) { + // + // NOTE: our condition here is based on aio_num (protected by + // aio_lock) and throttle_bytes (part of the write queue). when + // we sleep, we *only* wait for aio_num to change, and do not + // wake when more data is queued. this is not strictly correct, + // but should be fine given that we will have plenty of aios in + // flight if we hit this limit to ensure we keep the device + // saturated. + while (aio_num > 0) { + int exp = MIN(aio_num * 2, 24); + long unsigned min_new = 1ull << exp; + long unsigned cur = throttle_bytes.get_current(); + dout(20) << "write_thread_entry aio throttle: aio num " << aio_num << " bytes " << aio_bytes + << " ... exp " << exp << " min_new " << min_new + << " ... pending " << cur << dendl; + if (cur >= min_new) + break; dout(20) << "write_thread_entry deferring until more aios complete: " << aio_num << " aios with " << aio_bytes << " bytes needs " << min_new << " bytes to start a new aio (currently " << cur << " pending)" << dendl; aio_cond.Wait(aio_lock); dout(20) << "write_thread_entry woke up" << dendl; - continue; } } #endif |