summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2012-05-04 13:12:58 -0700
committerSage Weil <sage@newdream.net>2012-05-04 13:12:58 -0700
commitf3043fee3e22600cb4349072287842db129588eb (patch)
tree8a3849b5da1b4059ccb74ad444ddfa914b05eded
parentf3760da4feb02a5b25794615ad67f07b3e1370b9 (diff)
downloadceph-f3043fee3e22600cb4349072287842db129588eb.tar.gz
objectcacher: don't wait for write waiters; wait after dirtying
We do three things here: - Wait for the dirty limit to drop _after_ writing into the cache. This means that an active thread can always provide its dirty data to the cache for potential writing without waiting (a small win). It's also helpful later... (see below, and next commit) - Don't wait for other waiters. If another thread dirtying 1MB and is waiting for it, don't wait for them too. This prevents two threads writing 1MB at a time with a limit of 1MB from serializing: both can dirty their 1MB and initiate a flush, and they once 1/2 of that has flushed one of them will be allowed to proceed. - Update the flusher to add the dirty_waiting bytes to the amount to write so that the OPs will indeed be parallel. Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--src/client/Client.cc6
-rw-r--r--src/librbd.cc2
-rw-r--r--src/osdc/ObjectCacher.cc35
-rw-r--r--src/osdc/ObjectCacher.h3
4 files changed, 26 insertions, 20 deletions
diff --git a/src/client/Client.cc b/src/client/Client.cc
index c069f772996..7f220093912 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -5311,13 +5311,13 @@ int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf)
get_cap_ref(in, CEPH_CAP_FILE_BUFFER);
- // wait? (this may block!)
- objectcacher->wait_for_write(size, client_lock);
-
// async, caching, non-blocking.
objectcacher->file_write(&in->oset, &in->layout, in->snaprealm->get_snap_context(),
offset, size, bl, ceph_clock_now(cct), 0);
+ // wait? (this may block!)
+ objectcacher->wait_for_write(size, client_lock);
+
put_cap_ref(in, CEPH_CAP_FILE_BUFFER);
} else {
// simple, non-atomic sync write
diff --git a/src/librbd.cc b/src/librbd.cc
index bb7043682aa..57a4eb8cfc9 100644
--- a/src/librbd.cc
+++ b/src/librbd.cc
@@ -288,8 +288,8 @@ namespace librbd {
wr->extents.push_back(extent);
{
Mutex::Locker l(cache_lock);
- object_cacher->wait_for_write(len, cache_lock);
object_cacher->writex(wr, object_set);
+ object_cacher->wait_for_write(len, cache_lock);
}
}
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 208002ad16b..621655f79dc 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -457,8 +457,7 @@ ObjectCacher::ObjectCacher(CephContext *cct_, string name, WritebackHandler& wb,
cct(cct_), writeback_handler(wb), name(name), lock(l),
flush_set_callback(flush_callback), flush_set_callback_arg(flush_callback_arg),
flusher_stop(false), flusher_thread(this),
- stat_waiter(0),
- stat_clean(0), stat_dirty(0), stat_rx(0), stat_tx(0), stat_missing(0)
+ stat_clean(0), stat_dirty(0), stat_rx(0), stat_tx(0), stat_missing(0), stat_dirty_waiting(0)
{
perf_start();
}
@@ -1132,15 +1131,19 @@ bool ObjectCacher::wait_for_write(uint64_t len, Mutex& lock)
utime_t start = ceph_clock_now(cct);
// wait for writeback?
- while (get_stat_dirty() + get_stat_tx() >= conf->client_oc_max_dirty) {
+ // - wait for dirty and tx bytes (relative to the max_dirty threshold)
+ // - do not wait for bytes other waiters are waiting on. this means that
+ // threads do not wait for each other. this effectively allows the cache size
+ // to balloon proportional to the data that is in flight.
+ while (get_stat_dirty() + get_stat_tx() >= conf->client_oc_max_dirty + get_stat_dirty_waiting()) {
ldout(cct, 10) << "wait_for_write waiting on " << len << ", dirty|tx "
- << (get_stat_dirty() + get_stat_tx())
- << " >= " << conf->client_oc_max_dirty
- << dendl;
+ << (get_stat_dirty() + get_stat_tx())
+ << " >= max " << conf->client_oc_max_dirty << " + dirty_waiting " << get_stat_dirty_waiting()
+ << dendl;
flusher_cond.Signal();
- stat_waiter++;
+ stat_dirty_waiting += len;
stat_cond.Wait(lock);
- stat_waiter--;
+ stat_dirty_waiting -= len;
blocked++;
ldout(cct, 10) << "wait_for_write woke up" << dendl;
}
@@ -1148,7 +1151,7 @@ bool ObjectCacher::wait_for_write(uint64_t len, Mutex& lock)
// start writeback anyway?
if (get_stat_dirty() > conf->client_oc_target_dirty) {
ldout(cct, 10) << "wait_for_write " << get_stat_dirty() << " > target "
- << conf->client_oc_target_dirty << ", nudging flusher" << dendl;
+ << conf->client_oc_target_dirty << ", nudging flusher" << dendl;
flusher_cond.Signal();
}
if (blocked && perfcounter) {
@@ -1177,13 +1180,14 @@ void ObjectCacher::flusher_entry()
<< conf->client_oc_target_dirty << " target, "
<< conf->client_oc_max_dirty << " max)"
<< dendl;
- if (get_stat_dirty() > conf->client_oc_target_dirty) {
+ if (get_stat_dirty() + get_stat_dirty_waiting() > conf->client_oc_target_dirty) {
// flush some dirty pages
ldout(cct, 10) << "flusher "
- << get_stat_dirty() << " dirty > target "
- << conf->client_oc_target_dirty
- << ", flushing some dirty bhs" << dendl;
- flush(get_stat_dirty() - conf->client_oc_target_dirty);
+ << get_stat_dirty() << " dirty + " << get_stat_dirty_waiting()
+ << " dirty_waiting > target "
+ << conf->client_oc_target_dirty
+ << ", flushing some dirty bhs" << dendl;
+ flush(get_stat_dirty() + get_stat_dirty_waiting() - conf->client_oc_target_dirty);
}
else {
// check tail of lru for old dirty items
@@ -1757,7 +1761,8 @@ void ObjectCacher::bh_stat_add(BufferHead *bh)
stat_rx += bh->length();
break;
}
- if (stat_waiter) stat_cond.Signal();
+ if (get_stat_dirty_waiting() > 0)
+ stat_cond.Signal();
}
void ObjectCacher::bh_stat_sub(BufferHead *bh)
diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h
index 8f308af6fbe..85a8cead4b9 100644
--- a/src/osdc/ObjectCacher.h
+++ b/src/osdc/ObjectCacher.h
@@ -315,13 +315,13 @@ class ObjectCacher {
// bh stats
Cond stat_cond;
- int stat_waiter;
loff_t stat_clean;
loff_t stat_dirty;
loff_t stat_rx;
loff_t stat_tx;
loff_t stat_missing;
+ loff_t stat_dirty_waiting; // bytes that writers are waiting on to write
void verify_stats() const;
@@ -330,6 +330,7 @@ class ObjectCacher {
loff_t get_stat_tx() { return stat_tx; }
loff_t get_stat_rx() { return stat_rx; }
loff_t get_stat_dirty() { return stat_dirty; }
+ loff_t get_stat_dirty_waiting() { return stat_dirty_waiting; }
loff_t get_stat_clean() { return stat_clean; }
void touch_bh(BufferHead *bh) {