summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-22 16:14:00 -0700
committerSage Weil <sage@inktank.com>2013-10-22 17:23:43 -0700
commitd99f579c01f70a6eb005bde50aaabd38e169838d (patch)
tree8fa951af76644a81c0dd71fc7b83eea28dc07695
parent5507d9b7b6d4300237543b9cbc2932bf86e18281 (diff)
downloadceph-d99f579c01f70a6eb005bde50aaabd38e169838d.tar.gz
osd/ReplicatedPG: create whiteout on promote ENOENT
If we try to fetch an object from the base tier and it is not present, we can create a whiteout object. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/ReplicatedPG.cc32
-rw-r--r--src/test/librados/tier.cc11
2 files changed, 34 insertions, 9 deletions
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 6a32a9f49cc..581073e5357 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -4597,7 +4597,17 @@ void ReplicatedPG::finish_promote(int r, OpRequestRef op,
CopyResults *results, ObjectContextRef obc,
hobject_t& temp_obj)
{
- if (r < 0) {
+ dout(10) << __func__ << " " << obc->obs.oi.soid << " r=" << r << dendl;
+
+ bool whiteout = false;
+ if (r == -ENOENT &&
+ (pool.info.cache_mode == pg_pool_t::CACHEMODE_WRITEBACK ||
+ pool.info.cache_mode == pg_pool_t::CACHEMODE_READONLY)) {
+ dout(10) << __func__ << " whiteout " << obc->obs.oi.soid << dendl;
+ whiteout = true;
+ }
+
+ if (r < 0 && !whiteout) {
// we need to get rid of the op in the blocked queue
map<hobject_t,list<OpRequestRef> >::iterator blocked_iter =
waiting_for_blocked_object.find(obc->obs.oi.soid);
@@ -4615,20 +4625,26 @@ void ReplicatedPG::finish_promote(int r, OpRequestRef op,
RepGather *repop = simple_repop_create(obc);
OpContext *tctx = repop->ctx;
- tctx->op_t.swap(results->final_tx);
- if (results->started_temp_obj) {
- tctx->discard_temp_oid = temp_obj;
- }
object_stat_sum_t delta;
++delta.num_objects;
obc->obs.exists = true;
- delta.num_bytes += results->object_size;
- obc->obs.oi.category = results->category;
+ if (whiteout) {
+ // create a whiteout
+ tctx->op_t.touch(coll, obc->obs.oi.soid);
+ obc->obs.oi.set_flag(object_info_t::FLAG_WHITEOUT);
+ } else {
+ tctx->op_t.swap(results->final_tx);
+ if (results->started_temp_obj) {
+ tctx->discard_temp_oid = temp_obj;
+ }
+ delta.num_bytes += results->object_size;
+ obc->obs.oi.category = results->category;
+ tctx->user_at_version = results->user_version;
+ }
info.stats.stats.add(delta, obc->obs.oi.category);
tctx->at_version.epoch = get_osdmap()->get_epoch();
tctx->at_version.version = pg_log.get_head().version + 1;
- tctx->user_at_version = results->user_version;
tctx->log.push_back(pg_log_entry_t(
pg_log_entry_t::MODIFY,
diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
index 81f2061c66e..140f87f1eab 100644
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -126,11 +126,20 @@ TEST(LibRadosTier, Promote) {
ASSERT_EQ(1, base_ioctx.read("foo", bl, 1, 0));
}
+ // read, trigger a whiteout
+ {
+ bufferlist bl;
+ ASSERT_EQ(-ENOENT, base_ioctx.read("bar", bl, 1, 0));
+ ASSERT_EQ(-ENOENT, base_ioctx.read("bar", bl, 1, 0));
+ }
+
// verify the object is present in the cache tier
{
ObjectIterator it = cache_ioctx.objects_begin();
ASSERT_TRUE(it != cache_ioctx.objects_end());
- ASSERT_EQ(it->first, string("foo"));
+ ASSERT_TRUE(it->first == string("foo") || it->first == string("bar"));
+ ++it;
+ ASSERT_TRUE(it->first == string("foo") || it->first == string("bar"));
++it;
ASSERT_TRUE(it == cache_ioctx.objects_end());
}