summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Farnum <greg@inktank.com>2013-10-09 17:48:57 -0700
committerGreg Farnum <greg@inktank.com>2013-10-17 13:11:45 -0700
commitaff31357190b0371772822e21c7ee7f17b357f9f (patch)
tree05a4502b34ca7c62706bd3858b52af061bc13e47
parent3f6a23e267867cd3874f36832f91898e8f2900c9 (diff)
downloadceph-aff31357190b0371772822e21c7ee7f17b357f9f.tar.gz
ReplicatedPG: promote: first draft pass at doing object promotion
This is not yet at all complete -- among other things, it will retry forever on any object which doesn't exist in the underlying pool. But it demonstrates the approach reasonably clearly. Signed-off-by: Greg Farnum <greg@inktank.com>
-rw-r--r--src/osd/ReplicatedPG.cc42
-rw-r--r--src/osd/ReplicatedPG.h61
2 files changed, 97 insertions, 6 deletions
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 4a7aaff4dc0..c1afa0cab18 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1149,10 +1149,10 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op, ObjectContextRef obc,
return false;
break;
case pg_pool_t::CACHEMODE_WRITEBACK:
- if (obc.get()) {
+ if (obc.get() && obc->obs.exists) { // we have the object already
return false;
- } else {
- do_cache_redirect(op, obc);
+ } else { // try and promote!
+ promote_object(op, obc);
return true;
}
break;
@@ -1160,12 +1160,17 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op, ObjectContextRef obc,
do_cache_redirect(op, obc);
return true;
break;
- case pg_pool_t::CACHEMODE_READONLY:
- if (obc.get() && !r) {
+ case pg_pool_t::CACHEMODE_READONLY: // TODO: clean this case up
+ if (!obc.get() && r == -ENOENT) { // we don't have the object and op's a read
+ promote_object(op, obc);
+ return true;
+ } else if (obc.get() && obc->obs.exists) { // we have the object locally
return false;
- } else {
+ } else if (!r) { // it must be a write
do_cache_redirect(op, obc);
return true;
+ } else { // crap, there was a failure of some kind
+ return false;
}
break;
default:
@@ -1188,6 +1193,31 @@ void ReplicatedPG::do_cache_redirect(OpRequestRef op, ObjectContextRef obc)
return;
}
+void ReplicatedPG::promote_object(OpRequestRef op, ObjectContextRef obc)
+{
+ MOSDOp *m = static_cast<MOSDOp*>(op->get_req());
+ if (!obc.get()) { // we need to create an ObjectContext
+ int r = find_object_context(
+ hobject_t(m->get_oid(),
+ m->get_object_locator().key,
+ m->get_snapid(),
+ m->get_pg().ps(),
+ m->get_object_locator().get_pool(),
+ m->get_object_locator().nspace),
+ &obc, true, NULL);
+ assert(r == 0); // a lookup that allows creates can't fail now
+ }
+
+ hobject_t temp_target = generate_temp_object();
+ PromoteCallback *cb = new PromoteCallback(obc, temp_target, this);
+ object_locator_t oloc(m->get_object_locator());
+ oloc.pool = pool.info.tier_of;
+ start_copy(cb, obc, obc->obs.oi.soid, oloc, 0, temp_target);
+
+ assert(obc->is_blocked());
+ wait_for_blocked_object(obc->obs.oi.soid, op);
+}
+
void ReplicatedPG::execute_ctx(OpContext *ctx)
{
dout(10) << __func__ << " " << ctx << dendl;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 2ffc9e61066..bdaed9333d4 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -206,6 +206,56 @@ public:
};
friend class CopyFromCallback;
+ class PromoteCallback: public CopyCallback {
+ ObjectContextRef obc;
+ hobject_t temp_obj;
+ ReplicatedPG *pg;
+ public:
+ PromoteCallback(ObjectContextRef obc_, const hobject_t& temp_obj_,
+ ReplicatedPG *pg_) :
+ obc(obc_), temp_obj(temp_obj_), pg(pg_) {}
+
+ virtual void finish(CopyCallbackResults results) {
+ CopyResults* results_data = results.get<1>();
+ vector<OSDOp> ops;
+ tid_t rep_tid = pg->osd->get_tid();
+ osd_reqid_t reqid(pg->osd->get_cluster_msgr_name(), 0, rep_tid);
+ OpContext *tctx = new OpContext(OpRequestRef(), reqid, ops, &obc->obs, obc->ssc, pg);
+ tctx->mtime = ceph_clock_now(g_ceph_context);
+ tctx->op_t.swap(results_data->final_tx);
+ if (results_data->started_temp_obj) {
+ tctx->discard_temp_oid = temp_obj;
+ }
+
+ RepGather *repop = pg->new_repop(tctx, obc, rep_tid);
+ C_KickBlockedObject *blockedcb = new C_KickBlockedObject(obc, pg);
+ repop->ondone = blockedcb;
+ object_stat_sum_t delta;
+ ++delta.num_objects;
+ obc->obs.exists = true;
+ delta.num_bytes += results_data->object_size;
+ obc->obs.oi.category = results_data->category;
+ pg->info.stats.stats.add(delta, obc->obs.oi.category);
+ tctx->at_version.epoch = pg->get_osdmap()->get_epoch();
+ tctx->at_version.version = pg->pg_log.get_head().version + 1;
+ tctx->user_at_version = results_data->user_version;
+
+ tctx->log.push_back(pg_log_entry_t(
+ pg_log_entry_t::MODIFY,
+ obc->obs.oi.soid,
+ tctx->at_version,
+ tctx->obs->oi.version,
+ tctx->user_at_version,
+ osd_reqid_t(),
+ repop->ctx->mtime));
+ pg->append_log(tctx->log, eversion_t(), tctx->local_t);
+ pg->issue_repop(repop, repop->ctx->mtime);
+ pg->eval_repop(repop);
+ repop->put();
+ delete results_data;
+ }
+ };
+
boost::scoped_ptr<PGBackend> pgbackend;
PGBackend *get_pgbackend() {
return pgbackend.get();
@@ -691,8 +741,19 @@ protected:
uint64_t offset, uint64_t length, bool count_bytes);
void add_interval_usage(interval_set<uint64_t>& s, object_stat_sum_t& st);
+ /**
+ * This helper function is called from do_op if the ObjectContext lookup fails.
+ * @returns true if the caching code is handling the Op, false otherwise.
+ */
inline bool maybe_handle_cache(OpRequestRef op, ObjectContextRef obc, int r);
+ /**
+ * This helper function tells the client to redirect their request elsewhere.
+ */
void do_cache_redirect(OpRequestRef op, ObjectContextRef obc);
+ /**
+ * This function starts up a copy from
+ */
+ void promote_object(OpRequestRef op, ObjectContextRef obc);
int prepare_transaction(OpContext *ctx);