diff options
author | Greg Farnum <greg@inktank.com> | 2013-10-09 17:48:57 -0700 |
---|---|---|
committer | Greg Farnum <greg@inktank.com> | 2013-10-17 13:11:45 -0700 |
commit | aff31357190b0371772822e21c7ee7f17b357f9f (patch) | |
tree | 05a4502b34ca7c62706bd3858b52af061bc13e47 | |
parent | 3f6a23e267867cd3874f36832f91898e8f2900c9 (diff) | |
download | ceph-aff31357190b0371772822e21c7ee7f17b357f9f.tar.gz |
ReplicatedPG: promote: first draft pass at doing object promotion
This is not yet at all complete -- among other things, it will
retry forever on any object which doesn't exist in the underlying
pool. But it demonstrates the approach reasonably clearly.
Signed-off-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | src/osd/ReplicatedPG.cc | 42 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.h | 61 |
2 files changed, 97 insertions, 6 deletions
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 4a7aaff4dc0..c1afa0cab18 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1149,10 +1149,10 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op, ObjectContextRef obc, return false; break; case pg_pool_t::CACHEMODE_WRITEBACK: - if (obc.get()) { + if (obc.get() && obc->obs.exists) { // we have the object already return false; - } else { - do_cache_redirect(op, obc); + } else { // try and promote! + promote_object(op, obc); return true; } break; @@ -1160,12 +1160,17 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op, ObjectContextRef obc, do_cache_redirect(op, obc); return true; break; - case pg_pool_t::CACHEMODE_READONLY: - if (obc.get() && !r) { + case pg_pool_t::CACHEMODE_READONLY: // TODO: clean this case up + if (!obc.get() && r == -ENOENT) { // we don't have the object and op's a read + promote_object(op, obc); + return true; + } else if (obc.get() && obc->obs.exists) { // we have the object locally return false; - } else { + } else if (!r) { // it must be a write do_cache_redirect(op, obc); return true; + } else { // crap, there was a failure of some kind + return false; } break; default: @@ -1188,6 +1193,31 @@ void ReplicatedPG::do_cache_redirect(OpRequestRef op, ObjectContextRef obc) return; } +void ReplicatedPG::promote_object(OpRequestRef op, ObjectContextRef obc) +{ + MOSDOp *m = static_cast<MOSDOp*>(op->get_req()); + if (!obc.get()) { // we need to create an ObjectContext + int r = find_object_context( + hobject_t(m->get_oid(), + m->get_object_locator().key, + m->get_snapid(), + m->get_pg().ps(), + m->get_object_locator().get_pool(), + m->get_object_locator().nspace), + &obc, true, NULL); + assert(r == 0); // a lookup that allows creates can't fail now + } + + hobject_t temp_target = generate_temp_object(); + PromoteCallback *cb = new PromoteCallback(obc, temp_target, this); + object_locator_t oloc(m->get_object_locator()); + oloc.pool = pool.info.tier_of; + start_copy(cb, obc, obc->obs.oi.soid, oloc, 0, temp_target); + + assert(obc->is_blocked()); + wait_for_blocked_object(obc->obs.oi.soid, op); +} + void ReplicatedPG::execute_ctx(OpContext *ctx) { dout(10) << __func__ << " " << ctx << dendl; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 2ffc9e61066..bdaed9333d4 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -206,6 +206,56 @@ public: }; friend class CopyFromCallback; + class PromoteCallback: public CopyCallback { + ObjectContextRef obc; + hobject_t temp_obj; + ReplicatedPG *pg; + public: + PromoteCallback(ObjectContextRef obc_, const hobject_t& temp_obj_, + ReplicatedPG *pg_) : + obc(obc_), temp_obj(temp_obj_), pg(pg_) {} + + virtual void finish(CopyCallbackResults results) { + CopyResults* results_data = results.get<1>(); + vector<OSDOp> ops; + tid_t rep_tid = pg->osd->get_tid(); + osd_reqid_t reqid(pg->osd->get_cluster_msgr_name(), 0, rep_tid); + OpContext *tctx = new OpContext(OpRequestRef(), reqid, ops, &obc->obs, obc->ssc, pg); + tctx->mtime = ceph_clock_now(g_ceph_context); + tctx->op_t.swap(results_data->final_tx); + if (results_data->started_temp_obj) { + tctx->discard_temp_oid = temp_obj; + } + + RepGather *repop = pg->new_repop(tctx, obc, rep_tid); + C_KickBlockedObject *blockedcb = new C_KickBlockedObject(obc, pg); + repop->ondone = blockedcb; + object_stat_sum_t delta; + ++delta.num_objects; + obc->obs.exists = true; + delta.num_bytes += results_data->object_size; + obc->obs.oi.category = results_data->category; + pg->info.stats.stats.add(delta, obc->obs.oi.category); + tctx->at_version.epoch = pg->get_osdmap()->get_epoch(); + tctx->at_version.version = pg->pg_log.get_head().version + 1; + tctx->user_at_version = results_data->user_version; + + tctx->log.push_back(pg_log_entry_t( + pg_log_entry_t::MODIFY, + obc->obs.oi.soid, + tctx->at_version, + tctx->obs->oi.version, + tctx->user_at_version, + osd_reqid_t(), + repop->ctx->mtime)); + pg->append_log(tctx->log, eversion_t(), tctx->local_t); + pg->issue_repop(repop, repop->ctx->mtime); + pg->eval_repop(repop); + repop->put(); + delete results_data; + } + }; + boost::scoped_ptr<PGBackend> pgbackend; PGBackend *get_pgbackend() { return pgbackend.get(); @@ -691,8 +741,19 @@ protected: uint64_t offset, uint64_t length, bool count_bytes); void add_interval_usage(interval_set<uint64_t>& s, object_stat_sum_t& st); + /** + * This helper function is called from do_op if the ObjectContext lookup fails. + * @returns true if the caching code is handling the Op, false otherwise. + */ inline bool maybe_handle_cache(OpRequestRef op, ObjectContextRef obc, int r); + /** + * This helper function tells the client to redirect their request elsewhere. + */ void do_cache_redirect(OpRequestRef op, ObjectContextRef obc); + /** + * This function starts up a copy from + */ + void promote_object(OpRequestRef op, ObjectContextRef obc); int prepare_transaction(OpContext *ctx); |