From db85c52d4cff8f541e2cdd30e7a28cadf4bf516f Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 15 Apr 2013 16:33:48 -0700 Subject: PG: don't write out pg map epoch every handle_activate_map We don't actually need to write out the pg map epoch on every activate_map as long as: a) the osd does not trim past the oldest pg map persisted b) the pg does update the persisted map epoch from time to time. To that end, we now keep a reference to the last map persisted. The OSD already does not trim past the oldest live OSDMapRef. Second, handle_activate_map will trim if the difference between the current map and the last_persisted_map is large enough. Fixes: #4731 Signed-off-by: Samuel Just Reviewed-by: Greg Farnum (cherry picked from commit 2c5a9f0e178843e7ed514708bab137def840ab89) Conflicts: src/common/config_opts.h src/osd/PG.cc - last_persisted_osdmap_ref gets set in the non-static PG::write_info Conflicts: src/osd/PG.cc --- src/common/config_opts.h | 4 ++++ src/osd/PG.cc | 15 +++++++++++++-- src/osd/PG.h | 1 + 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 5e87b2f1782..e23c8affb0f 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -435,6 +435,10 @@ OPTION(osd_use_stale_snap, OPT_BOOL, false) OPTION(osd_rollback_to_cluster_snap, OPT_STR, "") OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in seconds OPTION(osd_kill_backfill_at, OPT_INT, 0) + +// Bounds how infrequently a new map epoch will be persisted for a pg +OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 200) + OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 94c10d0ab6e..cb13bdc8500 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -140,7 +140,7 @@ PG::PG(OSDService *o, OSDMapRef curmap, p.get_split_bits(curmap->get_pg_num(_pool.id)), _pool.id), map_lock("PG::map_lock"), - osdmap_ref(curmap), pool(_pool), + osdmap_ref(curmap), last_persisted_osdmap_ref(curmap), pool(_pool), _lock("PG::_lock"), ref(0), #ifdef PG_DEBUG_REFS @@ -2201,6 +2201,7 @@ void PG::write_info(ObjectStore::Transaction& t) past_intervals, snap_collections, osd->infos_oid, info_struct_v, dirty_big_info); assert(ret == 0); + last_persisted_osdmap_ref = osdmap_ref; dirty_info = false; dirty_big_info = false; @@ -5074,7 +5075,17 @@ void PG::handle_activate_map(RecoveryCtx *rctx) dout(10) << "handle_activate_map " << dendl; ActMap evt; recovery_state.handle_event(evt, rctx); - dirty_info = true; + if (osdmap_ref->get_epoch() - last_persisted_osdmap_ref->get_epoch() > + g_conf->osd_pg_epoch_persisted_max_stale) { + dout(20) << __func__ << ": Dirtying info: last_persisted is " + << last_persisted_osdmap_ref->get_epoch() + << " while current is " << osdmap_ref->get_epoch() << dendl; + dirty_info = true; + } else { + dout(20) << __func__ << ": Not dirtying info: last_persisted is " + << last_persisted_osdmap_ref->get_epoch() + << " while current is " << osdmap_ref->get_epoch() << dendl; + } } void PG::handle_loaded(RecoveryCtx *rctx) diff --git a/src/osd/PG.h b/src/osd/PG.h index 134f5ec470f..b9d3f9ebac1 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -172,6 +172,7 @@ protected: Mutex map_lock; list waiting_for_map; OSDMapRef osdmap_ref; + OSDMapRef last_persisted_osdmap_ref; PGPool pool; void queue_op(OpRequestRef op); -- cgit v1.2.1