diff options
author | Sage Weil <sage@inktank.com> | 2013-05-16 17:58:48 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-05-16 17:58:48 -0700 |
commit | 65072f2e432c212f88dc5d5b9e8034decb019103 (patch) | |
tree | 9f120c319dae626cec34697b1c9eaf6ba6a2ed09 | |
parent | 9b9d322c20e4d3cfa5d40023cd28c020068178f1 (diff) | |
download | ceph-65072f2e432c212f88dc5d5b9e8034decb019103.tar.gz |
mon: clear pg delta after some period
If we have not pg_map updates, the delta doesn't update, and can get stuck
with the velocity right before activity stopped. This is confusing, and
can cause incorrect health warnings about in-progress recovery.
To fix this, zero the delta if there is no activity for
'mon delta reset interval' seconds.
Fixes: #5077
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/common/config_opts.h | 1 | ||||
-rw-r--r-- | src/mon/PGMap.cc | 7 | ||||
-rw-r--r-- | src/mon/PGMap.h | 2 | ||||
-rw-r--r-- | src/mon/PGMonitor.cc | 8 |
4 files changed, 18 insertions, 0 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h index aa54a213fbf..776ac1c067e 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -129,6 +129,7 @@ OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states OPTION(mon_tick_interval, OPT_INT, 5) OPTION(mon_subscribe_interval, OPT_DOUBLE, 300) +OPTION(mon_delta_reset_interval, OPT_DOUBLE, 10) // seconds of inactivity before we reset the pg delta to 0 OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay OPTION(mon_osd_laggy_weight, OPT_DOUBLE, .3) // weight for new 'samples's in laggy estimations OPTION(mon_osd_adjust_heartbeat_grace, OPT_BOOL, true) // true if we should scale based on laggy estimations diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 36a35424a20..32ff8963a0a 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -675,6 +675,13 @@ void PGMap::recovery_summary(ostream& out) const } } +void PGMap::clear_delta() +{ + pg_sum_delta = pool_stat_t(); + pg_sum_deltas.clear(); + stamp_delta = ceph_clock_now(g_ceph_context); +} + void PGMap::print_summary(ostream& out) const { std::stringstream ss; diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index 4794a16f030..1d0f40e8ba2 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -78,6 +78,8 @@ public: pool_stat_t pg_sum_delta; utime_t stamp_delta; + void clear_delta(); + set<pg_t> creating_pgs; // lru: front = new additions, back = recently pinged map<int,set<pg_t> > creating_pgs_by_osd; diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index ed4833bce7a..687ef92158b 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -128,6 +128,14 @@ void PGMonitor::tick() } } + if (!pg_map.pg_sum_deltas.empty()) { + utime_t age = ceph_clock_now(g_ceph_context) - pg_map.stamp; + if (age > 2 * g_conf->mon_delta_reset_interval) { + dout(10) << " clearing pg_map delta (" << age << " > " << g_conf->mon_delta_reset_interval << " seconds old)" << dendl; + pg_map.clear_delta(); + } + } + dout(10) << pg_map << dendl; } |