diff options
author | Sage Weil <sage@inktank.com> | 2013-07-19 10:37:16 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-19 10:39:19 -0700 |
commit | c549e628a8deabf177d1f3f39da4741467555743 (patch) | |
tree | 4df46ec6e90835af10c158072fd9074fd547dd5a | |
parent | 9292c18dfcabee4e6e45d2bc5b796a9b77b2a283 (diff) | |
download | ceph-c549e628a8deabf177d1f3f39da4741467555743.tar.gz |
mon/PGMap: avoid negative pg stats when calculating rates
We periodically see strange values come out of the estimated cluster
throughput and recovery rates. Pretty sure this is cause by feeding
negative values into the rate arithmetic and then giving the si_t
helpers mangled (sign-extended + bit shifted) values.
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/mon/PGMap.cc | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index f2b3a0b6a36..21e40f3fb21 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -728,12 +728,18 @@ void PGMap::recovery_summary(Formatter *f, ostream *out) const } first = false; } - if (pg_sum_delta.stats.sum.num_objects_recovered || - pg_sum_delta.stats.sum.num_bytes_recovered || - pg_sum_delta.stats.sum.num_keys_recovered) { - int64_t objps = pg_sum_delta.stats.sum.num_objects_recovered / (double)stamp_delta; - int64_t bps = pg_sum_delta.stats.sum.num_bytes_recovered / (double)stamp_delta; - int64_t kps = pg_sum_delta.stats.sum.num_keys_recovered / (double)stamp_delta; + + // make non-negative; we can get negative values if osds send + // uncommitted stats and then "go backward" or if they are just + // buggy/wrong. + pool_stat_t pos_delta = pg_sum_delta; + pos_delta.floor(0); + if (pos_delta.stats.sum.num_objects_recovered || + pos_delta.stats.sum.num_bytes_recovered || + pos_delta.stats.sum.num_keys_recovered) { + int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)stamp_delta; + int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)stamp_delta; + int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)stamp_delta; if (f) { f->dump_int("recovering_objects_per_sec", objps); f->dump_int("recovering_bytes_per_sec", bps); @@ -744,7 +750,7 @@ void PGMap::recovery_summary(Formatter *f, ostream *out) const *out << " recovering " << si_t(objps) << " o/s, " << si_t(bps) << "B/s"; - if (pg_sum_delta.stats.sum.num_keys_recovered) + if (pos_delta.stats.sum.num_keys_recovered) *out << ", " << si_t(kps) << " key/s"; } } @@ -815,27 +821,32 @@ void PGMap::print_summary(Formatter *f, ostream *out) const << kb_t(osd_sum.kb) << " avail"; } - if (pg_sum_delta.stats.sum.num_rd || - pg_sum_delta.stats.sum.num_wr) { + // make non-negative; we can get negative values if osds send + // uncommitted stats and then "go backward" or if they are just + // buggy/wrong. + pool_stat_t pos_delta = pg_sum_delta; + pos_delta.floor(0); + if (pos_delta.stats.sum.num_rd || + pos_delta.stats.sum.num_wr) { if (!f) *out << "; "; - if (pg_sum_delta.stats.sum.num_rd) { - int64_t rd = (pg_sum_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta; + if (pos_delta.stats.sum.num_rd) { + int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta; if (f) { f->dump_int("read_bytes_sec", rd); } else { *out << si_t(rd) << "B/s rd, "; } } - if (pg_sum_delta.stats.sum.num_wr) { - int64_t wr = (pg_sum_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta; + if (pos_delta.stats.sum.num_wr) { + int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta; if (f) { f->dump_int("write_bytes_sec", wr); } else { *out << si_t(wr) << "B/s wr, "; } } - int64_t iops = (pg_sum_delta.stats.sum.num_rd + pg_sum_delta.stats.sum.num_wr) / (double)stamp_delta; + int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta; if (f) { f->dump_int("op_per_sec", iops); } else { |