summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-07-19 10:37:16 -0700
committerSage Weil <sage@inktank.com>2013-07-19 10:39:19 -0700
commitc549e628a8deabf177d1f3f39da4741467555743 (patch)
tree4df46ec6e90835af10c158072fd9074fd547dd5a
parent9292c18dfcabee4e6e45d2bc5b796a9b77b2a283 (diff)
downloadceph-c549e628a8deabf177d1f3f39da4741467555743.tar.gz
mon/PGMap: avoid negative pg stats when calculating rates
We periodically see strange values come out of the estimated cluster throughput and recovery rates. Pretty sure this is cause by feeding negative values into the rate arithmetic and then giving the si_t helpers mangled (sign-extended + bit shifted) values. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/mon/PGMap.cc39
1 files changed, 25 insertions, 14 deletions
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index f2b3a0b6a36..21e40f3fb21 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -728,12 +728,18 @@ void PGMap::recovery_summary(Formatter *f, ostream *out) const
}
first = false;
}
- if (pg_sum_delta.stats.sum.num_objects_recovered ||
- pg_sum_delta.stats.sum.num_bytes_recovered ||
- pg_sum_delta.stats.sum.num_keys_recovered) {
- int64_t objps = pg_sum_delta.stats.sum.num_objects_recovered / (double)stamp_delta;
- int64_t bps = pg_sum_delta.stats.sum.num_bytes_recovered / (double)stamp_delta;
- int64_t kps = pg_sum_delta.stats.sum.num_keys_recovered / (double)stamp_delta;
+
+ // make non-negative; we can get negative values if osds send
+ // uncommitted stats and then "go backward" or if they are just
+ // buggy/wrong.
+ pool_stat_t pos_delta = pg_sum_delta;
+ pos_delta.floor(0);
+ if (pos_delta.stats.sum.num_objects_recovered ||
+ pos_delta.stats.sum.num_bytes_recovered ||
+ pos_delta.stats.sum.num_keys_recovered) {
+ int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)stamp_delta;
+ int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)stamp_delta;
+ int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)stamp_delta;
if (f) {
f->dump_int("recovering_objects_per_sec", objps);
f->dump_int("recovering_bytes_per_sec", bps);
@@ -744,7 +750,7 @@ void PGMap::recovery_summary(Formatter *f, ostream *out) const
*out << " recovering "
<< si_t(objps) << " o/s, "
<< si_t(bps) << "B/s";
- if (pg_sum_delta.stats.sum.num_keys_recovered)
+ if (pos_delta.stats.sum.num_keys_recovered)
*out << ", " << si_t(kps) << " key/s";
}
}
@@ -815,27 +821,32 @@ void PGMap::print_summary(Formatter *f, ostream *out) const
<< kb_t(osd_sum.kb) << " avail";
}
- if (pg_sum_delta.stats.sum.num_rd ||
- pg_sum_delta.stats.sum.num_wr) {
+ // make non-negative; we can get negative values if osds send
+ // uncommitted stats and then "go backward" or if they are just
+ // buggy/wrong.
+ pool_stat_t pos_delta = pg_sum_delta;
+ pos_delta.floor(0);
+ if (pos_delta.stats.sum.num_rd ||
+ pos_delta.stats.sum.num_wr) {
if (!f)
*out << "; ";
- if (pg_sum_delta.stats.sum.num_rd) {
- int64_t rd = (pg_sum_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta;
+ if (pos_delta.stats.sum.num_rd) {
+ int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta;
if (f) {
f->dump_int("read_bytes_sec", rd);
} else {
*out << si_t(rd) << "B/s rd, ";
}
}
- if (pg_sum_delta.stats.sum.num_wr) {
- int64_t wr = (pg_sum_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta;
+ if (pos_delta.stats.sum.num_wr) {
+ int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta;
if (f) {
f->dump_int("write_bytes_sec", wr);
} else {
*out << si_t(wr) << "B/s wr, ";
}
}
- int64_t iops = (pg_sum_delta.stats.sum.num_rd + pg_sum_delta.stats.sum.num_wr) / (double)stamp_delta;
+ int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta;
if (f) {
f->dump_int("op_per_sec", iops);
} else {