diff options
-rw-r--r-- | src/common/TrackedOp.cc | 19 | ||||
-rw-r--r-- | src/common/TrackedOp.h | 21 | ||||
-rw-r--r-- | src/common/config_opts.h | 10 | ||||
-rw-r--r-- | src/osd/OSD.cc | 18 |
4 files changed, 50 insertions, 18 deletions
diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index 82594a6491e..d1dbc1e7135 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -48,14 +48,14 @@ void OpHistory::cleanup(utime_t now) { while (arrived.size() && (now - arrived.begin()->first > - (double)(tracker->cct->_conf->op_tracker_history_duration))) { + (double)(history_duration))) { duration.erase(make_pair( arrived.begin()->second->get_duration(), arrived.begin()->second)); arrived.erase(arrived.begin()); } - while (duration.size() > tracker->cct->_conf->op_tracker_history_size) { + while (duration.size() > history_size) { arrived.erase(make_pair( duration.begin()->second->get_arrived(), duration.begin()->second)); @@ -67,8 +67,8 @@ void OpHistory::dump_ops(utime_t now, Formatter *f) { cleanup(now); f->open_object_section("OpHistory"); - f->dump_int("num to keep", tracker->cct->_conf->op_tracker_history_size); - f->dump_int("duration to keep", tracker->cct->_conf->op_tracker_history_duration); + f->dump_int("num to keep", history_size); + f->dump_int("duration to keep", history_duration); { f->open_array_section("Ops"); for (set<pair<utime_t, TrackedOpRef> >::const_iterator i = @@ -132,7 +132,7 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector) utime_t now = ceph_clock_now(cct); utime_t too_old = now; - too_old -= cct->_conf->op_tracker_complaint_time; + too_old -= complaint_time; utime_t oldest_secs = now - ops_in_flight.front()->get_arrived(); @@ -140,11 +140,11 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector) << "; oldest is " << oldest_secs << " seconds old" << dendl; - if (oldest_secs < cct->_conf->op_tracker_complaint_time) + if (oldest_secs < complaint_time) return false; xlist<TrackedOp*>::iterator i = ops_in_flight.begin(); - warning_vector.reserve(cct->_conf->op_tracker_log_threshold + 1); + warning_vector.reserve(log_threshold + 1); int slow = 0; // total slow int warned = 0; // total logged @@ -153,13 +153,12 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector) // exponential backoff of warning intervals if (((*i)->get_arrived() + - (cct->_conf->op_tracker_complaint_time * - (*i)->warn_interval_multiplier)) < now) { + (complaint_time * (*i)->warn_interval_multiplier)) < now) { // will warn if (warning_vector.empty()) warning_vector.push_back(""); warned++; - if (warned > cct->_conf->op_tracker_log_threshold) + if (warned > log_threshold) break; utime_t age = now - (*i)->get_arrived(); diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index 2fe9eeb230c..9e00c14b178 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -32,9 +32,12 @@ class OpHistory { void cleanup(utime_t now); bool shutdown; OpTracker *tracker; + uint32_t history_size; + uint32_t history_duration; public: - OpHistory(OpTracker *tracker_) : shutdown(false), tracker(tracker_) {} + OpHistory(OpTracker *tracker_) : shutdown(false), tracker(tracker_), + history_size(0), history_duration(0) {} ~OpHistory() { assert(arrived.empty()); assert(duration.empty()); @@ -42,6 +45,10 @@ public: void insert(utime_t now, TrackedOpRef op); void dump_ops(utime_t now, Formatter *f); void on_shutdown(); + void set_size_and_duration(uint32_t new_size, uint32_t new_duration) { + history_size = new_size; + history_duration = new_duration; + } }; class OpTracker { @@ -57,10 +64,20 @@ class OpTracker { Mutex ops_in_flight_lock; xlist<TrackedOp *> ops_in_flight; OpHistory history; + float complaint_time; + int log_threshold; public: CephContext *cct; - OpTracker(CephContext *cct_) : seq(0), ops_in_flight_lock("OpTracker mutex"), history(this), cct(cct_) {} + OpTracker(CephContext *cct_) : seq(0), ops_in_flight_lock("OpTracker mutex"), + history(this), complaint_time(0), log_threshold(0), cct(cct_) {} + void set_complaint_and_threshold(float time, int threshold) { + complaint_time = time; + log_threshold = threshold; + } + void set_history_size_and_duration(uint32_t new_size, uint32_t new_duration) { + history.set_size_and_duration(new_size, new_duration); + } void dump_ops_in_flight(Formatter *f); void dump_historic_ops(Formatter *f); void register_inflight_op(xlist<TrackedOp*>::item *i); diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 521176c4672..f6283239660 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -471,9 +471,9 @@ OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 200) OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim -OPTION(op_tracker_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy +OPTION(osd_op_complaint_time, OPT_FLOAT, 30) // how many seconds old makes an op complaint-worthy OPTION(osd_command_max_records, OPT_INT, 256) -OPTION(op_tracker_log_threshold, OPT_INT, 5) // how many op log messages to show in one go +OPTION(osd_op_log_threshold, OPT_INT, 5) // how many op log messages to show in one go OPTION(osd_verify_sparse_read_holes, OPT_BOOL, false) // read fiemap-reported holes and verify they are zeros OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0) OPTION(osd_debug_drop_ping_duration, OPT_INT, 0) @@ -484,8 +484,8 @@ OPTION(osd_debug_op_order, OPT_BOOL, false) OPTION(osd_debug_verify_snaps_on_info, OPT_BOOL, false) OPTION(osd_debug_verify_stray_on_activate, OPT_BOOL, false) OPTION(osd_debug_skip_full_check_in_backfill_reservation, OPT_BOOL, false) -OPTION(op_tracker_history_size, OPT_U32, 20) // Max number of completed ops to track -OPTION(op_tracker_history_duration, OPT_U32, 600) // Oldest completed op to track +OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track +OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe) OPTION(osd_failsafe_nearfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD near full (failsafe) @@ -510,7 +510,7 @@ OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false) * 1..63. * * osd_recovery_op_warn_multiple scales the normal warning threshhold, - * op_tracker_complaint_time, so that slow recovery ops won't cause noise + * osd_op_complaint_time, so that slow recovery ops won't cause noise */ OPTION(osd_client_op_priority, OPT_U32, 63) OPTION(osd_recovery_op_priority, OPT_U32, 10) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 8150d9b3cd9..1ba35ec2ef5 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -979,6 +979,10 @@ OSD::OSD(CephContext *cct_, int id, Messenger *internal_messenger, Messenger *ex service(this) { monc->set_messenger(client_messenger); + op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time, + cct->_conf->osd_op_log_threshold); + op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size, + cct->_conf->osd_op_history_duration); } OSD::~OSD() @@ -2560,7 +2564,7 @@ void OSDService::check_nearfull_warning(const osd_stat_t &osd_stat) if (cur_state != new_state) { cur_state = new_state; - } else if (now - last_msg < cct->_conf->op_tracker_complaint_time) { + } else if (now - last_msg < cct->_conf->osd_op_complaint_time) { return; } last_msg = now; @@ -7318,6 +7322,8 @@ const char** OSD::get_tracked_conf_keys() const { static const char* KEYS[] = { "osd_max_backfills", + "osd_op_complaint_time", "osd_op_log_threshold", + "osd_op_history_size", "osd_op_history_duration", NULL }; return KEYS; @@ -7330,6 +7336,16 @@ void OSD::handle_conf_change(const struct md_config_t *conf, service.local_reserver.set_max(cct->_conf->osd_max_backfills); service.remote_reserver.set_max(cct->_conf->osd_max_backfills); } + if (changed.count("osd_op_complaint_time") || + changed.count("osd_op_log_threshold")) { + op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time, + cct->_conf->osd_op_log_threshold); + } + if (changed.count("osd_op_history_size") || + changed.count("osd_op_history_duration")) { + op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size, + cct->_conf->osd_op_history_duration); + } } // -------------------------------- |