summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Zafman <david.zafman@inktank.com>2013-08-15 18:32:30 -0700
committerDavid Zafman <david.zafman@inktank.com>2013-08-15 18:32:34 -0700
commit067d5756a8b17af16ce73a487698471c079a2fb6 (patch)
tree41bfad9c3f98ba45e366aaf279f6d311436c6f0c
parent913a205c97858afc90617a3ef6097a9f0f2eaec7 (diff)
parent93d8be222e452c0539e84cc8a6d4e7e9f8eb8d19 (diff)
downloadceph-067d5756a8b17af16ce73a487698471c079a2fb6.tar.gz
Merge branch 'wip-5848-coll'
Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/osd/OSD.cc47
-rw-r--r--src/osd/OSD.h40
-rw-r--r--src/osd/PG.cc79
3 files changed, 163 insertions, 3 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 14bdc87487c..55f11707189 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -161,6 +161,7 @@ OSDService::OSDService(OSD *osd) :
cluster_messenger(osd->cluster_messenger),
client_messenger(osd->client_messenger),
logger(osd->logger),
+ recoverystate_perf(osd->recoverystate_perf),
monc(osd->monc),
op_wq(osd->op_wq),
peering_wq(osd->peering_wq),
@@ -898,6 +899,7 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger,
client_messenger(external_messenger),
monc(mc),
logger(NULL),
+ recoverystate_perf(NULL),
store(NULL),
clog(external_messenger->cct, client_messenger, &mc->monmap, LogClient::NO_FLAGS),
whoami(id),
@@ -953,7 +955,9 @@ OSD::~OSD()
delete authorize_handler_cluster_registry;
delete authorize_handler_service_registry;
delete class_handler;
+ g_ceph_context->get_perfcounters_collection()->remove(recoverystate_perf);
g_ceph_context->get_perfcounters_collection()->remove(logger);
+ delete recoverystate_perf;
delete logger;
delete store;
}
@@ -1183,6 +1187,8 @@ int OSD::init()
osdmap = get_map(superblock.current_epoch);
check_osdmap_features();
+ create_recoverystate_perf();
+
bind_epoch = osdmap->get_epoch();
// load up pgs (as they previously existed)
@@ -1416,12 +1422,51 @@ void OSD::create_logger()
osd_plb.add_u64_counter(l_osd_mape_dup, "map_message_epoch_dups"); // dup osdmap epochs
osd_plb.add_u64_counter(l_osd_waiting_for_map,
"messages_delayed_for_map"); // dup osdmap epochs
- osd_plb.add_time_avg(l_osd_peering_latency, "peering_latency");
logger = osd_plb.create_perf_counters();
g_ceph_context->get_perfcounters_collection()->add(logger);
}
+void OSD::create_recoverystate_perf()
+{
+ dout(10) << "create_recoverystate_perf" << dendl;
+
+ PerfCountersBuilder rs_perf(g_ceph_context, "recoverystate_perf", rs_first, rs_last);
+
+ rs_perf.add_time_avg(rs_initial_latency, "initial_latency");
+ rs_perf.add_time_avg(rs_started_latency, "started_latency");
+ rs_perf.add_time_avg(rs_reset_latency, "reset_latency");
+ rs_perf.add_time_avg(rs_start_latency, "start_latency");
+ rs_perf.add_time_avg(rs_primary_latency, "primary_latency");
+ rs_perf.add_time_avg(rs_peering_latency, "peering_latency");
+ rs_perf.add_time_avg(rs_backfilling_latency, "backfilling_latency");
+ rs_perf.add_time_avg(rs_waitremotebackfillreserved_latency, "waitremotebackfillreserved_latency");
+ rs_perf.add_time_avg(rs_waitlocalbackfillreserved_latency, "waitlocalbackfillreserved_latency");
+ rs_perf.add_time_avg(rs_notbackfilling_latency, "notbackfilling_latency");
+ rs_perf.add_time_avg(rs_repnotrecovering_latency, "repnotrecovering_latency");
+ rs_perf.add_time_avg(rs_repwaitrecoveryreserved_latency, "repwaitrecoveryreserved_latency");
+ rs_perf.add_time_avg(rs_repwaitbackfillreserved_latency, "repwaitbackfillreserved_latency");
+ rs_perf.add_time_avg(rs_RepRecovering_latency, "RepRecovering_latency");
+ rs_perf.add_time_avg(rs_activating_latency, "activating_latency");
+ rs_perf.add_time_avg(rs_waitlocalrecoveryreserved_latency, "waitlocalrecoveryreserved_latency");
+ rs_perf.add_time_avg(rs_waitremoterecoveryreserved_latency, "waitremoterecoveryreserved_latency");
+ rs_perf.add_time_avg(rs_recovering_latency, "recovering_latency");
+ rs_perf.add_time_avg(rs_recovered_latency, "recovered_latency");
+ rs_perf.add_time_avg(rs_clean_latency, "clean_latency");
+ rs_perf.add_time_avg(rs_active_latency, "active_latency");
+ rs_perf.add_time_avg(rs_replicaactive_latency, "replicaactive_latency");
+ rs_perf.add_time_avg(rs_stray_latency, "stray_latency");
+ rs_perf.add_time_avg(rs_getinfo_latency, "getinfo_latency");
+ rs_perf.add_time_avg(rs_getlog_latency, "getlog_latency");
+ rs_perf.add_time_avg(rs_waitactingchange_latency, "waitactingchange_latency");
+ rs_perf.add_time_avg(rs_incomplete_latency, "incomplete_latency");
+ rs_perf.add_time_avg(rs_getmissing_latency, "getmissing_latency");
+ rs_perf.add_time_avg(rs_waitupthru_latency, "waitupthru_latency");
+
+ recoverystate_perf = rs_perf.create_perf_counters();
+ g_ceph_context->get_perfcounters_collection()->add(recoverystate_perf);
+}
+
void OSD::suicide(int exitcode)
{
if (g_conf->filestore_blackhole) {
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 82a251d9a80..ec1ee30f853 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -113,10 +113,45 @@ enum {
l_osd_mape_dup,
l_osd_waiting_for_map,
- l_osd_peering_latency,
+
l_osd_last,
};
+// RecoveryState perf counters
+enum {
+ rs_first = 20000,
+ rs_initial_latency,
+ rs_started_latency,
+ rs_reset_latency,
+ rs_start_latency,
+ rs_primary_latency,
+ rs_peering_latency,
+ rs_backfilling_latency,
+ rs_waitremotebackfillreserved_latency,
+ rs_waitlocalbackfillreserved_latency,
+ rs_notbackfilling_latency,
+ rs_repnotrecovering_latency,
+ rs_repwaitrecoveryreserved_latency,
+ rs_repwaitbackfillreserved_latency,
+ rs_RepRecovering_latency,
+ rs_activating_latency,
+ rs_waitlocalrecoveryreserved_latency,
+ rs_waitremoterecoveryreserved_latency,
+ rs_recovering_latency,
+ rs_recovered_latency,
+ rs_clean_latency,
+ rs_active_latency,
+ rs_replicaactive_latency,
+ rs_stray_latency,
+ rs_getinfo_latency,
+ rs_getlog_latency,
+ rs_waitactingchange_latency,
+ rs_incomplete_latency,
+ rs_getmissing_latency,
+ rs_waitupthru_latency,
+ rs_last,
+};
+
class Messenger;
class Message;
class MonClient;
@@ -258,6 +293,7 @@ private:
Messenger *&client_messenger;
public:
PerfCounters *&logger;
+ PerfCounters *&recoverystate_perf;
MonClient *&monc;
ThreadPool::WorkQueueVal<pair<PGRef, OpRequestRef>, PGRef> &op_wq;
ThreadPool::BatchWorkQueue<PG> &peering_wq;
@@ -593,6 +629,7 @@ protected:
Messenger *client_messenger;
MonClient *monc;
PerfCounters *logger;
+ PerfCounters *recoverystate_perf;
ObjectStore *store;
LogClient clog;
@@ -613,6 +650,7 @@ protected:
int dispatch_running;
void create_logger();
+ void create_recoverystate_perf();
void tick();
void _dispatch(Message *m);
void dispatch_op(OpRequestRef op);
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 8e78eaa7a16..63be6a34b03 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -5164,6 +5164,9 @@ boost::statechart::result PG::RecoveryState::Initial::react(const MLogRec& i)
void PG::RecoveryState::Initial::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_initial_latency, dur);
}
/*------Started-------*/
@@ -5210,6 +5213,9 @@ boost::statechart::result PG::RecoveryState::Started::react(const QueryState& q)
void PG::RecoveryState::Started::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_started_latency, dur);
}
/*--------Reset---------*/
@@ -5282,6 +5288,9 @@ boost::statechart::result PG::RecoveryState::Reset::react(const QueryState& q)
void PG::RecoveryState::Reset::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_reset_latency, dur);
}
/*-------Start---------*/
@@ -5304,6 +5313,9 @@ PG::RecoveryState::Start::Start(my_context ctx)
void PG::RecoveryState::Start::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_start_latency, dur);
}
/*---------Primary--------*/
@@ -5351,6 +5363,8 @@ void PG::RecoveryState::Primary::exit()
context< RecoveryMachine >().log_exit(state_name, enter_time);
PG *pg = context< RecoveryMachine >().pg;
pg->want_acting.clear();
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_primary_latency, dur);
}
/*---------Peering--------*/
@@ -5438,7 +5452,7 @@ void PG::RecoveryState::Peering::exit()
pg->clear_probe_targets();
utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
- pg->osd->logger->tinc(l_osd_peering_latency, dur);
+ pg->osd->recoverystate_perf->tinc(rs_peering_latency, dur);
}
@@ -5475,6 +5489,8 @@ void PG::RecoveryState::Backfilling::exit()
pg->backfill_reserved = false;
pg->backfill_reserving = false;
pg->state_clear(PG_STATE_BACKFILL);
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_backfilling_latency, dur);
}
/*--WaitRemoteBackfillReserved--*/
@@ -5507,6 +5523,9 @@ PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_con
void PG::RecoveryState::WaitRemoteBackfillReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_waitremotebackfillreserved_latency, dur);
}
boost::statechart::result
@@ -5549,6 +5568,9 @@ PG::RecoveryState::WaitLocalBackfillReserved::WaitLocalBackfillReserved(my_conte
void PG::RecoveryState::WaitLocalBackfillReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_waitlocalbackfillreserved_latency, dur);
}
/*----NotBackfilling------*/
@@ -5562,6 +5584,9 @@ PG::RecoveryState::NotBackfilling::NotBackfilling(my_context ctx)
void PG::RecoveryState::NotBackfilling::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_notbackfilling_latency, dur);
}
/*---RepNotRecovering----*/
@@ -5575,6 +5600,9 @@ PG::RecoveryState::RepNotRecovering::RepNotRecovering(my_context ctx)
void PG::RecoveryState::RepNotRecovering::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_repnotrecovering_latency, dur);
}
/*---RepWaitRecoveryReserved--*/
@@ -5609,6 +5637,9 @@ PG::RecoveryState::RepWaitRecoveryReserved::react(const RemoteRecoveryReserved &
void PG::RecoveryState::RepWaitRecoveryReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_repwaitrecoveryreserved_latency, dur);
}
/*-RepWaitBackfillReserved*/
@@ -5644,6 +5675,9 @@ PG::RecoveryState::RepNotRecovering::react(const RequestBackfillPrio &evt)
void PG::RecoveryState::RepWaitBackfillReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_repwaitbackfillreserved_latency, dur);
}
boost::statechart::result
@@ -5689,6 +5723,8 @@ void PG::RecoveryState::RepRecovering::exit()
context< RecoveryMachine >().log_exit(state_name, enter_time);
PG *pg = context< RecoveryMachine >().pg;
pg->osd->remote_reserver.cancel_reservation(pg->info.pgid);
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_RepRecovering_latency, dur);
}
/*------Activating--------*/
@@ -5702,6 +5738,9 @@ PG::RecoveryState::Activating::Activating(my_context ctx)
void PG::RecoveryState::Activating::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_activating_latency, dur);
}
PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_context ctx)
@@ -5721,6 +5760,9 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte
void PG::RecoveryState::WaitLocalRecoveryReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_waitlocalrecoveryreserved_latency, dur);
}
PG::RecoveryState::WaitRemoteRecoveryReserved::WaitRemoteRecoveryReserved(my_context ctx)
@@ -5765,6 +5807,9 @@ PG::RecoveryState::WaitRemoteRecoveryReserved::react(const RemoteRecoveryReserve
void PG::RecoveryState::WaitRemoteRecoveryReserved::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_waitremoterecoveryreserved_latency, dur);
}
PG::RecoveryState::Recovering::Recovering(my_context ctx)
@@ -5825,6 +5870,9 @@ PG::RecoveryState::Recovering::react(const RequestBackfill &evt)
void PG::RecoveryState::Recovering::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_recovering_latency, dur);
}
PG::RecoveryState::Recovered::Recovered(my_context ctx)
@@ -5856,6 +5904,9 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx)
void PG::RecoveryState::Recovered::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_recovered_latency, dur);
}
PG::RecoveryState::Clean::Clean(my_context ctx)
@@ -5882,6 +5933,8 @@ void PG::RecoveryState::Clean::exit()
context< RecoveryMachine >().log_exit(state_name, enter_time);
PG *pg = context< RecoveryMachine >().pg;
pg->state_clear(PG_STATE_CLEAN);
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_clean_latency, dur);
}
/*---------Active---------*/
@@ -6131,6 +6184,8 @@ void PG::RecoveryState::Active::exit()
pg->state_clear(PG_STATE_BACKFILL_WAIT);
pg->state_clear(PG_STATE_RECOVERY_WAIT);
pg->state_clear(PG_STATE_REPLAY);
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_active_latency, dur);
}
/*------ReplicaActive-----*/
@@ -6222,6 +6277,8 @@ void PG::RecoveryState::ReplicaActive::exit()
context< RecoveryMachine >().log_exit(state_name, enter_time);
PG *pg = context< RecoveryMachine >().pg;
pg->osd->remote_reserver.cancel_reservation(pg->info.pgid);
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_replicaactive_latency, dur);
}
/*-------Stray---*/
@@ -6321,6 +6378,9 @@ boost::statechart::result PG::RecoveryState::Stray::react(const ActMap&)
void PG::RecoveryState::Stray::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_stray_latency, dur);
}
/*--------GetInfo---------*/
@@ -6493,6 +6553,9 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const QueryState& q)
void PG::RecoveryState::GetInfo::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_getinfo_latency, dur);
}
/*------GetLog------------*/
@@ -6605,6 +6668,9 @@ boost::statechart::result PG::RecoveryState::GetLog::react(const QueryState& q)
void PG::RecoveryState::GetLog::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_getlog_latency, dur);
}
/*------WaitActingChange--------*/
@@ -6662,6 +6728,9 @@ boost::statechart::result PG::RecoveryState::WaitActingChange::react(const Query
void PG::RecoveryState::WaitActingChange::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_waitactingchange_latency, dur);
}
/*------Incomplete--------*/
@@ -6697,6 +6766,8 @@ void PG::RecoveryState::Incomplete::exit()
PG *pg = context< RecoveryMachine >().pg;
pg->state_clear(PG_STATE_INCOMPLETE);
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_incomplete_latency, dur);
}
/*------GetMissing--------*/
@@ -6819,6 +6890,9 @@ boost::statechart::result PG::RecoveryState::GetMissing::react(const QueryState&
void PG::RecoveryState::GetMissing::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_getmissing_latency, dur);
}
/*---WaitFlushedPeering---*/
@@ -6897,6 +6971,9 @@ boost::statechart::result PG::RecoveryState::WaitUpThru::react(const QueryState&
void PG::RecoveryState::WaitUpThru::exit()
{
context< RecoveryMachine >().log_exit(state_name, enter_time);
+ PG *pg = context< RecoveryMachine >().pg;
+ utime_t dur = ceph_clock_now(g_ceph_context) - enter_time;
+ pg->osd->recoverystate_perf->tinc(rs_waitupthru_latency, dur);
}
/*----RecoveryState::RecoveryMachine Methods-----*/