diff options
author | Sage Weil <sage.weil@dreamhost.com> | 2012-01-27 16:40:53 -0800 |
---|---|---|
committer | Sage Weil <sage.weil@dreamhost.com> | 2012-01-27 16:40:53 -0800 |
commit | d0a447d84e7f5b56d439c3b8f6714b8b1d6fd5da (patch) | |
tree | ca52d0a8042d944c197220fc2fdc891ea5f3e7ef | |
parent | 374fec47253bad511eee52d372f182402fb17b1a (diff) | |
parent | 56d164c8fb8ec37e63754b0efcc0dca483f4e096 (diff) | |
download | ceph-d0a447d84e7f5b56d439c3b8f6714b8b1d6fd5da.tar.gz |
Merge branch 'wip-pg-stale'
-rw-r--r-- | src/mon/PGMonitor.cc | 52 | ||||
-rw-r--r-- | src/mon/PGMonitor.h | 18 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 2 | ||||
-rw-r--r-- | src/osd/osd_types.h | 1 |
4 files changed, 72 insertions, 1 deletions
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index dc20b73e361..624f935363f 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -72,7 +72,8 @@ PGMonitor::PGMonitor(Monitor *mn, Paxos *p) : PaxosService(mn, p), ratio_lock("PGMonitor::ratio_lock"), need_full_ratio_update(false), - need_nearfull_ratio_update(false) + need_nearfull_ratio_update(false), + need_check_down_pgs(false) { ratio_monitor = new RatioMonitor(this); g_conf->add_observer(ratio_monitor); @@ -99,6 +100,7 @@ void PGMonitor::on_active() { if (mon->is_leader()) { check_osd_map(mon->osdmon()->osdmap.epoch); + need_check_down_pgs = true; } update_logger(); @@ -178,6 +180,10 @@ void PGMonitor::tick() } } ratio_lock.Unlock(); + + if (need_check_down_pgs && check_down_pgs()) + propose = true; + if (propose) { propose_pending(); } @@ -629,6 +635,13 @@ void PGMonitor::check_osd_map(epoch_t epoch) pending_inc.osd_stat_rm.erase(p->first); pending_inc.osd_stat_updates[p->first]; } + + // this is conservative: we want to know if any osds (maybe) got marked down. + for (map<int32_t,uint8_t>::iterator p = inc.new_state.begin(); + p != inc.new_state.end(); + ++p) + if (p->second & CEPH_OSD_UP) // true if marked up OR down, but we're too lazy to check which + need_check_down_pgs = true; } bool propose = false; @@ -640,6 +653,9 @@ void PGMonitor::check_osd_map(epoch_t epoch) // scan pg space? if (register_new_pgs()) propose = true; + + if (need_check_down_pgs && check_down_pgs()) + propose = true; if (propose) propose_pending(); @@ -826,6 +842,38 @@ void PGMonitor::send_pg_creates() } } +bool PGMonitor::check_down_pgs() +{ + dout(10) << "check_down_pgs" << dendl; + + OSDMap *osdmap = &mon->osdmon()->osdmap; + bool ret = false; + + for (hash_map<pg_t,pg_stat_t>::iterator p = pg_map.pg_stat.begin(); + p != pg_map.pg_stat.end(); + ++p) { + if ((p->second.state & PG_STATE_STALE) == 0 && + p->second.acting.size() && + osdmap->is_down(p->second.acting[0])) { + dout(10) << " marking pg " << p->first << " stale with acting " << p->second.acting << dendl; + + map<pg_t,pg_stat_t>::iterator q = pending_inc.pg_stat_updates.find(p->first); + pg_stat_t *stat; + if (q == pending_inc.pg_stat_updates.end()) { + stat = &pending_inc.pg_stat_updates[p->first]; + *stat = p->second; + } else { + stat = &q->second; + } + stat->state |= PG_STATE_STALE; + ret = true; + } + } + need_check_down_pgs = false; + return ret; +} + + bool PGMonitor::preprocess_command(MMonCommand *m) { int r = -1; @@ -1069,6 +1117,8 @@ enum health_status_t PGMonitor::get_health(std::ostream &ss) const hash_map<int,int>::const_iterator p = pg_map.num_pg_by_state.begin(); hash_map<int,int>::const_iterator p_end = pg_map.num_pg_by_state.end(); for (; p != p_end; ++p) { + if (p->first & PG_STATE_STALE) + note["stale"] += p->second; if (p->first & PG_STATE_DOWN) note["down"] += p->second; if (p->first & PG_STATE_DEGRADED) diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index 56722914561..65204b36d50 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -47,6 +47,8 @@ public: bool need_full_ratio_update, need_nearfull_ratio_update; float new_full_ratio, new_nearfull_ratio; + bool need_check_down_pgs; + private: PGMap::Incremental pending_inc; @@ -91,9 +93,25 @@ private: map<int,utime_t> last_osd_report; void register_pg(pg_pool_t& pool, pg_t pgid, epoch_t epoch, bool new_pool); + + /** + * check latest osdmap for new pgs to register + * + * @return true if we updated pending_inc (and should propose) + */ bool register_new_pgs(); + void send_pg_creates(); + /** + * check pgs for down primary osds + * + * clears need_check_down_pgs + * + * @return true if we updated pending_inc (and should propose) + */ + bool check_down_pgs(); + public: PGMonitor(Monitor *mn, Paxos *p); virtual ~PGMonitor(); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 9674354bc83..367c98a8963 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -164,6 +164,8 @@ void coll_t::decode(bufferlist::iterator& bl) std::string pg_state_string(int state) { ostringstream oss; + if (state & PG_STATE_STALE) + oss << "stale+"; if (state & PG_STATE_CREATING) oss << "creating+"; if (state & PG_STATE_ACTIVE) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 4090aa7a0ff..c7babe87b6b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -505,6 +505,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) { //PG_STATE_SCANNING (1<<14) .. deprecated. #define PG_STATE_BACKFILL (1<<15) // [active] backfilling pg content #define PG_STATE_INCOMPLETE (1<<16) // incomplete content, peering failed. +#define PG_STATE_STALE (1<<17) // our state for this pg is stale, unknown. std::string pg_state_string(int state); |