diff options
author | Sage Weil <sage.weil@dreamhost.com> | 2012-01-27 13:21:39 -0800 |
---|---|---|
committer | Sage Weil <sage.weil@dreamhost.com> | 2012-01-27 13:21:39 -0800 |
commit | 61c54a799ec7f539fd4634cb01f8141c763847fd (patch) | |
tree | 6e7501a363563e286109877ac823294e26d0cefe | |
parent | 6e44af9fe70070767b3c1c539c789caba4683deb (diff) | |
download | ceph-61c54a799ec7f539fd4634cb01f8141c763847fd.tar.gz |
mon: mark pgs stale in pg_map if primary osd is down
This alerts the administrator when all OSDs for a PG have failed and the
monitor doesn't receive any further updates. Otherwise we may continue
to think a pg is active+clean when it is in fact offline.
Fixes: #1993
Signed-off-by: Sage Weil <sage.weil@dreamhost.com>
-rw-r--r-- | src/mon/PGMonitor.cc | 50 | ||||
-rw-r--r-- | src/mon/PGMonitor.h | 18 |
2 files changed, 67 insertions, 1 deletions
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index dc20b73e361..178467d0936 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -72,7 +72,8 @@ PGMonitor::PGMonitor(Monitor *mn, Paxos *p) : PaxosService(mn, p), ratio_lock("PGMonitor::ratio_lock"), need_full_ratio_update(false), - need_nearfull_ratio_update(false) + need_nearfull_ratio_update(false), + need_check_down_pgs(false) { ratio_monitor = new RatioMonitor(this); g_conf->add_observer(ratio_monitor); @@ -99,6 +100,7 @@ void PGMonitor::on_active() { if (mon->is_leader()) { check_osd_map(mon->osdmon()->osdmap.epoch); + need_check_down_pgs = true; } update_logger(); @@ -178,6 +180,10 @@ void PGMonitor::tick() } } ratio_lock.Unlock(); + + if (need_check_down_pgs && check_down_pgs()) + propose = true; + if (propose) { propose_pending(); } @@ -629,6 +635,13 @@ void PGMonitor::check_osd_map(epoch_t epoch) pending_inc.osd_stat_rm.erase(p->first); pending_inc.osd_stat_updates[p->first]; } + + // this is conservative: we want to know if any osds (maybe) got marked down. + for (map<int32_t,uint8_t>::iterator p = inc.new_state.begin(); + p != inc.new_state.end(); + ++p) + if (p->second & CEPH_OSD_UP) // true if marked up OR down, but we're too lazy to check which + need_check_down_pgs = true; } bool propose = false; @@ -640,6 +653,9 @@ void PGMonitor::check_osd_map(epoch_t epoch) // scan pg space? if (register_new_pgs()) propose = true; + + if (need_check_down_pgs && check_down_pgs()) + propose = true; if (propose) propose_pending(); @@ -826,6 +842,38 @@ void PGMonitor::send_pg_creates() } } +bool PGMonitor::check_down_pgs() +{ + dout(10) << "check_down_pgs" << dendl; + + OSDMap *osdmap = &mon->osdmon()->osdmap; + bool ret = false; + + for (hash_map<pg_t,pg_stat_t>::iterator p = pg_map.pg_stat.begin(); + p != pg_map.pg_stat.end(); + ++p) { + if ((p->second.state & PG_STATE_STALE) == 0 && + p->second.acting.size() && + osdmap->is_down(p->second.acting[0])) { + dout(10) << " marking pg " << p->first << " stale with acting " << p->second.acting << dendl; + + map<pg_t,pg_stat_t>::iterator q = pending_inc.pg_stat_updates.find(p->first); + pg_stat_t *stat; + if (q == pending_inc.pg_stat_updates.end()) { + stat = &pending_inc.pg_stat_updates[p->first]; + *stat = p->second; + } else { + stat = &q->second; + } + stat->state |= PG_STATE_STALE; + ret = true; + } + } + need_check_down_pgs = false; + return ret; +} + + bool PGMonitor::preprocess_command(MMonCommand *m) { int r = -1; diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index 56722914561..65204b36d50 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -47,6 +47,8 @@ public: bool need_full_ratio_update, need_nearfull_ratio_update; float new_full_ratio, new_nearfull_ratio; + bool need_check_down_pgs; + private: PGMap::Incremental pending_inc; @@ -91,9 +93,25 @@ private: map<int,utime_t> last_osd_report; void register_pg(pg_pool_t& pool, pg_t pgid, epoch_t epoch, bool new_pool); + + /** + * check latest osdmap for new pgs to register + * + * @return true if we updated pending_inc (and should propose) + */ bool register_new_pgs(); + void send_pg_creates(); + /** + * check pgs for down primary osds + * + * clears need_check_down_pgs + * + * @return true if we updated pending_inc (and should propose) + */ + bool check_down_pgs(); + public: PGMonitor(Monitor *mn, Paxos *p); virtual ~PGMonitor(); |