summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage.weil@dreamhost.com>2012-01-27 16:40:53 -0800
committerSage Weil <sage.weil@dreamhost.com>2012-01-27 16:40:53 -0800
commitd0a447d84e7f5b56d439c3b8f6714b8b1d6fd5da (patch)
treeca52d0a8042d944c197220fc2fdc891ea5f3e7ef
parent374fec47253bad511eee52d372f182402fb17b1a (diff)
parent56d164c8fb8ec37e63754b0efcc0dca483f4e096 (diff)
downloadceph-d0a447d84e7f5b56d439c3b8f6714b8b1d6fd5da.tar.gz
Merge branch 'wip-pg-stale'
-rw-r--r--src/mon/PGMonitor.cc52
-rw-r--r--src/mon/PGMonitor.h18
-rw-r--r--src/osd/osd_types.cc2
-rw-r--r--src/osd/osd_types.h1
4 files changed, 72 insertions, 1 deletions
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index dc20b73e361..624f935363f 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -72,7 +72,8 @@ PGMonitor::PGMonitor(Monitor *mn, Paxos *p)
: PaxosService(mn, p),
ratio_lock("PGMonitor::ratio_lock"),
need_full_ratio_update(false),
- need_nearfull_ratio_update(false)
+ need_nearfull_ratio_update(false),
+ need_check_down_pgs(false)
{
ratio_monitor = new RatioMonitor(this);
g_conf->add_observer(ratio_monitor);
@@ -99,6 +100,7 @@ void PGMonitor::on_active()
{
if (mon->is_leader()) {
check_osd_map(mon->osdmon()->osdmap.epoch);
+ need_check_down_pgs = true;
}
update_logger();
@@ -178,6 +180,10 @@ void PGMonitor::tick()
}
}
ratio_lock.Unlock();
+
+ if (need_check_down_pgs && check_down_pgs())
+ propose = true;
+
if (propose) {
propose_pending();
}
@@ -629,6 +635,13 @@ void PGMonitor::check_osd_map(epoch_t epoch)
pending_inc.osd_stat_rm.erase(p->first);
pending_inc.osd_stat_updates[p->first];
}
+
+ // this is conservative: we want to know if any osds (maybe) got marked down.
+ for (map<int32_t,uint8_t>::iterator p = inc.new_state.begin();
+ p != inc.new_state.end();
+ ++p)
+ if (p->second & CEPH_OSD_UP) // true if marked up OR down, but we're too lazy to check which
+ need_check_down_pgs = true;
}
bool propose = false;
@@ -640,6 +653,9 @@ void PGMonitor::check_osd_map(epoch_t epoch)
// scan pg space?
if (register_new_pgs())
propose = true;
+
+ if (need_check_down_pgs && check_down_pgs())
+ propose = true;
if (propose)
propose_pending();
@@ -826,6 +842,38 @@ void PGMonitor::send_pg_creates()
}
}
+bool PGMonitor::check_down_pgs()
+{
+ dout(10) << "check_down_pgs" << dendl;
+
+ OSDMap *osdmap = &mon->osdmon()->osdmap;
+ bool ret = false;
+
+ for (hash_map<pg_t,pg_stat_t>::iterator p = pg_map.pg_stat.begin();
+ p != pg_map.pg_stat.end();
+ ++p) {
+ if ((p->second.state & PG_STATE_STALE) == 0 &&
+ p->second.acting.size() &&
+ osdmap->is_down(p->second.acting[0])) {
+ dout(10) << " marking pg " << p->first << " stale with acting " << p->second.acting << dendl;
+
+ map<pg_t,pg_stat_t>::iterator q = pending_inc.pg_stat_updates.find(p->first);
+ pg_stat_t *stat;
+ if (q == pending_inc.pg_stat_updates.end()) {
+ stat = &pending_inc.pg_stat_updates[p->first];
+ *stat = p->second;
+ } else {
+ stat = &q->second;
+ }
+ stat->state |= PG_STATE_STALE;
+ ret = true;
+ }
+ }
+ need_check_down_pgs = false;
+ return ret;
+}
+
+
bool PGMonitor::preprocess_command(MMonCommand *m)
{
int r = -1;
@@ -1069,6 +1117,8 @@ enum health_status_t PGMonitor::get_health(std::ostream &ss) const
hash_map<int,int>::const_iterator p = pg_map.num_pg_by_state.begin();
hash_map<int,int>::const_iterator p_end = pg_map.num_pg_by_state.end();
for (; p != p_end; ++p) {
+ if (p->first & PG_STATE_STALE)
+ note["stale"] += p->second;
if (p->first & PG_STATE_DOWN)
note["down"] += p->second;
if (p->first & PG_STATE_DEGRADED)
diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h
index 56722914561..65204b36d50 100644
--- a/src/mon/PGMonitor.h
+++ b/src/mon/PGMonitor.h
@@ -47,6 +47,8 @@ public:
bool need_full_ratio_update, need_nearfull_ratio_update;
float new_full_ratio, new_nearfull_ratio;
+ bool need_check_down_pgs;
+
private:
PGMap::Incremental pending_inc;
@@ -91,9 +93,25 @@ private:
map<int,utime_t> last_osd_report;
void register_pg(pg_pool_t& pool, pg_t pgid, epoch_t epoch, bool new_pool);
+
+ /**
+ * check latest osdmap for new pgs to register
+ *
+ * @return true if we updated pending_inc (and should propose)
+ */
bool register_new_pgs();
+
void send_pg_creates();
+ /**
+ * check pgs for down primary osds
+ *
+ * clears need_check_down_pgs
+ *
+ * @return true if we updated pending_inc (and should propose)
+ */
+ bool check_down_pgs();
+
public:
PGMonitor(Monitor *mn, Paxos *p);
virtual ~PGMonitor();
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 9674354bc83..367c98a8963 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -164,6 +164,8 @@ void coll_t::decode(bufferlist::iterator& bl)
std::string pg_state_string(int state)
{
ostringstream oss;
+ if (state & PG_STATE_STALE)
+ oss << "stale+";
if (state & PG_STATE_CREATING)
oss << "creating+";
if (state & PG_STATE_ACTIVE)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 4090aa7a0ff..c7babe87b6b 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -505,6 +505,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
//PG_STATE_SCANNING (1<<14) .. deprecated.
#define PG_STATE_BACKFILL (1<<15) // [active] backfilling pg content
#define PG_STATE_INCOMPLETE (1<<16) // incomplete content, peering failed.
+#define PG_STATE_STALE (1<<17) // our state for this pg is stale, unknown.
std::string pg_state_string(int state);