summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2012-12-19 16:50:11 -0800
committerSamuel Just <sam.just@inktank.com>2012-12-19 16:51:25 -0800
commit9a9778fb9cf8f71f5dfb7822dbdaa43dac2018fe (patch)
tree3fba6713d3517c209e02aff91949f8374166f121
parent212f6b56d1269c04621e36b7900032b8a27ef386 (diff)
parent6122a9f62f9eeae1410d1703fecb8939a35fb03f (diff)
downloadceph-9a9778fb9cf8f71f5dfb7822dbdaa43dac2018fe.tar.gz
Merge remote-tracking branch 'upstream/wip_pg_temp' into next
Reviewed-by: Sage Weil <sage@inktank.com> Reviewed-by: Joao Luis <joao.luis@inktank.com>
-rw-r--r--src/mon/OSDMonitor.cc24
-rw-r--r--src/mon/OSDMonitor.h1
-rw-r--r--src/osd/OSDMap.cc41
-rw-r--r--src/osd/OSDMap.h2
4 files changed, 48 insertions, 20 deletions
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 5c8b08e43a2..7522bc133ad 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -302,6 +302,27 @@ void OSDMonitor::remove_redundant_pg_temp()
}
}
+void OSDMonitor::remove_down_pg_temp()
+{
+ dout(10) << "remove_down_pg_temp" << dendl;
+ OSDMap tmpmap(osdmap);
+ tmpmap.apply_incremental(pending_inc);
+
+ for (map<pg_t,vector<int> >::iterator p = tmpmap.pg_temp->begin();
+ p != tmpmap.pg_temp->end();
+ p++) {
+ unsigned num_up = 0;
+ for (vector<int>::iterator i = p->second.begin();
+ i != p->second.end();
+ ++i) {
+ if (!tmpmap.is_down(*i))
+ ++num_up;
+ }
+ if (num_up == 0)
+ pending_inc.new_pg_temp[p->first].clear();
+ }
+}
+
/* Assign a lower weight to overloaded OSDs.
*
* The osds that will get a lower weight are those with with a utilization
@@ -391,6 +412,9 @@ void OSDMonitor::create_pending()
// drop any redundant pg_temp entries
remove_redundant_pg_temp();
+
+ // drop any pg_temp entries with no up entries
+ remove_down_pg_temp();
}
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index 4faa90e2902..9529f731c84 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -165,6 +165,7 @@ private:
void send_incremental(epoch_t first, entity_inst_t& dest, bool onetime);
void remove_redundant_pg_temp();
+ void remove_down_pg_temp();
int reweight_by_utilization(int oload, std::string& out_str);
bool preprocess_failure(class MOSDFailure *m);
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 96aa3169e19..03ecad78dcc 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -704,7 +704,7 @@ void OSDMap::dedup(const OSDMap *o, OSDMap *n)
n->osd_uuid = o->osd_uuid;
}
-int OSDMap::apply_incremental(Incremental &inc)
+int OSDMap::apply_incremental(const Incremental &inc)
{
if (inc.epoch == 1)
fsid = inc.fsid;
@@ -717,7 +717,8 @@ int OSDMap::apply_incremental(Incremental &inc)
// full map?
if (inc.fullmap.length()) {
- decode(inc.fullmap);
+ bufferlist bl(inc.fullmap);
+ decode(bl);
return 0;
}
@@ -731,20 +732,20 @@ int OSDMap::apply_incremental(Incremental &inc)
if (inc.new_pool_max != -1)
pool_max = inc.new_pool_max;
- for (set<int64_t>::iterator p = inc.old_pools.begin();
+ for (set<int64_t>::const_iterator p = inc.old_pools.begin();
p != inc.old_pools.end();
p++) {
pools.erase(*p);
name_pool.erase(pool_name[*p]);
pool_name.erase(*p);
}
- for (map<int64_t,pg_pool_t>::iterator p = inc.new_pools.begin();
+ for (map<int64_t,pg_pool_t>::const_iterator p = inc.new_pools.begin();
p != inc.new_pools.end();
p++) {
pools[p->first] = p->second;
pools[p->first].last_change = epoch;
}
- for (map<int64_t,string>::iterator p = inc.new_pool_names.begin();
+ for (map<int64_t,string>::const_iterator p = inc.new_pool_names.begin();
p != inc.new_pool_names.end();
p++) {
if (pool_name.count(p->first))
@@ -753,7 +754,7 @@ int OSDMap::apply_incremental(Incremental &inc)
name_pool[p->second] = p->first;
}
- for (map<int32_t,uint32_t>::iterator i = inc.new_weight.begin();
+ for (map<int32_t,uint32_t>::const_iterator i = inc.new_weight.begin();
i != inc.new_weight.end();
i++) {
set_weight(i->first, i->second);
@@ -764,7 +765,7 @@ int OSDMap::apply_incremental(Incremental &inc)
}
// up/down
- for (map<int32_t,uint8_t>::iterator i = inc.new_state.begin();
+ for (map<int32_t,uint8_t>::const_iterator i = inc.new_state.begin();
i != inc.new_state.end();
i++) {
int s = i->second ? i->second : CEPH_OSD_UP;
@@ -778,7 +779,7 @@ int OSDMap::apply_incremental(Incremental &inc)
(*osd_uuid)[i->first] = uuid_d();
osd_state[i->first] ^= s;
}
- for (map<int32_t,entity_addr_t>::iterator i = inc.new_up_client.begin();
+ for (map<int32_t,entity_addr_t>::const_iterator i = inc.new_up_client.begin();
i != inc.new_up_client.end();
i++) {
osd_state[i->first] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
@@ -786,38 +787,39 @@ int OSDMap::apply_incremental(Incremental &inc)
if (inc.new_hb_up.empty())
osd_addrs->hb_addr[i->first].reset(new entity_addr_t(i->second)); //this is a backward-compatibility hack
else
- osd_addrs->hb_addr[i->first].reset(new entity_addr_t(inc.new_hb_up[i->first]));
+ osd_addrs->hb_addr[i->first].reset(
+ new entity_addr_t(inc.new_hb_up.find(i->first)->second));
osd_info[i->first].up_from = epoch;
}
- for (map<int32_t,entity_addr_t>::iterator i = inc.new_up_internal.begin();
+ for (map<int32_t,entity_addr_t>::const_iterator i = inc.new_up_internal.begin();
i != inc.new_up_internal.end();
i++)
osd_addrs->cluster_addr[i->first].reset(new entity_addr_t(i->second));
// info
- for (map<int32_t,epoch_t>::iterator i = inc.new_up_thru.begin();
+ for (map<int32_t,epoch_t>::const_iterator i = inc.new_up_thru.begin();
i != inc.new_up_thru.end();
i++)
osd_info[i->first].up_thru = i->second;
- for (map<int32_t,pair<epoch_t,epoch_t> >::iterator i = inc.new_last_clean_interval.begin();
+ for (map<int32_t,pair<epoch_t,epoch_t> >::const_iterator i = inc.new_last_clean_interval.begin();
i != inc.new_last_clean_interval.end();
i++) {
osd_info[i->first].last_clean_begin = i->second.first;
osd_info[i->first].last_clean_end = i->second.second;
}
- for (map<int32_t,epoch_t>::iterator p = inc.new_lost.begin(); p != inc.new_lost.end(); p++)
+ for (map<int32_t,epoch_t>::const_iterator p = inc.new_lost.begin(); p != inc.new_lost.end(); p++)
osd_info[p->first].lost_at = p->second;
// xinfo
- for (map<int32_t,osd_xinfo_t>::iterator p = inc.new_xinfo.begin(); p != inc.new_xinfo.end(); ++p)
+ for (map<int32_t,osd_xinfo_t>::const_iterator p = inc.new_xinfo.begin(); p != inc.new_xinfo.end(); ++p)
osd_xinfo[p->first] = p->second;
// uuid
- for (map<int32_t,uuid_d>::iterator p = inc.new_uuid.begin(); p != inc.new_uuid.end(); ++p)
+ for (map<int32_t,uuid_d>::const_iterator p = inc.new_uuid.begin(); p != inc.new_uuid.end(); ++p)
(*osd_uuid)[p->first] = p->second;
// pg rebuild
- for (map<pg_t, vector<int> >::iterator p = inc.new_pg_temp.begin(); p != inc.new_pg_temp.end(); p++) {
+ for (map<pg_t, vector<int> >::const_iterator p = inc.new_pg_temp.begin(); p != inc.new_pg_temp.end(); p++) {
if (p->second.empty())
pg_temp->erase(p->first);
else
@@ -825,11 +827,11 @@ int OSDMap::apply_incremental(Incremental &inc)
}
// blacklist
- for (map<entity_addr_t,utime_t>::iterator p = inc.new_blacklist.begin();
+ for (map<entity_addr_t,utime_t>::const_iterator p = inc.new_blacklist.begin();
p != inc.new_blacklist.end();
p++)
blacklist[p->first] = p->second;
- for (vector<entity_addr_t>::iterator p = inc.old_blacklist.begin();
+ for (vector<entity_addr_t>::const_iterator p = inc.old_blacklist.begin();
p != inc.old_blacklist.end();
p++)
blacklist.erase(*p);
@@ -845,7 +847,8 @@ int OSDMap::apply_incremental(Incremental &inc)
// do new crush map last (after up/down stuff)
if (inc.crush.length()) {
- bufferlist::iterator blp = inc.crush.begin();
+ bufferlist bl(inc.crush);
+ bufferlist::iterator blp = bl.begin();
crush.reset(new CrushWrapper);
crush->decode(blp);
}
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 632b2c2527b..4d4bbd0b031 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -388,7 +388,7 @@ private:
return -1;
}
- int apply_incremental(Incremental &inc);
+ int apply_incremental(const Incremental &inc);
/// try to re-use/reference addrs in oldmap from newmap
static void dedup(const OSDMap *oldmap, OSDMap *newmap);