summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-02-19 16:16:11 -0800
committerSage Weil <sage@inktank.com>2013-02-19 16:16:11 -0800
commitde892bbaf60f2aad28b29dcc005e8ec3e67dd193 (patch)
treef4862b57c16a124e434f7dc0c805b8bcbdb77db7
parent96e153aeef4c8ef403f23c849ef44552bedca064 (diff)
parent128cb17d87ff9ac42434bd508ccc96bc42d53484 (diff)
downloadceph-de892bbaf60f2aad28b29dcc005e8ec3e67dd193.tar.gz
Merge branch 'wip-pool'
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/common/config_opts.h1
-rw-r--r--src/include/ceph_features.h6
-rw-r--r--src/mon/OSDMonitor.cc12
-rw-r--r--src/osd/OSD.cc8
-rw-r--r--src/osd/OSDMap.cc26
-rw-r--r--src/osd/OSDMap.h8
-rw-r--r--src/osd/osd_types.cc18
-rw-r--r--src/osd/osd_types.h3
8 files changed, 65 insertions, 17 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 3963b31aff9..2fb52a87622 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -321,6 +321,7 @@ OPTION(osd_pool_default_size, OPT_INT, 2)
OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
+OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools
OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_cache_size, OPT_INT, 500)
OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message
diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h
index c9ff72c15f9..0aa8dc158a2 100644
--- a/src/include/ceph_features.h
+++ b/src/include/ceph_features.h
@@ -34,6 +34,7 @@
#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
#define CEPH_FEATURE_MDSENC (1<<29)
+#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30)
/*
* Features supported. Should be everything above.
@@ -67,8 +68,9 @@
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_CREATEPOOLID | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
- CEPH_FEATURE_OSD_HBMSGS | \
- CEPH_FEATURE_MDSENC)
+ CEPH_FEATURE_OSD_HBMSGS | \
+ CEPH_FEATURE_MDSENC | \
+ CEPH_FEATURE_OSDHASHPSPOOL)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 0198c29e45b..02abdb5325f 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -159,19 +159,14 @@ void OSDMonitor::update_from_paxos()
void OSDMonitor::update_msgr_features()
{
+ uint64_t mask;
+ uint64_t features = osdmap.get_features(&mask);
+
set<int> types;
types.insert((int)entity_name_t::TYPE_OSD);
types.insert((int)entity_name_t::TYPE_CLIENT);
types.insert((int)entity_name_t::TYPE_MDS);
types.insert((int)entity_name_t::TYPE_MON);
-
- uint64_t mask = CEPH_FEATURES_CRUSH;
- uint64_t features = 0;
- if (osdmap.crush->has_nondefault_tunables())
- features |= CEPH_FEATURE_CRUSH_TUNABLES;
- if (osdmap.crush->has_nondefault_tunables2())
- features |= CEPH_FEATURE_CRUSH_TUNABLES2;
-
for (set<int>::iterator q = types.begin(); q != types.end(); ++q) {
if ((mon->messenger->get_policy(*q).features_required & mask) != features) {
dout(0) << "crush map has features " << features << ", adjusting msgr requires" << dendl;
@@ -2076,6 +2071,7 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid, int crush_rule,
pending_inc.new_pool_max = osdmap.pool_max;
int64_t pool = ++pending_inc.new_pool_max;
pending_inc.new_pools[pool].type = pg_pool_t::TYPE_REP;
+ pending_inc.new_pools[pool].type = g_conf->osd_pool_default_flags;
pending_inc.new_pools[pool].size = g_conf->osd_pool_default_size;
pending_inc.new_pools[pool].min_size = g_conf->get_osd_pool_default_min_size();
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index d5f2b2299a4..b20e6d690f2 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -4231,12 +4231,8 @@ void OSD::check_osdmap_features()
// current memory location, and setting or clearing bits in integer
// fields, and we are the only writer, this is not a problem.
- uint64_t mask = CEPH_FEATURES_CRUSH;
- uint64_t features = 0;
- if (osdmap->crush->has_nondefault_tunables())
- features |= CEPH_FEATURE_CRUSH_TUNABLES;
- if (osdmap->crush->has_nondefault_tunables2())
- features |= CEPH_FEATURE_CRUSH_TUNABLES2;
+ uint64_t mask;
+ uint64_t features = osdmap->get_features(&mask);
{
Messenger::Policy p = client_messenger->get_default_policy();
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 6b692d407a8..8f0b01db706 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -714,6 +714,30 @@ bool OSDMap::find_osd_on_ip(const entity_addr_t& ip) const
return -1;
}
+
+uint64_t OSDMap::get_features(uint64_t *pmask) const
+{
+ uint64_t features = 0;
+ uint64_t mask = 0;
+
+ if (crush->has_nondefault_tunables())
+ features |= CEPH_FEATURE_CRUSH_TUNABLES;
+ if (crush->has_nondefault_tunables2())
+ features |= CEPH_FEATURE_CRUSH_TUNABLES2;
+ mask |= CEPH_FEATURES_CRUSH;
+
+ for (map<int64_t,pg_pool_t>::const_iterator p = pools.begin(); p != pools.end(); ++p) {
+ if (p->second.flags & pg_pool_t::FLAG_HASHPSPOOL) {
+ features |= CEPH_FEATURE_OSDHASHPSPOOL;
+ }
+ }
+ mask |= CEPH_FEATURE_OSDHASHPSPOOL;
+
+ if (pmask)
+ *pmask = mask;
+ return features;
+}
+
void OSDMap::dedup(const OSDMap *o, OSDMap *n)
{
if (o->epoch == n->epoch)
@@ -1690,6 +1714,7 @@ void OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
for (map<int,const char*>::iterator p = rulesets.begin(); p != rulesets.end(); p++) {
int64_t pool = ++pool_max;
pools[pool].type = pg_pool_t::TYPE_REP;
+ pools[pool].flags = cct->_conf->osd_pool_default_flags;
pools[pool].size = cct->_conf->osd_pool_default_size;
pools[pool].min_size = cct->_conf->get_osd_pool_default_min_size();
pools[pool].crush_ruleset = p->first;
@@ -1814,6 +1839,7 @@ int OSDMap::build_simple_from_conf(CephContext *cct, epoch_t e, uuid_d &fsid,
for (map<int,const char*>::iterator p = rulesets.begin(); p != rulesets.end(); p++) {
int64_t pool = ++pool_max;
pools[pool].type = pg_pool_t::TYPE_REP;
+ pools[pool].flags = cct->_conf->osd_pool_default_flags;
pools[pool].size = cct->_conf->osd_pool_default_size;
pools[pool].min_size = cct->_conf->get_osd_pool_default_min_size();
pools[pool].crush_ruleset = p->first;
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 70ec263e4d8..6588382971f 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -394,6 +394,14 @@ private:
return -1;
}
+ /**
+ * get feature bits required by the current structure
+ *
+ * @param mask [out] set of all possible map-related features we could set
+ * @return feature bits used by this map
+ */
+ uint64_t get_features(uint64_t *mask) const;
+
int apply_incremental(const Incremental &inc);
/// try to re-use/reference addrs in oldmap from newmap
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index c3827a4680b..b1046c9aec5 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -14,6 +14,9 @@
#include "osd_types.h"
#include "include/ceph_features.h"
+extern "C" {
+#include "crush/hash.h"
+}
#include "PG.h"
#include "OSDMap.h"
@@ -678,7 +681,20 @@ pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const
*/
ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const
{
- return ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) + pg.pool();
+ if (true) {//flags & FLAG_HASHPSPOOL) {
+ // Hash the pool id so that pool PGs do not overlap.
+ return
+ crush_hash32_2(CRUSH_HASH_RJENKINS1,
+ ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask),
+ pg.pool());
+ } else {
+ // Legacy behavior; add ps and pool together. This is not a great
+ // idea because the PGs from each pool will essentially overlap on
+ // top of each other: 0.5 == 1.4 == 2.3 == ...
+ return
+ ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) +
+ pg.pool();
+ }
}
void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 558c10ff27b..ff8c2c5219e 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -620,6 +620,9 @@ struct pg_pool_t {
TYPE_REP = 1, // replication
TYPE_RAID4 = 2, // raid4 (never implemented)
};
+ enum {
+ FLAG_HASHPSPOOL = 1, // hash pg seed and pool together (instead of adding)
+ };
static const char *get_type_name(int t) {
switch (t) {