diff options
author | Sage Weil <sage@inktank.com> | 2013-02-19 16:16:11 -0800 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-02-19 16:16:11 -0800 |
commit | de892bbaf60f2aad28b29dcc005e8ec3e67dd193 (patch) | |
tree | f4862b57c16a124e434f7dc0c805b8bcbdb77db7 | |
parent | 96e153aeef4c8ef403f23c849ef44552bedca064 (diff) | |
parent | 128cb17d87ff9ac42434bd508ccc96bc42d53484 (diff) | |
download | ceph-de892bbaf60f2aad28b29dcc005e8ec3e67dd193.tar.gz |
Merge branch 'wip-pool'
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/common/config_opts.h | 1 | ||||
-rw-r--r-- | src/include/ceph_features.h | 6 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 12 | ||||
-rw-r--r-- | src/osd/OSD.cc | 8 | ||||
-rw-r--r-- | src/osd/OSDMap.cc | 26 | ||||
-rw-r--r-- | src/osd/OSDMap.h | 8 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 18 | ||||
-rw-r--r-- | src/osd/osd_types.h | 3 |
8 files changed, 65 insertions, 17 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 3963b31aff9..2fb52a87622 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -321,6 +321,7 @@ OPTION(osd_pool_default_size, OPT_INT, 2) OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2 OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num +OPTION(osd_pool_default_flags, OPT_INT, 0) // default flags for new pools OPTION(osd_map_dedup, OPT_BOOL, true) OPTION(osd_map_cache_size, OPT_INT, 500) OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index c9ff72c15f9..0aa8dc158a2 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -34,6 +34,7 @@ #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) #define CEPH_FEATURE_OSD_HBMSGS (1<<28) #define CEPH_FEATURE_MDSENC (1<<29) +#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) /* * Features supported. Should be everything above. @@ -67,8 +68,9 @@ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_CREATEPOOLID | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ - CEPH_FEATURE_OSD_HBMSGS | \ - CEPH_FEATURE_MDSENC) + CEPH_FEATURE_OSD_HBMSGS | \ + CEPH_FEATURE_MDSENC | \ + CEPH_FEATURE_OSDHASHPSPOOL) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 0198c29e45b..02abdb5325f 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -159,19 +159,14 @@ void OSDMonitor::update_from_paxos() void OSDMonitor::update_msgr_features() { + uint64_t mask; + uint64_t features = osdmap.get_features(&mask); + set<int> types; types.insert((int)entity_name_t::TYPE_OSD); types.insert((int)entity_name_t::TYPE_CLIENT); types.insert((int)entity_name_t::TYPE_MDS); types.insert((int)entity_name_t::TYPE_MON); - - uint64_t mask = CEPH_FEATURES_CRUSH; - uint64_t features = 0; - if (osdmap.crush->has_nondefault_tunables()) - features |= CEPH_FEATURE_CRUSH_TUNABLES; - if (osdmap.crush->has_nondefault_tunables2()) - features |= CEPH_FEATURE_CRUSH_TUNABLES2; - for (set<int>::iterator q = types.begin(); q != types.end(); ++q) { if ((mon->messenger->get_policy(*q).features_required & mask) != features) { dout(0) << "crush map has features " << features << ", adjusting msgr requires" << dendl; @@ -2076,6 +2071,7 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid, int crush_rule, pending_inc.new_pool_max = osdmap.pool_max; int64_t pool = ++pending_inc.new_pool_max; pending_inc.new_pools[pool].type = pg_pool_t::TYPE_REP; + pending_inc.new_pools[pool].type = g_conf->osd_pool_default_flags; pending_inc.new_pools[pool].size = g_conf->osd_pool_default_size; pending_inc.new_pools[pool].min_size = g_conf->get_osd_pool_default_min_size(); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index d5f2b2299a4..b20e6d690f2 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4231,12 +4231,8 @@ void OSD::check_osdmap_features() // current memory location, and setting or clearing bits in integer // fields, and we are the only writer, this is not a problem. - uint64_t mask = CEPH_FEATURES_CRUSH; - uint64_t features = 0; - if (osdmap->crush->has_nondefault_tunables()) - features |= CEPH_FEATURE_CRUSH_TUNABLES; - if (osdmap->crush->has_nondefault_tunables2()) - features |= CEPH_FEATURE_CRUSH_TUNABLES2; + uint64_t mask; + uint64_t features = osdmap->get_features(&mask); { Messenger::Policy p = client_messenger->get_default_policy(); diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 6b692d407a8..8f0b01db706 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -714,6 +714,30 @@ bool OSDMap::find_osd_on_ip(const entity_addr_t& ip) const return -1; } + +uint64_t OSDMap::get_features(uint64_t *pmask) const +{ + uint64_t features = 0; + uint64_t mask = 0; + + if (crush->has_nondefault_tunables()) + features |= CEPH_FEATURE_CRUSH_TUNABLES; + if (crush->has_nondefault_tunables2()) + features |= CEPH_FEATURE_CRUSH_TUNABLES2; + mask |= CEPH_FEATURES_CRUSH; + + for (map<int64_t,pg_pool_t>::const_iterator p = pools.begin(); p != pools.end(); ++p) { + if (p->second.flags & pg_pool_t::FLAG_HASHPSPOOL) { + features |= CEPH_FEATURE_OSDHASHPSPOOL; + } + } + mask |= CEPH_FEATURE_OSDHASHPSPOOL; + + if (pmask) + *pmask = mask; + return features; +} + void OSDMap::dedup(const OSDMap *o, OSDMap *n) { if (o->epoch == n->epoch) @@ -1690,6 +1714,7 @@ void OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid, for (map<int,const char*>::iterator p = rulesets.begin(); p != rulesets.end(); p++) { int64_t pool = ++pool_max; pools[pool].type = pg_pool_t::TYPE_REP; + pools[pool].flags = cct->_conf->osd_pool_default_flags; pools[pool].size = cct->_conf->osd_pool_default_size; pools[pool].min_size = cct->_conf->get_osd_pool_default_min_size(); pools[pool].crush_ruleset = p->first; @@ -1814,6 +1839,7 @@ int OSDMap::build_simple_from_conf(CephContext *cct, epoch_t e, uuid_d &fsid, for (map<int,const char*>::iterator p = rulesets.begin(); p != rulesets.end(); p++) { int64_t pool = ++pool_max; pools[pool].type = pg_pool_t::TYPE_REP; + pools[pool].flags = cct->_conf->osd_pool_default_flags; pools[pool].size = cct->_conf->osd_pool_default_size; pools[pool].min_size = cct->_conf->get_osd_pool_default_min_size(); pools[pool].crush_ruleset = p->first; diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 70ec263e4d8..6588382971f 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -394,6 +394,14 @@ private: return -1; } + /** + * get feature bits required by the current structure + * + * @param mask [out] set of all possible map-related features we could set + * @return feature bits used by this map + */ + uint64_t get_features(uint64_t *mask) const; + int apply_incremental(const Incremental &inc); /// try to re-use/reference addrs in oldmap from newmap diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index c3827a4680b..b1046c9aec5 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -14,6 +14,9 @@ #include "osd_types.h" #include "include/ceph_features.h" +extern "C" { +#include "crush/hash.h" +} #include "PG.h" #include "OSDMap.h" @@ -678,7 +681,20 @@ pg_t pg_pool_t::raw_pg_to_pg(pg_t pg) const */ ps_t pg_pool_t::raw_pg_to_pps(pg_t pg) const { - return ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) + pg.pool(); + if (true) {//flags & FLAG_HASHPSPOOL) { + // Hash the pool id so that pool PGs do not overlap. + return + crush_hash32_2(CRUSH_HASH_RJENKINS1, + ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask), + pg.pool()); + } else { + // Legacy behavior; add ps and pool together. This is not a great + // idea because the PGs from each pool will essentially overlap on + // top of each other: 0.5 == 1.4 == 2.3 == ... + return + ceph_stable_mod(pg.ps(), pgp_num, pgp_num_mask) + + pg.pool(); + } } void pg_pool_t::encode(bufferlist& bl, uint64_t features) const diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 558c10ff27b..ff8c2c5219e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -620,6 +620,9 @@ struct pg_pool_t { TYPE_REP = 1, // replication TYPE_RAID4 = 2, // raid4 (never implemented) }; + enum { + FLAG_HASHPSPOOL = 1, // hash pg seed and pool together (instead of adding) + }; static const char *get_type_name(int t) { switch (t) { |