diff options
author | Samuel Just <samuel.just@dreamhost.com> | 2012-04-26 17:58:59 -0700 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2012-04-26 19:41:25 -0700 |
commit | 7f3790a9edc6691c28a2f658b436fd496fe72a4d (patch) | |
tree | a95c612bc28b7f882032f436e02bb1dc6ec17ec7 | |
parent | ec1ea6a8fd4a7b46fc0164c466f405e6724c9014 (diff) | |
download | ceph-7f3790a9edc6691c28a2f658b436fd496fe72a4d.tar.gz |
OSD.cc: track osdmap refs using an LRU
Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
-rw-r--r-- | src/common/config_opts.h | 2 | ||||
-rw-r--r-- | src/osd/OSD.cc | 158 | ||||
-rw-r--r-- | src/osd/OSD.h | 40 |
3 files changed, 78 insertions, 122 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h index f39a5f3307f..ae177a1e1a1 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -269,7 +269,7 @@ OPTION(osd_pool_default_size, OPT_INT, 2) OPTION(osd_pool_default_pg_num, OPT_INT, 8) OPTION(osd_pool_default_pgp_num, OPT_INT, 8) OPTION(osd_map_dedup, OPT_BOOL, true) -OPTION(osd_map_cache_max, OPT_INT, 250) +OPTION(osd_map_cache_size, OPT_INT, 500) OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message OPTION(osd_op_threads, OPT_INT, 2) // 0 == no threading OPTION(osd_disk_threads, OPT_INT, 1) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 81143d580cd..51ee8806818 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -556,6 +556,9 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger, map_lock("OSD::map_lock"), peer_map_epoch_lock("OSD::peer_map_epoch_lock"), map_cache_lock("OSD::map_cache_lock"), + map_cache(g_conf->osd_map_cache_size), + map_bl_cache(g_conf->osd_map_cache_size), + map_bl_inc_cache(g_conf->osd_map_cache_size), outstanding_pg_stats(false), up_thru_wanted(0), up_thru_pending(0), pg_stat_queue_lock("OSD::pg_stat_queue_lock"), @@ -956,8 +959,6 @@ int OSD::shutdown() delete watch; - clear_map_cache(); - return r; } @@ -3046,6 +3047,7 @@ void OSD::note_up_osd(int peer) void OSD::handle_osd_map(MOSDMap *m) { assert(osd_lock.is_locked()); + list<OSDMapRef> pinned_maps; if (m->fsid != monc->get_fsid()) { dout(0) << "handle_osd_map fsid " << m->fsid << " != " << monc->get_fsid() << dendl; m->put(); @@ -3159,7 +3161,7 @@ void OSD::handle_osd_map(MOSDMap *m) bufferlist& bl = p->second; o->decode(bl); - add_map(o); + pinned_maps.push_back(add_map(o)); hobject_t fulloid = get_osdmap_pobject_name(e); t.write(coll_t::META_COLL, fulloid, 0, bl.length(), bl); @@ -3191,7 +3193,7 @@ void OSD::handle_osd_map(MOSDMap *m) assert(0 == "bad fsid"); } - add_map(o); + pinned_maps.push_back(add_map(o)); bufferlist fbl; o->encode(fbl); @@ -3387,11 +3389,6 @@ void OSD::handle_osd_map(MOSDMap *m) ulock.Unlock(); osd_lock.Lock(); - // everything through current epoch now on disk; keep anything after - // that in cache - trim_map_bl_cache(osdmap->get_epoch()+1); - trim_map_cache(0); - op_tp.unpause(); recovery_tp.unpause(); disk_tp.unpause(); @@ -3621,7 +3618,6 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin) logger->set(l_osd_pg_stray, num_pg_stray); wake_all_pg_waiters(); // the pg mapping may have shifted - trim_map_cache(oldest_last_clean); maybe_update_heartbeat_peers(); send_pg_temp(); @@ -3694,135 +3690,79 @@ void OSD::send_incremental_map(epoch_t since, const entity_inst_t& inst, bool la } } -bool OSD::get_map_bl(epoch_t e, bufferlist& bl) +bool OSD::_get_map_bl(epoch_t e, bufferlist& bl) { - { - Mutex::Locker l(map_cache_lock); - map<epoch_t,bufferlist>::iterator p = map_bl.find(e); - if (p != map_bl.end()) { - bl = p->second; - return true; - } - } - return store->read(coll_t::META_COLL, get_osdmap_pobject_name(e), 0, 0, bl) >= 0; + bool found = map_bl_cache.lookup(e, &bl); + if (found) + return true; + found = store->read( + coll_t::META_COLL, get_osdmap_pobject_name(e), 0, 0, bl) >= 0; + if (found) + _add_map_bl(e, bl); + return found; } bool OSD::get_inc_map_bl(epoch_t e, bufferlist& bl) { - { - Mutex::Locker l(map_cache_lock); - map<epoch_t,bufferlist>::iterator p = map_inc_bl.find(e); - if (p != map_inc_bl.end()) { - bl = p->second; - return true; - } - } - return store->read(coll_t::META_COLL, get_inc_osdmap_pobject_name(e), 0, 0, bl) >= 0; -} - -OSDMapRef OSD::add_map(OSDMap *o) -{ Mutex::Locker l(map_cache_lock); - epoch_t e = o->get_epoch(); - if (map_cache.count(e) == 0) { - dout(10) << "add_map " << e << " " << o << dendl; - - if (g_conf->osd_map_dedup) { - // dedup against an existing map at nearby epoch - map<epoch_t,OSDMapRef>::iterator p = map_cache.lower_bound(e); - if (p == map_cache.end() && !map_cache.empty()) - p--; - if (p != map_cache.end()) - OSDMap::dedup(p->second.get(), o); - } - - map_cache.insert(make_pair(e, OSDMapRef(o))); - } else { - dout(10) << "add_map " << e << " already have it" << dendl; - } - return map_cache[e]; + bool found = map_bl_inc_cache.lookup(e, &bl); + if (found) + return true; + found = store->read( + coll_t::META_COLL, get_inc_osdmap_pobject_name(e), 0, 0, bl) >= 0; + if (found) + _add_map_inc_bl(e, bl); + return found; } -void OSD::add_map_bl(epoch_t e, bufferlist& bl) +void OSD::_add_map_bl(epoch_t e, bufferlist& bl) { - Mutex::Locker l(map_cache_lock); dout(10) << "add_map_bl " << e << " " << bl.length() << " bytes" << dendl; - map_bl[e] = bl; + map_bl_cache.add(e, bl); } -void OSD::add_map_inc_bl(epoch_t e, bufferlist& bl) +void OSD::_add_map_inc_bl(epoch_t e, bufferlist& bl) { - Mutex::Locker l(map_cache_lock); dout(10) << "add_map_inc_bl " << e << " " << bl.length() << " bytes" << dendl; - map_inc_bl[e] = bl; + map_bl_inc_cache.add(e, bl); } -OSDMapRef OSD::get_map(epoch_t epoch) +OSDMapRef OSD::_add_map(OSDMap *o) { - { - Mutex::Locker l(map_cache_lock); - map<epoch_t,OSDMapRef>::iterator p = map_cache.find(epoch); - if (p != map_cache.end()) { - dout(30) << "get_map " << epoch << " - cached " << p->second << dendl; - return p->second; + epoch_t e = o->get_epoch(); + + if (g_conf->osd_map_dedup) { + // Dedup against an existing map at a nearby epoch + OSDMapRef for_dedup = map_cache.lower_bound(e); + if (for_dedup) { + OSDMap::dedup(for_dedup.get(), o); } } + OSDMapRef l = map_cache.add(e, o); + return l; +} + +OSDMapRef OSD::get_map(epoch_t epoch) +{ + Mutex::Locker l(map_cache_lock); + OSDMapRef retval = map_cache.lookup(epoch); + if (retval) { + dout(30) << "get_map " << epoch << " -cached" << dendl; + return retval; + } OSDMap *map = new OSDMap; if (epoch > 0) { dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl; bufferlist bl; - get_map_bl(epoch, bl); + assert(_get_map_bl(epoch, bl)); map->decode(bl); } else { dout(20) << "get_map " << epoch << " - return initial " << map << dendl; } - return add_map(map); -} - -void OSD::trim_map_bl_cache(epoch_t oldest) -{ - Mutex::Locker l(map_cache_lock); - dout(10) << "trim_map_bl_cache up to " << oldest << dendl; - while (!map_inc_bl.empty() && map_inc_bl.begin()->first < oldest) - map_inc_bl.erase(map_inc_bl.begin()); - while (!map_bl.empty() && map_bl.begin()->first < oldest) - map_bl.erase(map_bl.begin()); -} - -void OSD::trim_map_cache(epoch_t oldest) -{ - Mutex::Locker l(map_cache_lock); - dout(10) << "trim_map_cache prior to " << oldest << dendl; - while (!map_cache.empty() && - (map_cache.begin()->first < oldest || - (int)map_cache.size() > g_conf->osd_map_cache_max)) { - epoch_t e = map_cache.begin()->first; - OSDMapRef o = map_cache.begin()->second; - dout(10) << "trim_map_cache " << e << " " << o << dendl; - map_cache.erase(map_cache.begin()); - } + return _add_map(map); } -void OSD::clear_map_cache() -{ - while (!map_cache.empty()) { - map_cache.erase(map_cache.begin()); - } -} - -bool OSD::get_inc_map(epoch_t e, OSDMap::Incremental &inc) -{ - bufferlist bl; - if (!get_inc_map_bl(e, bl)) - return false; - bufferlist::iterator p = bl.begin(); - inc.decode(p); - return true; -} - - - bool OSD::require_mon_peer(Message *m) { if (!m->get_connection()->peer_is_mon()) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 562aad5f77e..155f4048a25 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -47,6 +47,8 @@ using namespace std; using namespace __gnu_cxx; #include "OpRequest.h" +#include "common/shared_cache.hpp" +#include "common/simple_cache.hpp" #define CEPH_OSD_PROTOCOL 10 /* cluster internal */ @@ -409,22 +411,36 @@ private: void activate_map(ObjectStore::Transaction& t, list<Context*>& tfin); // osd map cache (past osd maps) - map<epoch_t,OSDMapRef > map_cache; - map<epoch_t,bufferlist> map_inc_bl; - map<epoch_t,bufferlist> map_bl; Mutex map_cache_lock; + SharedLRU<epoch_t, OSDMap> map_cache; + SimpleLRU<epoch_t, bufferlist> map_bl_cache; + SimpleLRU<epoch_t, bufferlist> map_bl_inc_cache; + OSDMapRef get_map(epoch_t e); - OSDMapRef add_map(OSDMap *o); - void add_map_bl(epoch_t e, bufferlist& bl); - void add_map_inc_bl(epoch_t e, bufferlist& bl); - void trim_map_cache(epoch_t oldest); - void trim_map_bl_cache(epoch_t oldest); - void clear_map_cache(); - - bool get_map_bl(epoch_t e, bufferlist& bl); + OSDMapRef add_map(OSDMap *o) { + Mutex::Locker l(map_cache_lock); + return _add_map(o); + } + OSDMapRef _add_map(OSDMap *o); + + void add_map_bl(epoch_t e, bufferlist& bl) { + Mutex::Locker l(map_cache_lock); + return _add_map_bl(e, bl); + } + void _add_map_bl(epoch_t e, bufferlist& bl); + bool get_map_bl(epoch_t e, bufferlist& bl) { + Mutex::Locker l(map_cache_lock); + return _get_map_bl(e, bl); + } + bool _get_map_bl(epoch_t e, bufferlist& bl); + + void add_map_inc_bl(epoch_t e, bufferlist& bl) { + Mutex::Locker l(map_cache_lock); + return _add_map_inc_bl(e, bl); + } + void _add_map_inc_bl(epoch_t e, bufferlist& bl); bool get_inc_map_bl(epoch_t e, bufferlist& bl); - bool get_inc_map(epoch_t e, OSDMap::Incremental &inc); MOSDMap *build_incremental_map_msg(epoch_t from, epoch_t to); void send_incremental_map(epoch_t since, const entity_inst_t& inst, bool lazy=false); |