summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <samuel.just@dreamhost.com>2012-04-26 17:58:59 -0700
committerSage Weil <sage@newdream.net>2012-04-26 19:41:25 -0700
commit7f3790a9edc6691c28a2f658b436fd496fe72a4d (patch)
treea95c612bc28b7f882032f436e02bb1dc6ec17ec7
parentec1ea6a8fd4a7b46fc0164c466f405e6724c9014 (diff)
downloadceph-7f3790a9edc6691c28a2f658b436fd496fe72a4d.tar.gz
OSD.cc: track osdmap refs using an LRU
Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
-rw-r--r--src/common/config_opts.h2
-rw-r--r--src/osd/OSD.cc158
-rw-r--r--src/osd/OSD.h40
3 files changed, 78 insertions, 122 deletions
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index f39a5f3307f..ae177a1e1a1 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -269,7 +269,7 @@ OPTION(osd_pool_default_size, OPT_INT, 2)
OPTION(osd_pool_default_pg_num, OPT_INT, 8)
OPTION(osd_pool_default_pgp_num, OPT_INT, 8)
OPTION(osd_map_dedup, OPT_BOOL, true)
-OPTION(osd_map_cache_max, OPT_INT, 250)
+OPTION(osd_map_cache_size, OPT_INT, 500)
OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message
OPTION(osd_op_threads, OPT_INT, 2) // 0 == no threading
OPTION(osd_disk_threads, OPT_INT, 1)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 81143d580cd..51ee8806818 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -556,6 +556,9 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger,
map_lock("OSD::map_lock"),
peer_map_epoch_lock("OSD::peer_map_epoch_lock"),
map_cache_lock("OSD::map_cache_lock"),
+ map_cache(g_conf->osd_map_cache_size),
+ map_bl_cache(g_conf->osd_map_cache_size),
+ map_bl_inc_cache(g_conf->osd_map_cache_size),
outstanding_pg_stats(false),
up_thru_wanted(0), up_thru_pending(0),
pg_stat_queue_lock("OSD::pg_stat_queue_lock"),
@@ -956,8 +959,6 @@ int OSD::shutdown()
delete watch;
- clear_map_cache();
-
return r;
}
@@ -3046,6 +3047,7 @@ void OSD::note_up_osd(int peer)
void OSD::handle_osd_map(MOSDMap *m)
{
assert(osd_lock.is_locked());
+ list<OSDMapRef> pinned_maps;
if (m->fsid != monc->get_fsid()) {
dout(0) << "handle_osd_map fsid " << m->fsid << " != " << monc->get_fsid() << dendl;
m->put();
@@ -3159,7 +3161,7 @@ void OSD::handle_osd_map(MOSDMap *m)
bufferlist& bl = p->second;
o->decode(bl);
- add_map(o);
+ pinned_maps.push_back(add_map(o));
hobject_t fulloid = get_osdmap_pobject_name(e);
t.write(coll_t::META_COLL, fulloid, 0, bl.length(), bl);
@@ -3191,7 +3193,7 @@ void OSD::handle_osd_map(MOSDMap *m)
assert(0 == "bad fsid");
}
- add_map(o);
+ pinned_maps.push_back(add_map(o));
bufferlist fbl;
o->encode(fbl);
@@ -3387,11 +3389,6 @@ void OSD::handle_osd_map(MOSDMap *m)
ulock.Unlock();
osd_lock.Lock();
- // everything through current epoch now on disk; keep anything after
- // that in cache
- trim_map_bl_cache(osdmap->get_epoch()+1);
- trim_map_cache(0);
-
op_tp.unpause();
recovery_tp.unpause();
disk_tp.unpause();
@@ -3621,7 +3618,6 @@ void OSD::activate_map(ObjectStore::Transaction& t, list<Context*>& tfin)
logger->set(l_osd_pg_stray, num_pg_stray);
wake_all_pg_waiters(); // the pg mapping may have shifted
- trim_map_cache(oldest_last_clean);
maybe_update_heartbeat_peers();
send_pg_temp();
@@ -3694,135 +3690,79 @@ void OSD::send_incremental_map(epoch_t since, const entity_inst_t& inst, bool la
}
}
-bool OSD::get_map_bl(epoch_t e, bufferlist& bl)
+bool OSD::_get_map_bl(epoch_t e, bufferlist& bl)
{
- {
- Mutex::Locker l(map_cache_lock);
- map<epoch_t,bufferlist>::iterator p = map_bl.find(e);
- if (p != map_bl.end()) {
- bl = p->second;
- return true;
- }
- }
- return store->read(coll_t::META_COLL, get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
+ bool found = map_bl_cache.lookup(e, &bl);
+ if (found)
+ return true;
+ found = store->read(
+ coll_t::META_COLL, get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
+ if (found)
+ _add_map_bl(e, bl);
+ return found;
}
bool OSD::get_inc_map_bl(epoch_t e, bufferlist& bl)
{
- {
- Mutex::Locker l(map_cache_lock);
- map<epoch_t,bufferlist>::iterator p = map_inc_bl.find(e);
- if (p != map_inc_bl.end()) {
- bl = p->second;
- return true;
- }
- }
- return store->read(coll_t::META_COLL, get_inc_osdmap_pobject_name(e), 0, 0, bl) >= 0;
-}
-
-OSDMapRef OSD::add_map(OSDMap *o)
-{
Mutex::Locker l(map_cache_lock);
- epoch_t e = o->get_epoch();
- if (map_cache.count(e) == 0) {
- dout(10) << "add_map " << e << " " << o << dendl;
-
- if (g_conf->osd_map_dedup) {
- // dedup against an existing map at nearby epoch
- map<epoch_t,OSDMapRef>::iterator p = map_cache.lower_bound(e);
- if (p == map_cache.end() && !map_cache.empty())
- p--;
- if (p != map_cache.end())
- OSDMap::dedup(p->second.get(), o);
- }
-
- map_cache.insert(make_pair(e, OSDMapRef(o)));
- } else {
- dout(10) << "add_map " << e << " already have it" << dendl;
- }
- return map_cache[e];
+ bool found = map_bl_inc_cache.lookup(e, &bl);
+ if (found)
+ return true;
+ found = store->read(
+ coll_t::META_COLL, get_inc_osdmap_pobject_name(e), 0, 0, bl) >= 0;
+ if (found)
+ _add_map_inc_bl(e, bl);
+ return found;
}
-void OSD::add_map_bl(epoch_t e, bufferlist& bl)
+void OSD::_add_map_bl(epoch_t e, bufferlist& bl)
{
- Mutex::Locker l(map_cache_lock);
dout(10) << "add_map_bl " << e << " " << bl.length() << " bytes" << dendl;
- map_bl[e] = bl;
+ map_bl_cache.add(e, bl);
}
-void OSD::add_map_inc_bl(epoch_t e, bufferlist& bl)
+void OSD::_add_map_inc_bl(epoch_t e, bufferlist& bl)
{
- Mutex::Locker l(map_cache_lock);
dout(10) << "add_map_inc_bl " << e << " " << bl.length() << " bytes" << dendl;
- map_inc_bl[e] = bl;
+ map_bl_inc_cache.add(e, bl);
}
-OSDMapRef OSD::get_map(epoch_t epoch)
+OSDMapRef OSD::_add_map(OSDMap *o)
{
- {
- Mutex::Locker l(map_cache_lock);
- map<epoch_t,OSDMapRef>::iterator p = map_cache.find(epoch);
- if (p != map_cache.end()) {
- dout(30) << "get_map " << epoch << " - cached " << p->second << dendl;
- return p->second;
+ epoch_t e = o->get_epoch();
+
+ if (g_conf->osd_map_dedup) {
+ // Dedup against an existing map at a nearby epoch
+ OSDMapRef for_dedup = map_cache.lower_bound(e);
+ if (for_dedup) {
+ OSDMap::dedup(for_dedup.get(), o);
}
}
+ OSDMapRef l = map_cache.add(e, o);
+ return l;
+}
+
+OSDMapRef OSD::get_map(epoch_t epoch)
+{
+ Mutex::Locker l(map_cache_lock);
+ OSDMapRef retval = map_cache.lookup(epoch);
+ if (retval) {
+ dout(30) << "get_map " << epoch << " -cached" << dendl;
+ return retval;
+ }
OSDMap *map = new OSDMap;
if (epoch > 0) {
dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl;
bufferlist bl;
- get_map_bl(epoch, bl);
+ assert(_get_map_bl(epoch, bl));
map->decode(bl);
} else {
dout(20) << "get_map " << epoch << " - return initial " << map << dendl;
}
- return add_map(map);
-}
-
-void OSD::trim_map_bl_cache(epoch_t oldest)
-{
- Mutex::Locker l(map_cache_lock);
- dout(10) << "trim_map_bl_cache up to " << oldest << dendl;
- while (!map_inc_bl.empty() && map_inc_bl.begin()->first < oldest)
- map_inc_bl.erase(map_inc_bl.begin());
- while (!map_bl.empty() && map_bl.begin()->first < oldest)
- map_bl.erase(map_bl.begin());
-}
-
-void OSD::trim_map_cache(epoch_t oldest)
-{
- Mutex::Locker l(map_cache_lock);
- dout(10) << "trim_map_cache prior to " << oldest << dendl;
- while (!map_cache.empty() &&
- (map_cache.begin()->first < oldest ||
- (int)map_cache.size() > g_conf->osd_map_cache_max)) {
- epoch_t e = map_cache.begin()->first;
- OSDMapRef o = map_cache.begin()->second;
- dout(10) << "trim_map_cache " << e << " " << o << dendl;
- map_cache.erase(map_cache.begin());
- }
+ return _add_map(map);
}
-void OSD::clear_map_cache()
-{
- while (!map_cache.empty()) {
- map_cache.erase(map_cache.begin());
- }
-}
-
-bool OSD::get_inc_map(epoch_t e, OSDMap::Incremental &inc)
-{
- bufferlist bl;
- if (!get_inc_map_bl(e, bl))
- return false;
- bufferlist::iterator p = bl.begin();
- inc.decode(p);
- return true;
-}
-
-
-
bool OSD::require_mon_peer(Message *m)
{
if (!m->get_connection()->peer_is_mon()) {
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 562aad5f77e..155f4048a25 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -47,6 +47,8 @@ using namespace std;
using namespace __gnu_cxx;
#include "OpRequest.h"
+#include "common/shared_cache.hpp"
+#include "common/simple_cache.hpp"
#define CEPH_OSD_PROTOCOL 10 /* cluster internal */
@@ -409,22 +411,36 @@ private:
void activate_map(ObjectStore::Transaction& t, list<Context*>& tfin);
// osd map cache (past osd maps)
- map<epoch_t,OSDMapRef > map_cache;
- map<epoch_t,bufferlist> map_inc_bl;
- map<epoch_t,bufferlist> map_bl;
Mutex map_cache_lock;
+ SharedLRU<epoch_t, OSDMap> map_cache;
+ SimpleLRU<epoch_t, bufferlist> map_bl_cache;
+ SimpleLRU<epoch_t, bufferlist> map_bl_inc_cache;
+
OSDMapRef get_map(epoch_t e);
- OSDMapRef add_map(OSDMap *o);
- void add_map_bl(epoch_t e, bufferlist& bl);
- void add_map_inc_bl(epoch_t e, bufferlist& bl);
- void trim_map_cache(epoch_t oldest);
- void trim_map_bl_cache(epoch_t oldest);
- void clear_map_cache();
-
- bool get_map_bl(epoch_t e, bufferlist& bl);
+ OSDMapRef add_map(OSDMap *o) {
+ Mutex::Locker l(map_cache_lock);
+ return _add_map(o);
+ }
+ OSDMapRef _add_map(OSDMap *o);
+
+ void add_map_bl(epoch_t e, bufferlist& bl) {
+ Mutex::Locker l(map_cache_lock);
+ return _add_map_bl(e, bl);
+ }
+ void _add_map_bl(epoch_t e, bufferlist& bl);
+ bool get_map_bl(epoch_t e, bufferlist& bl) {
+ Mutex::Locker l(map_cache_lock);
+ return _get_map_bl(e, bl);
+ }
+ bool _get_map_bl(epoch_t e, bufferlist& bl);
+
+ void add_map_inc_bl(epoch_t e, bufferlist& bl) {
+ Mutex::Locker l(map_cache_lock);
+ return _add_map_inc_bl(e, bl);
+ }
+ void _add_map_inc_bl(epoch_t e, bufferlist& bl);
bool get_inc_map_bl(epoch_t e, bufferlist& bl);
- bool get_inc_map(epoch_t e, OSDMap::Incremental &inc);
MOSDMap *build_incremental_map_msg(epoch_t from, epoch_t to);
void send_incremental_map(epoch_t since, const entity_inst_t& inst, bool lazy=false);