diff options
author | Sage Weil <sage@inktank.com> | 2013-04-30 11:49:31 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-04-30 11:49:31 -0700 |
commit | 17612a407ab7bd1d08945d663e2088c5c81cee33 (patch) | |
tree | fe89353ff145e31d3c9caf1364dae63832fc672d | |
parent | 6ae9bbb5d03cb5695a4ebb7a3c20f729de1bd67a (diff) | |
parent | bd68b82bd6e9e119257ecf351b04c085ee4897db (diff) | |
download | ceph-17612a407ab7bd1d08945d663e2088c5c81cee33.tar.gz |
Merge branch 'wip-mon-compact' into next
Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r-- | src/ceph_mon.cc | 9 | ||||
-rw-r--r-- | src/common/config_opts.h | 7 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 7 | ||||
-rw-r--r-- | src/mon/MonitorDBStore.h | 32 | ||||
-rw-r--r-- | src/mon/Paxos.cc | 4 | ||||
-rw-r--r-- | src/mon/PaxosService.cc | 4 | ||||
-rw-r--r-- | src/os/LevelDBStore.h | 16 |
7 files changed, 76 insertions, 3 deletions
diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc index 01072728db2..69bcf6d3282 100644 --- a/src/ceph_mon.cc +++ b/src/ceph_mon.cc @@ -114,6 +114,7 @@ int main(int argc, const char **argv) int err; bool mkfs = false; + bool compact = false; std::string osdmapfn, inject_monmap; vector<const char*> args; @@ -132,6 +133,8 @@ int main(int argc, const char **argv) exit(0); } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { mkfs = true; + } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) { + compact = true; } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) { osdmapfn = val; } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) { @@ -474,6 +477,12 @@ int main(int argc, const char **argv) if (err < 0) return 1; + if (compact || g_conf->mon_compact_on_start) { + derr << "compacting monitor store ..." << dendl; + mon->store->compact(); + derr << "done compacting" << dendl; + } + global_init_daemonize(g_ceph_context, 0); common_init_finish(g_ceph_context); global_init_chdir(g_ceph_context); diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 30702d68026..9f7dafeb218 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -124,6 +124,9 @@ OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0) // seconds OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id") OPTION(mon_initial_members, OPT_STR, "") // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster OPTION(mon_sync_fs_threshold, OPT_INT, 5) // sync() when writing this many objects; 0 to disable. +OPTION(mon_compact_on_start, OPT_BOOL, false) // compact leveldb on ceph-mon start +OPTION(mon_compact_on_bootstrap, OPT_BOOL, false) // trigger leveldb compaction on bootstrap +OPTION(mon_compact_on_trim, OPT_BOOL, true) // compact (a prefix) when we trim old states OPTION(mon_tick_interval, OPT_INT, 5) OPTION(mon_subscribe_interval, OPT_DOUBLE, 300) OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay @@ -192,8 +195,8 @@ OPTION(paxos_propose_interval, OPT_DOUBLE, 1.0) // gather updates for this long OPTION(paxos_min_wait, OPT_DOUBLE, 0.05) // min time to gather updates for after period of inactivity OPTION(paxos_trim_tolerance, OPT_INT, 30) // number of extra proposals tolerated before trimming OPTION(paxos_trim_disabled_max_versions, OPT_INT, 100) // maximum amount of versions we shall allow passing by without trimming -OPTION(paxos_service_trim_max, OPT_INT, 50) // maximum amount of versions to trim during a single proposal (0 disables it) -OPTION(paxos_service_trim_min, OPT_INT, 30) // minimum amount of versions to trigger a trim (0 disables it) +OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it) +OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it) OPTION(clock_offset, OPT_DOUBLE, 0) // how much to offset the system clock in Clock.cc OPTION(auth_cluster_required, OPT_STR, "cephx") // required of mon, mds, osd daemons OPTION(auth_service_required, OPT_STR, "cephx") // required by daemons of clients diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 11cad14979e..b360bd0f9d9 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -635,6 +635,13 @@ void Monitor::bootstrap() reset(); + // sync store + if (g_conf->mon_compact_on_bootstrap) { + dout(10) << "bootstrap -- triggering compaction" << dendl; + store->compact(); + dout(10) << "bootstrap -- finished compaction" << dendl; + } + // singleton monitor? if (monmap->size() == 1 && rank == 0) { win_standalone_election(); diff --git a/src/mon/MonitorDBStore.h b/src/mon/MonitorDBStore.h index ac2703ec5e6..c4c681043b1 100644 --- a/src/mon/MonitorDBStore.h +++ b/src/mon/MonitorDBStore.h @@ -70,6 +70,7 @@ class MonitorDBStore enum { OP_PUT = 1, OP_ERASE = 2, + OP_COMPACT_PREFIX = 3, }; void put(string prefix, string key, bufferlist& bl) { @@ -98,6 +99,10 @@ class MonitorDBStore erase(prefix, os.str()); } + void compact_prefix(string prefix) { + ops.push_back(Op(OP_COMPACT_PREFIX, prefix, string())); + } + void encode(bufferlist& bl) const { ENCODE_START(1, 1, bl); ::encode(ops, bl); @@ -157,6 +162,12 @@ class MonitorDBStore f->dump_string("key", op.key); } break; + case OP_COMPACT_PREFIX: + { + f->dump_string("type", "COMPACT_PREFIX"); + f->dump_string("prefix", op.prefix); + } + break; default: { f->dump_string("type", "unknown"); @@ -174,6 +185,7 @@ class MonitorDBStore int apply_transaction(MonitorDBStore::Transaction& t) { KeyValueDB::Transaction dbt = db->get_transaction(); + list<string> compact_prefixes; for (list<Op>::iterator it = t.ops.begin(); it != t.ops.end(); ++it) { Op& op = *it; switch (op.type) { @@ -183,13 +195,23 @@ class MonitorDBStore case Transaction::OP_ERASE: dbt->rmkey(op.prefix, op.key); break; + case Transaction::OP_COMPACT_PREFIX: + compact_prefixes.push_back(op.prefix); + break; default: derr << __func__ << " unknown op type " << op.type << dendl; ceph_assert(0); break; } } - return db->submit_transaction_sync(dbt); + int r = db->submit_transaction_sync(dbt); + if (r >= 0) { + while (!compact_prefixes.empty()) { + db->compact_prefix(compact_prefixes.front()); + compact_prefixes.pop_front(); + } + } + return r; } class StoreIteratorImpl { @@ -456,6 +478,14 @@ class MonitorDBStore return db->create_and_open(out); } + void compact() { + db->compact(); + } + + void compact_prefix(const string& prefix) { + db->compact_prefix(prefix); + } + MonitorDBStore(const string& path) : db(0) { string::const_reverse_iterator rit; int pos = 0; diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 46eaf88273d..f306a8bf296 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -974,6 +974,10 @@ void Paxos::trim_to(MonitorDBStore::Transaction *t, t->erase(get_name(), from); from++; } + if (g_conf->mon_compact_on_trim) { + dout(10) << " compacting prefix" << dendl; + t->compact_prefix(get_name()); + } } void Paxos::trim_to(MonitorDBStore::Transaction *t, version_t first) diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc index d02cb1d7ab5..647980a9342 100644 --- a/src/mon/PaxosService.cc +++ b/src/mon/PaxosService.cc @@ -318,6 +318,10 @@ void PaxosService::trim(MonitorDBStore::Transaction *t, t->erase(get_service_name(), full_key); } } + if (g_conf->mon_compact_on_trim) { + dout(20) << " compacting prefix " << get_service_name() << dendl; + t->compact_prefix(get_service_name()); + } } void PaxosService::encode_trim(MonitorDBStore::Transaction *t) diff --git a/src/os/LevelDBStore.h b/src/os/LevelDBStore.h index 6b1afceb753..83f2ed3b4c4 100644 --- a/src/os/LevelDBStore.h +++ b/src/os/LevelDBStore.h @@ -33,6 +33,22 @@ class LevelDBStore : public KeyValueDB { int init(ostream &out, bool create_if_missing); public: + /// compact the underlying leveldb store + void compact() { + db->CompactRange(NULL, NULL); + } + + /// compact leveldb for all keys with a given prefix + void compact_prefix(const string& prefix) { + // if we combine the prefix with key by adding a '\0' separator, + // a char(1) will capture all such keys. + string end = prefix; + end += (char)1; + leveldb::Slice cstart(prefix); + leveldb::Slice cend(end); + db->CompactRange(&cstart, &cend); + } + /** * options_t: Holds options which are minimally interpreted * on initialization and then passed through to LevelDB. |