summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS4
-rw-r--r--COPYING3
-rw-r--r--README4
-rw-r--r--doc/radosgw/config.rst13
-rwxr-xr-xqa/workunits/mon/crush_ops.sh23
-rw-r--r--src/Makefile.am6
-rw-r--r--src/client/Client.cc53
-rw-r--r--src/client/Client.h14
-rw-r--r--src/common/Throttle.cc2
-rw-r--r--src/common/config_opts.h2
-rw-r--r--src/crush/CrushWrapper.cc71
-rw-r--r--src/crush/CrushWrapper.h14
-rw-r--r--src/crush/crush.c7
-rw-r--r--src/crush/crush.h1
-rw-r--r--src/mon/OSDMonitor.cc154
-rw-r--r--src/osd/OSDMap.cc9
-rw-r--r--src/osd/OSDMap.h1
-rw-r--r--src/osd/PG.cc3
-rw-r--r--src/test/common/Throttle.cc256
19 files changed, 610 insertions, 30 deletions
diff --git a/AUTHORS b/AUTHORS
index 08f3b1ca729..289f54bf67b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -13,3 +13,7 @@ Patience Warnick <patience@newdream.net>
Yehuda Sadeh-Weinraub <yehudasa@gmail.com>
Greg Farnum <gregf@hq.newdream.net>
+Contributors
+------------
+
+Loic Dachary <loic@dachary.org>
diff --git a/COPYING b/COPYING
index 20ab537172d..888e30e679f 100644
--- a/COPYING
+++ b/COPYING
@@ -98,3 +98,6 @@ License:
+Files: test/common/Throttle.cc
+Copyright: Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+License: LGPL2 or later
diff --git a/README b/README
index eb10688544a..c249abcb90b 100644
--- a/README
+++ b/README
@@ -121,9 +121,9 @@ To build the source code, you must install the following:
- libatomic-ops-dev
- libboost-program-options-dev
- libboost-thread-dev
-
+- libexpat1-dev
For example:
- $ apt-get install automake autoconf automake gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev
+ $ apt-get install automake autoconf automake gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev libexpat1-dev
diff --git a/doc/radosgw/config.rst b/doc/radosgw/config.rst
index 5c2c2c27216..a4dc85aff4a 100644
--- a/doc/radosgw/config.rst
+++ b/doc/radosgw/config.rst
@@ -22,6 +22,7 @@ For example::
rgw socket path = /tmp/radosgw.sock
log file = /var/log/ceph/radosgw.log
+.. note:: ``host`` must be your machine hostname, not FQDN.
Deploy ``ceph.conf``
====================
@@ -315,10 +316,9 @@ packages.
RGW's ``user:subuser`` tuple maps to the ``tenant:user`` tuple expected by Swift.
-.. important:: RGW's Swift authentication service only supports
- built-in Swift authentication (``-V 1.0``) at this point. There is
- currently no way to make RGW authenticate users via OpenStack
- Identity Service (Keystone).
+.. note:: RGW's Swift authentication service only supports built-in Swift
+ authentication (``-V 1.0``). To make RGW authenticate users via OpenStack
+ Identity Service (Keystone), see below.
Integrating with OpenStack Keystone
===================================
@@ -332,7 +332,7 @@ by RGW.
The following config options are available for Keystone integration::
[client.radosgw.gateway]
- rgw keystone url = {keystone server url}
+ rgw keystone url = {keystone server url:keystone server admin port}
rgw keystone admin token = {keystone admin token}
rgw keystone accepted roles = {accepted user roles}
rgw keystone token cache size = {number of tokens to cache}
@@ -349,7 +349,8 @@ Keystone itself needs to be configured to point to RGW as an object-storage
endpoint::
keystone service-create --name swift --type-object-store
- keystone endpoint-create --service-id <id> --public-url http://radosgw.example.com/swift/v1
+ keystone endpoint-create --service-id <id> --publicurl http://radosgw.example.com/swift/v1 \
+ --internalurl http://radosgw.example.com/swift/v1 --adminurl http://radosgw.example.com/swift/v1
The keystone url is the Keystone admin RESTful api url. The admin token is the
diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh
new file mode 100755
index 00000000000..735646b5ca0
--- /dev/null
+++ b/qa/workunits/mon/crush_ops.sh
@@ -0,0 +1,23 @@
+#!/bin/sh -x
+
+set -e
+
+ceph osd crush dump
+ceph osd crush rule dump
+ceph osd crush rule ls
+ceph osd crush rule list
+
+ceph osd crush rule create-simple foo default host
+ceph osd crush rule create-simple foo default host
+ceph osd crush rule create-simple bar default host
+
+ceph osd crush rule ls | grep foo
+
+ceph osd crush rule rm foo
+ceph osd crush rule rm foo # idempotent
+ceph osd crush rule rm bar
+
+# can't delete in-use rules, tho:
+ceph osd crush rule rm data && exit 1 || true
+
+echo OK
diff --git a/src/Makefile.am b/src/Makefile.am
index 622834f0b0e..6a9bb147824 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -671,6 +671,12 @@ unittest_log_LDADD = libcommon.la ${UNITTEST_LDADD}
unittest_log_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} -O2
check_PROGRAMS += unittest_log
+unittest_throttle_SOURCES = test/common/Throttle.cc
+unittest_throttle_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
+unittest_throttle_LDADD = libcommon.la ${LIBGLOBAL_LDA} ${UNITTEST_LDADD}
+unittest_throttle_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} -O2
+check_PROGRAMS += unittest_throttle
+
unittest_base64_SOURCES = test/base64.cc
unittest_base64_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
unittest_base64_LDADD = libcephfs.la -lm ${UNITTEST_LDADD}
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 6cff22be9f0..4ff30797284 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -2566,7 +2566,7 @@ public:
}
};
-bool Client::_flush(Inode *in)
+bool Client::_flush(Inode *in, Context *onfinish)
{
ldout(cct, 10) << "_flush " << *in << dendl;
@@ -2575,7 +2575,9 @@ bool Client::_flush(Inode *in)
return true;
}
- Context *onfinish = new C_Client_PutInode(this, in);
+ if (!onfinish) {
+ onfinish = new C_Client_PutInode(this, in);
+ }
bool safe = objectcacher->flush_set(&in->oset, onfinish);
if (safe) {
onfinish->complete(0);
@@ -5877,11 +5879,19 @@ int Client::_fsync(Fh *f, bool syncdataonly)
Inode *in = f->inode;
tid_t wait_on_flush = 0;
bool flushed_metadata = false;
+ Mutex lock("Client::_fsync::lock");
+ Cond cond;
+ bool done = false;
+ C_SafeCond *object_cacher_completion = NULL;
ldout(cct, 3) << "_fsync(" << f << ", " << (syncdataonly ? "dataonly)":"data+metadata)") << dendl;
- if (cct->_conf->client_oc)
- _flush(in);
+ if (cct->_conf->client_oc) {
+ object_cacher_completion = new C_SafeCond(&lock, &cond, &done, &r);
+ in->get(); // take a reference; C_SafeCond doesn't and _flush won't either
+ _flush(in, object_cacher_completion);
+ ldout(cct, 15) << "using return-valued form of _fsync" << dendl;
+ }
if (!syncdataonly && (in->dirty_caps & ~CEPH_CAP_ANY_FILE_WR)) {
for (map<int, Cap*>::iterator iter = in->caps.begin(); iter != in->caps.end(); ++iter) {
@@ -5893,18 +5903,35 @@ int Client::_fsync(Fh *f, bool syncdataonly)
flushed_metadata = true;
} else ldout(cct, 10) << "no metadata needs to commit" << dendl;
- // FIXME: this can starve
- while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) {
- ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER]
- << " uncommitted, waiting" << dendl;
- wait_on_list(in->waitfor_commit);
+ if (object_cacher_completion) { // wait on a real reply instead of guessing
+ client_lock.Unlock();
+ lock.Lock();
+ ldout(cct, 15) << "waiting on data to flush" << dendl;
+ while (!done)
+ cond.Wait(lock);
+ lock.Unlock();
+ client_lock.Lock();
+ put_inode(in);
+ ldout(cct, 15) << "got " << r << " from flush writeback" << dendl;
+ } else {
+ // FIXME: this can starve
+ while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) {
+ ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER]
+ << " uncommitted, waiting" << dendl;
+ wait_on_list(in->waitfor_commit);
+ }
}
- if (!flushed_metadata) wait_sync_caps(wait_on_flush); //this could wait longer than strictly necessary,
- //but on a sync the user can put up with it
-
- ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl;
+ if (!r) {
+ if (flushed_metadata) wait_sync_caps(wait_on_flush);
+ // this could wait longer than strictly necessary,
+ // but on a sync the user can put up with it
+ ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl;
+ } else {
+ ldout(cct, 1) << "ino " << in->ino << " failed to commit to disk! "
+ << cpp_strerror(-r) << dendl;
+ }
return r;
}
diff --git a/src/client/Client.h b/src/client/Client.h
index b3b1f87cf46..3fcdf481ad1 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -451,7 +451,19 @@ protected:
void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps);
void _async_invalidate(Inode *in, int64_t off, int64_t len, bool keep_caps);
void _release(Inode *in);
- bool _flush(Inode *in);
+
+ /**
+ * Initiate a flush of the data associated with the given inode.
+ * If you specify a Context, you are responsible for holding an inode
+ * reference for the duration of the flush. If not, _flush() will
+ * take the reference for you.
+ * @param in The Inode whose data you wish to flush.
+ * @param c The Context you wish us to complete once the data is
+ * flushed. If already flushed, this will be called in-line.
+ *
+ * @returns true if the data was already flushed, false otherwise.
+ */
+ bool _flush(Inode *in, Context *c=NULL);
void _flush_range(Inode *in, int64_t off, uint64_t size);
void _flushed(Inode *in);
void flush_set_callback(ObjectCacher::ObjectSet *oset);
diff --git a/src/common/Throttle.cc b/src/common/Throttle.cc
index 844263aa111..82ffe7a9fc5 100644
--- a/src/common/Throttle.cc
+++ b/src/common/Throttle.cc
@@ -65,7 +65,7 @@ Throttle::~Throttle()
void Throttle::_reset_max(int64_t m)
{
assert(lock.is_locked());
- if (m < ((int64_t)max.read()) && !cond.empty())
+ if (!cond.empty())
cond.front()->SignalOne();
logger->set(l_throttle_max, m);
max.set((size_t)m);
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 0d2e011721a..5e0449e3606 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -434,7 +434,7 @@ OPTION(filestore_kill_at, OPT_INT, 0) // inject a failure at the n'th
OPTION(filestore_inject_stall, OPT_INT, 0) // artificially stall for N seconds in op queue thread
OPTION(filestore_fail_eio, OPT_BOOL, true) // fail/crash on EIO
OPTION(journal_dio, OPT_BOOL, true)
-OPTION(journal_aio, OPT_BOOL, false)
+OPTION(journal_aio, OPT_BOOL, true)
OPTION(journal_block_align, OPT_BOOL, true)
OPTION(journal_max_write_bytes, OPT_INT, 10 << 20)
OPTION(journal_max_write_entries, OPT_INT, 100)
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 45e4fb53de6..a22f23509c9 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -488,6 +488,61 @@ void CrushWrapper::reweight(CephContext *cct)
}
}
+int CrushWrapper::add_simple_rule(string name, string root_name, string failure_domain_name)
+{
+ if (rule_exists(name))
+ return -EEXIST;
+ if (!name_exists(root_name.c_str()))
+ return -ENOENT;
+ int root = get_item_id(root_name.c_str());
+ int type = 0;
+ if (failure_domain_name.length()) {
+ type = get_type_id(failure_domain_name.c_str());
+ if (type <= 0) // bah, returns 0 on error; but its ok, device isn't a domain really
+ return -EINVAL;
+ }
+
+ int ruleset = 0;
+ for (int i = 0; i < get_max_rules(); i++) {
+ if (rule_exists(i) &&
+ get_rule_mask_ruleset(i) >= ruleset) {
+ ruleset = get_rule_mask_ruleset(i) + 1;
+ }
+ }
+
+ crush_rule *rule = crush_make_rule(3, ruleset, 1 /* pg_pool_t::TYPE_REP */, 1, 10);
+ assert(rule);
+ crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, root, 0);
+ if (type)
+ crush_rule_set_step(rule, 1,
+ CRUSH_RULE_CHOOSE_LEAF_FIRSTN,
+ CRUSH_CHOOSE_N,
+ type);
+ else
+ crush_rule_set_step(rule, 1,
+ CRUSH_RULE_CHOOSE_FIRSTN,
+ CRUSH_CHOOSE_N,
+ 0);
+ crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
+ int rno = crush_add_rule(crush, rule, -1);
+ set_rule_name(rno, name.c_str());
+ have_rmaps = false;
+ return rno;
+}
+
+int CrushWrapper::remove_rule(int ruleno)
+{
+ if (ruleno >= (int)crush->max_rules)
+ return -ENOENT;
+ if (crush->rules[ruleno] == NULL)
+ return -ENOENT;
+ crush_destroy_rule(crush->rules[ruleno]);
+ crush->rules[ruleno] = NULL;
+ rule_name_map.erase(ruleno);
+ have_rmaps = false;
+ return 0;
+}
+
void CrushWrapper::encode(bufferlist& bl, bool lean) const
{
assert(crush);
@@ -817,6 +872,12 @@ void CrushWrapper::dump(Formatter *f) const
f->close_section();
f->open_array_section("rules");
+ dump_rules(f);
+ f->close_section();
+}
+
+void CrushWrapper::dump_rules(Formatter *f) const
+{
for (int i=0; i<get_max_rules(); i++) {
if (!rule_exists(i))
continue;
@@ -872,7 +933,15 @@ void CrushWrapper::dump(Formatter *f) const
f->close_section();
f->close_section();
}
- f->close_section();
+}
+
+void CrushWrapper::list_rules(Formatter *f) const
+{
+ for (int rule = 0; rule < get_max_rules(); rule++) {
+ if (!rule_exists(rule))
+ continue;
+ f->dump_string("name", get_rule_name(rule));
+ }
}
void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index 7def6e4ab34..0b919cba3ec 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -221,12 +221,15 @@ public:
}
// rule names
- int get_rule_id(const char *n) {
- string name(n);
+ bool rule_exists(string name) {
+ build_rmaps();
+ return rule_name_rmap.count(name);
+ }
+ int get_rule_id(string name) {
build_rmaps();
if (rule_name_rmap.count(name))
return rule_name_rmap[name];
- return 0; /* hrm */
+ return -ENOENT;
}
const char *get_rule_name(int t) const {
std::map<int,string>::const_iterator p = rule_name_map.find(t);
@@ -527,6 +530,9 @@ public:
return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
}
+ int add_simple_rule(string name, string root_name, string failure_domain_type);
+
+ int remove_rule(int ruleno);
/** buckets **/
@@ -735,6 +741,8 @@ public:
void decode(bufferlist::iterator &blp);
void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp);
void dump(Formatter *f) const;
+ void dump_rules(Formatter *f) const;
+ void list_rules(Formatter *f) const;
static void generate_test_instances(list<CrushWrapper*>& o);
};
WRITE_CLASS_ENCODER(CrushWrapper)
diff --git a/src/crush/crush.c b/src/crush/crush.c
index 19a765228e9..1e83eb866bb 100644
--- a/src/crush/crush.c
+++ b/src/crush/crush.c
@@ -116,7 +116,7 @@ void crush_destroy(struct crush_map *map)
if (map->rules) {
__u32 b;
for (b = 0; b < map->max_rules; b++)
- kfree(map->rules[b]);
+ crush_destroy_rule(map->rules[b]);
kfree(map->rules);
}
@@ -124,6 +124,11 @@ void crush_destroy(struct crush_map *map)
kfree(map);
}
+void crush_destroy_rule(struct crush_rule *rule)
+{
+ kfree(rule);
+}
+
// methods to check for safe arithmetic operations
int crush_addition_is_unsafe(__u32 a, __u32 b)
{
diff --git a/src/crush/crush.h b/src/crush/crush.h
index 9fd37e9e516..82d032879d9 100644
--- a/src/crush/crush.h
+++ b/src/crush/crush.h
@@ -185,6 +185,7 @@ extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
+extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
static inline int crush_calc_tree_node(int i)
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 507eed74c42..209bb982d86 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1880,6 +1880,38 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
}
}
}
+ else if (m->cmd[1] == "find") {
+ if (m->cmd.size() < 3) {
+ ss << "usage: osd find <osd-id>";
+ r = -EINVAL;
+ goto out;
+ }
+ long osd = parse_osd_id(m->cmd[2].c_str(), &ss);
+ if (osd < 0) {
+ r = -EINVAL;
+ goto out;
+ }
+ if (!osdmap.exists(osd)) {
+ ss << "osd." << osd << " does not exist";
+ r = -ENOENT;
+ goto out;
+ }
+ JSONFormatter jf(true);
+ jf.open_object_section("osd_location");
+ jf.dump_int("osd", osd);
+ jf.dump_stream("ip") << osdmap.get_addr(osd);
+ jf.open_object_section("crush_location");
+ map<string,string> loc = osdmap.crush->get_full_location(osd);
+ for (map<string,string>::iterator p = loc.begin(); p != loc.end(); ++p)
+ jf.dump_string(p->first.c_str(), p->second);
+ jf.close_section();
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
else if (m->cmd[1] == "map" && m->cmd.size() == 4) {
int64_t pool = osdmap.lookup_pg_pool_name(m->cmd[2].c_str());
if (pool < 0) {
@@ -1964,6 +1996,40 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
ss << "listed " << osdmap.blacklist.size() << " entries";
r = 0;
}
+ else if (m->cmd.size() >= 4 && m->cmd[1] == "crush" && m->cmd[2] == "rule" && (m->cmd[3] == "list" ||
+ m->cmd[3] == "ls")) {
+ JSONFormatter jf(true);
+ jf.open_array_section("rules");
+ osdmap.crush->list_rules(&jf);
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
+ else if (m->cmd.size() >= 4 && m->cmd[1] == "crush" && m->cmd[2] == "rule" && m->cmd[3] == "dump") {
+ JSONFormatter jf(true);
+ jf.open_array_section("rules");
+ osdmap.crush->dump_rules(&jf);
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
+ else if (m->cmd.size() == 3 && m->cmd[1] == "crush" && m->cmd[2] == "dump") {
+ JSONFormatter jf(true);
+ jf.open_object_section("crush_map");
+ osdmap.crush->dump(&jf);
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
}
out:
if (r != -1) {
@@ -2380,6 +2446,94 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
return true;
}
}
+ else if (m->cmd.size() == 7 &&
+ m->cmd[1] == "crush" &&
+ m->cmd[2] == "rule" &&
+ m->cmd[3] == "create-simple") {
+ string name = m->cmd[4];
+ string root = m->cmd[5];
+ string type = m->cmd[6];
+
+ if (osdmap.crush->rule_exists(name)) {
+ ss << "rule " << name << " already exists";
+ err = 0;
+ goto out;
+ }
+
+ bufferlist bl;
+ if (pending_inc.crush.length())
+ bl = pending_inc.crush;
+ else
+ osdmap.crush->encode(bl);
+ CrushWrapper newcrush;
+ bufferlist::iterator p = bl.begin();
+ newcrush.decode(p);
+
+ if (newcrush.rule_exists(name)) {
+ ss << "rule " << name << " already exists";
+ } else {
+ int rule = newcrush.add_simple_rule(name, root, type);
+ if (rule < 0) {
+ err = rule;
+ goto out;
+ }
+
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush);
+ }
+ getline(ss, rs);
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
+ return true;
+ }
+ else if (m->cmd.size() == 5 &&
+ m->cmd[1] == "crush" &&
+ m->cmd[2] == "rule" &&
+ m->cmd[3] == "rm") {
+ string name = m->cmd[4];
+
+ if (!osdmap.crush->rule_exists(name)) {
+ ss << "rule " << name << " does not exist";
+ err = 0;
+ goto out;
+ }
+
+ bufferlist bl;
+ if (pending_inc.crush.length())
+ bl = pending_inc.crush;
+ else
+ osdmap.crush->encode(bl);
+ CrushWrapper newcrush;
+ bufferlist::iterator p = bl.begin();
+ newcrush.decode(p);
+
+ if (!newcrush.rule_exists(name)) {
+ ss << "rule " << name << " does not exist";
+ } else {
+ int ruleno = newcrush.get_rule_id(name);
+ assert(ruleno >= 0);
+
+ // make sure it is not in use.
+ // FIXME: this is ok in some situations, but let's not bother with that
+ // complexity now.
+ int ruleset = newcrush.get_rule_mask_ruleset(ruleno);
+ if (osdmap.crush_ruleset_in_use(ruleset)) {
+ ss << "crush rule " << name << " ruleset " << ruleset << " is in use";
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = newcrush.remove_rule(ruleno);
+ if (err < 0) {
+ goto out;
+ }
+
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush);
+ }
+ getline(ss, rs);
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
+ return true;
+ }
else if (m->cmd[1] == "setmaxosd" && m->cmd.size() > 2) {
int newmax = parse_pos_long(m->cmd[2].c_str(), &ss);
if (newmax < 0) {
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index c7d044ac6fd..6b692d407a8 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -1654,6 +1654,15 @@ void OSDMap::print_summary(ostream& out) const
out << " nearfull";
}
+bool OSDMap::crush_ruleset_in_use(int ruleset) const
+{
+ for (map<int64_t,pg_pool_t>::const_iterator p = pools.begin(); p != pools.end(); ++p) {
+ if (p->second.crush_ruleset == ruleset)
+ return true;
+ }
+ return false;
+}
+
void OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
int nosd, int pg_bits, int pgp_bits)
{
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index d161fa7436b..70ec263e4d8 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -553,6 +553,7 @@ public:
static void build_simple_crush_map_from_conf(CephContext *cct, CrushWrapper& crush,
map<int, const char*>& rulesets);
+ bool crush_ruleset_in_use(int ruleset) const;
private:
void print_osd_line(int cur, ostream *out, Formatter *f) const;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 6526633b7d7..fc3f8c24b1f 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -2649,7 +2649,8 @@ void PG::read_state(ObjectStore *store, bufferlist &bl)
try {
ostringstream oss;
read_log(store, coll, log_oid, info, ondisklog, log, missing, oss, this);
- osd->clog.error() << oss;
+ if (oss.str().length())
+ osd->clog.error() << oss;
}
catch (const buffer::error &e) {
string cr_log_coll_name(get_corrupt_pg_log_name());
diff --git a/src/test/common/Throttle.cc b/src/test/common/Throttle.cc
new file mode 100644
index 00000000000..60d7daebdac
--- /dev/null
+++ b/src/test/common/Throttle.cc
@@ -0,0 +1,256 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include "common/Mutex.h"
+#include "common/Thread.h"
+#include "common/Throttle.h"
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include <gtest/gtest.h>
+
+class ThrottleTest : public ::testing::Test {
+protected:
+
+ class Thread_get : public Thread {
+ public:
+ Throttle &throttle;
+ int64_t count;
+ bool waited;
+
+ Thread_get(Throttle& _throttle, int64_t _count) :
+ throttle(_throttle),
+ count(_count),
+ waited(false)
+ {
+ }
+
+ virtual void *entry() {
+ waited = throttle.get(count);
+ throttle.put(count);
+ return NULL;
+ }
+ };
+
+};
+
+TEST_F(ThrottleTest, Throttle) {
+ ASSERT_THROW({
+ Throttle throttle(g_ceph_context, "throttle", -1);
+ }, FailedAssertion);
+
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+ ASSERT_EQ(throttle.get_max(), throttle_max);
+ ASSERT_EQ(throttle.get_current(), 0);
+}
+
+TEST_F(ThrottleTest, take) {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+ ASSERT_THROW(throttle.take(-1), FailedAssertion);
+ ASSERT_EQ(throttle.take(throttle_max), throttle_max);
+ ASSERT_EQ(throttle.take(throttle_max), throttle_max * 2);
+}
+
+TEST_F(ThrottleTest, get) {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+ ASSERT_THROW(throttle.get(-1), FailedAssertion);
+ ASSERT_FALSE(throttle.get(5));
+ ASSERT_EQ(throttle.put(5), 0);
+
+ ASSERT_FALSE(throttle.get(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_FALSE(throttle.get(1, throttle_max + 1));
+ ASSERT_EQ(throttle.put(throttle_max + 1), 0);
+ ASSERT_FALSE(throttle.get(0, throttle_max));
+ ASSERT_FALSE(throttle.get(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+
+ useconds_t delay = 1;
+
+ bool waited;
+
+ do {
+ cout << "Trying (1) with delay " << delay << "us\n";
+
+ ASSERT_FALSE(throttle.get(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max));
+
+ Thread_get t(throttle, 7);
+ t.create();
+ usleep(delay);
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+ t.join();
+
+ if (!(waited = t.waited))
+ delay *= 2;
+ } while(!waited);
+
+ do {
+ cout << "Trying (2) with delay " << delay << "us\n";
+
+ ASSERT_FALSE(throttle.get(throttle_max / 2));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max));
+
+ Thread_get t(throttle, throttle_max);
+ t.create();
+ usleep(delay);
+
+ Thread_get u(throttle, 1);
+ u.create();
+ usleep(delay);
+
+ throttle.put(throttle_max / 2);
+
+ t.join();
+ u.join();
+
+ if (!(waited = t.waited && u.waited))
+ delay *= 2;
+ } while(!waited);
+
+}
+
+TEST_F(ThrottleTest, get_or_fail) {
+ {
+ Throttle throttle(g_ceph_context, "throttle");
+
+ ASSERT_TRUE(throttle.get_or_fail(5));
+ ASSERT_TRUE(throttle.get_or_fail(5));
+ }
+
+ {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+
+ ASSERT_TRUE(throttle.get_or_fail(throttle_max));
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+
+ ASSERT_TRUE(throttle.get_or_fail(throttle_max * 2));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max * 2));
+ ASSERT_EQ(throttle.put(throttle_max * 2), 0);
+
+ ASSERT_TRUE(throttle.get_or_fail(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+ }
+}
+
+TEST_F(ThrottleTest, wait) {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+
+ useconds_t delay = 1;
+
+ bool waited;
+
+ do {
+ cout << "Trying (3) with delay " << delay << "us\n";
+
+ ASSERT_FALSE(throttle.get(throttle_max / 2));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max));
+
+ Thread_get t(throttle, throttle_max);
+ t.create();
+ usleep(delay);
+
+ //
+ // Throttle::_reset_max(int64_t m) used to contain a test
+ // that blocked the following statement, only if
+ // the argument was greater than throttle_max.
+ // Although a value lower than throttle_max would cover
+ // the same code in _reset_max, the throttle_max * 100
+ // value is left here to demonstrate that the problem
+ // has been solved.
+ //
+ throttle.wait(throttle_max * 100);
+ usleep(delay);
+ ASSERT_EQ(throttle.get_current(), throttle_max / 2);
+
+
+ t.join();
+
+ if (!(waited = t.waited)) {
+ delay *= 2;
+ // undo the changes we made
+ throttle.put(throttle_max / 2);
+ throttle.wait(throttle_max);
+ }
+ } while(!waited);
+}
+
+TEST_F(ThrottleTest, destructor) {
+ Thread_get *t;
+ {
+ int64_t throttle_max = 10;
+ Throttle *throttle = new Throttle(g_ceph_context, "throttle", throttle_max);
+
+ ASSERT_FALSE(throttle->get(5));
+
+ t = new Thread_get(*throttle, 7);
+ t->create();
+ bool blocked;
+ useconds_t delay = 1;
+ do {
+ usleep(delay);
+ if (throttle->get_or_fail(1)) {
+ throttle->put(1);
+ blocked = false;
+ } else {
+ blocked = true;
+ }
+ delay *= 2;
+ } while(!blocked);
+ delete throttle;
+ }
+
+ { //
+ // The thread is left hanging, otherwise it will abort().
+ // Deleting the Throttle on which it is waiting creates a
+ // inconsistency that will be detected: the Throttle object that
+ // it references no longer exists.
+ //
+ pthread_t id = t->get_thread_id();
+ ASSERT_EQ(pthread_kill(id, 0), 0);
+ delete t;
+ ASSERT_EQ(pthread_kill(id, 0), 0);
+ }
+}
+
+int main(int argc, char **argv) {
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+
+ global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+ common_init_finish(g_ceph_context);
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+// Local Variables:
+// compile-command: "cd ../.. ; make unittest_throttle ; ./unittest_throttle # --gtest_filter=ThrottleTest.destructor --log-to-stderr=true --debug-filestore=20"
+// End: