summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-06-13 15:17:05 -0700
committerSage Weil <sage@inktank.com>2013-06-13 15:17:05 -0700
commit392e86fbfffb95a0cf492f28756d71f29ed0d851 (patch)
tree1145786b2a25d3626c75a5c53fdf3bbc6e5705cc
parent51dae8ad7ce239f75bc274e3585281c5066adee7 (diff)
parent9a7ed0b3f8df5bd74133f216bad61ae71eab0816 (diff)
downloadceph-392e86fbfffb95a0cf492f28756d71f29ed0d851.tar.gz
Merge remote-tracking branch 'gh/next'
-rw-r--r--README5
-rwxr-xr-xqa/workunits/mon/crush_ops.sh5
-rwxr-xr-xsrc/ceph-disk2
-rwxr-xr-xsrc/ceph.in158
-rw-r--r--src/crush/CrushWrapper.cc8
-rw-r--r--src/crush/CrushWrapper.h2
-rw-r--r--src/crushtool.cc8
-rw-r--r--src/include/rados/librados.h30
-rw-r--r--src/librados/RadosClient.cc43
-rw-r--r--src/librados/RadosClient.h6
-rw-r--r--src/librados/librados.cc39
-rw-r--r--src/mon/MonCap.cc4
-rw-r--r--src/mon/MonClient.cc90
-rw-r--r--src/mon/MonClient.h19
-rw-r--r--src/mon/MonCommands.h25
-rw-r--r--src/mon/MonmapMonitor.cc48
-rw-r--r--src/mon/OSDMonitor.cc60
-rw-r--r--src/pybind/rados.py21
-rwxr-xr-xsrc/vstart.sh2
19 files changed, 387 insertions, 188 deletions
diff --git a/README b/README
index d8f03f52eb4..2ac68504d87 100644
--- a/README
+++ b/README
@@ -102,8 +102,10 @@ To build the source code, you must install the following:
- automake
- autoconf
+- pkg-config
- gcc
- g++
+- make
- libboost-dev
- libedit-dev
- libssl-dev
@@ -123,7 +125,8 @@ To build the source code, you must install the following:
- libexpat1-dev
- libleveldb-dev
- libsnappy-dev
+- libcurl4-gnutls-dev
For example:
- $ apt-get install automake autoconf gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev libexpat1-dev libleveldb-dev libsnappy-dev
+ $ apt-get install automake autoconf pkg-config gcc g++ make libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev libexpat1-dev libleveldb-dev libsnappy-dev libcurl4-gnutls-dev
diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh
index 748d00bc363..4f66e552153 100755
--- a/qa/workunits/mon/crush_ops.sh
+++ b/qa/workunits/mon/crush_ops.sh
@@ -27,10 +27,15 @@ ceph osd crush add-bucket foo root
o1=`ceph osd create`
o2=`ceph osd create`
ceph osd crush add $o1 1 host=host1 root=foo
+ceph osd crush add $o1 1 host=host1 root=foo # idemptoent
ceph osd crush add $o2 1 host=host2 root=foo
+ceph osd crush add $o2 1 host=host2 root=foo # idempotent
ceph osd crush add-bucket bar root
+ceph osd crush add-bucket bar root # idempotent
ceph osd crush link host1 root=bar
+ceph osd crush link host1 root=bar # idempotent
ceph osd crush link host2 root=bar
+ceph osd crush link host2 root=bar # idempotent
ceph osd tree | grep -c osd.$o1 | grep -q 2
ceph osd tree | grep -c host1 | grep -q 2
diff --git a/src/ceph-disk b/src/ceph-disk
index 6c1b3703847..0389b5ce55b 100755
--- a/src/ceph-disk
+++ b/src/ceph-disk
@@ -1085,7 +1085,7 @@ def main_prepare(args):
args.cluster_uuid = get_fsid(cluster=args.cluster)
if args.cluster_uuid is None:
raise Error(
- 'must have fsid in config or pass --cluster--uuid=',
+ 'must have fsid in config or pass --cluster-uuid=',
)
if args.fs_type is None:
diff --git a/src/ceph.in b/src/ceph.in
index 857197185f4..7d6c62a7303 100755
--- a/src/ceph.in
+++ b/src/ceph.in
@@ -345,8 +345,9 @@ class CephPgid(CephArgtype):
class CephName(CephArgtype):
"""
- Name, or type.id, where type is osd|mon|client|mds, and id is a base10 int,
- or just id.
+ Name (type.id) where:
+ type is osd|mon|client|mds
+ id is a base10 int, if type == osd, or a string otherwise
Also accept '*'
"""
@@ -357,16 +358,17 @@ class CephName(CephArgtype):
self.nameid = None
return True
if s.find('.') == -1:
- i = s
+ raise ArgumentFormat('CephName: no . in {0}'.format(s))
else:
t, i = s.split('.')
if not t in ('osd', 'mon', 'client', 'mds'):
raise ArgumentValid('unknown type ' + self.t)
if t == 'osd':
- try:
- i = int(i)
- except:
- raise ArgumentFormat('osd id ' + i + ' not integer')
+ if i != '*':
+ try:
+ i = int(i)
+ except:
+ raise ArgumentFormat('osd id ' + i + ' not integer')
self.nametype = t
self.val = s
self.nameid = i
@@ -375,6 +377,36 @@ class CephName(CephArgtype):
def __str__(self):
return '<name (type.id)>'
+class CephOsdName(CephArgtype):
+ """
+ Like CephName, but specific to osds: allow <id> alone
+
+ osd.<id>, or <id>, or *, where id is a base10 int
+ """
+ def valid(self, s, partial=False):
+ if s == '*':
+ self.val = s
+ self.nametype = None
+ self.nameid = None
+ return True
+ if s.find('.') != -1:
+ t, i = s.split('.')
+ else:
+ t = 'osd'
+ i = s
+ if t != 'osd':
+ raise ArgumentValid('unknown type ' + self.t)
+ try:
+ i = int(i)
+ except:
+ raise ArgumentFormat('osd id ' + i + ' not integer')
+ self.nametype = t
+ self.nameid = i
+ self.val = i
+ return True
+
+ def __str__(self):
+ return '<osdname (id|osd.id)>'
class CephChoices(CephArgtype):
"""
@@ -778,7 +810,7 @@ def osdids():
ret, outbuf, outs = json_command(prefix='osd ls')
if ret:
raise RuntimeError('Can\'t contact mon for osd list')
- return [i for i in outbuf.split('\n')]
+ return [i for i in outbuf.split('\n') if i != '']
def monids():
ret, outbuf, outs = json_command(prefix='mon dump',
@@ -1107,8 +1139,10 @@ def send_command(target=('mon', ''), cmd=[], inbuf='', timeout=0):
if verbose:
print >> sys.stderr, '{0} to {1}'.\
format(cmd, target[0])
- ret, outbuf, outs = cluster_handle.mon_command(cmd, inbuf,
- timeout)
+ if target[1] == '':
+ ret, outbuf, outs = cluster_handle.mon_command(cmd, inbuf, timeout)
+ else:
+ ret, outbuf, outs = cluster_handle.mon_command(cmd, inbuf, timeout, target[1])
except Exception as e:
raise RuntimeError('"{0}": exception {1}'.format(cmd, e))
@@ -1266,7 +1300,7 @@ def find_cmd_target(childargs):
if len(valid_dict) == 2:
name = CephName()
name.valid(valid_dict['target'])
- return 'osd', name.nameid
+ return name.nametype, name.nameid
sig = parse_funcsig(['pg', {'name':'pgid','type':'CephPgid'}])
valid_dict = validate(childargs, sig, partial=True);
@@ -1508,56 +1542,76 @@ def main():
# of the form 'cmdNNN' followed by an array of argument descriptors)
# as part of the validated argument JSON object
- ret, outbuf, outs = json_command(target=target,
- prefix='get_command_descriptions')
- if ret == -errno.EINVAL:
- # send command to old monitor or OSD
- if verbose:
- print '{0} to old {1}'.format(' '.join(childargs), target[0])
- ret, outbuf, outs = send_command(target, childargs, inbuf)
- # combine nonerror outbuf and outs; either may have cmd output
- if ret == 0:
- outbuf += outs
- # clear outs so generic code below doesn't print it to stderr
- outs = ''
- elif ret:
+ targets = [target]
+
+ if target[1] == '*':
+ targets = [(target[0], o) for o in osdids()]
+
+ final_ret = 0
+ for target in targets:
+ ret, outbuf, outs = json_command(target=target,
+ prefix='get_command_descriptions')
+ if ret == -errno.EINVAL:
+ # send command to old monitor or OSD
+ if verbose:
+ print '{0} to old {1}'.format(' '.join(childargs), target[0])
+ ret, outbuf, outs = send_command(target, childargs, inbuf)
+ # combine nonerror outbuf and outs; either may have cmd output
+ if ret == 0:
+ outbuf += outs
+ # clear outs so generic code below doesn't print it to stderr
+ outs = ''
+ elif ret:
+ if ret < 0:
+ ret = -ret
+ print >> sys.stderr, \
+ 'Problem getting command descriptions from {0}, {1}'.\
+ format(target, errno.errorcode[ret])
+ else:
+ sigdict = parse_json_funcsigs(outbuf)
+
+ if parsed_args.completion:
+ return complete(sigdict, childargs, target)
+
+ ret, outbuf, outs = new_style_command(parsed_args, childargs, target,
+ sigdict, inbuf, verbose)
+
if ret < 0:
ret = -ret
- print >> sys.stderr, \
- 'Problem getting command descriptions from {0}, {1}'.\
- format(target, errno.errorcode[ret])
- return ret
- else:
- sigdict = parse_json_funcsigs(outbuf)
+ if len(targets) > 1:
+ sys.stderr.write('{0}.{1}: '.format(*target))
+ print >> sys.stderr, 'Error {0}: {1}'.format(errno.errorcode[ret], outs)
+ if len(targets) > 1:
+ final_ret = ret
+ else:
+ return ret
- if parsed_args.completion:
- return complete(sigdict, childargs, target)
+ # this assumes outs never has useful command output, only status
+ if outs:
+ print >> sys.stderr, outs
- ret, outbuf, outs = new_style_command(parsed_args, childargs, target,
- sigdict, inbuf, verbose)
+ if (parsed_args.output_file):
+ outf.write(outbuf)
+ else:
+ # hack: old code printed status line before many json outputs
+ # (osd dump, etc.) that consumers know to ignore. Add blank line
+ # to satisfy consumers that skip the first line, but not annoy
+ # consumers that don't.
+ if parsed_args.output_format and \
+ parsed_args.output_format.startswith('json'):
+ sys.stdout.write('\n');
- if ret < 0:
- ret = -ret
- print >> sys.stderr, 'Error {0}: {1}'.format(errno.errorcode[ret], outs)
- return ret
+ # prefix output with target, if there was a wildcard used
+ if len(targets) > 1:
+ sys.stdout.write('{0}.{1}: '.format(*target))
- # this assumes outs never has useful command output, only status
- if outs:
- print >> sys.stderr, outs
+ sys.stdout.write(outbuf)
if (parsed_args.output_file):
- outf.write(outbuf)
outf.close()
- else:
- # hack: old code printed status line before many json outputs
- # (osd dump, etc.) that consumers know to ignore. Add blank line
- # to satisfy consumers that skip the first line, but not annoy
- # consumers that don't.
- if parsed_args.output_format and \
- parsed_args.output_format.startswith('json'):
- sys.stdout.write('\n');
-
- sys.stdout.write(outbuf)
+
+ if final_ret:
+ return ret
return 0
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index f0ae6dd9488..8a9addfb6c2 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -255,14 +255,14 @@ int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string>
string high_type_name = type_map[high_type];
path.push_back(parent_coord);
- parent_id = get_item_id( (parent_coord.second).c_str() );
+ parent_id = get_item_id(parent_coord.second);
while (parent_coord.first != high_type_name) {
parent_coord = get_immediate_parent(parent_id);
path.push_back(parent_coord);
if ( parent_coord.first != high_type_name ){
- parent_id = get_item_id( (parent_coord.second).c_str() );
+ parent_id = get_item_id(parent_coord.second);
}
}
@@ -291,7 +291,7 @@ map<int, string> CrushWrapper::get_parent_hierarchy(int id)
high_type = (*it).first;
}
- parent_id = get_item_id((parent_coord.second).c_str());
+ parent_id = get_item_id(parent_coord.second);
while (type_counter < high_type) {
type_counter++;
@@ -300,7 +300,7 @@ map<int, string> CrushWrapper::get_parent_hierarchy(int id)
if (type_counter < high_type){
// get the coordinate information for the next parent
parent_coord = get_immediate_parent(parent_id);
- parent_id = get_item_id(parent_coord.second.c_str());
+ parent_id = get_item_id(parent_coord.second);
}
}
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index c123da3d9d5..f5a88c8bdd5 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -633,7 +633,7 @@ private:
pair<string, string> bucket_location = get_immediate_parent(item);
// get the id of the parent bucket
- int parent_id = get_item_id( (bucket_location.second).c_str() );
+ int parent_id = get_item_id(bucket_location.second);
// get the parent bucket
crush_bucket *parent_bucket = get_bucket(parent_id);
diff --git a/src/crushtool.cc b/src/crushtool.cc
index 054c4df8698..75c26c098b6 100644
--- a/src/crushtool.cc
+++ b/src/crushtool.cc
@@ -609,11 +609,11 @@ int main(int argc, const char **argv)
if (!reweight_name.empty()) {
cout << me << " reweighting item " << reweight_name << " to " << reweight_weight << std::endl;
int r;
- if (!crush.name_exists(reweight_name.c_str())) {
+ if (!crush.name_exists(reweight_name)) {
cerr << " name " << reweight_name << " dne" << std::endl;
r = -ENOENT;
} else {
- int item = crush.get_item_id(reweight_name.c_str());
+ int item = crush.get_item_id(reweight_name);
r = crush.adjust_item_weightf(g_ceph_context, item, reweight_weight);
}
if (r >= 0)
@@ -627,11 +627,11 @@ int main(int argc, const char **argv)
if (!remove_name.empty()) {
cout << me << " removing item " << remove_name << std::endl;
int r;
- if (!crush.name_exists(remove_name.c_str())) {
+ if (!crush.name_exists(remove_name)) {
cerr << " name " << remove_name << " dne" << std::endl;
r = -ENOENT;
} else {
- int remove_item = crush.get_item_id(remove_name.c_str());
+ int remove_item = crush.get_item_id(remove_name);
r = crush.remove_item(g_ceph_context, remove_item, false);
}
if (r == 0)
diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h
index 69eeccb323b..eb1a30632b7 100644
--- a/src/include/rados/librados.h
+++ b/src/include/rados/librados.h
@@ -1712,7 +1712,7 @@ int rados_break_lock(rados_ioctx_t io, const char *o, const char *name,
* @note Takes command string in carefully-formatted JSON; must match
* defined commands, types, etc.
*
- * The result buffers are allocated on the heapt; the caller is
+ * The result buffers are allocated on the heap; the caller is
* expected to release that memory with rados_buffer_free(). The
* buffer and length pointers can all be NULL, in which case they are
* not filled in.
@@ -1733,6 +1733,34 @@ int rados_mon_command(rados_t cluster, const char **cmd, size_t cmdlen,
char **outs, size_t *outslen);
/**
+ * Send monitor command to a specific monitor.
+ *
+ * @note Takes command string in carefully-formatted JSON; must match
+ * defined commands, types, etc.
+ *
+ * The result buffers are allocated on the heap; the caller is
+ * expected to release that memory with rados_buffer_free(). The
+ * buffer and length pointers can all be NULL, in which case they are
+ * not filled in.
+ *
+ * @param cluster cluster handle
+ * @param name target monitor's name
+ * @param cmd an array of char *'s representing the command
+ * @param cmdlen count of valid entries in cmd
+ * @param inbuf any bulk input data (crush map, etc.)
+ * @param outbuf double pointer to output buffer
+ * @param outbuflen pointer to output buffer length
+ * @param outs double pointer to status string
+ * @param outslen pointer to status string length
+ * @returns 0 on success, negative error code on failure
+ */
+int rados_mon_command_target(rados_t cluster, const char *name,
+ const char **cmd, size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen);
+
+/**
* free a rados-allocated buffer
*
* Release memory allocated by librados calls like rados_mon_command().
diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc
index a2b54abf058..f5dccaffc09 100644
--- a/src/librados/RadosClient.cc
+++ b/src/librados/RadosClient.cc
@@ -201,11 +201,6 @@ int librados::RadosClient::connect()
objecter->init_locked();
monclient.renew_subs();
- while (osdmap.get_epoch() == 0) {
- ldout(cct, 1) << "waiting for osdmap" << dendl;
- cond.Wait(lock);
- }
-
finisher.start();
state = CONNECTED;
@@ -583,6 +578,44 @@ int librados::RadosClient::mon_command(const vector<string>& cmd,
return rval;
}
+int librados::RadosClient::mon_command(int rank, const vector<string>& cmd,
+ bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ Mutex mylock("RadosClient::mon_command::mylock");
+ Cond cond;
+ bool done;
+ int rval;
+ lock.Lock();
+ monclient.start_mon_command(rank, cmd, inbl, outbl, outs,
+ new C_SafeCond(&mylock, &cond, &done, &rval));
+ lock.Unlock();
+ mylock.Lock();
+ while (!done)
+ cond.Wait(mylock);
+ mylock.Unlock();
+ return rval;
+}
+
+int librados::RadosClient::mon_command(string name, const vector<string>& cmd,
+ bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ Mutex mylock("RadosClient::mon_command::mylock");
+ Cond cond;
+ bool done;
+ int rval;
+ lock.Lock();
+ monclient.start_mon_command(name, cmd, inbl, outbl, outs,
+ new C_SafeCond(&mylock, &cond, &done, &rval));
+ lock.Unlock();
+ mylock.Lock();
+ while (!done)
+ cond.Wait(mylock);
+ mylock.Unlock();
+ return rval;
+}
+
int librados::RadosClient::osd_command(int osd, vector<string>& cmd,
bufferlist& inbl,
bufferlist *poutbl, string *prs)
diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h
index 9374553b0ad..337beff5750 100644
--- a/src/librados/RadosClient.h
+++ b/src/librados/RadosClient.h
@@ -108,6 +108,12 @@ public:
void watch_notify(MWatchNotify *m);
int mon_command(const vector<string>& cmd, bufferlist &inbl,
bufferlist *outbl, string *outs);
+ int mon_command(int rank,
+ const vector<string>& cmd, bufferlist &inbl,
+ bufferlist *outbl, string *outs);
+ int mon_command(string name,
+ const vector<string>& cmd, bufferlist &inbl,
+ bufferlist *outbl, string *outs);
int osd_command(int osd, vector<string>& cmd, bufferlist& inbl,
bufferlist *poutbl, string *prs);
int pg_command(pg_t pgid, vector<string>& cmd, bufferlist& inbl,
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index 1dda70828fd..43c2584c390 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -1857,6 +1857,45 @@ extern "C" int rados_mon_command(rados_t cluster, const char **cmd,
return ret;
}
+extern "C" int rados_mon_command_target(rados_t cluster, const char *name,
+ const char **cmd,
+ size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ vector<string> cmdvec;
+
+ // is this a numeric id?
+ char *endptr;
+ errno = 0;
+ long rank = strtol(name, &endptr, 10);
+ if ((errno == ERANGE && (rank == LONG_MAX || rank == LONG_MIN)) ||
+ (errno != 0 && rank == 0) ||
+ endptr == name || // no digits
+ *endptr != '\0') { // extra characters
+ rank = -1;
+ }
+
+ for (size_t i = 0; i < cmdlen; i++)
+ cmdvec.push_back(cmd[i]);
+
+ inbl.append(inbuf, inbuflen);
+ int ret;
+ if (rank >= 0)
+ ret = client->mon_command(rank, cmdvec, inbl, &outbl, &outstring);
+ else
+ ret = client->mon_command(name, cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ return ret;
+}
+
extern "C" int rados_osd_command(rados_t cluster, int osdid, const char **cmd,
size_t cmdlen,
const char *inbuf, size_t inbuflen,
diff --git a/src/mon/MonCap.cc b/src/mon/MonCap.cc
index bf8eb6fb79d..6f1055091e0 100644
--- a/src/mon/MonCap.cc
+++ b/src/mon/MonCap.cc
@@ -143,6 +143,8 @@ void MonCapGrant::expand_profile(entity_name_t name) const
profile_grants.push_back(MonCapGrant("config-key delete", "key", StringConstraint("", prefix)));
}
if (profile == "bootstrap-osd") {
+ profile_grants.push_back(MonCapGrant("mon", MON_CAP_R)); // read monmap
+ profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); // read osdmap
profile_grants.push_back(MonCapGrant("mon getmap"));
profile_grants.push_back(MonCapGrant("osd create"));
profile_grants.push_back(MonCapGrant("osd crush set")); // FIXME: constraint this further?
@@ -152,6 +154,8 @@ void MonCapGrant::expand_profile(entity_name_t name) const
profile_grants.back().command_args["caps_osd"] = StringConstraint("allow *", "");
}
if (profile == "bootstrap-mds") {
+ profile_grants.push_back(MonCapGrant("mon", MON_CAP_R)); // read monmap
+ profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); // read osdmap
profile_grants.push_back(MonCapGrant("mon getmap"));
profile_grants.push_back(MonCapGrant("auth get-or-create")); // FIXME: this can expose other mds keys
profile_grants.back().command_args["name"] = StringConstraint("", "mds.");
diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc
index 181d0688311..c0d2b5183c5 100644
--- a/src/mon/MonClient.cc
+++ b/src/mon/MonClient.cc
@@ -479,11 +479,18 @@ string MonClient::_pick_random_mon()
}
}
-void MonClient::_pick_new_mon()
+void MonClient::_reopen_session(int rank, string name)
{
assert(monc_lock.is_locked());
+ ldout(cct, 10) << "_reopen_session rank " << rank << " name " << name << dendl;
- cur_mon = _pick_random_mon();
+ if (rank < 0 && name.length() == 0) {
+ cur_mon = _pick_random_mon();
+ } else if (name.length()) {
+ cur_mon = name;
+ } else {
+ cur_mon = monmap.get_name(rank);
+ }
if (cur_con) {
messenger->mark_down(cur_con);
@@ -491,18 +498,9 @@ void MonClient::_pick_new_mon()
}
cur_con = messenger->get_connection(monmap.get_inst(cur_mon));
- ldout(cct, 10) << "_pick_new_mon picked mon." << cur_mon << " con " << cur_con
+ ldout(cct, 10) << "picked mon." << cur_mon << " con " << cur_con
<< " addr " << cur_con->get_peer_addr()
<< dendl;
-}
-
-
-void MonClient::_reopen_session()
-{
- assert(monc_lock.is_locked());
- ldout(cct, 10) << "_reopen_session" << dendl;
-
- _pick_new_mon();
// throw out old queued messages
while (!waiting_for_session.empty()) {
@@ -724,6 +722,37 @@ int MonClient::wait_auth_rotating(double timeout)
void MonClient::_send_command(MonCommand *r)
{
version_t last_seen_version = 0;
+
+ if (r->target_rank >= 0 &&
+ r->target_rank != monmap.get_rank(cur_mon)) {
+ ldout(cct, 10) << "_send_command " << r->tid << " " << r->cmd
+ << " wants rank " << r->target_rank
+ << ", reopening session"
+ << dendl;
+ if (r->target_rank >= (int)monmap.size()) {
+ ldout(cct, 10) << " target " << r->target_rank << " >= max mon " << monmap.size() << dendl;
+ _finish_command(r, -ENOENT, "mon rank dne");
+ return;
+ }
+ _reopen_session(r->target_rank, string());
+ return;
+ }
+
+ if (r->target_name.length() &&
+ r->target_name != cur_mon) {
+ ldout(cct, 10) << "_send_command " << r->tid << " " << r->cmd
+ << " wants mon " << r->target_name
+ << ", reopening session"
+ << dendl;
+ if (!monmap.contains(r->target_name)) {
+ ldout(cct, 10) << " target " << r->target_name << " not present in monmap" << dendl;
+ _finish_command(r, -ENOENT, "mon dne");
+ return;
+ }
+ _reopen_session(-1, r->target_name);
+ return;
+ }
+
ldout(cct, 10) << "_send_command " << r->tid << " " << r->cmd << dendl;
MMonCommand *m = new MMonCommand(monmap.fsid, last_seen_version);
m->set_tid(r->tid);
@@ -798,6 +827,43 @@ int MonClient::start_mon_command(const vector<string>& cmd, bufferlist& inbl,
return 0;
}
+int MonClient::start_mon_command(string name,
+ const vector<string>& cmd, bufferlist& inbl,
+ bufferlist *outbl, string *outs,
+ Context *onfinish)
+{
+ Mutex::Locker l(monc_lock);
+ MonCommand *r = new MonCommand(++last_mon_command_tid);
+ r->target_name = name;
+ r->cmd = cmd;
+ r->inbl = inbl;
+ r->poutbl = outbl;
+ r->prs = outs;
+ r->onfinish = onfinish;
+ mon_commands[r->tid] = r;
+ _send_command(r);
+ // can't fail
+ return 0;
+}
+
+int MonClient::start_mon_command(int rank,
+ const vector<string>& cmd, bufferlist& inbl,
+ bufferlist *outbl, string *outs,
+ Context *onfinish)
+{
+ Mutex::Locker l(monc_lock);
+ MonCommand *r = new MonCommand(++last_mon_command_tid);
+ r->target_rank = rank;
+ r->cmd = cmd;
+ r->inbl = inbl;
+ r->poutbl = outbl;
+ r->prs = outs;
+ r->onfinish = onfinish;
+ mon_commands[r->tid] = r;
+ _send_command(r);
+ return 0;
+}
+
// ---------
void MonClient::get_version(string map, version_t *newest, version_t *oldest, Context *onfinish)
diff --git a/src/mon/MonClient.h b/src/mon/MonClient.h
index 04e6b0e44b5..7d4a0548f9d 100644
--- a/src/mon/MonClient.h
+++ b/src/mon/MonClient.h
@@ -129,8 +129,10 @@ private:
string _pick_random_mon();
void _finish_hunting();
- void _reopen_session();
- void _pick_new_mon();
+ void _reopen_session(int rank, string name);
+ void _reopen_session() {
+ _reopen_session(-1, string());
+ }
void _send_mon_message(Message *m, bool force=false);
public:
@@ -270,6 +272,8 @@ public:
private:
uint64_t last_mon_command_tid;
struct MonCommand {
+ string target_name;
+ int target_rank;
uint64_t tid;
vector<string> cmd;
bufferlist inbl;
@@ -279,7 +283,8 @@ private:
Context *onfinish;
MonCommand(uint64_t t)
- : tid(t),
+ : target_rank(-1),
+ tid(t),
poutbl(NULL), prs(NULL), prval(NULL), onfinish(NULL)
{}
};
@@ -294,6 +299,14 @@ public:
int start_mon_command(const vector<string>& cmd, bufferlist& inbl,
bufferlist *outbl, string *outs,
Context *onfinish);
+ int start_mon_command(int mon_rank,
+ const vector<string>& cmd, bufferlist& inbl,
+ bufferlist *outbl, string *outs,
+ Context *onfinish);
+ int start_mon_command(const string mon_name, ///< mon name, with mon. prefix
+ const vector<string>& cmd, bufferlist& inbl,
+ bufferlist *outbl, string *outs,
+ Context *onfinish);
// version requests
public:
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 4e64a0a70d4..2c805362255 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -55,6 +55,7 @@
* CephObjectname: Another plainold string
* CephPgid: n.xxx where n is an int > 0, xxx is a hex number > 0
* CephName: daemon name, '*' or '<type>.<id>' (id must be int for type osd)
+ * CephOsdName: osd name, '*' or '<id> or 'osd.<id>' (id must be int)
* CephChoices: strings="foo|bar" means this param can be either
* CephFilepath: openable file
* CephFragment: cephfs 'fragID': val/bits, val in hex 0xnnn, bits in dec
@@ -259,10 +260,6 @@ COMMAND("mon stat", "summarize monitor status")
COMMAND("mon getmap " \
"name=epoch,type=CephInt,range=0,req=false", \
"get monmap")
-COMMAND("mon tell " \
- "name=who,type=CephString " \
- "name=args,type=CephString,n=N", \
- "send command to specific monitor(s)")
COMMAND("mon add " \
"name=name,type=CephString " \
"name=addr,type=CephIPAddr", \
@@ -289,10 +286,6 @@ COMMAND("osd getcrushmap " \
"name=epoch,type=CephInt,range=0,req=false", \
"get CRUSH map")
COMMAND("osd getmaxosd", "show largest OSD id")
-COMMAND("osd tell " \
- "name=who,type=CephString " \
- "name=args,type=CephString,n=N", \
- "send command to particular osd")
COMMAND("osd find " \
"name=id,type=CephInt,range=0", \
"find osd <id> in the CRUSH map and show its location")
@@ -324,24 +317,22 @@ COMMAND("osd crush add-bucket " \
"name=type,type=CephString", \
"add no-parent (probably root) crush bucket <name> of type <type>")
COMMAND("osd crush set " \
- "name=id,type=CephInt,range=0 " \
- "name=name,type=CephName,req=false " \
+ "name=id,type=CephOsdName " \
"name=weight,type=CephFloat,range=0.0 " \
"name=args,type=CephString,n=N", \
- "set crushmap entry for <id> to <weight> with location <args>")
+ "set crushmap entry for <name> to <weight> with location <args>")
COMMAND("osd crush add " \
- "name=id,type=CephInt,range=0 " \
- "name=name,type=CephName,req=false " \
+ "name=id,type=CephOsdName " \
"name=weight,type=CephFloat,range=0.0 " \
"name=args,type=CephString,n=N", \
- "add crushmap entry for <id> with <weight> and location <args>")
+ "add crushmap entry for <name> with <weight> and location <args>")
COMMAND("osd crush create-or-move " \
- "name=id,type=CephInt,range=0 " \
+ "name=id,type=CephOsdName " \
"name=weight,type=CephFloat,range=0.0 " \
"name=args,type=CephString,n=N", \
- "create entry or move existing entry for <id> <weight> at/to location <args>")
+ "create entry or move existing entry for <name> <weight> at/to location <args>")
COMMAND("osd crush move " \
- "name=name,type=CephString " \
+ "name=id,type=CephOsdName " \
"name=args,type=CephString,n=N", \
"move existing entry for <name> to location <args>")
COMMAND("osd crush link " \
diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc
index 90839b7706f..cbcefae104f 100644
--- a/src/mon/MonmapMonitor.cc
+++ b/src/mon/MonmapMonitor.cc
@@ -264,60 +264,12 @@ bool MonmapMonitor::preprocess_command(MMonCommand *m)
if (p != mon->monmap)
delete p;
}
- } else if (prefix == "mon tell") {
- dout(20) << "got tell: " << m->cmd << dendl;
- string whostr;
- cmd_getval(g_ceph_context, cmdmap, "who", whostr);
- vector<string> argvec;
- cmd_getval(g_ceph_context, cmdmap, "args", argvec);
-
- if (whostr == "*") { // send to all mons and do myself
- for (unsigned i = 0; i < mon->monmap->size(); ++i) {
- MMonCommand *newm = new MMonCommand(m->fsid, m->version);
- newm->cmd.insert(newm->cmd.begin(), argvec.begin(), argvec.end());
- mon->messenger->send_message(newm, mon->monmap->get_inst(i));
- }
- ss << "bcast to all mons";
- r = 0;
- } else {
- // find target. Ignore error from parsing long as we probably
- // have a string instead
- long who = parse_pos_long(whostr.c_str(), NULL);
- EntityName name;
- if (who < 0) {
-
- // not numeric; try as name or id, and see if in monmap
- if (!name.from_str(whostr))
- name.set("mon", whostr);
-
- if (mon->monmap->contains(name.get_id())) {
- who = mon->monmap->get_rank(name.get_id());
- } else {
- ss << "bad mon name \"" << whostr << "\"";
- r = -ENOENT;
- goto out;
- }
- } else if (who >= (long)mon->monmap->size()) {
- ss << "mon." << whostr << " does not exist";
- r = -ENOENT;
- goto out;
- }
-
- // send to target, or handle if it's me
- stringstream ss;
- MMonCommand *newm = new MMonCommand(m->fsid, m->version);
- newm->cmd.insert(newm->cmd.begin(), argvec.begin(), argvec.end());
- mon->messenger->send_message(newm, mon->monmap->get_inst(who));
- ss << "fw to mon." << whostr;
- r = 0;
- }
}
else if (prefix == "mon add")
return false;
else if (prefix == "mon remove")
return false;
- out:
if (r != -1) {
string rs;
getline(ss, rs);
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index d6e2e1cb560..d785cf626c3 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1697,7 +1697,7 @@ void OSDMonitor::tick()
// is this an entire large subtree down?
if (g_conf->mon_osd_down_out_subtree_limit.length()) {
- int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit.c_str());
+ int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit);
if (type > 0) {
if (osdmap.containing_subtree_is_down(g_ceph_context, o, type, &down_cache)) {
dout(10) << "tick entire containing " << g_conf->mon_osd_down_out_subtree_limit
@@ -2482,7 +2482,13 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
int64_t id;
+ string name;
bool osdid_present = cmd_getval(g_ceph_context, cmdmap, "id", id);
+ if (osdid_present) {
+ ostringstream oss;
+ oss << "osd." << id;
+ name = oss.str();
+ }
if (prefix == "osd setcrushmap" ||
(prefix == "osd crush set" && !osdid_present)) {
@@ -2544,23 +2550,19 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
ss << "added bucket " << name << " type " << typestr
<< " to crush map";
goto update;
- } else if (osdid_present &&
+ } else if (osdid_present &&
(prefix == "osd crush set" || prefix == "osd crush add")) {
do {
- // osd crush set <osd-id> [<osd.* name>] <weight> <loc1> [<loc2> ...]
- // osd crush add <osd-id> [<osd.* name>] <weight> <loc1> [<loc2> ...]
+ // <OsdName> is 'osd.<id>' or '<id>', passed as int64_t id
+ // osd crush set <OsdName> <weight> <loc1> [<loc2> ...]
+ // osd crush add <OsdName> <weight> <loc1> [<loc2> ...]
+
if (!osdmap.exists(id)) {
err = -ENOENT;
- ss << "osd." << id << " does not exist. create it before updating the crush map";
+ ss << name << " does not exist. create it before updating the crush map";
goto reply;
}
- string name;
- if (!cmd_getval(g_ceph_context, cmdmap, "name", name)) {
- // new usage; infer name
- name = "osd." + stringify(id);
- }
-
double weight;
cmd_getval(g_ceph_context, cmdmap, "weight", weight);
@@ -2577,7 +2579,8 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
_get_pending_crush(newcrush);
string action;
- if (prefix == "osd crush set") {
+ if (prefix == "osd crush set" ||
+ newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
action = "set";
err = newcrush.update_item(g_ceph_context, id, weight, name, loc);
} else {
@@ -2604,16 +2607,13 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
} else if (prefix == "osd crush create-or-move") {
do {
- // osd crush create-or-move <id> <initial_weight> <loc1> [<loc2> ...]
- int64_t id;
- cmd_getval(g_ceph_context, cmdmap, "id", id);
+ // osd crush create-or-move <OsdName> <initial_weight> <loc1> [<loc2> ...]
if (!osdmap.exists(id)) {
err = -ENOENT;
- ss << "osd." << id << " does not exist. create it before updating the crush map";
+ ss << name << " does not exist. create it before updating the crush map";
goto reply;
}
- string name = "osd." + stringify(id);
double weight;
cmd_getval(g_ceph_context, cmdmap, "weight", weight);
@@ -2623,7 +2623,7 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
map<string,string> loc;
parse_loc_map(argvec, &loc);
- dout(0) << "create-or-move crush item id " << id << " name '" << name << "' initial_weight " << weight
+ dout(0) << "create-or-move crush item name '" << name << "' initial_weight " << weight
<< " at location " << loc << dendl;
CrushWrapper newcrush;
@@ -2631,14 +2631,14 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
err = newcrush.create_or_move_item(g_ceph_context, id, weight, name, loc);
if (err == 0) {
- ss << "create-or-move updated item id " << id << " name '" << name << "' weight " << weight
+ ss << "create-or-move updated item name '" << name << "' weight " << weight
<< " at location " << loc << " to crush map";
break;
}
if (err > 0) {
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush);
- ss << "create-or-move updating item id " << id << " name '" << name << "' weight " << weight
+ ss << "create-or-move updating item name '" << name << "' weight " << weight
<< " at location " << loc << " to crush map";
getline(ss, rs);
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_version()));
@@ -2649,8 +2649,6 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
} else if (prefix == "osd crush move") {
do {
// osd crush move <name> <loc1> [<loc2> ...]
- string name;
- cmd_getval(g_ceph_context, cmdmap, "name", name);
string args;
vector<string> argvec;
@@ -2662,12 +2660,12 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
CrushWrapper newcrush;
_get_pending_crush(newcrush);
- if (!newcrush.name_exists(name.c_str())) {
+ if (!newcrush.name_exists(name)) {
err = -ENOENT;
ss << "item " << name << " does not exist";
break;
}
- int id = newcrush.get_item_id(name.c_str());
+ int id = newcrush.get_item_id(name);
if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
err = newcrush.move_bucket(g_ceph_context, id, loc);
@@ -2699,12 +2697,12 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
CrushWrapper newcrush;
_get_pending_crush(newcrush);
- if (!newcrush.name_exists(name.c_str())) {
+ if (!newcrush.name_exists(name)) {
err = -ENOENT;
ss << "item " << name << " does not exist";
break;
}
- int id = newcrush.get_item_id(name.c_str());
+ int id = newcrush.get_item_id(name);
if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
err = newcrush.link_bucket(g_ceph_context, id, loc);
@@ -2732,12 +2730,12 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
string name;
cmd_getval(g_ceph_context, cmdmap, "name", name);
- if (!newcrush.name_exists(name.c_str())) {
+ if (!newcrush.name_exists(name)) {
err = 0;
ss << "device '" << name << "' does not appear in the crush map";
break;
}
- int id = newcrush.get_item_id(name.c_str());
+ int id = newcrush.get_item_id(name);
bool unlink_only = prefix == "osd crush unlink";
string ancestor_str;
if (cmd_getval(g_ceph_context, cmdmap, "ancestor", ancestor_str)) {
@@ -2776,13 +2774,13 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
string name;
cmd_getval(g_ceph_context, cmdmap, "name", name);
- if (!newcrush.name_exists(name.c_str())) {
+ if (!newcrush.name_exists(name)) {
err = -ENOENT;
ss << "device '" << name << "' does not appear in the crush map";
break;
}
- int id = newcrush.get_item_id(name.c_str());
+ int id = newcrush.get_item_id(name);
if (id < 0) {
ss << "device '" << name << "' is not a leaf in the crush map";
break;
@@ -3082,7 +3080,7 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
// osd already exists
err = 0;
ss << i;
- getline(ss, rs);
+ rdata.append(ss);
goto reply;
}
i = pending_inc.identify_osd(uuid);
diff --git a/src/pybind/rados.py b/src/pybind/rados.py
index 4a5ed961603..c4000465272 100644
--- a/src/pybind/rados.py
+++ b/src/pybind/rados.py
@@ -518,9 +518,9 @@ Rados object in state %s." % (self.state))
raise make_ex(ret, "error opening ioctx '%s'" % ioctx_name)
return Ioctx(ioctx_name, self.librados, ioctx)
- def mon_command(self, cmd, inbuf, timeout=0):
+ def mon_command(self, cmd, inbuf, timeout=0, target=None):
"""
- mon_command(cmd, inbuf, outbuf, outbuflen, outs, outslen)
+ mon_command[_target](cmd, inbuf, outbuf, outbuflen, outs, outslen)
returns (int ret, string outbuf, string outs)
"""
import sys
@@ -531,11 +531,18 @@ Rados object in state %s." % (self.state))
outslen = c_long()
cmdarr = (c_char_p * len(cmd))(*cmd)
- ret = run_in_thread(self.librados.rados_mon_command,
- (self.cluster, cmdarr, len(cmd),
- c_char_p(inbuf), len(inbuf),
- outbufp, byref(outbuflen), outsp, byref(outslen)),
- timeout)
+ if target:
+ ret = run_in_thread(self.librados.rados_mon_command_target,
+ (self.cluster, target, cmdarr, len(cmd),
+ c_char_p(inbuf), len(inbuf),
+ outbufp, byref(outbuflen), outsp, byref(outslen)),
+ timeout)
+ else:
+ ret = run_in_thread(self.librados.rados_mon_command,
+ (self.cluster, cmdarr, len(cmd),
+ c_char_p(inbuf), len(inbuf),
+ outbufp, byref(outbuflen), outsp, byref(outslen)),
+ timeout)
if ret == 0:
# copy returned memory (ctypes makes a copy, not a reference)
diff --git a/src/vstart.sh b/src/vstart.sh
index 27a750cbbeb..f179b0b1bf1 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -423,7 +423,7 @@ EOF
uuid=`uuidgen`
echo "add osd$osd $uuid"
$SUDO $CEPH_ADM osd create $uuid
- $SUDO $CEPH_ADM osd crush set $osd osd.$osd 1.0 host=localhost rack=localrack root=default
+ $SUDO $CEPH_ADM osd crush set osd.$osd 1.0 host=localhost rack=localrack root=default
$SUDO $CEPH_BIN/ceph-osd -i $osd $ARGS --mkfs --mkkey --osd-uuid $uuid
key_fn=dev/osd$osd/keyring