diff options
author | Sage Weil <sage@inktank.com> | 2013-06-13 15:17:05 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-06-13 15:17:05 -0700 |
commit | 392e86fbfffb95a0cf492f28756d71f29ed0d851 (patch) | |
tree | 1145786b2a25d3626c75a5c53fdf3bbc6e5705cc | |
parent | 51dae8ad7ce239f75bc274e3585281c5066adee7 (diff) | |
parent | 9a7ed0b3f8df5bd74133f216bad61ae71eab0816 (diff) | |
download | ceph-392e86fbfffb95a0cf492f28756d71f29ed0d851.tar.gz |
Merge remote-tracking branch 'gh/next'
-rw-r--r-- | README | 5 | ||||
-rwxr-xr-x | qa/workunits/mon/crush_ops.sh | 5 | ||||
-rwxr-xr-x | src/ceph-disk | 2 | ||||
-rwxr-xr-x | src/ceph.in | 158 | ||||
-rw-r--r-- | src/crush/CrushWrapper.cc | 8 | ||||
-rw-r--r-- | src/crush/CrushWrapper.h | 2 | ||||
-rw-r--r-- | src/crushtool.cc | 8 | ||||
-rw-r--r-- | src/include/rados/librados.h | 30 | ||||
-rw-r--r-- | src/librados/RadosClient.cc | 43 | ||||
-rw-r--r-- | src/librados/RadosClient.h | 6 | ||||
-rw-r--r-- | src/librados/librados.cc | 39 | ||||
-rw-r--r-- | src/mon/MonCap.cc | 4 | ||||
-rw-r--r-- | src/mon/MonClient.cc | 90 | ||||
-rw-r--r-- | src/mon/MonClient.h | 19 | ||||
-rw-r--r-- | src/mon/MonCommands.h | 25 | ||||
-rw-r--r-- | src/mon/MonmapMonitor.cc | 48 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 60 | ||||
-rw-r--r-- | src/pybind/rados.py | 21 | ||||
-rwxr-xr-x | src/vstart.sh | 2 |
19 files changed, 387 insertions, 188 deletions
@@ -102,8 +102,10 @@ To build the source code, you must install the following: - automake - autoconf +- pkg-config - gcc - g++ +- make - libboost-dev - libedit-dev - libssl-dev @@ -123,7 +125,8 @@ To build the source code, you must install the following: - libexpat1-dev - libleveldb-dev - libsnappy-dev +- libcurl4-gnutls-dev For example: - $ apt-get install automake autoconf gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev libexpat1-dev libleveldb-dev libsnappy-dev + $ apt-get install automake autoconf pkg-config gcc g++ make libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev libexpat1-dev libleveldb-dev libsnappy-dev libcurl4-gnutls-dev diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh index 748d00bc363..4f66e552153 100755 --- a/qa/workunits/mon/crush_ops.sh +++ b/qa/workunits/mon/crush_ops.sh @@ -27,10 +27,15 @@ ceph osd crush add-bucket foo root o1=`ceph osd create` o2=`ceph osd create` ceph osd crush add $o1 1 host=host1 root=foo +ceph osd crush add $o1 1 host=host1 root=foo # idemptoent ceph osd crush add $o2 1 host=host2 root=foo +ceph osd crush add $o2 1 host=host2 root=foo # idempotent ceph osd crush add-bucket bar root +ceph osd crush add-bucket bar root # idempotent ceph osd crush link host1 root=bar +ceph osd crush link host1 root=bar # idempotent ceph osd crush link host2 root=bar +ceph osd crush link host2 root=bar # idempotent ceph osd tree | grep -c osd.$o1 | grep -q 2 ceph osd tree | grep -c host1 | grep -q 2 diff --git a/src/ceph-disk b/src/ceph-disk index 6c1b3703847..0389b5ce55b 100755 --- a/src/ceph-disk +++ b/src/ceph-disk @@ -1085,7 +1085,7 @@ def main_prepare(args): args.cluster_uuid = get_fsid(cluster=args.cluster) if args.cluster_uuid is None: raise Error( - 'must have fsid in config or pass --cluster--uuid=', + 'must have fsid in config or pass --cluster-uuid=', ) if args.fs_type is None: diff --git a/src/ceph.in b/src/ceph.in index 857197185f4..7d6c62a7303 100755 --- a/src/ceph.in +++ b/src/ceph.in @@ -345,8 +345,9 @@ class CephPgid(CephArgtype): class CephName(CephArgtype): """ - Name, or type.id, where type is osd|mon|client|mds, and id is a base10 int, - or just id. + Name (type.id) where: + type is osd|mon|client|mds + id is a base10 int, if type == osd, or a string otherwise Also accept '*' """ @@ -357,16 +358,17 @@ class CephName(CephArgtype): self.nameid = None return True if s.find('.') == -1: - i = s + raise ArgumentFormat('CephName: no . in {0}'.format(s)) else: t, i = s.split('.') if not t in ('osd', 'mon', 'client', 'mds'): raise ArgumentValid('unknown type ' + self.t) if t == 'osd': - try: - i = int(i) - except: - raise ArgumentFormat('osd id ' + i + ' not integer') + if i != '*': + try: + i = int(i) + except: + raise ArgumentFormat('osd id ' + i + ' not integer') self.nametype = t self.val = s self.nameid = i @@ -375,6 +377,36 @@ class CephName(CephArgtype): def __str__(self): return '<name (type.id)>' +class CephOsdName(CephArgtype): + """ + Like CephName, but specific to osds: allow <id> alone + + osd.<id>, or <id>, or *, where id is a base10 int + """ + def valid(self, s, partial=False): + if s == '*': + self.val = s + self.nametype = None + self.nameid = None + return True + if s.find('.') != -1: + t, i = s.split('.') + else: + t = 'osd' + i = s + if t != 'osd': + raise ArgumentValid('unknown type ' + self.t) + try: + i = int(i) + except: + raise ArgumentFormat('osd id ' + i + ' not integer') + self.nametype = t + self.nameid = i + self.val = i + return True + + def __str__(self): + return '<osdname (id|osd.id)>' class CephChoices(CephArgtype): """ @@ -778,7 +810,7 @@ def osdids(): ret, outbuf, outs = json_command(prefix='osd ls') if ret: raise RuntimeError('Can\'t contact mon for osd list') - return [i for i in outbuf.split('\n')] + return [i for i in outbuf.split('\n') if i != ''] def monids(): ret, outbuf, outs = json_command(prefix='mon dump', @@ -1107,8 +1139,10 @@ def send_command(target=('mon', ''), cmd=[], inbuf='', timeout=0): if verbose: print >> sys.stderr, '{0} to {1}'.\ format(cmd, target[0]) - ret, outbuf, outs = cluster_handle.mon_command(cmd, inbuf, - timeout) + if target[1] == '': + ret, outbuf, outs = cluster_handle.mon_command(cmd, inbuf, timeout) + else: + ret, outbuf, outs = cluster_handle.mon_command(cmd, inbuf, timeout, target[1]) except Exception as e: raise RuntimeError('"{0}": exception {1}'.format(cmd, e)) @@ -1266,7 +1300,7 @@ def find_cmd_target(childargs): if len(valid_dict) == 2: name = CephName() name.valid(valid_dict['target']) - return 'osd', name.nameid + return name.nametype, name.nameid sig = parse_funcsig(['pg', {'name':'pgid','type':'CephPgid'}]) valid_dict = validate(childargs, sig, partial=True); @@ -1508,56 +1542,76 @@ def main(): # of the form 'cmdNNN' followed by an array of argument descriptors) # as part of the validated argument JSON object - ret, outbuf, outs = json_command(target=target, - prefix='get_command_descriptions') - if ret == -errno.EINVAL: - # send command to old monitor or OSD - if verbose: - print '{0} to old {1}'.format(' '.join(childargs), target[0]) - ret, outbuf, outs = send_command(target, childargs, inbuf) - # combine nonerror outbuf and outs; either may have cmd output - if ret == 0: - outbuf += outs - # clear outs so generic code below doesn't print it to stderr - outs = '' - elif ret: + targets = [target] + + if target[1] == '*': + targets = [(target[0], o) for o in osdids()] + + final_ret = 0 + for target in targets: + ret, outbuf, outs = json_command(target=target, + prefix='get_command_descriptions') + if ret == -errno.EINVAL: + # send command to old monitor or OSD + if verbose: + print '{0} to old {1}'.format(' '.join(childargs), target[0]) + ret, outbuf, outs = send_command(target, childargs, inbuf) + # combine nonerror outbuf and outs; either may have cmd output + if ret == 0: + outbuf += outs + # clear outs so generic code below doesn't print it to stderr + outs = '' + elif ret: + if ret < 0: + ret = -ret + print >> sys.stderr, \ + 'Problem getting command descriptions from {0}, {1}'.\ + format(target, errno.errorcode[ret]) + else: + sigdict = parse_json_funcsigs(outbuf) + + if parsed_args.completion: + return complete(sigdict, childargs, target) + + ret, outbuf, outs = new_style_command(parsed_args, childargs, target, + sigdict, inbuf, verbose) + if ret < 0: ret = -ret - print >> sys.stderr, \ - 'Problem getting command descriptions from {0}, {1}'.\ - format(target, errno.errorcode[ret]) - return ret - else: - sigdict = parse_json_funcsigs(outbuf) + if len(targets) > 1: + sys.stderr.write('{0}.{1}: '.format(*target)) + print >> sys.stderr, 'Error {0}: {1}'.format(errno.errorcode[ret], outs) + if len(targets) > 1: + final_ret = ret + else: + return ret - if parsed_args.completion: - return complete(sigdict, childargs, target) + # this assumes outs never has useful command output, only status + if outs: + print >> sys.stderr, outs - ret, outbuf, outs = new_style_command(parsed_args, childargs, target, - sigdict, inbuf, verbose) + if (parsed_args.output_file): + outf.write(outbuf) + else: + # hack: old code printed status line before many json outputs + # (osd dump, etc.) that consumers know to ignore. Add blank line + # to satisfy consumers that skip the first line, but not annoy + # consumers that don't. + if parsed_args.output_format and \ + parsed_args.output_format.startswith('json'): + sys.stdout.write('\n'); - if ret < 0: - ret = -ret - print >> sys.stderr, 'Error {0}: {1}'.format(errno.errorcode[ret], outs) - return ret + # prefix output with target, if there was a wildcard used + if len(targets) > 1: + sys.stdout.write('{0}.{1}: '.format(*target)) - # this assumes outs never has useful command output, only status - if outs: - print >> sys.stderr, outs + sys.stdout.write(outbuf) if (parsed_args.output_file): - outf.write(outbuf) outf.close() - else: - # hack: old code printed status line before many json outputs - # (osd dump, etc.) that consumers know to ignore. Add blank line - # to satisfy consumers that skip the first line, but not annoy - # consumers that don't. - if parsed_args.output_format and \ - parsed_args.output_format.startswith('json'): - sys.stdout.write('\n'); - - sys.stdout.write(outbuf) + + if final_ret: + return ret return 0 diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index f0ae6dd9488..8a9addfb6c2 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -255,14 +255,14 @@ int CrushWrapper::get_full_location_ordered(int id, vector<pair<string, string> string high_type_name = type_map[high_type]; path.push_back(parent_coord); - parent_id = get_item_id( (parent_coord.second).c_str() ); + parent_id = get_item_id(parent_coord.second); while (parent_coord.first != high_type_name) { parent_coord = get_immediate_parent(parent_id); path.push_back(parent_coord); if ( parent_coord.first != high_type_name ){ - parent_id = get_item_id( (parent_coord.second).c_str() ); + parent_id = get_item_id(parent_coord.second); } } @@ -291,7 +291,7 @@ map<int, string> CrushWrapper::get_parent_hierarchy(int id) high_type = (*it).first; } - parent_id = get_item_id((parent_coord.second).c_str()); + parent_id = get_item_id(parent_coord.second); while (type_counter < high_type) { type_counter++; @@ -300,7 +300,7 @@ map<int, string> CrushWrapper::get_parent_hierarchy(int id) if (type_counter < high_type){ // get the coordinate information for the next parent parent_coord = get_immediate_parent(parent_id); - parent_id = get_item_id(parent_coord.second.c_str()); + parent_id = get_item_id(parent_coord.second); } } diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index c123da3d9d5..f5a88c8bdd5 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -633,7 +633,7 @@ private: pair<string, string> bucket_location = get_immediate_parent(item); // get the id of the parent bucket - int parent_id = get_item_id( (bucket_location.second).c_str() ); + int parent_id = get_item_id(bucket_location.second); // get the parent bucket crush_bucket *parent_bucket = get_bucket(parent_id); diff --git a/src/crushtool.cc b/src/crushtool.cc index 054c4df8698..75c26c098b6 100644 --- a/src/crushtool.cc +++ b/src/crushtool.cc @@ -609,11 +609,11 @@ int main(int argc, const char **argv) if (!reweight_name.empty()) { cout << me << " reweighting item " << reweight_name << " to " << reweight_weight << std::endl; int r; - if (!crush.name_exists(reweight_name.c_str())) { + if (!crush.name_exists(reweight_name)) { cerr << " name " << reweight_name << " dne" << std::endl; r = -ENOENT; } else { - int item = crush.get_item_id(reweight_name.c_str()); + int item = crush.get_item_id(reweight_name); r = crush.adjust_item_weightf(g_ceph_context, item, reweight_weight); } if (r >= 0) @@ -627,11 +627,11 @@ int main(int argc, const char **argv) if (!remove_name.empty()) { cout << me << " removing item " << remove_name << std::endl; int r; - if (!crush.name_exists(remove_name.c_str())) { + if (!crush.name_exists(remove_name)) { cerr << " name " << remove_name << " dne" << std::endl; r = -ENOENT; } else { - int remove_item = crush.get_item_id(remove_name.c_str()); + int remove_item = crush.get_item_id(remove_name); r = crush.remove_item(g_ceph_context, remove_item, false); } if (r == 0) diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h index 69eeccb323b..eb1a30632b7 100644 --- a/src/include/rados/librados.h +++ b/src/include/rados/librados.h @@ -1712,7 +1712,7 @@ int rados_break_lock(rados_ioctx_t io, const char *o, const char *name, * @note Takes command string in carefully-formatted JSON; must match * defined commands, types, etc. * - * The result buffers are allocated on the heapt; the caller is + * The result buffers are allocated on the heap; the caller is * expected to release that memory with rados_buffer_free(). The * buffer and length pointers can all be NULL, in which case they are * not filled in. @@ -1733,6 +1733,34 @@ int rados_mon_command(rados_t cluster, const char **cmd, size_t cmdlen, char **outs, size_t *outslen); /** + * Send monitor command to a specific monitor. + * + * @note Takes command string in carefully-formatted JSON; must match + * defined commands, types, etc. + * + * The result buffers are allocated on the heap; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can all be NULL, in which case they are + * not filled in. + * + * @param cluster cluster handle + * @param name target monitor's name + * @param cmd an array of char *'s representing the command + * @param cmdlen count of valid entries in cmd + * @param inbuf any bulk input data (crush map, etc.) + * @param outbuf double pointer to output buffer + * @param outbuflen pointer to output buffer length + * @param outs double pointer to status string + * @param outslen pointer to status string length + * @returns 0 on success, negative error code on failure + */ +int rados_mon_command_target(rados_t cluster, const char *name, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + +/** * free a rados-allocated buffer * * Release memory allocated by librados calls like rados_mon_command(). diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc index a2b54abf058..f5dccaffc09 100644 --- a/src/librados/RadosClient.cc +++ b/src/librados/RadosClient.cc @@ -201,11 +201,6 @@ int librados::RadosClient::connect() objecter->init_locked(); monclient.renew_subs(); - while (osdmap.get_epoch() == 0) { - ldout(cct, 1) << "waiting for osdmap" << dendl; - cond.Wait(lock); - } - finisher.start(); state = CONNECTED; @@ -583,6 +578,44 @@ int librados::RadosClient::mon_command(const vector<string>& cmd, return rval; } +int librados::RadosClient::mon_command(int rank, const vector<string>& cmd, + bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + Mutex mylock("RadosClient::mon_command::mylock"); + Cond cond; + bool done; + int rval; + lock.Lock(); + monclient.start_mon_command(rank, cmd, inbl, outbl, outs, + new C_SafeCond(&mylock, &cond, &done, &rval)); + lock.Unlock(); + mylock.Lock(); + while (!done) + cond.Wait(mylock); + mylock.Unlock(); + return rval; +} + +int librados::RadosClient::mon_command(string name, const vector<string>& cmd, + bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + Mutex mylock("RadosClient::mon_command::mylock"); + Cond cond; + bool done; + int rval; + lock.Lock(); + monclient.start_mon_command(name, cmd, inbl, outbl, outs, + new C_SafeCond(&mylock, &cond, &done, &rval)); + lock.Unlock(); + mylock.Lock(); + while (!done) + cond.Wait(mylock); + mylock.Unlock(); + return rval; +} + int librados::RadosClient::osd_command(int osd, vector<string>& cmd, bufferlist& inbl, bufferlist *poutbl, string *prs) diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h index 9374553b0ad..337beff5750 100644 --- a/src/librados/RadosClient.h +++ b/src/librados/RadosClient.h @@ -108,6 +108,12 @@ public: void watch_notify(MWatchNotify *m); int mon_command(const vector<string>& cmd, bufferlist &inbl, bufferlist *outbl, string *outs); + int mon_command(int rank, + const vector<string>& cmd, bufferlist &inbl, + bufferlist *outbl, string *outs); + int mon_command(string name, + const vector<string>& cmd, bufferlist &inbl, + bufferlist *outbl, string *outs); int osd_command(int osd, vector<string>& cmd, bufferlist& inbl, bufferlist *poutbl, string *prs); int pg_command(pg_t pgid, vector<string>& cmd, bufferlist& inbl, diff --git a/src/librados/librados.cc b/src/librados/librados.cc index 1dda70828fd..43c2584c390 100644 --- a/src/librados/librados.cc +++ b/src/librados/librados.cc @@ -1857,6 +1857,45 @@ extern "C" int rados_mon_command(rados_t cluster, const char **cmd, return ret; } +extern "C" int rados_mon_command_target(rados_t cluster, const char *name, + const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + vector<string> cmdvec; + + // is this a numeric id? + char *endptr; + errno = 0; + long rank = strtol(name, &endptr, 10); + if ((errno == ERANGE && (rank == LONG_MAX || rank == LONG_MIN)) || + (errno != 0 && rank == 0) || + endptr == name || // no digits + *endptr != '\0') { // extra characters + rank = -1; + } + + for (size_t i = 0; i < cmdlen; i++) + cmdvec.push_back(cmd[i]); + + inbl.append(inbuf, inbuflen); + int ret; + if (rank >= 0) + ret = client->mon_command(rank, cmdvec, inbl, &outbl, &outstring); + else + ret = client->mon_command(name, cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + return ret; +} + extern "C" int rados_osd_command(rados_t cluster, int osdid, const char **cmd, size_t cmdlen, const char *inbuf, size_t inbuflen, diff --git a/src/mon/MonCap.cc b/src/mon/MonCap.cc index bf8eb6fb79d..6f1055091e0 100644 --- a/src/mon/MonCap.cc +++ b/src/mon/MonCap.cc @@ -143,6 +143,8 @@ void MonCapGrant::expand_profile(entity_name_t name) const profile_grants.push_back(MonCapGrant("config-key delete", "key", StringConstraint("", prefix))); } if (profile == "bootstrap-osd") { + profile_grants.push_back(MonCapGrant("mon", MON_CAP_R)); // read monmap + profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); // read osdmap profile_grants.push_back(MonCapGrant("mon getmap")); profile_grants.push_back(MonCapGrant("osd create")); profile_grants.push_back(MonCapGrant("osd crush set")); // FIXME: constraint this further? @@ -152,6 +154,8 @@ void MonCapGrant::expand_profile(entity_name_t name) const profile_grants.back().command_args["caps_osd"] = StringConstraint("allow *", ""); } if (profile == "bootstrap-mds") { + profile_grants.push_back(MonCapGrant("mon", MON_CAP_R)); // read monmap + profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); // read osdmap profile_grants.push_back(MonCapGrant("mon getmap")); profile_grants.push_back(MonCapGrant("auth get-or-create")); // FIXME: this can expose other mds keys profile_grants.back().command_args["name"] = StringConstraint("", "mds."); diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc index 181d0688311..c0d2b5183c5 100644 --- a/src/mon/MonClient.cc +++ b/src/mon/MonClient.cc @@ -479,11 +479,18 @@ string MonClient::_pick_random_mon() } } -void MonClient::_pick_new_mon() +void MonClient::_reopen_session(int rank, string name) { assert(monc_lock.is_locked()); + ldout(cct, 10) << "_reopen_session rank " << rank << " name " << name << dendl; - cur_mon = _pick_random_mon(); + if (rank < 0 && name.length() == 0) { + cur_mon = _pick_random_mon(); + } else if (name.length()) { + cur_mon = name; + } else { + cur_mon = monmap.get_name(rank); + } if (cur_con) { messenger->mark_down(cur_con); @@ -491,18 +498,9 @@ void MonClient::_pick_new_mon() } cur_con = messenger->get_connection(monmap.get_inst(cur_mon)); - ldout(cct, 10) << "_pick_new_mon picked mon." << cur_mon << " con " << cur_con + ldout(cct, 10) << "picked mon." << cur_mon << " con " << cur_con << " addr " << cur_con->get_peer_addr() << dendl; -} - - -void MonClient::_reopen_session() -{ - assert(monc_lock.is_locked()); - ldout(cct, 10) << "_reopen_session" << dendl; - - _pick_new_mon(); // throw out old queued messages while (!waiting_for_session.empty()) { @@ -724,6 +722,37 @@ int MonClient::wait_auth_rotating(double timeout) void MonClient::_send_command(MonCommand *r) { version_t last_seen_version = 0; + + if (r->target_rank >= 0 && + r->target_rank != monmap.get_rank(cur_mon)) { + ldout(cct, 10) << "_send_command " << r->tid << " " << r->cmd + << " wants rank " << r->target_rank + << ", reopening session" + << dendl; + if (r->target_rank >= (int)monmap.size()) { + ldout(cct, 10) << " target " << r->target_rank << " >= max mon " << monmap.size() << dendl; + _finish_command(r, -ENOENT, "mon rank dne"); + return; + } + _reopen_session(r->target_rank, string()); + return; + } + + if (r->target_name.length() && + r->target_name != cur_mon) { + ldout(cct, 10) << "_send_command " << r->tid << " " << r->cmd + << " wants mon " << r->target_name + << ", reopening session" + << dendl; + if (!monmap.contains(r->target_name)) { + ldout(cct, 10) << " target " << r->target_name << " not present in monmap" << dendl; + _finish_command(r, -ENOENT, "mon dne"); + return; + } + _reopen_session(-1, r->target_name); + return; + } + ldout(cct, 10) << "_send_command " << r->tid << " " << r->cmd << dendl; MMonCommand *m = new MMonCommand(monmap.fsid, last_seen_version); m->set_tid(r->tid); @@ -798,6 +827,43 @@ int MonClient::start_mon_command(const vector<string>& cmd, bufferlist& inbl, return 0; } +int MonClient::start_mon_command(string name, + const vector<string>& cmd, bufferlist& inbl, + bufferlist *outbl, string *outs, + Context *onfinish) +{ + Mutex::Locker l(monc_lock); + MonCommand *r = new MonCommand(++last_mon_command_tid); + r->target_name = name; + r->cmd = cmd; + r->inbl = inbl; + r->poutbl = outbl; + r->prs = outs; + r->onfinish = onfinish; + mon_commands[r->tid] = r; + _send_command(r); + // can't fail + return 0; +} + +int MonClient::start_mon_command(int rank, + const vector<string>& cmd, bufferlist& inbl, + bufferlist *outbl, string *outs, + Context *onfinish) +{ + Mutex::Locker l(monc_lock); + MonCommand *r = new MonCommand(++last_mon_command_tid); + r->target_rank = rank; + r->cmd = cmd; + r->inbl = inbl; + r->poutbl = outbl; + r->prs = outs; + r->onfinish = onfinish; + mon_commands[r->tid] = r; + _send_command(r); + return 0; +} + // --------- void MonClient::get_version(string map, version_t *newest, version_t *oldest, Context *onfinish) diff --git a/src/mon/MonClient.h b/src/mon/MonClient.h index 04e6b0e44b5..7d4a0548f9d 100644 --- a/src/mon/MonClient.h +++ b/src/mon/MonClient.h @@ -129,8 +129,10 @@ private: string _pick_random_mon(); void _finish_hunting(); - void _reopen_session(); - void _pick_new_mon(); + void _reopen_session(int rank, string name); + void _reopen_session() { + _reopen_session(-1, string()); + } void _send_mon_message(Message *m, bool force=false); public: @@ -270,6 +272,8 @@ public: private: uint64_t last_mon_command_tid; struct MonCommand { + string target_name; + int target_rank; uint64_t tid; vector<string> cmd; bufferlist inbl; @@ -279,7 +283,8 @@ private: Context *onfinish; MonCommand(uint64_t t) - : tid(t), + : target_rank(-1), + tid(t), poutbl(NULL), prs(NULL), prval(NULL), onfinish(NULL) {} }; @@ -294,6 +299,14 @@ public: int start_mon_command(const vector<string>& cmd, bufferlist& inbl, bufferlist *outbl, string *outs, Context *onfinish); + int start_mon_command(int mon_rank, + const vector<string>& cmd, bufferlist& inbl, + bufferlist *outbl, string *outs, + Context *onfinish); + int start_mon_command(const string mon_name, ///< mon name, with mon. prefix + const vector<string>& cmd, bufferlist& inbl, + bufferlist *outbl, string *outs, + Context *onfinish); // version requests public: diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 4e64a0a70d4..2c805362255 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -55,6 +55,7 @@ * CephObjectname: Another plainold string * CephPgid: n.xxx where n is an int > 0, xxx is a hex number > 0 * CephName: daemon name, '*' or '<type>.<id>' (id must be int for type osd) + * CephOsdName: osd name, '*' or '<id> or 'osd.<id>' (id must be int) * CephChoices: strings="foo|bar" means this param can be either * CephFilepath: openable file * CephFragment: cephfs 'fragID': val/bits, val in hex 0xnnn, bits in dec @@ -259,10 +260,6 @@ COMMAND("mon stat", "summarize monitor status") COMMAND("mon getmap " \ "name=epoch,type=CephInt,range=0,req=false", \ "get monmap") -COMMAND("mon tell " \ - "name=who,type=CephString " \ - "name=args,type=CephString,n=N", \ - "send command to specific monitor(s)") COMMAND("mon add " \ "name=name,type=CephString " \ "name=addr,type=CephIPAddr", \ @@ -289,10 +286,6 @@ COMMAND("osd getcrushmap " \ "name=epoch,type=CephInt,range=0,req=false", \ "get CRUSH map") COMMAND("osd getmaxosd", "show largest OSD id") -COMMAND("osd tell " \ - "name=who,type=CephString " \ - "name=args,type=CephString,n=N", \ - "send command to particular osd") COMMAND("osd find " \ "name=id,type=CephInt,range=0", \ "find osd <id> in the CRUSH map and show its location") @@ -324,24 +317,22 @@ COMMAND("osd crush add-bucket " \ "name=type,type=CephString", \ "add no-parent (probably root) crush bucket <name> of type <type>") COMMAND("osd crush set " \ - "name=id,type=CephInt,range=0 " \ - "name=name,type=CephName,req=false " \ + "name=id,type=CephOsdName " \ "name=weight,type=CephFloat,range=0.0 " \ "name=args,type=CephString,n=N", \ - "set crushmap entry for <id> to <weight> with location <args>") + "set crushmap entry for <name> to <weight> with location <args>") COMMAND("osd crush add " \ - "name=id,type=CephInt,range=0 " \ - "name=name,type=CephName,req=false " \ + "name=id,type=CephOsdName " \ "name=weight,type=CephFloat,range=0.0 " \ "name=args,type=CephString,n=N", \ - "add crushmap entry for <id> with <weight> and location <args>") + "add crushmap entry for <name> with <weight> and location <args>") COMMAND("osd crush create-or-move " \ - "name=id,type=CephInt,range=0 " \ + "name=id,type=CephOsdName " \ "name=weight,type=CephFloat,range=0.0 " \ "name=args,type=CephString,n=N", \ - "create entry or move existing entry for <id> <weight> at/to location <args>") + "create entry or move existing entry for <name> <weight> at/to location <args>") COMMAND("osd crush move " \ - "name=name,type=CephString " \ + "name=id,type=CephOsdName " \ "name=args,type=CephString,n=N", \ "move existing entry for <name> to location <args>") COMMAND("osd crush link " \ diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 90839b7706f..cbcefae104f 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -264,60 +264,12 @@ bool MonmapMonitor::preprocess_command(MMonCommand *m) if (p != mon->monmap) delete p; } - } else if (prefix == "mon tell") { - dout(20) << "got tell: " << m->cmd << dendl; - string whostr; - cmd_getval(g_ceph_context, cmdmap, "who", whostr); - vector<string> argvec; - cmd_getval(g_ceph_context, cmdmap, "args", argvec); - - if (whostr == "*") { // send to all mons and do myself - for (unsigned i = 0; i < mon->monmap->size(); ++i) { - MMonCommand *newm = new MMonCommand(m->fsid, m->version); - newm->cmd.insert(newm->cmd.begin(), argvec.begin(), argvec.end()); - mon->messenger->send_message(newm, mon->monmap->get_inst(i)); - } - ss << "bcast to all mons"; - r = 0; - } else { - // find target. Ignore error from parsing long as we probably - // have a string instead - long who = parse_pos_long(whostr.c_str(), NULL); - EntityName name; - if (who < 0) { - - // not numeric; try as name or id, and see if in monmap - if (!name.from_str(whostr)) - name.set("mon", whostr); - - if (mon->monmap->contains(name.get_id())) { - who = mon->monmap->get_rank(name.get_id()); - } else { - ss << "bad mon name \"" << whostr << "\""; - r = -ENOENT; - goto out; - } - } else if (who >= (long)mon->monmap->size()) { - ss << "mon." << whostr << " does not exist"; - r = -ENOENT; - goto out; - } - - // send to target, or handle if it's me - stringstream ss; - MMonCommand *newm = new MMonCommand(m->fsid, m->version); - newm->cmd.insert(newm->cmd.begin(), argvec.begin(), argvec.end()); - mon->messenger->send_message(newm, mon->monmap->get_inst(who)); - ss << "fw to mon." << whostr; - r = 0; - } } else if (prefix == "mon add") return false; else if (prefix == "mon remove") return false; - out: if (r != -1) { string rs; getline(ss, rs); diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index d6e2e1cb560..d785cf626c3 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1697,7 +1697,7 @@ void OSDMonitor::tick() // is this an entire large subtree down? if (g_conf->mon_osd_down_out_subtree_limit.length()) { - int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit.c_str()); + int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit); if (type > 0) { if (osdmap.containing_subtree_is_down(g_ceph_context, o, type, &down_cache)) { dout(10) << "tick entire containing " << g_conf->mon_osd_down_out_subtree_limit @@ -2482,7 +2482,13 @@ bool OSDMonitor::prepare_command(MMonCommand *m) cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); int64_t id; + string name; bool osdid_present = cmd_getval(g_ceph_context, cmdmap, "id", id); + if (osdid_present) { + ostringstream oss; + oss << "osd." << id; + name = oss.str(); + } if (prefix == "osd setcrushmap" || (prefix == "osd crush set" && !osdid_present)) { @@ -2544,23 +2550,19 @@ bool OSDMonitor::prepare_command(MMonCommand *m) ss << "added bucket " << name << " type " << typestr << " to crush map"; goto update; - } else if (osdid_present && + } else if (osdid_present && (prefix == "osd crush set" || prefix == "osd crush add")) { do { - // osd crush set <osd-id> [<osd.* name>] <weight> <loc1> [<loc2> ...] - // osd crush add <osd-id> [<osd.* name>] <weight> <loc1> [<loc2> ...] + // <OsdName> is 'osd.<id>' or '<id>', passed as int64_t id + // osd crush set <OsdName> <weight> <loc1> [<loc2> ...] + // osd crush add <OsdName> <weight> <loc1> [<loc2> ...] + if (!osdmap.exists(id)) { err = -ENOENT; - ss << "osd." << id << " does not exist. create it before updating the crush map"; + ss << name << " does not exist. create it before updating the crush map"; goto reply; } - string name; - if (!cmd_getval(g_ceph_context, cmdmap, "name", name)) { - // new usage; infer name - name = "osd." + stringify(id); - } - double weight; cmd_getval(g_ceph_context, cmdmap, "weight", weight); @@ -2577,7 +2579,8 @@ bool OSDMonitor::prepare_command(MMonCommand *m) _get_pending_crush(newcrush); string action; - if (prefix == "osd crush set") { + if (prefix == "osd crush set" || + newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { action = "set"; err = newcrush.update_item(g_ceph_context, id, weight, name, loc); } else { @@ -2604,16 +2607,13 @@ bool OSDMonitor::prepare_command(MMonCommand *m) } else if (prefix == "osd crush create-or-move") { do { - // osd crush create-or-move <id> <initial_weight> <loc1> [<loc2> ...] - int64_t id; - cmd_getval(g_ceph_context, cmdmap, "id", id); + // osd crush create-or-move <OsdName> <initial_weight> <loc1> [<loc2> ...] if (!osdmap.exists(id)) { err = -ENOENT; - ss << "osd." << id << " does not exist. create it before updating the crush map"; + ss << name << " does not exist. create it before updating the crush map"; goto reply; } - string name = "osd." + stringify(id); double weight; cmd_getval(g_ceph_context, cmdmap, "weight", weight); @@ -2623,7 +2623,7 @@ bool OSDMonitor::prepare_command(MMonCommand *m) map<string,string> loc; parse_loc_map(argvec, &loc); - dout(0) << "create-or-move crush item id " << id << " name '" << name << "' initial_weight " << weight + dout(0) << "create-or-move crush item name '" << name << "' initial_weight " << weight << " at location " << loc << dendl; CrushWrapper newcrush; @@ -2631,14 +2631,14 @@ bool OSDMonitor::prepare_command(MMonCommand *m) err = newcrush.create_or_move_item(g_ceph_context, id, weight, name, loc); if (err == 0) { - ss << "create-or-move updated item id " << id << " name '" << name << "' weight " << weight + ss << "create-or-move updated item name '" << name << "' weight " << weight << " at location " << loc << " to crush map"; break; } if (err > 0) { pending_inc.crush.clear(); newcrush.encode(pending_inc.crush); - ss << "create-or-move updating item id " << id << " name '" << name << "' weight " << weight + ss << "create-or-move updating item name '" << name << "' weight " << weight << " at location " << loc << " to crush map"; getline(ss, rs); wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_version())); @@ -2649,8 +2649,6 @@ bool OSDMonitor::prepare_command(MMonCommand *m) } else if (prefix == "osd crush move") { do { // osd crush move <name> <loc1> [<loc2> ...] - string name; - cmd_getval(g_ceph_context, cmdmap, "name", name); string args; vector<string> argvec; @@ -2662,12 +2660,12 @@ bool OSDMonitor::prepare_command(MMonCommand *m) CrushWrapper newcrush; _get_pending_crush(newcrush); - if (!newcrush.name_exists(name.c_str())) { + if (!newcrush.name_exists(name)) { err = -ENOENT; ss << "item " << name << " does not exist"; break; } - int id = newcrush.get_item_id(name.c_str()); + int id = newcrush.get_item_id(name); if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { err = newcrush.move_bucket(g_ceph_context, id, loc); @@ -2699,12 +2697,12 @@ bool OSDMonitor::prepare_command(MMonCommand *m) CrushWrapper newcrush; _get_pending_crush(newcrush); - if (!newcrush.name_exists(name.c_str())) { + if (!newcrush.name_exists(name)) { err = -ENOENT; ss << "item " << name << " does not exist"; break; } - int id = newcrush.get_item_id(name.c_str()); + int id = newcrush.get_item_id(name); if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { err = newcrush.link_bucket(g_ceph_context, id, loc); @@ -2732,12 +2730,12 @@ bool OSDMonitor::prepare_command(MMonCommand *m) string name; cmd_getval(g_ceph_context, cmdmap, "name", name); - if (!newcrush.name_exists(name.c_str())) { + if (!newcrush.name_exists(name)) { err = 0; ss << "device '" << name << "' does not appear in the crush map"; break; } - int id = newcrush.get_item_id(name.c_str()); + int id = newcrush.get_item_id(name); bool unlink_only = prefix == "osd crush unlink"; string ancestor_str; if (cmd_getval(g_ceph_context, cmdmap, "ancestor", ancestor_str)) { @@ -2776,13 +2774,13 @@ bool OSDMonitor::prepare_command(MMonCommand *m) string name; cmd_getval(g_ceph_context, cmdmap, "name", name); - if (!newcrush.name_exists(name.c_str())) { + if (!newcrush.name_exists(name)) { err = -ENOENT; ss << "device '" << name << "' does not appear in the crush map"; break; } - int id = newcrush.get_item_id(name.c_str()); + int id = newcrush.get_item_id(name); if (id < 0) { ss << "device '" << name << "' is not a leaf in the crush map"; break; @@ -3082,7 +3080,7 @@ bool OSDMonitor::prepare_command(MMonCommand *m) // osd already exists err = 0; ss << i; - getline(ss, rs); + rdata.append(ss); goto reply; } i = pending_inc.identify_osd(uuid); diff --git a/src/pybind/rados.py b/src/pybind/rados.py index 4a5ed961603..c4000465272 100644 --- a/src/pybind/rados.py +++ b/src/pybind/rados.py @@ -518,9 +518,9 @@ Rados object in state %s." % (self.state)) raise make_ex(ret, "error opening ioctx '%s'" % ioctx_name) return Ioctx(ioctx_name, self.librados, ioctx) - def mon_command(self, cmd, inbuf, timeout=0): + def mon_command(self, cmd, inbuf, timeout=0, target=None): """ - mon_command(cmd, inbuf, outbuf, outbuflen, outs, outslen) + mon_command[_target](cmd, inbuf, outbuf, outbuflen, outs, outslen) returns (int ret, string outbuf, string outs) """ import sys @@ -531,11 +531,18 @@ Rados object in state %s." % (self.state)) outslen = c_long() cmdarr = (c_char_p * len(cmd))(*cmd) - ret = run_in_thread(self.librados.rados_mon_command, - (self.cluster, cmdarr, len(cmd), - c_char_p(inbuf), len(inbuf), - outbufp, byref(outbuflen), outsp, byref(outslen)), - timeout) + if target: + ret = run_in_thread(self.librados.rados_mon_command_target, + (self.cluster, target, cmdarr, len(cmd), + c_char_p(inbuf), len(inbuf), + outbufp, byref(outbuflen), outsp, byref(outslen)), + timeout) + else: + ret = run_in_thread(self.librados.rados_mon_command, + (self.cluster, cmdarr, len(cmd), + c_char_p(inbuf), len(inbuf), + outbufp, byref(outbuflen), outsp, byref(outslen)), + timeout) if ret == 0: # copy returned memory (ctypes makes a copy, not a reference) diff --git a/src/vstart.sh b/src/vstart.sh index 27a750cbbeb..f179b0b1bf1 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -423,7 +423,7 @@ EOF uuid=`uuidgen` echo "add osd$osd $uuid" $SUDO $CEPH_ADM osd create $uuid - $SUDO $CEPH_ADM osd crush set $osd osd.$osd 1.0 host=localhost rack=localrack root=default + $SUDO $CEPH_ADM osd crush set osd.$osd 1.0 host=localhost rack=localrack root=default $SUDO $CEPH_BIN/ceph-osd -i $osd $ARGS --mkfs --mkkey --osd-uuid $uuid key_fn=dev/osd$osd/keyring |