diff options
author | Sage Weil <sage@inktank.com> | 2013-07-26 15:14:02 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-26 15:14:02 -0700 |
commit | c36c280a782e72e5a7fee2d1ecad1ab17f29b98e (patch) | |
tree | 6afb9591e4c7b956e38b8ea1ba01d623c9599e7c | |
parent | a08998e8dc27e34023f5937230e471adc1a7d795 (diff) | |
parent | 10959404b30aa05c9d416b2a39aaae7d5641389a (diff) | |
download | ceph-c36c280a782e72e5a7fee2d1ecad1ab17f29b98e.tar.gz |
Merge remote-tracking branch 'gh/next'
Conflicts:
src/pybind/ceph_argparse.py
32 files changed, 321 insertions, 157 deletions
diff --git a/qa/workunits/rest/test.py b/qa/workunits/rest/test.py index 60dabab9678..f0cf1f2c761 100755 --- a/qa/workunits/rest/test.py +++ b/qa/workunits/rest/test.py @@ -72,7 +72,7 @@ if __name__ == '__main__': expect('auth/export', 'GET', 200, 'xml', XMLHDR) expect('auth/add?entity=client.xx&' - 'caps=mon&caps=allow&caps=osd&caps=allow *', 'PUT', 200, 'json', + 'caps=mon&caps=allow&caps=osd&caps=allow+*', 'PUT', 200, 'json', JSONHDR) r = expect('auth/export?entity=client.xx', 'GET', 200, 'plain') @@ -95,7 +95,7 @@ if __name__ == '__main__': expect('auth/print-key?entity=client.xx', 'GET', 200, 'json', JSONHDR) expect('auth/print_key?entity=client.xx', 'GET', 200, 'json', JSONHDR) - expect('auth/caps?entity=client.xx&caps=osd&caps=allow rw', 'PUT', 200, + expect('auth/caps?entity=client.xx&caps=osd&caps=allow+rw', 'PUT', 200, 'json', JSONHDR) r = expect('auth/list.json', 'GET', 200, 'json') dictlist = r.myjson['output']['auth_dump'] diff --git a/src/ceph-disk b/src/ceph-disk index b4a9e68dad7..77a9d9a2612 100755 --- a/src/ceph-disk +++ b/src/ceph-disk @@ -182,7 +182,7 @@ class FilesystemTypeError(Error): def maybe_mkdir(*a, **kw): """ - Creates a new directory if it doesn't exist, removes + Creates a new directory if it doesn't exist, removes existing symlink before creating the directory. """ # remove any symlink, if it is there.. @@ -495,7 +495,7 @@ def _check_output(*args, **kwargs): def get_conf(cluster, variable): """ - Get the value of the given configuration variable from the + Get the value of the given configuration variable from the cluster. :raises: Error if call to ceph-conf fails. @@ -654,7 +654,7 @@ def mount( options, ): """ - Mounts a device with given filessystem type and + Mounts a device with given filessystem type and mount options to a tempfile path under /var/lib/ceph/tmp. """ # pick best-of-breed mount options based on fs type @@ -1307,18 +1307,37 @@ def auth_key( osd_id, keyring, ): - subprocess.check_call( - args=[ - '/usr/bin/ceph', - '--cluster', cluster, - '--name', 'client.bootstrap-osd', - '--keyring', keyring, - 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id), - '-i', os.path.join(path, 'keyring'), - 'osd', 'allow *', - 'mon', 'allow rwx', - ], - ) + try: + # try dumpling+ cap scheme + subprocess.check_call( + args=[ + '/usr/bin/ceph', + '--cluster', cluster, + '--name', 'client.bootstrap-osd', + '--keyring', keyring, + 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id), + '-i', os.path.join(path, 'keyring'), + 'osd', 'allow *', + 'mon', 'allow profile osd', + ], + ) + except subprocess.CalledProcessError as err: + if err.errno == errno.EACCES: + # try old cap scheme + subprocess.check_call( + args=[ + '/usr/bin/ceph', + '--cluster', cluster, + '--name', 'client.bootstrap-osd', + '--keyring', keyring, + 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id), + '-i', os.path.join(path, 'keyring'), + 'osd', 'allow *', + 'mon', 'allow rwx', + ], + ) + else: + raise def move_mount( @@ -1698,7 +1717,7 @@ def main_activate(args): ) else: raise Error('%s is not a directory or block device', args.path) - + start_daemon( cluster=cluster, osd_id=osd_id, @@ -1951,7 +1970,7 @@ def list_dev(dev, uuid_map, journal_map): print '%s%s %s' % (prefix, dev, ', '.join(desc)) - + def main_list(args): partmap = list_all_partitions() diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 335a9207152..3b9d025393f 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -443,6 +443,7 @@ OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100) OPTION(osd_auto_weight, OPT_BOOL, false) OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored +OPTION(osd_open_classes_on_start, OPT_BOOL, true) OPTION(osd_check_for_log_corruption, OPT_BOOL, false) OPTION(osd_use_stale_snap, OPT_BOOL, false) OPTION(osd_rollback_to_cluster_snap, OPT_STR, "") diff --git a/src/mon/MonCap.cc b/src/mon/MonCap.cc index 8e35b775247..7ac8d142d87 100644 --- a/src/mon/MonCap.cc +++ b/src/mon/MonCap.cc @@ -149,7 +149,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const profile_grants.push_back(MonCapGrant("osd create")); profile_grants.push_back(MonCapGrant("osd crush set")); // FIXME: constraint this further? profile_grants.push_back(MonCapGrant("auth add")); - profile_grants.back().command_args["name"] = StringConstraint("", "osd."); + profile_grants.back().command_args["entity"] = StringConstraint("", "osd."); profile_grants.back().command_args["caps_mon"] = StringConstraint("allow profile osd", ""); profile_grants.back().command_args["caps_osd"] = StringConstraint("allow *", ""); } @@ -158,7 +158,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); // read osdmap profile_grants.push_back(MonCapGrant("mon getmap")); profile_grants.push_back(MonCapGrant("auth get-or-create")); // FIXME: this can expose other mds keys - profile_grants.back().command_args["name"] = StringConstraint("", "mds."); + profile_grants.back().command_args["entity"] = StringConstraint("", "mds."); profile_grants.back().command_args["caps_mon"] = StringConstraint("allow profile mds", ""); profile_grants.back().command_args["caps_osd"] = StringConstraint("allow rwx", ""); profile_grants.back().command_args["caps_mds"] = StringConstraint("allow", ""); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index bd189f1430c..bf500dff218 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1535,11 +1535,23 @@ bool Monitor::_allowed_command(MonSession *s, map<string, cmd_vartype>& cmd) map<string,string> strmap; for (map<string, cmd_vartype>::const_iterator p = cmd.begin(); p != cmd.end(); ++p) { - if (p->first != "prefix") { - strmap[p->first] = cmd_vartype_stringify(p->second); + if (p->first == "prefix") + continue; + if (p->first == "caps") { + vector<string> cv; + if (cmd_getval(g_ceph_context, cmd, "caps", cv) && + cv.size() % 2 == 0) { + for (unsigned i = 0; i < cv.size(); i += 2) { + string k = string("caps_") + cv[i]; + strmap[k] = cv[i + 1]; + } + continue; + } } + strmap[p->first] = cmd_vartype_stringify(p->second); } + dout(20) << __func__ << " strmap " << strmap << dendl; if (s->caps.is_capable(g_ceph_context, s->inst.name, "", prefix, strmap, false, false, true)) { retval = true; diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 508669deef5..a543abed7ed 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -210,7 +210,7 @@ void Paxos::handle_collect(MMonPaxos *collect) // do we have an accepted but uncommitted value? // (it'll be at last_committed+1) bufferlist bl; - if (collect->last_committed == last_committed && + if (collect->last_committed <= last_committed && get_store()->exists(get_name(), last_committed+1)) { get_store()->get(get_name(), last_committed+1, bl); assert(bl.length() > 0); diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 4e4847e6917..28f81b7547f 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -2243,6 +2243,7 @@ int FileStore::_check_global_replay_guard(coll_t cid, if (r < 0) { dout(20) << __func__ << " no xattr" << dendl; assert(!m_filestore_fail_eio || r != -EIO); + TEMP_FAILURE_RETRY(::close(fd)); return 1; // no xattr } bufferlist bl; diff --git a/src/osd/ClassHandler.cc b/src/osd/ClassHandler.cc index 5af2ac01a0f..3cc319efabe 100644 --- a/src/osd/ClassHandler.cc +++ b/src/osd/ClassHandler.cc @@ -18,6 +18,11 @@ #undef dout_prefix #define dout_prefix *_dout + +#define CLS_PREFIX "libcls_" +#define CLS_SUFFIX ".so" + + int ClassHandler::open_class(const string& cname, ClassData **pcls) { Mutex::Locker lock(mutex); @@ -31,11 +36,43 @@ int ClassHandler::open_class(const string& cname, ClassData **pcls) return 0; } +int ClassHandler::open_all_classes() +{ + dout(10) << __func__ << dendl; + DIR *dir = ::opendir(g_conf->osd_class_dir.c_str()); + if (!dir) + return -errno; + + char buf[offsetof(struct dirent, d_name) + PATH_MAX + 1]; + struct dirent *pde; + int r = 0; + while ((r = ::readdir_r(dir, (dirent *)&buf, &pde)) == 0 && pde) { + if (pde->d_name[0] == '.') + continue; + if (strlen(pde->d_name) > sizeof(CLS_PREFIX) - 1 + sizeof(CLS_SUFFIX) - 1 && + strncmp(pde->d_name, CLS_PREFIX, sizeof(CLS_PREFIX) - 1) == 0 && + strcmp(pde->d_name + strlen(pde->d_name) - (sizeof(CLS_SUFFIX) - 1), CLS_SUFFIX) == 0) { + char cname[strlen(pde->d_name)]; + strcpy(cname, pde->d_name + sizeof(CLS_PREFIX) - 1); + cname[strlen(cname) - (sizeof(CLS_SUFFIX) - 1)] = '\0'; + dout(10) << __func__ << " found " << cname << dendl; + ClassData *cls; + r = open_class(cname, &cls); + if (r < 0) + goto out; + } + } + out: + closedir(dir); + return r; +} + void ClassHandler::shutdown() { for (map<string, ClassData>::iterator p = classes.begin(); p != classes.end(); ++p) { dlclose(p->second.handle); } + classes.clear(); } ClassHandler::ClassData *ClassHandler::_get_class(const string& cname) @@ -63,7 +100,7 @@ int ClassHandler::_load_class(ClassData *cls) if (cls->status == ClassData::CLASS_UNKNOWN || cls->status == ClassData::CLASS_MISSING) { char fname[PATH_MAX]; - snprintf(fname, sizeof(fname), "%s/libcls_%s.so", + snprintf(fname, sizeof(fname), "%s/" CLS_PREFIX "%s" CLS_SUFFIX, g_conf->osd_class_dir.c_str(), cls->name.c_str()); dout(10) << "_load_class " << cls->name << " from " << fname << dendl; diff --git a/src/osd/ClassHandler.h b/src/osd/ClassHandler.h index 733ed01a35d..f7c80f9454b 100644 --- a/src/osd/ClassHandler.h +++ b/src/osd/ClassHandler.h @@ -78,6 +78,8 @@ private: public: ClassHandler() : mutex("ClassHandler") {} + int open_all_classes(); + int open_class(const string& cname, ClassData **pcls); ClassData *register_class(const char *cname); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 3f226cec95d..58020d0c322 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -291,11 +291,13 @@ void OSDService::init_splits_between(pg_t pgid, // Ok, a split happened, so we need to walk the osdmaps set<pg_t> new_pgs; // pgs to scan on each map new_pgs.insert(pgid); + OSDMapRef curmap(get_map(frommap->get_epoch())); for (epoch_t e = frommap->get_epoch() + 1; e <= tomap->get_epoch(); ++e) { - OSDMapRef curmap(get_map(e-1)); - OSDMapRef nextmap(get_map(e)); + OSDMapRef nextmap(try_get_map(e)); + if (!nextmap) + continue; set<pg_t> even_newer_pgs; // pgs added in this loop for (set<pg_t>::iterator i = new_pgs.begin(); i != new_pgs.end(); ++i) { set<pg_t> split_pgs; @@ -307,7 +309,9 @@ void OSDService::init_splits_between(pg_t pgid, } } new_pgs.insert(even_newer_pgs.begin(), even_newer_pgs.end()); + curmap = nextmap; } + assert(curmap == tomap); // we must have had both frommap and tomap } } @@ -1162,6 +1166,9 @@ int OSD::init() class_handler = new ClassHandler(); cls_initialize(class_handler); + if (g_conf->osd_open_classes_on_start) + class_handler->open_all_classes(); + // load up "current" osdmap assert_warn(!osdmap); if (osdmap) { @@ -5177,7 +5184,9 @@ void OSD::advance_pg( for (; next_epoch <= osd_epoch; ++next_epoch) { - OSDMapRef nextmap = get_map(next_epoch); + OSDMapRef nextmap = service.try_get_map(next_epoch); + if (!nextmap) + continue; vector<int> newup, newacting; nextmap->pg_to_up_acting_osds(pg->info.pgid, newup, newacting); @@ -5511,7 +5520,7 @@ OSDMapRef OSDService::_add_map(OSDMap *o) return l; } -OSDMapRef OSDService::get_map(epoch_t epoch) +OSDMapRef OSDService::try_get_map(epoch_t epoch) { Mutex::Locker l(map_cache_lock); OSDMapRef retval = map_cache.lookup(epoch); @@ -5524,7 +5533,10 @@ OSDMapRef OSDService::get_map(epoch_t epoch) if (epoch > 0) { dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl; bufferlist bl; - assert(_get_map_bl(epoch, bl)); + if (!_get_map_bl(epoch, bl)) { + delete map; + return OSDMapRef(); + } map->decode(bl); } else { dout(20) << "get_map " << epoch << " - return initial " << map << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 04ad4dcd7d7..f9ceaf81bf3 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -441,7 +441,12 @@ public: SimpleLRU<epoch_t, bufferlist> map_bl_cache; SimpleLRU<epoch_t, bufferlist> map_bl_inc_cache; - OSDMapRef get_map(epoch_t e); + OSDMapRef try_get_map(epoch_t e); + OSDMapRef get_map(epoch_t e) { + OSDMapRef ret(try_get_map(e)); + assert(ret); + return ret; + } OSDMapRef add_map(OSDMap *o) { Mutex::Locker l(map_cache_lock); return _add_map(o); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7373357db11..9f957b8e054 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5032,7 +5032,6 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap, vector<int>& newup, vector<int>& newacting, RecoveryCtx *rctx) { - assert(osdmap->get_epoch() == (lastmap->get_epoch() + 1)); assert(lastmap->get_epoch() == osdmap_ref->get_epoch()); assert(lastmap == osdmap_ref); dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 3a6db4d8315..ca3dcc192b0 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1141,6 +1141,13 @@ struct pg_history_t { epoch_t last_epoch_clean; // lower bound on last epoch the PG was completely clean. epoch_t last_epoch_split; // as parent + /** + * In the event of a map discontinuity, same_*_since may reflect the first + * map the osd has seen in the new map sequence rather than the actual start + * of the interval. This is ok since a discontinuity at epoch e means there + * must have been a clean interval between e and now and that we cannot be + * in the active set during the interval containing e. + */ epoch_t same_up_since; // same acting set since epoch_t same_interval_since; // same acting AND up set since epoch_t same_primary_since; // same primary at least back through this epoch. diff --git a/src/push_to_kclient.pl b/src/push_to_kclient.pl deleted file mode 100755 index f76a2bb91f4..00000000000 --- a/src/push_to_kclient.pl +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/perl - -use strict; - -my $usage = "./push_to_client.pl <path_to_kernel_git_tree>\n"; - -my $kernel = shift @ARGV || die $usage; -die $usage unless -d $kernel; -die $usage unless -e "$kernel/fs/ceph/README"; - -die "not in a git tree" unless `cd $kernel && git rev-parse HEAD`; - -my $dir = '.'; -until (-d "$dir/.git") { - $dir .= "/.."; -} - -print "pushing changed shared files from $dir to $kernel...\n"; -my @files = split(/\n/, `cat $kernel/fs/ceph/README`); -for (@files) { - next if /^#/; - my ($orig, $new) = split(/\s+/, $_); - #print "$dir/$orig -> $new\n"; - system "cp -v $dir/$orig $kernel/$new"; -} - -print "pulling changed shared files from $kernel to $dir...\n"; -system "cp -v $kernel/fs/ceph/ioctl.h $dir/src/client/ioctl.h"; -system "cp -v $kernel/fs/btrfs/ioctl.h $dir/src/os/btrfs_ioctl.h"; - -print "done.\n"; - diff --git a/src/pybind/ceph_rest_api.py b/src/pybind/ceph_rest_api.py index a379e352b1f..28a0419c33c 100755 --- a/src/pybind/ceph_rest_api.py +++ b/src/pybind/ceph_rest_api.py @@ -130,7 +130,7 @@ def api_setup(): glob.baseurl = get_conf(cfg, clientname, 'base_url') or DEFAULT_BASEURL if glob.baseurl.endswith('/'): - glob.baseurl + glob.baseurl = glob.baseurl[:-1] addr = get_conf(cfg, clientname, 'public_addr') or DEFAULT_ADDR addrport = addr.rsplit(':', 1) addr = addrport[0] @@ -322,6 +322,9 @@ def make_response(fmt, output, statusmsg, errorcode): {1} </status> </response>'''.format(response, xml.sax.saxutils.escape(statusmsg)) + else: + if not 200 <= errorcode < 300: + response = response + '\n' + statusmsg + '\n' return flask.make_response(response, errorcode) diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 67f5f1c68b3..364f60f78f7 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -1815,7 +1815,7 @@ next: cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl; return ret; } - ret = store->meta_mgr->put(metadata_key, bl); + ret = store->meta_mgr->put(metadata_key, bl, RGWMetadataHandler::APPLY_ALWAYS); if (ret < 0) { cerr << "ERROR: can't put key: " << cpp_strerror(-ret) << std::endl; return -ret; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 8de5a3d101f..bf8da99d616 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -1397,7 +1397,8 @@ public: return 0; } - int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { + int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_type) { RGWBucketEntryPoint be, old_be; decode_json_obj(be, obj); @@ -1410,6 +1411,12 @@ public: if (ret < 0 && ret != -ENOENT) return ret; + // are we actually going to perform this put, or is it too old? + if (!check_versions(old_ot.read_version, orig_mtime, + objv_tracker.write_version, mtime, sync_type)) { + return STATUS_NO_APPLY; + } + objv_tracker.read_version = old_ot.read_version; /* maintain the obj version we just read */ ret = store->put_bucket_entrypoint_info(entry, be, false, objv_tracker, mtime, &attrs); @@ -1540,7 +1547,8 @@ public: return 0; } - int put(RGWRados *store, string& oid, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { + int put(RGWRados *store, string& oid, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_type) { RGWBucketCompleteInfo bci, old_bci; decode_json_obj(bci, obj); @@ -1566,6 +1574,13 @@ public: bci.info.bucket.index_pool = old_bci.info.bucket.index_pool; } + // are we actually going to perform this put, or is it too old? + if (!check_versions(old_bci.info.objv_tracker.read_version, orig_mtime, + objv_tracker.write_version, mtime, sync_type)) { + objv_tracker.read_version = old_bci.info.objv_tracker.read_version; + return STATUS_NO_APPLY; + } + /* record the read version (if any), store the new version */ bci.info.objv_tracker.read_version = old_bci.info.objv_tracker.read_version; bci.info.objv_tracker.write_version = objv_tracker.write_version; @@ -1580,7 +1595,7 @@ public: if (ret < 0) return ret; - return 0; + return STATUS_APPLIED; } struct list_keys_info { diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 7f224a798f5..543bdf21377 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -99,6 +99,8 @@ using ceph::crypto::MD5; #define STATUS_NO_CONTENT 1902 #define STATUS_PARTIAL_CONTENT 1903 #define STATUS_REDIRECT 1904 +#define STATUS_NO_APPLY 1905 +#define STATUS_APPLIED 1906 #define ERR_INVALID_BUCKET_NAME 2000 #define ERR_INVALID_OBJECT_NAME 2001 diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc index e9094ad35d6..6da1ff5ab24 100644 --- a/src/rgw/rgw_metadata.cc +++ b/src/rgw/rgw_metadata.cc @@ -194,7 +194,8 @@ public: virtual string get_type() { return string(); } virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) { return -ENOTSUP; } - virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { return -ENOTSUP; } + virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_type) { return -ENOTSUP; } virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) {} @@ -242,6 +243,7 @@ RGWMetadataManager::~RGWMetadataManager() } handlers.clear(); + delete md_log; } int RGWMetadataManager::register_handler(RGWMetadataHandler *handler) @@ -328,7 +330,9 @@ int RGWMetadataManager::get(string& metadata_key, Formatter *f) return 0; } -int RGWMetadataManager::put(string& metadata_key, bufferlist& bl) +int RGWMetadataManager::put(string& metadata_key, bufferlist& bl, + RGWMetadataHandler::sync_type_t sync_type, + obj_version *existing_version) { RGWMetadataHandler *handler; string entry; @@ -357,7 +361,11 @@ int RGWMetadataManager::put(string& metadata_key, bufferlist& bl) return -EINVAL; } - return handler->put(store, entry, objv_tracker, mtime, jo); + ret = handler->put(store, entry, objv_tracker, mtime, jo, sync_type); + if (existing_version) { + *existing_version = objv_tracker.read_version; + } + return ret; } int RGWMetadataManager::remove(string& metadata_key) diff --git a/src/rgw/rgw_metadata.h b/src/rgw/rgw_metadata.h index 3ff3b3317b8..50649b6f901 100644 --- a/src/rgw/rgw_metadata.h +++ b/src/rgw/rgw_metadata.h @@ -44,14 +44,30 @@ class RGWMetadataManager; class RGWMetadataHandler { friend class RGWMetadataManager; -protected: - virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) = 0; public: + enum sync_type_t { + APPLY_ALWAYS, + APPLY_UPDATES, + APPLY_NEWER + }; + static bool string_to_sync_type(const string& sync_string, + sync_type_t& type) { + if (sync_string.compare("update-by-version") == 0) + type = APPLY_UPDATES; + else if (sync_string.compare("update-by-timestamp") == 0) + type = APPLY_NEWER; + else if (sync_string.compare("always") == 0) + type = APPLY_ALWAYS; + else + return false; + return true; + } virtual ~RGWMetadataHandler() {} virtual string get_type() = 0; virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0; - virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) = 0; + virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t type) = 0; virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0; virtual int list_keys_init(RGWRados *store, void **phandle) = 0; @@ -62,6 +78,33 @@ public: virtual void get_hash_key(const string& section, const string& key, string& hash_key) { hash_key = section + ":" + key; } + +protected: + virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) = 0; + /** + * Compare an incoming versus on-disk tag/version+mtime combo against + * the sync mode to see if the new one should replace the on-disk one. + * + * @return true if the update should proceed, false otherwise. + */ + bool check_versions(const obj_version& ondisk, const time_t& ondisk_time, + const obj_version& incoming, const time_t& incoming_time, + sync_type_t sync_mode) { + switch (sync_mode) { + case APPLY_UPDATES: + if ((ondisk.tag != incoming.tag) || + (ondisk.ver >= incoming.ver)) + return false; + break; + case APPLY_NEWER: + if (ondisk_time >= incoming_time) + return false; + break; + case APPLY_ALWAYS: //deliberate fall-thru -- we always apply! + default: break; + } + return true; + } }; #define META_LOG_OBJ_PREFIX "meta.log." @@ -86,9 +129,7 @@ class RGWMetadataLog { } public: - RGWMetadataLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store) { - prefix = META_LOG_OBJ_PREFIX; - } + RGWMetadataLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store), prefix(META_LOG_OBJ_PREFIX) {} int add_entry(RGWRados *store, RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl); @@ -152,7 +193,9 @@ public: map<string, bufferlist>* rmattrs, RGWObjVersionTracker *objv_tracker); int get(string& metadata_key, Formatter *f); - int put(string& metadata_key, bufferlist& bl); + int put(string& metadata_key, bufferlist& bl, + RGWMetadataHandler::sync_type_t sync_mode, + obj_version *existing_version = NULL); int remove(string& metadata_key); int list_keys_init(string& section, void **phandle); diff --git a/src/rgw/rgw_rest_log.cc b/src/rgw/rgw_rest_log.cc index 544adbe7965..7b50986cb37 100644 --- a/src/rgw/rgw_rest_log.cc +++ b/src/rgw/rgw_rest_log.cc @@ -88,6 +88,8 @@ void RGWOp_MDLog_List::execute() { if (!max_entries_str.empty()) max_entries -= entries.size(); } while (truncated && (max_entries > 0)); + + meta_log->complete_list_entries(handle); } void RGWOp_MDLog_List::send_response() { diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc index 35ec0ab9b04..0705a46ed6c 100644 --- a/src/rgw/rgw_rest_metadata.cc +++ b/src/rgw/rgw_rest_metadata.cc @@ -161,12 +161,43 @@ void RGWOp_Metadata_Put::execute() { frame_metadata_key(s, metadata_key); - http_ret = store->meta_mgr->put(metadata_key, bl); + RGWMetadataHandler::sync_type_t sync_type = RGWMetadataHandler::APPLY_ALWAYS; + + bool mode_exists = false; + string mode_string = s->info.args.get("sync-type", &mode_exists); + if (mode_exists) { + bool parsed = RGWMetadataHandler::string_to_sync_type(mode_string, + sync_type); + if (!parsed) { + http_ret = -EINVAL; + return; + } + } + + http_ret = store->meta_mgr->put(metadata_key, bl, sync_type, &ondisk_version); if (http_ret < 0) { dout(5) << "ERROR: can't put key: " << cpp_strerror(http_ret) << dendl; return; } - http_ret = 0; + // translate internal codes into return header + if (http_ret == STATUS_NO_APPLY) + update_status = "skipped"; + else if (http_ret == STATUS_APPLIED) + update_status = "applied"; +} + +void RGWOp_Metadata_Put::send_response() { + int http_return_code = http_ret; + if ((http_ret == STATUS_NO_APPLY) || (http_ret == STATUS_APPLIED)) + http_return_code = STATUS_NO_CONTENT; + set_req_state_err(s, http_return_code); + dump_errno(s); + stringstream ver_stream; + ver_stream << "ver:" << ondisk_version.ver + <<",tag:" << ondisk_version.tag; + dump_pair(s, "RGWX_UPDATE_STATUS", update_status.c_str()); + dump_pair(s, "RGWX_UPDATE_VERSION", ver_stream.str().c_str()); + end_header(s); } void RGWOp_Metadata_Delete::execute() { diff --git a/src/rgw/rgw_rest_metadata.h b/src/rgw/rgw_rest_metadata.h index 59d7c5f7045..7f3cf1f2207 100644 --- a/src/rgw/rgw_rest_metadata.h +++ b/src/rgw/rgw_rest_metadata.h @@ -40,6 +40,8 @@ public: class RGWOp_Metadata_Put : public RGWRESTOp { int get_data(bufferlist& bl); + string update_status; + obj_version ondisk_version; public: RGWOp_Metadata_Put() {} ~RGWOp_Metadata_Put() {} @@ -48,6 +50,7 @@ public: return caps.check_cap("metadata", RGW_CAP_WRITE); } void execute(); + void send_response(); virtual const string name() { return "set_metadata"; } }; diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc index 6cdcaa62935..6fcecd4a98d 100644 --- a/src/rgw/rgw_user.cc +++ b/src/rgw/rgw_user.cc @@ -2295,22 +2295,29 @@ public: return 0; } - int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { + int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_mode) { RGWUserInfo info; decode_json_obj(info, obj); RGWUserInfo old_info; - int ret = rgw_get_user_info_by_uid(store, entry, old_info, &objv_tracker); + time_t orig_mtime; + int ret = rgw_get_user_info_by_uid(store, entry, old_info, &objv_tracker, &orig_mtime); if (ret < 0 && ret != -ENOENT) return ret; + // are we actually going to perform this put, or is it too old? + if (!check_versions(objv_tracker.read_version, orig_mtime, + objv_tracker.write_version, mtime, sync_mode)) { + return STATUS_NO_APPLY; + } ret = rgw_store_user_info(store, info, &old_info, &objv_tracker, mtime, false); if (ret < 0) return ret; - return 0; + return STATUS_APPLIED; } struct list_keys_info { diff --git a/src/test/cls_log/test_cls_log.cc b/src/test/cls_log/test_cls_log.cc index a8d1b3d5300..ce97025f819 100644 --- a/src/test/cls_log/test_cls_log.cc +++ b/src/test/cls_log/test_cls_log.cc @@ -118,14 +118,10 @@ TEST(cls_rgw, test_log_add_same_time) /* add chains */ string oid = "obj"; - /* create object */ - ASSERT_EQ(0, ioctx.create(oid, true)); - /* generate log */ - utime_t start_time = ceph_clock_now(g_ceph_context); generate_log(ioctx, oid, 10, start_time, false); @@ -206,14 +202,10 @@ TEST(cls_rgw, test_log_add_different_time) /* add chains */ string oid = "obj"; - /* create object */ - ASSERT_EQ(0, ioctx.create(oid, true)); - /* generate log */ - utime_t start_time = ceph_clock_now(g_ceph_context); generate_log(ioctx, oid, 10, start_time, true); @@ -227,7 +219,6 @@ TEST(cls_rgw, test_log_add_different_time) string marker; /* check list */ - cls_log_list(*rop, start_time, to_time, marker, 0, entries, &marker, &truncated); bufferlist obl; @@ -258,7 +249,6 @@ TEST(cls_rgw, test_log_add_different_time) reset_rop(&rop); /* check list again with shifted time */ - utime_t next_time = get_time(start_time, 1, true); marker.clear(); @@ -289,7 +279,7 @@ TEST(cls_rgw, test_log_add_different_time) } while (truncated); ASSERT_EQ(10, i); - + delete rop; } TEST(cls_rgw, test_log_trim) @@ -305,14 +295,10 @@ TEST(cls_rgw, test_log_trim) /* add chains */ string oid = "obj"; - /* create object */ - ASSERT_EQ(0, ioctx.create(oid, true)); - /* generate log */ - utime_t start_time = ceph_clock_now(g_ceph_context); generate_log(ioctx, oid, 10, start_time, true); @@ -344,4 +330,5 @@ TEST(cls_rgw, test_log_trim) ASSERT_EQ(9 - i, (int)entries.size()); ASSERT_EQ(0, (int)truncated); } + delete rop; } diff --git a/src/test/cls_replica_log/test_cls_replica_log.cc b/src/test/cls_replica_log/test_cls_replica_log.cc index eabe0b3860d..8c204caef04 100644 --- a/src/test/cls_replica_log/test_cls_replica_log.cc +++ b/src/test/cls_replica_log/test_cls_replica_log.cc @@ -14,34 +14,42 @@ #include "cls/replica_log/cls_replica_log_client.h" #include "cls/replica_log/cls_replica_log_types.h" -#define SETUP_DATA \ - librados::Rados rados; \ - librados::IoCtx ioctx; \ - string pool_name = get_temp_pool_name(); \ - ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); \ - ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); \ - string oid = "obj"; \ - ASSERT_EQ(0, ioctx.create(oid, true)); - -#define ADD_MARKER \ - string entity = "tester_entity"; \ - string marker = "tester_marker1"; \ - utime_t time; \ - time.set_from_double(10); \ - list<pair<string, utime_t> > entries; \ - entries.push_back(make_pair("tester_obj1", time)); \ - time.set_from_double(20); \ - cls_replica_log_progress_marker progress; \ - cls_replica_log_prepare_marker(progress, entity, marker, time, &entries); \ - librados::ObjectWriteOperation opw; \ - cls_replica_log_update_bound(opw, progress); \ - ASSERT_EQ(0, ioctx.operate(oid, &opw)); - -TEST(cls_replica_log, test_set_get_marker) +class cls_replica_log_Test : public ::testing::Test { +public: + librados::Rados rados; + librados::IoCtx ioctx; + string pool_name; + string oid; + string entity; + string marker; + utime_t time; + list<pair<string, utime_t> > entries; + cls_replica_log_progress_marker progress; + + void SetUp() { + pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); + ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); + oid = "obj"; + ASSERT_EQ(0, ioctx.create(oid, true)); + } + + void add_marker() { + entity = "tester_entity"; + marker = "tester_marker1"; + time.set_from_double(10); + entries.push_back(make_pair("tester_obj1", time)); + time.set_from_double(20); + cls_replica_log_prepare_marker(progress, entity, marker, time, &entries); + librados::ObjectWriteOperation opw; + cls_replica_log_update_bound(opw, progress); + ASSERT_EQ(0, ioctx.operate(oid, &opw)); + } +}; + +TEST_F(cls_replica_log_Test, test_set_get_marker) { - SETUP_DATA - - ADD_MARKER + add_marker(); string reply_position_marker; utime_t reply_time; @@ -66,11 +74,9 @@ TEST(cls_replica_log, test_set_get_marker) ASSERT_EQ("tester_obj1", response_item_list.front().first); } -TEST(cls_replica_log, test_bad_update) +TEST_F(cls_replica_log_Test, test_bad_update) { - SETUP_DATA - - ADD_MARKER + add_marker(); time.set_from_double(15); cls_replica_log_progress_marker bad_marker; @@ -80,22 +86,18 @@ TEST(cls_replica_log, test_bad_update) ASSERT_EQ(-EINVAL, ioctx.operate(oid, &badw)); } -TEST(cls_replica_log, test_bad_delete) +TEST_F(cls_replica_log_Test, test_bad_delete) { - SETUP_DATA - - ADD_MARKER + add_marker(); librados::ObjectWriteOperation badd; cls_replica_log_delete_bound(badd, entity); ASSERT_EQ(-ENOTEMPTY, ioctx.operate(oid, &badd)); } -TEST(cls_replica_log, test_good_delete) +TEST_F(cls_replica_log_Test, test_good_delete) { - SETUP_DATA - - ADD_MARKER + add_marker(); librados::ObjectWriteOperation opc; progress.items.clear(); @@ -113,10 +115,8 @@ TEST(cls_replica_log, test_good_delete) ASSERT_EQ((unsigned)0, return_progress_list.size()); } -TEST(cls_replica_log, test_bad_get) +TEST_F(cls_replica_log_Test, test_bad_get) { - SETUP_DATA - string reply_position_marker; utime_t reply_time; list<cls_replica_log_progress_marker> return_progress_list; @@ -125,11 +125,9 @@ TEST(cls_replica_log, test_bad_get) reply_time, return_progress_list)); } -TEST(cls_replica_log, test_double_delete) +TEST_F(cls_replica_log_Test, test_double_delete) { - SETUP_DATA - - ADD_MARKER + add_marker(); librados::ObjectWriteOperation opc; progress.items.clear(); diff --git a/src/test/cls_statelog/test_cls_statelog.cc b/src/test/cls_statelog/test_cls_statelog.cc index 33028f4fbad..a1b4cc34efc 100644 --- a/src/test/cls_statelog/test_cls_statelog.cc +++ b/src/test/cls_statelog/test_cls_statelog.cc @@ -70,6 +70,7 @@ static void get_entries_by_object(librados::IoCtx& ioctx, string& oid, cls_statelog_list(*rop, empty_str, op_id, object, marker, 0, entries, &marker, &truncated); ASSERT_EQ(0, ioctx.operate(oid, rop, &obl)); ASSERT_EQ(expected, (int)entries.size()); + delete rop; } static void get_entries_by_client_id(librados::IoCtx& ioctx, string& oid, diff --git a/src/test/cls_version/test_cls_version.cc b/src/test/cls_version/test_cls_version.cc index acce4f00cf4..4c2d59500df 100644 --- a/src/test/cls_version/test_cls_version.cc +++ b/src/test/cls_version/test_cls_version.cc @@ -270,14 +270,17 @@ TEST(cls_rgw, test_version_inc_check) bufferlist bl; ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_GE); ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_LE); ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_TAG_EQ); ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); @@ -296,14 +299,17 @@ TEST(cls_rgw, test_version_inc_check) delete op; /* a bunch of conditions that should fail */ + delete rop; rop = new_rop(); cls_version_check(*rop, ver, VER_COND_LT); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_LE); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_TAG_NE); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl)); diff --git a/src/test/test_rgw_admin_log.cc b/src/test/test_rgw_admin_log.cc index 67ba0b12d96..f49a107d2f6 100644 --- a/src/test/test_rgw_admin_log.cc +++ b/src/test/test_rgw_admin_log.cc @@ -163,13 +163,13 @@ void test_helper::set_response(char *r){ } size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response((char *)ptr); return size*nmemb; } size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response_data((char *)ptr, size*nmemb); return size*nmemb; } @@ -405,7 +405,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) { int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid << " --display-name=" << display_name; @@ -426,7 +425,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { int user_rm(string& uid, string& display_name) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " metadata rm --metadata-key=user:" << uid; diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc index 2882891e411..fb9cf863f06 100644 --- a/src/test/test_rgw_admin_meta.cc +++ b/src/test/test_rgw_admin_meta.cc @@ -157,13 +157,13 @@ void test_helper::set_response(char *r){ } size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response((char *)ptr); return size*nmemb; } size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response_data((char *)ptr, size*nmemb); return size*nmemb; } @@ -399,7 +399,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) { int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid << " --display-name=" << display_name; @@ -420,7 +419,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { int user_rm(string& uid, string& display_name) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user rm --uid=" << uid << " --display-name=" << display_name; diff --git a/src/test/test_rgw_admin_opstate.cc b/src/test/test_rgw_admin_opstate.cc index 1cd39890d48..3f173d12932 100644 --- a/src/test/test_rgw_admin_opstate.cc +++ b/src/test/test_rgw_admin_opstate.cc @@ -160,13 +160,13 @@ void test_helper::set_response(char *r){ } size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response((char *)ptr); return size*nmemb; } size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response_data((char *)ptr, size*nmemb); return size*nmemb; } @@ -403,7 +403,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) { int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid << " --display-name=" << display_name; @@ -424,7 +423,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { int user_rm(string& uid, string& display_name) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " metadata rm --metadata-key=user:" << uid; diff --git a/src/upstart/ceph-create-keys.conf b/src/upstart/ceph-create-keys.conf index de215d98ff3..7c79e692a86 100644 --- a/src/upstart/ceph-create-keys.conf +++ b/src/upstart/ceph-create-keys.conf @@ -1,6 +1,7 @@ description "Create Ceph client.admin key when possible" start on started ceph-mon +stop on stopping ceph-mon task |