diff options
author | Sage Weil <sage@inktank.com> | 2012-11-29 15:48:54 -0800 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2012-11-29 15:48:54 -0800 |
commit | c26dc1885d62fad4c28882b023ef62c4d195b132 (patch) | |
tree | f37edb5a968a826d398cb110b0dc46a00f08cb74 | |
parent | 0b55fbdbea2b2209eea65b67adb00eea0f5cf620 (diff) | |
parent | 77711ddee37de154c0d5d452c4f84dae36eb3e3a (diff) | |
download | ceph-c26dc1885d62fad4c28882b023ef62c4d195b132.tar.gz |
Merge branch 'next'
Conflicts:
src/rgw/rgw_admin.cc
-rw-r--r-- | README | 3 | ||||
-rwxr-xr-x | qa/workunits/rbd/map-unmap.sh | 98 | ||||
-rw-r--r-- | src/ceph_mon.cc | 4 | ||||
-rw-r--r-- | src/client/Client.cc | 6 | ||||
-rw-r--r-- | src/common/ceph_context.cc | 2 | ||||
-rw-r--r-- | src/common/config.cc | 4 | ||||
-rw-r--r-- | src/common/config_opts.h | 3 | ||||
-rw-r--r-- | src/mon/AuthMonitor.cc | 2 | ||||
-rw-r--r-- | src/mon/MDSMonitor.cc | 4 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 25 | ||||
-rw-r--r-- | src/mon/MonitorStore.cc | 124 | ||||
-rw-r--r-- | src/mon/MonitorStore.h | 43 | ||||
-rw-r--r-- | src/mon/MonmapMonitor.cc | 7 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 16 | ||||
-rw-r--r-- | src/mon/PGMonitor.cc | 2 | ||||
-rw-r--r-- | src/mon/Paxos.cc | 13 | ||||
-rw-r--r-- | src/os/hobject.h | 17 | ||||
-rw-r--r-- | src/osd/OSD.cc | 12 | ||||
-rw-r--r-- | src/osd/PG.cc | 23 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 16 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 11 | ||||
-rw-r--r-- | src/osd/osd_types.h | 6 | ||||
-rw-r--r-- | src/rgw/rgw_admin.cc | 13 | ||||
-rw-r--r-- | src/test/filestore/store_test.cc | 6 | ||||
-rw-r--r-- | src/test/libcephfs/test.cc | 28 | ||||
-rw-r--r-- | src/test/librados/misc.cc | 5 |
26 files changed, 359 insertions, 134 deletions
@@ -120,9 +120,10 @@ To build the source code, you must install the following: - uuid-dev - libatomic-ops-dev - libboost-program-options-dev +- libboost-thread-dev For example: - $ apt-get install automake autoconf automake gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev + $ apt-get install automake autoconf automake gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev diff --git a/qa/workunits/rbd/map-unmap.sh b/qa/workunits/rbd/map-unmap.sh new file mode 100755 index 00000000000..9ecc226e5f5 --- /dev/null +++ b/qa/workunits/rbd/map-unmap.sh @@ -0,0 +1,98 @@ +#!/bin/bash -e + +RUN_TIME=300 # approximate duration of run (seconds) + +[ $# -eq 1 ] && RUN_TIME="$1" + +IMAGE_NAME="image-$$" +IMAGE_SIZE="1024" # MB + +ID_TIMEOUT="10" # seconds to wait to get rbd id after mapping +ID_DELAY=".1" # floating-point seconds to delay before rescan + +MAP_DELAY=".25" # floating-point seconds to delay before unmap + +function get_time() { + date '+%s' +} + +function times_up() { + local end_time="$1" + + test $(get_time) -ge "${end_time}" +} + +function get_id() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + local id="" + local end_time=$(expr $(get_time) + ${ID_TIMEOUT}) + + cd /sys/bus/rbd/devices + + while [ -z "${id}" ]; do + if times_up "${end_time}"; then + break; + fi + for i in *; do + if [ "$(cat $i/name)" = "${image_name}" ]; then + id=$i + break + fi + done + sleep "${ID_DELAY}" + done + echo $id + test -n "${id}" # return code 0 if id was found +} + +function map_unmap() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + + rbd map "${image_name}" + RBD_ID=$(get_id "${image_name}") + + sleep "${MAP_DELAY}" + + rbd unmap "/dev/rbd${RBD_ID}" +} + +function setup() { + [ $# -eq 2 ] || exit 99 + local image_name="$1" + local image_size="$2" + + [ -d /sys/bus/rbd ] || sudo modprobe rbd + + # allow ubuntu user to map/unmap rbd devices + sudo chown ubuntu /sys/bus/rbd/add + sudo chown ubuntu /sys/bus/rbd/remove + rbd create "${image_name}" --size="${image_size}" +} + +function cleanup() { + # Have to rely on globals for the trap call + # rbd unmap "/dev/rbd${RBD_ID}" || true + rbd rm "${IMAGE_NAME}" || true + sudo chown root /sys/bus/rbd/remove || true + sudo chown root /sys/bus/rbd/add || true +} +trap cleanup EXIT HUP INT + +#### Start + +setup "${IMAGE_NAME}" "${IMAGE_SIZE}" + +COUNT=0 +START_TIME=$(get_time) +END_TIME=$(expr $(get_time) + ${RUN_TIME}) +while ! times_up "${END_TIME}"; do + map_unmap "${IMAGE_NAME}" + COUNT=$(expr $COUNT + 1) +done +ELAPSED=$(expr "$(get_time)" - "${START_TIME}") + +echo "${COUNT} iterations completed in ${ELAPSED} seconds" + +exit 0 diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc index f82be18b066..3e3edf9b90a 100644 --- a/src/ceph_mon.cc +++ b/src/ceph_mon.cc @@ -283,14 +283,14 @@ int main(int argc, const char **argv) { bufferlist mapbl; bufferlist latest; - store.get_bl_ss(latest, "monmap", "latest"); + store.get_bl_ss_safe(latest, "monmap", "latest"); if (latest.length() > 0) { bufferlist::iterator p = latest.begin(); version_t v; ::decode(v, p); ::decode(mapbl, p); } else { - store.get_bl_ss(mapbl, "mkfs", "monmap"); + store.get_bl_ss_safe(mapbl, "mkfs", "monmap"); if (mapbl.length() == 0) { cerr << "mon fs missing 'monmap/latest' and 'mkfs/monmap'" << std::endl; exit(1); diff --git a/src/client/Client.cc b/src/client/Client.cc index b808f204e70..1c881828540 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -5400,6 +5400,7 @@ int Client::read(int fd, char *buf, loff_t size, loff_t offset) int Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) { + const md_config_t *conf = cct->_conf; Inode *in = f->inode; //bool lazy = f->mode == CEPH_FILE_MODE_LAZY; @@ -5416,7 +5417,7 @@ int Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) movepos = true; } - if (have & CEPH_CAP_FILE_CACHE) + if (!conf->client_debug_force_sync_read && have & CEPH_CAP_FILE_CACHE) r = _read_async(f, offset, size, bl); else r = _read_sync(f, offset, size, bl); @@ -5564,6 +5565,9 @@ int Client::_read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl) flock.Unlock(); client_lock.Lock(); + // if we get ENOENT from OSD, assume 0 bytes returned + if (r == -ENOENT) + r = 0; if (r < 0) return r; if (tbl.length()) { diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc index e4345e64d5b..decf86db70a 100644 --- a/src/common/ceph_context.cc +++ b/src/common/ceph_context.cc @@ -142,7 +142,7 @@ public: } if (changed.count("log_max_recent")) { - log->set_max_new(conf->log_max_recent); + log->set_max_recent(conf->log_max_recent); } } }; diff --git a/src/common/config.cc b/src/common/config.cc index 205a15e509e..0e86b792ae9 100644 --- a/src/common/config.cc +++ b/src/common/config.cc @@ -900,7 +900,7 @@ int md_config_t::set_val_raw(const char *val, const config_option *opt) } static const char *CONF_METAVARIABLES[] = - { "cluster", "type", "name", "host", "num", "id" }; + { "cluster", "type", "name", "host", "num", "id", "pid" }; static const int NUM_CONF_METAVARIABLES = (sizeof(CONF_METAVARIABLES) / sizeof(CONF_METAVARIABLES[0])); @@ -976,6 +976,8 @@ bool md_config_t::expand_meta(std::string &origval) const out += name.get_id().c_str(); else if (var == "id") out += name.get_id().c_str(); + else if (var == "pid") + out += stringify(getpid()); else assert(0); // unreachable found_meta = true; diff --git a/src/common/config_opts.h b/src/common/config_opts.h index e2a29fca509..8699d789164 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -33,7 +33,7 @@ OPTION(fatal_signal_handlers, OPT_BOOL, true) OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log") OPTION(log_max_new, OPT_INT, 1000) -OPTION(log_max_recent, OPT_INT, 1000000) +OPTION(log_max_recent, OPT_INT, 100000) OPTION(log_to_stderr, OPT_BOOL, true) OPTION(err_to_stderr, OPT_BOOL, true) OPTION(log_to_syslog, OPT_BOOL, false) @@ -181,6 +181,7 @@ OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx. OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish) OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache +OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds) // note: the max amount of "in flight" dirty data is roughly (max - target) OPTION(fuse_use_invalidate_cb, OPT_BOOL, false) // use fuse 2.8+ invalidate callback to keep page cache consistent OPTION(fuse_big_writes, OPT_BOOL, true) diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc index c43d7e988db..aec2ba04d86 100644 --- a/src/mon/AuthMonitor.cc +++ b/src/mon/AuthMonitor.cc @@ -96,7 +96,7 @@ void AuthMonitor::create_initial() KeyRing keyring; bufferlist bl; - mon->store->get_bl_ss(bl, "mkfs", "keyring"); + mon->store->get_bl_ss_safe(bl, "mkfs", "keyring"); bufferlist::iterator p = bl.begin(); ::decode(keyring, p); diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index dd256b9b14a..72168ac9638 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -555,7 +555,7 @@ bool MDSMonitor::preprocess_command(MMonCommand *m) MDSMap *p = &mdsmap; if (epoch) { bufferlist b; - mon->store->get_bl_sn(b, "mdsmap", epoch); + mon->store->get_bl_sn_safe(b, "mdsmap", epoch); if (!b.length()) { p = 0; r = -ENOENT; @@ -597,7 +597,7 @@ bool MDSMonitor::preprocess_command(MMonCommand *m) } epoch_t e = l; bufferlist b; - mon->store->get_bl_sn(b,"mdsmap",e); + mon->store->get_bl_sn_safe(b,"mdsmap",e); if (!b.length()) { r = -ENOENT; } else { diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index b819fceddb0..a90ae6e3cf8 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -81,6 +81,11 @@ static ostream& _prefix(std::ostream *_dout, const Monitor *mon) { long parse_pos_long(const char *s, ostream *pss) { + if (*s == '-' || *s == '+') { + *pss << "expected numerical value, got: " << s; + return -EINVAL; + } + string err; long r = strict_strtol(s, 10, &err); if ((r == 0) && !err.empty()) { @@ -360,7 +365,7 @@ int Monitor::check_features(MonitorStore *store) CompatSet ondisk; bufferlist features; - store->get_bl_ss(features, COMPAT_SET_LOC, 0); + store->get_bl_ss_safe(features, COMPAT_SET_LOC, 0); if (features.length() == 0) { generic_dout(0) << "WARNING: mon fs missing feature list.\n" << "Assuming it is old-style and introducing one." << dendl; @@ -389,7 +394,7 @@ int Monitor::check_features(MonitorStore *store) void Monitor::read_features() { bufferlist bl; - store->get_bl_ss(bl, COMPAT_SET_LOC, 0); + store->get_bl_ss_safe(bl, COMPAT_SET_LOC, 0); assert(bl.length()); bufferlist::iterator p = bl.begin(); @@ -495,7 +500,7 @@ int Monitor::preinit() if (authmon()->paxos->get_version() == 0) { dout(10) << "loading initial keyring to bootstrap authentication for mkfs" << dendl; bufferlist bl; - store->get_bl_ss(bl, "mkfs", "keyring"); + store->get_bl_ss_safe(bl, "mkfs", "keyring"); KeyRing keyring; bufferlist::iterator p = bl.begin(); ::decode(keyring, p); @@ -1030,12 +1035,14 @@ MMonProbe *Monitor::fill_probe_data(MMonProbe *m, Paxos *pax) version_t v = MAX(pax->get_first_committed(), m->newest_version + 1); int len = 0; for (; v <= pax->get_version(); v++) { - len += store->get_bl_sn(r->paxos_values[m->machine_name][v], m->machine_name.c_str(), v); + store->get_bl_sn_safe(r->paxos_values[m->machine_name][v], m->machine_name.c_str(), v); + len += r->paxos_values[m->machine_name][v].length(); r->gv[m->machine_name][v] = store->get_global_version(m->machine_name.c_str(), v); for (list<string>::iterator p = pax->extra_state_dirs.begin(); p != pax->extra_state_dirs.end(); ++p) { - len += store->get_bl_sn(r->paxos_values[*p][v], p->c_str(), v); + store->get_bl_sn_safe(r->paxos_values[*p][v], p->c_str(), v); + len += r->paxos_values[*p][v].length(); } if (len >= g_conf->mon_slurp_bytes) break; @@ -2356,7 +2363,8 @@ int Monitor::write_fsid() bufferlist b; b.append(us); - return store->put_bl_ss(b, "cluster_uuid", 0); + store->put_bl_ss(b, "cluster_uuid", 0); + return 0; } /* @@ -2380,9 +2388,8 @@ int Monitor::mkfs(bufferlist& osdmapbl) bufferlist magicbl; magicbl.append(CEPH_MON_ONDISK_MAGIC); magicbl.append("\n"); - r = store->put_bl_ss(magicbl, "magic", 0); - if (r < 0) - return r; + store->put_bl_ss(magicbl, "magic", 0); + features = get_supported_features(); write_features(); diff --git a/src/mon/MonitorStore.cc b/src/mon/MonitorStore.cc index b9decedac9e..a74b750787c 100644 --- a/src/mon/MonitorStore.cc +++ b/src/mon/MonitorStore.cc @@ -35,7 +35,6 @@ static ostream& _prefix(std::ostream *_dout, const string& dir) { return *_dout << "store(" << dir << ") "; } - #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> @@ -89,7 +88,8 @@ int MonitorStore::mount() int MonitorStore::umount() { - ::close(lock_fd); + int close_err = TEMP_FAILURE_RETRY(::close(lock_fd)); + assert (0 == close_err); return 0; } @@ -110,7 +110,8 @@ int MonitorStore::mkfs() derr << "MonitorStore::mkfs: unable to open " << dir << ": " << cpp_strerror(err) << dendl; return err; } - ::close(fd); + int close_err = TEMP_FAILURE_RETRY(::close(fd)); + assert (0 == close_err); dout(0) << "created monfs at " << dir << " for " << g_conf->name.get_id() << dendl; @@ -134,6 +135,7 @@ version_t MonitorStore::get_int(const char *a, const char *b) } derr << "MonitorStore::get_int: failed to open '" << fn << "': " << cpp_strerror(err) << dendl; + assert(0 == "failed to open"); return 0; } @@ -143,10 +145,13 @@ version_t MonitorStore::get_int(const char *a, const char *b) if (r < 0) { derr << "MonitorStore::get_int: failed to read '" << fn << "': " << cpp_strerror(r) << dendl; - TEMP_FAILURE_RETRY(::close(fd)); + int close_err = TEMP_FAILURE_RETRY(::close(fd)); + assert(0 == close_err); + assert(0); // the file exists; so this is a different failure return 0; } - TEMP_FAILURE_RETRY(::close(fd)); + int close_err = TEMP_FAILURE_RETRY(::close(fd)); + assert (0 == close_err); version_t val = atoi(buf); @@ -196,7 +201,12 @@ void MonitorStore::put_int(version_t val, const char *a, const char *b) << cpp_strerror(r) << dendl; ceph_abort(); } - ::fsync(fd); + r = ::fsync(fd); + if (r) { + derr << "Monitor::put_int: failed to fsync fd for '" << tfn << "': " + << cpp_strerror(r) << dendl; + ceph_abort(); + } if (TEMP_FAILURE_RETRY(::close(fd))) { derr << "MonitorStore::put_int: failed to close fd for '" << tfn << "': " << cpp_strerror(r) << dendl; @@ -245,10 +255,13 @@ bool MonitorStore::exists_bl_ss(const char *a, const char *b) int r = ::stat(fn, &st); //char buf[80]; //dout(15) << "exists_bl stat " << fn << " r=" << r << " errno " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl; + if (r) { + assert (errno == ENOENT); + } return r == 0; } -int MonitorStore::erase_ss(const char *a, const char *b) +void MonitorStore::erase_ss(const char *a, const char *b) { char fn[1024]; char dr[1024]; @@ -261,6 +274,7 @@ int MonitorStore::erase_ss(const char *a, const char *b) strcpy(fn, dr); } int r = ::unlink(fn); + assert(0 == r || ENOENT == errno); // callers don't check for existence first if (b) { // wipe out _gv file too, if any. this is sloppy, but will work. @@ -270,7 +284,6 @@ int MonitorStore::erase_ss(const char *a, const char *b) } ::rmdir(dr); // sloppy attempt to clean up empty dirs - return r; } int MonitorStore::get_bl_ss(bufferlist& bl, const char *a, const char *b) @@ -314,7 +327,8 @@ int MonitorStore::get_bl_ss(bufferlist& bl, const char *a, const char *b) off += r; } bl.append(bp); - ::close(fd); + int close_err = TEMP_FAILURE_RETRY(::close(fd)); + assert (0 == close_err); if (b) { dout(15) << "get_bl " << a << "/" << b << " = " << bl.length() << " bytes" << dendl; @@ -325,7 +339,7 @@ int MonitorStore::get_bl_ss(bufferlist& bl, const char *a, const char *b) return len; } -int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b, bool append) +void MonitorStore::write_bl_ss(bufferlist& bl, const char *a, const char *b, bool append) { int err = 0; char fn[1024]; @@ -336,7 +350,7 @@ int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b, err = -errno; derr << __func__ << " failed to create dir " << fn << ": " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to create dir"); } dout(15) << "put_bl " << a << "/" << b << " = " << bl.length() << " bytes" << dendl; snprintf(fn, sizeof(fn), "%s/%s/%s", dir.c_str(), a, b); @@ -352,7 +366,7 @@ int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b, err = -errno; derr << "failed to open " << fn << "for append: " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to open for append"); } } else { snprintf(tfn, sizeof(tfn), "%s.new", fn); @@ -360,42 +374,34 @@ int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b, if (fd < 0) { err = -errno; derr << "failed to open " << tfn << ": " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to open"); } } err = bl.write_fd(fd); - - if (!err) - ::fsync(fd); - ::close(fd); - if (!append && !err) { - int r = ::rename(tfn, fn); - if (r < 0) { + assert(!err); + err = ::fsync(fd); + assert(!err); + err = TEMP_FAILURE_RETRY(::close(fd)); + assert (!err); // this really can't fail, right? right?... + if (!append) { + err = ::rename(tfn, fn); + if (err < 0) { err = -errno; derr << __func__ << " failed to rename '" << tfn << "' -> '" << fn << "': " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to rename"); } } - - return err; -} - -int MonitorStore::write_bl_ss(bufferlist& bl, const char *a, const char *b, bool append) -{ - int err = write_bl_ss_impl(bl, a, b, append); - if (err) - derr << "write_bl_ss " << a << "/" << b << " got error " << cpp_strerror(err) << dendl; - assert(!err); // for now - return 0; } -int MonitorStore::put_bl_sn_map(const char *a, +void MonitorStore::put_bl_sn_map(const char *a, map<version_t,bufferlist>::iterator start, map<version_t,bufferlist>::iterator end, map<version_t,version_t> *gvmap) { + int err = 0; + int close_err = 0; version_t first = start->first; map<version_t,bufferlist>::iterator lastp = end; --lastp; @@ -407,13 +413,11 @@ int MonitorStore::put_bl_sn_map(const char *a, last - first < (unsigned)g_conf->mon_sync_fs_threshold) { // just do them individually for (map<version_t,bufferlist>::iterator p = start; p != end; ++p) { - int err = put_bl_sn(p->second, a, p->first); - if (err < 0) - return err; + put_bl_sn(p->second, a, p->first); if (gvmap && gvmap->count(p->first) && (*gvmap)[p->first] > 0) put_global_version(a, p->first, (*gvmap)[p->first]); } - return 0; + return; } // make sure dir exists @@ -421,10 +425,10 @@ int MonitorStore::put_bl_sn_map(const char *a, snprintf(dfn, sizeof(dfn), "%s/%s", dir.c_str(), a); int r = ::mkdir(dfn, 0755); if ((r < 0) && (errno != EEXIST)) { - int err = -errno; + err = -errno; derr << __func__ << " failed to create dir " << dfn << ": " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to create dir"); } for (map<version_t,bufferlist>::iterator p = start; p != end; ++p) { @@ -437,13 +441,14 @@ int MonitorStore::put_bl_sn_map(const char *a, if (fd < 0) { int err = -errno; derr << "failed to open " << tfn << ": " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to open"); } - int err = p->second.write_fd(fd); - ::close(fd); + err = p->second.write_fd(fd); + close_err = TEMP_FAILURE_RETRY(::close(fd)); + assert (0 == close_err); if (err < 0) - return -errno; + assert(0 == "failed to write"); // this doesn't try to be efficient.. too bad for you! it may also // extend beyond commmitted, but that's okay; we only look at these @@ -455,12 +460,13 @@ int MonitorStore::put_bl_sn_map(const char *a, // sync them all int dirfd = ::open(dir.c_str(), O_RDONLY); if (dirfd < 0) { - int err = -errno; + err = -errno; derr << "failed to open " << dir << ": " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to open temp file"); } sync_filesystem(dirfd); - ::close(dirfd); + close_err = TEMP_FAILURE_RETRY(::close(dirfd)); + assert (0 == close_err); // rename them all into place for (map<version_t,bufferlist>::iterator p = start; p != end; ++p) { @@ -469,23 +475,28 @@ int MonitorStore::put_bl_sn_map(const char *a, snprintf(fn, sizeof(fn), "%s/%llu", dfn, (long long unsigned)p->first); snprintf(tfn, sizeof(tfn), "%s.new", fn); - int err = ::rename(tfn, fn); + err = ::rename(tfn, fn); if (err < 0) - return -errno; + assert(0 == "failed to rename"); } // fsync the dir (to commit the renames) dirfd = ::open(dir.c_str(), O_RDONLY); if (dirfd < 0) { - int err = -errno; + err = -errno; derr << __func__ << " failed to open " << dir << ": " << cpp_strerror(err) << dendl; - return err; + assert(0 == "failed to open dir"); } - ::fsync(dirfd); - ::close(dirfd); - - return 0; + err = ::fsync(dirfd); + if (err < 0) { + err = -errno; + derr << __func__ << " failed to fsync " << dir + << ": " << cpp_strerror(err) << dendl; + assert(0 == "failed to fsync"); + } + close_err = TEMP_FAILURE_RETRY(::close(dirfd)); + assert (0 == close_err); } void MonitorStore::sync() @@ -495,8 +506,9 @@ void MonitorStore::sync() int err = -errno; derr << __func__ << " failed to open " << dir << ": " << cpp_strerror(err) << dendl; - return; + assert(0 == "failed to open dir for syncing"); } sync_filesystem(dirfd); - ::close(dirfd); + int close_err = TEMP_FAILURE_RETRY(::close(dirfd)); + assert (0 == close_err); } diff --git a/src/mon/MonitorStore.h b/src/mon/MonitorStore.h index cf748360456..9d1efde3d53 100644 --- a/src/mon/MonitorStore.h +++ b/src/mon/MonitorStore.h @@ -18,16 +18,17 @@ #include "include/types.h" #include "include/buffer.h" +#include "common/compiler_extensions.h" + #include <iosfwd> #include <string.h> +#include <errno.h> class MonitorStore { string dir; int lock_fd; - int write_bl_ss_impl(bufferlist& bl, const char *a, const char *b, - bool append); - int write_bl_ss(bufferlist& bl, const char *a, const char *b, + void write_bl_ss(bufferlist& bl, const char *a, const char *b, bool append); public: MonitorStore(const std::string &d) : dir(d), lock_fd(-1) { } @@ -39,36 +40,44 @@ public: void sync(); // ints (stored as ascii) - version_t get_int(const char *a, const char *b=0); + version_t get_int(const char *a, const char *b=0) WARN_UNUSED_RESULT; void put_int(version_t v, const char *a, const char *b=0); - version_t get_global_version(const char *a, version_t b); + version_t get_global_version(const char *a, version_t b) WARN_UNUSED_RESULT; void put_global_version(const char *a, version_t b, version_t gv); // buffers // ss and sn varieties. bool exists_bl_ss(const char *a, const char *b=0); - int get_bl_ss(bufferlist& bl, const char *a, const char *b); - int put_bl_ss(bufferlist& bl, const char *a, const char *b) { - return write_bl_ss(bl, a, b, false); + int get_bl_ss(bufferlist& bl, const char *a, const char *b) WARN_UNUSED_RESULT; + void get_bl_ss_safe(bufferlist& bl, const char *a, const char *b) { + int ret = get_bl_ss(bl, a, b); + assert (ret >= 0 || ret == -ENOENT); + } + void put_bl_ss(bufferlist& bl, const char *a, const char *b) { + write_bl_ss(bl, a, b, false); } - int append_bl_ss(bufferlist& bl, const char *a, const char *b) { - return write_bl_ss(bl, a, b, true); + void append_bl_ss(bufferlist& bl, const char *a, const char *b) { + write_bl_ss(bl, a, b, true); } bool exists_bl_sn(const char *a, version_t b) { char bs[20]; snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b); return exists_bl_ss(a, bs); } - int get_bl_sn(bufferlist& bl, const char *a, version_t b) { + int get_bl_sn(bufferlist& bl, const char *a, version_t b) WARN_UNUSED_RESULT { char bs[20]; snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b); return get_bl_ss(bl, a, bs); } - int put_bl_sn(bufferlist& bl, const char *a, version_t b) { + void get_bl_sn_safe(bufferlist& bl, const char *a, version_t b) { + int ret = get_bl_sn(bl, a, b); + assert(ret >= 0 || ret == -ENOENT); + } + void put_bl_sn(bufferlist& bl, const char *a, version_t b) { char bs[20]; snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b); - return put_bl_ss(bl, a, bs); + put_bl_ss(bl, a, bs); } /** * Put a whole set of values efficiently and safely. @@ -77,16 +86,16 @@ public: * @param vals - map of int name -> values * @return 0 for success or negative error code */ - int put_bl_sn_map(const char *a, + void put_bl_sn_map(const char *a, map<version_t,bufferlist>::iterator start, map<version_t,bufferlist>::iterator end, map<version_t,version_t> *gvmap); - int erase_ss(const char *a, const char *b); - int erase_sn(const char *a, version_t b) { + void erase_ss(const char *a, const char *b); + void erase_sn(const char *a, version_t b) { char bs[20]; snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b); - return erase_ss(a, bs); + erase_ss(a, bs); } /* diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 24624bbc268..196edeee476 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -257,9 +257,14 @@ bool MonmapMonitor::preprocess_command(MMonCommand *m) r = -EINVAL; goto out; } - stringstream ss; + if (target >= (long)mon->monmap->size()) { + ss << "mon." << target << " does not exist"; + r = -ENOENT; + goto out; + } // send to target, or handle if it's me + stringstream ss; MMonCommand *newm = new MMonCommand(m->fsid, m->version); newm->cmd.insert(newm->cmd.begin(), m->cmd.begin() + 3, m->cmd.end()); mon->messenger->send_message(newm, mon->monmap->get_inst(target)); diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index e72929b7d43..c845c2b600c 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -75,7 +75,7 @@ void OSDMonitor::create_initial() OSDMap newmap; bufferlist bl; - mon->store->get_bl_ss(bl, "mkfs", "osdmap"); + mon->store->get_bl_ss_safe(bl, "mkfs", "osdmap"); if (bl.length()) { newmap.decode(bl); newmap.set_fsid(mon->monmap->fsid); @@ -1309,7 +1309,8 @@ void OSDMonitor::send_incremental(PaxosServiceMessage *req, epoch_t first) if (first < paxos->get_first_committed()) { first = paxos->get_first_committed(); bufferlist bl; - mon->store->get_bl_sn(bl, "osdmap_full", first); + mon->store->get_bl_sn_safe(bl, "osdmap_full", first); + assert(bl.length()); dout(20) << "send_incremental starting with base full " << first << " " << bl.length() << " bytes" << dendl; MOSDMap *m = new MOSDMap(osdmap.get_fsid()); m->oldest_map = paxos->get_first_committed(); @@ -1336,7 +1337,8 @@ void OSDMonitor::send_incremental(epoch_t first, entity_inst_t& dest, bool oneti if (first < paxos->get_first_committed()) { first = paxos->get_first_committed(); bufferlist bl; - mon->store->get_bl_sn(bl, "osdmap_full", first); + mon->store->get_bl_sn_safe(bl, "osdmap_full", first); + assert(bl.length()); dout(20) << "send_incremental starting with base full " << first << " " << bl.length() << " bytes" << dendl; MOSDMap *m = new MOSDMap(osdmap.get_fsid()); m->oldest_map = paxos->get_first_committed(); @@ -1701,7 +1703,7 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) OSDMap *p = &osdmap; if (epoch) { bufferlist b; - mon->store->get_bl_sn(b,"osdmap_full", epoch); + mon->store->get_bl_sn_safe(b,"osdmap_full", epoch); if (!b.length()) { p = 0; r = -ENOENT; @@ -2445,7 +2447,11 @@ bool OSDMonitor::prepare_command(MMonCommand *m) // optional uuid provided? uuid_d uuid; - if (m->cmd.size() > 2 && uuid.parse(m->cmd[2].c_str())) { + if (m->cmd.size() > 2) { + if (!uuid.parse(m->cmd[2].c_str())) { + err = -EINVAL; + goto out; + } dout(10) << " osd create got uuid " << uuid << dendl; i = osdmap.identify_osd(uuid); if (i >= 0) { diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index dcf9380613b..6a6dd60b84b 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -579,7 +579,7 @@ void PGMonitor::check_osd_map(epoch_t epoch) e++) { dout(10) << "check_osd_map applying osdmap e" << e << " to pg_map" << dendl; bufferlist bl; - mon->store->get_bl_sn(bl, "osdmap", e); + mon->store->get_bl_sn_safe(bl, "osdmap", e); assert(bl.length()); OSDMap::Incremental inc(bl); for (map<int32_t,uint32_t>::iterator p = inc.new_weight.begin(); diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 349908789e3..fbe141e3a84 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -73,7 +73,8 @@ void Paxos::collect(version_t oldpn) if (mon->store->exists_bl_sn(machine_name, last_committed+1)) { uncommitted_v = last_committed+1; uncommitted_pn = accepted_pn; - mon->store->get_bl_sn(uncommitted_value, machine_name, last_committed+1); + mon->store->get_bl_sn_safe(uncommitted_value, machine_name, last_committed+1); + assert(uncommitted_value.length()); dout(10) << "learned uncommitted " << (last_committed+1) << " (" << uncommitted_value.length() << " bytes) from myself" << dendl; @@ -146,7 +147,7 @@ void Paxos::handle_collect(MMonPaxos *collect) // (it'll be at last_committed+1) bufferlist bl; if (mon->store->exists_bl_sn(machine_name, last_committed+1)) { - mon->store->get_bl_sn(bl, machine_name, last_committed+1); + mon->store->get_bl_sn_safe(bl, machine_name, last_committed+1); assert(bl.length() > 0); dout(10) << " sharing our accepted but uncommitted value for " << last_committed+1 << " (" << bl.length() << " bytes)" << dendl; @@ -184,7 +185,8 @@ void Paxos::share_state(MMonPaxos *m, version_t peer_first_committed, // include incrementals for ( ; v <= last_committed; v++) { if (mon->store->exists_bl_sn(machine_name, v)) { - mon->store->get_bl_sn(m->values[v], machine_name, v); + mon->store->get_bl_sn_safe(m->values[v], machine_name, v); + assert(m->values[v].length()); m->gv[v] = mon->store->get_global_version(machine_name, v); dout(10) << " sharing " << v << " (" << m->values[v].length() << " bytes)" << dendl; @@ -968,7 +970,7 @@ bool Paxos::is_readable(version_t v) bool Paxos::read(version_t v, bufferlist &bl) { - if (!mon->store->get_bl_sn(bl, machine_name, v)) + if (mon->store->get_bl_sn(bl, machine_name, v) <= 0) return false; return true; } @@ -1051,7 +1053,8 @@ void Paxos::stash_latest(version_t v, bufferlist& bl) version_t Paxos::get_stashed(bufferlist& bl) { bufferlist full; - if (mon->store->get_bl_ss(full, machine_name, "latest") <= 0) { + mon->store->get_bl_ss_safe(full, machine_name, "latest"); + if (!full.length()) { dout(10) << "get_stashed not found" << dendl; return 0; } diff --git a/src/os/hobject.h b/src/os/hobject.h index 9a1c207e796..d75ae8570c4 100644 --- a/src/os/hobject.h +++ b/src/os/hobject.h @@ -31,7 +31,9 @@ struct hobject_t { object_t oid; snapid_t snap; uint32_t hash; +private: bool max; +public: int64_t pool; string nspace; @@ -57,6 +59,15 @@ public: pool(pool), key(soid.oid.name == key ? string() : key) {} + /// @return min hobject_t ret s.t. ret.hash == this->hash + hobject_t get_boundary() const { + if (is_max()) + return *this; + hobject_t ret; + ret.hash = hash; + return ret; + } + /* Do not use when a particular hash function is needed */ explicit hobject_t(const sobject_t &o) : oid(o.oid), snap(o.snap), max(false), pool(-1) { @@ -108,6 +119,12 @@ public: void decode(json_spirit::Value& v); void dump(Formatter *f) const; static void generate_test_instances(list<hobject_t*>& o); + friend bool operator<(const hobject_t&, const hobject_t&); + friend bool operator>(const hobject_t&, const hobject_t&); + friend bool operator<=(const hobject_t&, const hobject_t&); + friend bool operator>=(const hobject_t&, const hobject_t&); + friend bool operator==(const hobject_t&, const hobject_t&); + friend bool operator!=(const hobject_t&, const hobject_t&); }; WRITE_CLASS_ENCODER(hobject_t) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 96052820a15..913157a8508 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1307,15 +1307,6 @@ PG *OSD::_create_lock_pg( t.create_collection(coll_t(pgid)); - if (newly_created) { - /* This is weird, but all the peering code needs last_epoch_start - * to be less than same_interval_since. Make it so! - * This is easier to deal with if you remember that the PG, while - * now created in memory, still hasn't peered and started -- and - * the map epoch could change before that happens! */ - history.last_epoch_started = history.epoch_created - 1; - } - pg->init(role, up, acting, history, pi, &t); dout(7) << "_create_lock_pg " << *pg << dendl; @@ -1586,6 +1577,8 @@ PG *OSD::get_or_create_pg(const pg_info_t& info, pg_interval_map_t& pi, if (!_have_pg(info.pgid)) { // same primary? + if (!osdmap->have_pg_pool(info.pgid.pool())) + return 0; vector<int> up, acting; osdmap->pg_to_up_acting_osds(info.pgid, up, acting); int role = osdmap->calc_pg_role(whoami, acting, acting.size()); @@ -4502,6 +4495,7 @@ void OSD::handle_pg_create(OpRequestRef op) 0, creating_pgs[pgid].acting, creating_pgs[pgid].acting, history, pi, *rctx.transaction); + pg->info.last_epoch_started = pg->info.history.last_epoch_started; creating_pgs.erase(pgid); wake_pg_waiters(pg->info.pgid); pg->handle_create(&rctx); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 53e9ae4bb19..b81bfed36f4 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1042,11 +1042,11 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t> for (map<int, pg_info_t>::const_iterator i = infos.begin(); i != infos.end(); ++i) { - if (max_last_epoch_started_found < i->second.history.last_epoch_started) { + if (max_last_epoch_started_found < i->second.last_epoch_started) { min_last_update_acceptable = eversion_t::max(); - max_last_epoch_started_found = i->second.history.last_epoch_started; + max_last_epoch_started_found = i->second.last_epoch_started; } - if (max_last_epoch_started_found == i->second.history.last_epoch_started) { + if (max_last_epoch_started_found == i->second.last_epoch_started) { if (min_last_update_acceptable > i->second.last_update) min_last_update_acceptable = i->second.last_update; } @@ -1381,6 +1381,8 @@ void PG::activate(ObjectStore::Transaction& t, send_notify = false; + info.last_epoch_started = query_epoch; + if (is_primary()) { // If necessary, create might_have_unfound to help us find our unfound objects. // NOTE: It's important that we build might_have_unfound before trimming the @@ -1774,7 +1776,8 @@ void PG::all_activated_and_committed() assert(is_primary()); assert(peer_activated.size() == acting.size()); - info.history.last_epoch_started = get_osdmap()->get_epoch(); + // info.last_epoch_started is set during activate() + info.history.last_epoch_started = info.last_epoch_started; share_pg_info(); update_stats(); @@ -3691,7 +3694,7 @@ void PG::chunky_scrub() { // search backward from the end looking for a boundary objects.push_back(scrubber.end); while (!boundary_found && objects.size() > 1) { - hobject_t end = objects.back(); + hobject_t end = objects.back().get_boundary(); objects.pop_back(); if (objects.back().get_filestore_key() != end.get_filestore_key()) { @@ -4134,6 +4137,10 @@ void PG::share_pg_info() // share new pg_info_t with replicas for (unsigned i=1; i<acting.size(); i++) { int peer = acting[i]; + if (peer_info.count(i)) { + peer_info[i].last_epoch_started = info.last_epoch_started; + peer_info[i].history.merge(info.history); + } MOSDPGInfo *m = new MOSDPGInfo(get_osdmap()->get_epoch()); m->pg_list.push_back( make_pair( @@ -4523,6 +4530,10 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo) dirty_info = true; osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp); + assert(oinfo.last_epoch_started == info.last_epoch_started); + assert(info.history.last_epoch_started == oinfo.last_epoch_started); + assert(oinfo.history.last_epoch_started == oinfo.last_epoch_started); + // Handle changes to purged_snaps ONLY IF we have caught up if (last_complete_ondisk.epoch >= info.history.last_epoch_started) { interval_set<snapid_t> p; @@ -6451,7 +6462,7 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx) // We pull the log from the peer's last_epoch_started to ensure we // get enough log to detect divergent updates. - eversion_t since(pi.history.last_epoch_started, 0); + eversion_t since(pi.last_epoch_started, 0); assert(pi.last_update >= pg->info.log_tail); // or else choose_acting() did a bad thing if (pi.log_tail <= since) { dout(10) << " requesting log+missing since " << since << " from osd." << *i << dendl; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 2513d9d6fe4..388751e8e8b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -7168,10 +7168,15 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&) dout(10) << "NotTrimming: obs_to_trim empty!" << dendl; dout(10) << "purged_snaps now " << pg->info.purged_snaps << ", snap_trimq now " << pg->snap_trimq << dendl; - ObjectStore::Transaction *t = new ObjectStore::Transaction; - t->remove_collection(col_to_trim); - int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t)); - assert(r == 0); + if (pg->snap_collections.contains(snap_to_trim)) { + ObjectStore::Transaction *t = new ObjectStore::Transaction; + pg->snap_collections.erase(snap_to_trim); + t->remove_collection(col_to_trim); + pg->write_info(*t); + int r = pg->osd->store->queue_transaction( + NULL, t, new ObjectStore::C_DeleteTransaction(t)); + assert(r == 0); + } post_event(SnapTrim()); return discard_event(); } else { @@ -7222,9 +7227,10 @@ boost::statechart::result ReplicatedPG::RepColTrim::react(const SnapTrim&) t->collection_remove(col_to_trim, *i); } t->remove_collection(col_to_trim); + pg->snap_collections.erase(snap_to_trim); + pg->write_info(*t); int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t)); assert(r == 0); - pg->snap_collections.erase(snap_to_trim); return discard_event(); } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 1ca5adc70be..4a1b3fcf2ef 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1286,7 +1286,7 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o) void pg_info_t::encode(bufferlist &bl) const { - ENCODE_START(26, 26, bl); + ENCODE_START(27, 26, bl); ::encode(pgid, bl); ::encode(last_update, bl); ::encode(last_complete, bl); @@ -1295,12 +1295,13 @@ void pg_info_t::encode(bufferlist &bl) const ::encode(stats, bl); history.encode(bl); ::encode(purged_snaps, bl); + ::encode(last_epoch_started, bl); ENCODE_FINISH(bl); } void pg_info_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(26, 26, 26, bl); + DECODE_START_LEGACY_COMPAT_LEN(27, 26, 26, bl); if (struct_v < 23) { old_pg_t opgid; ::decode(opgid, bl); @@ -1325,6 +1326,11 @@ void pg_info_t::decode(bufferlist::iterator &bl) set<snapid_t> snap_trimq; ::decode(snap_trimq, bl); } + if (struct_v < 27) { + last_epoch_started = history.last_epoch_started; + } else { + ::decode(last_epoch_started, bl); + } DECODE_FINISH(bl); } @@ -1348,6 +1354,7 @@ void pg_info_t::dump(Formatter *f) const f->dump_int("empty", is_empty()); f->dump_int("dne", dne()); f->dump_int("incomplete", is_incomplete()); + f->dump_int("last_epoch_started", last_epoch_started); } void pg_info_t::generate_test_instances(list<pg_info_t*>& o) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index bb2ed253ce6..da2b2abf319 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1039,6 +1039,7 @@ struct pg_info_t { pg_t pgid; eversion_t last_update; // last object version applied to store. eversion_t last_complete; // last version pg was complete through. + epoch_t last_epoch_started;// last epoch at which this pg started on this osd eversion_t log_tail; // oldest log entry. @@ -1051,11 +1052,11 @@ struct pg_info_t { pg_history_t history; pg_info_t() - : last_backfill(hobject_t::get_max()) + : last_epoch_started(0), last_backfill(hobject_t::get_max()) { } pg_info_t(pg_t p) : pgid(p), - last_backfill(hobject_t::get_max()) + last_epoch_started(0), last_backfill(hobject_t::get_max()) { } bool is_empty() const { return last_update.version == 0; } @@ -1086,6 +1087,7 @@ inline ostream& operator<<(ostream& out, const pg_info_t& pgi) out << " lb " << pgi.last_backfill; } //out << " c " << pgi.epoch_created; + out << " local-les=" << pgi.last_epoch_started; out << " n=" << pgi.stats.stats.sum.num_objects; out << " " << pgi.history << ")"; diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 6a7d7915a69..fb419d93b4c 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -29,7 +29,7 @@ using namespace std; #define SECRET_KEY_LEN 40 #define PUBLIC_ID_LEN 20 -RGWRados *store; +static RGWRados *store = NULL; void _usage() { @@ -657,6 +657,15 @@ static bool bucket_object_check_filter(const string& name) return rgw_obj::translate_raw_obj_to_obj_in_ns(obj, ns); } +class StoreDestructor { + RGWRados *store; +public: + StoreDestructor(RGWRados *_s) : store(_s) {} + ~StoreDestructor() { + RGWStoreManager::close_storage(store); + } +}; + int main(int argc, char **argv) { vector<const char*> args; @@ -873,6 +882,8 @@ int main(int argc, char **argv) return 5; //EIO } + StoreDestructor store_destructor(store); + if (opt_cmd != OPT_USER_CREATE && opt_cmd != OPT_LOG_SHOW && opt_cmd != OPT_LOG_LIST && opt_cmd != OPT_LOG_RM && user_id.empty()) { diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc index 55fde77f0fc..3a41fa10e4c 100644 --- a/src/test/filestore/store_test.cc +++ b/src/test/filestore/store_test.cc @@ -212,7 +212,7 @@ TEST_F(StoreTest, ManyObjectTest) { ASSERT_EQ(r, 0); listed.insert(objects.begin(), objects.end()); if (objects.size() < 50) { - ASSERT_TRUE(next.max); + ASSERT_TRUE(next.is_max()); break; } objects.clear(); @@ -385,7 +385,7 @@ public: ASSERT_TRUE(sorted(objects)); objects_set.insert(objects.begin(), objects.end()); objects.clear(); - if (next.max) break; + if (next.is_max()) break; current = next; } ASSERT_EQ(objects_set.size(), available_objects.size()); @@ -529,7 +529,7 @@ TEST_F(StoreTest, HashCollisionTest) { listed.insert(*i); } if (objects.size() < 50) { - ASSERT_TRUE(next.max); + ASSERT_TRUE(next.is_max()); break; } objects.clear(); diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index 42fb411362d..644873b1a46 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -754,3 +754,31 @@ TEST(LibCephFS, BadFileDesc) { ASSERT_EQ(ceph_get_file_pool(cmount, -1), -EBADF); ASSERT_EQ(ceph_get_file_replication(cmount, -1), -EBADF); } + +TEST(LibCephFS, ReadEmptyFile) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(ceph_mount(cmount, NULL), 0); + + // test the read_sync path in the client for zero files + ASSERT_EQ(ceph_conf_set(cmount, "client_debug_force_sync_read", "true"), 0); + + int mypid = getpid(); + char testf[256]; + + sprintf(testf, "test_reademptyfile%d", mypid); + int fd = ceph_open(cmount, testf, O_CREAT|O_TRUNC|O_WRONLY, 0644); + ASSERT_GT(fd, 0); + + ceph_close(cmount, fd); + + fd = ceph_open(cmount, testf, O_RDONLY, 0); + ASSERT_GT(fd, 0); + + char buf[4096]; + ASSERT_EQ(ceph_read(cmount, fd, buf, 4096, 0), 0); + + ceph_close(cmount, fd); + ceph_shutdown(cmount); +} diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc index 50c83701491..0868fac3173 100644 --- a/src/test/librados/misc.cc +++ b/src/test/librados/misc.cc @@ -235,12 +235,13 @@ TEST(LibRadosMisc, TmapUpdateMisorderedPutPP) { ::encode(string("aval"), bl); ::encode(string("c"), bl); ::encode(string("cval"), bl); + bufferlist orig = bl; // tmap_put steals bl content ASSERT_EQ(0, ioctx.tmap_put("foo", bl)); // check bufferlist newbl; - ASSERT_EQ(0, ioctx.read("foo", bl, 0, 0)); - ASSERT_EQ(bl.contents_equal(newbl), false); + ioctx.read("foo", newbl, orig.length(), 0); + ASSERT_EQ(orig.contents_equal(newbl), false); ioctx.close(); ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster)); |