summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-11-29 15:48:54 -0800
committerSage Weil <sage@inktank.com>2012-11-29 15:48:54 -0800
commitc26dc1885d62fad4c28882b023ef62c4d195b132 (patch)
treef37edb5a968a826d398cb110b0dc46a00f08cb74
parent0b55fbdbea2b2209eea65b67adb00eea0f5cf620 (diff)
parent77711ddee37de154c0d5d452c4f84dae36eb3e3a (diff)
downloadceph-c26dc1885d62fad4c28882b023ef62c4d195b132.tar.gz
Merge branch 'next'
Conflicts: src/rgw/rgw_admin.cc
-rw-r--r--README3
-rwxr-xr-xqa/workunits/rbd/map-unmap.sh98
-rw-r--r--src/ceph_mon.cc4
-rw-r--r--src/client/Client.cc6
-rw-r--r--src/common/ceph_context.cc2
-rw-r--r--src/common/config.cc4
-rw-r--r--src/common/config_opts.h3
-rw-r--r--src/mon/AuthMonitor.cc2
-rw-r--r--src/mon/MDSMonitor.cc4
-rw-r--r--src/mon/Monitor.cc25
-rw-r--r--src/mon/MonitorStore.cc124
-rw-r--r--src/mon/MonitorStore.h43
-rw-r--r--src/mon/MonmapMonitor.cc7
-rw-r--r--src/mon/OSDMonitor.cc16
-rw-r--r--src/mon/PGMonitor.cc2
-rw-r--r--src/mon/Paxos.cc13
-rw-r--r--src/os/hobject.h17
-rw-r--r--src/osd/OSD.cc12
-rw-r--r--src/osd/PG.cc23
-rw-r--r--src/osd/ReplicatedPG.cc16
-rw-r--r--src/osd/osd_types.cc11
-rw-r--r--src/osd/osd_types.h6
-rw-r--r--src/rgw/rgw_admin.cc13
-rw-r--r--src/test/filestore/store_test.cc6
-rw-r--r--src/test/libcephfs/test.cc28
-rw-r--r--src/test/librados/misc.cc5
26 files changed, 359 insertions, 134 deletions
diff --git a/README b/README
index 1c6514c3844..eb10688544a 100644
--- a/README
+++ b/README
@@ -120,9 +120,10 @@ To build the source code, you must install the following:
- uuid-dev
- libatomic-ops-dev
- libboost-program-options-dev
+- libboost-thread-dev
For example:
- $ apt-get install automake autoconf automake gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev
+ $ apt-get install automake autoconf automake gcc g++ libboost-dev libedit-dev libssl-dev libtool libfcgi libfcgi-dev libfuse-dev linux-kernel-headers libcrypto++-dev libaio-dev libgoogle-perftools-dev libkeyutils-dev uuid-dev libatomic-ops-dev libboost-program-options-dev libboost-thread-dev
diff --git a/qa/workunits/rbd/map-unmap.sh b/qa/workunits/rbd/map-unmap.sh
new file mode 100755
index 00000000000..9ecc226e5f5
--- /dev/null
+++ b/qa/workunits/rbd/map-unmap.sh
@@ -0,0 +1,98 @@
+#!/bin/bash -e
+
+RUN_TIME=300 # approximate duration of run (seconds)
+
+[ $# -eq 1 ] && RUN_TIME="$1"
+
+IMAGE_NAME="image-$$"
+IMAGE_SIZE="1024" # MB
+
+ID_TIMEOUT="10" # seconds to wait to get rbd id after mapping
+ID_DELAY=".1" # floating-point seconds to delay before rescan
+
+MAP_DELAY=".25" # floating-point seconds to delay before unmap
+
+function get_time() {
+ date '+%s'
+}
+
+function times_up() {
+ local end_time="$1"
+
+ test $(get_time) -ge "${end_time}"
+}
+
+function get_id() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+ local id=""
+ local end_time=$(expr $(get_time) + ${ID_TIMEOUT})
+
+ cd /sys/bus/rbd/devices
+
+ while [ -z "${id}" ]; do
+ if times_up "${end_time}"; then
+ break;
+ fi
+ for i in *; do
+ if [ "$(cat $i/name)" = "${image_name}" ]; then
+ id=$i
+ break
+ fi
+ done
+ sleep "${ID_DELAY}"
+ done
+ echo $id
+ test -n "${id}" # return code 0 if id was found
+}
+
+function map_unmap() {
+ [ $# -eq 1 ] || exit 99
+ local image_name="$1"
+
+ rbd map "${image_name}"
+ RBD_ID=$(get_id "${image_name}")
+
+ sleep "${MAP_DELAY}"
+
+ rbd unmap "/dev/rbd${RBD_ID}"
+}
+
+function setup() {
+ [ $# -eq 2 ] || exit 99
+ local image_name="$1"
+ local image_size="$2"
+
+ [ -d /sys/bus/rbd ] || sudo modprobe rbd
+
+ # allow ubuntu user to map/unmap rbd devices
+ sudo chown ubuntu /sys/bus/rbd/add
+ sudo chown ubuntu /sys/bus/rbd/remove
+ rbd create "${image_name}" --size="${image_size}"
+}
+
+function cleanup() {
+ # Have to rely on globals for the trap call
+ # rbd unmap "/dev/rbd${RBD_ID}" || true
+ rbd rm "${IMAGE_NAME}" || true
+ sudo chown root /sys/bus/rbd/remove || true
+ sudo chown root /sys/bus/rbd/add || true
+}
+trap cleanup EXIT HUP INT
+
+#### Start
+
+setup "${IMAGE_NAME}" "${IMAGE_SIZE}"
+
+COUNT=0
+START_TIME=$(get_time)
+END_TIME=$(expr $(get_time) + ${RUN_TIME})
+while ! times_up "${END_TIME}"; do
+ map_unmap "${IMAGE_NAME}"
+ COUNT=$(expr $COUNT + 1)
+done
+ELAPSED=$(expr "$(get_time)" - "${START_TIME}")
+
+echo "${COUNT} iterations completed in ${ELAPSED} seconds"
+
+exit 0
diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc
index f82be18b066..3e3edf9b90a 100644
--- a/src/ceph_mon.cc
+++ b/src/ceph_mon.cc
@@ -283,14 +283,14 @@ int main(int argc, const char **argv)
{
bufferlist mapbl;
bufferlist latest;
- store.get_bl_ss(latest, "monmap", "latest");
+ store.get_bl_ss_safe(latest, "monmap", "latest");
if (latest.length() > 0) {
bufferlist::iterator p = latest.begin();
version_t v;
::decode(v, p);
::decode(mapbl, p);
} else {
- store.get_bl_ss(mapbl, "mkfs", "monmap");
+ store.get_bl_ss_safe(mapbl, "mkfs", "monmap");
if (mapbl.length() == 0) {
cerr << "mon fs missing 'monmap/latest' and 'mkfs/monmap'" << std::endl;
exit(1);
diff --git a/src/client/Client.cc b/src/client/Client.cc
index b808f204e70..1c881828540 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -5400,6 +5400,7 @@ int Client::read(int fd, char *buf, loff_t size, loff_t offset)
int Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
{
+ const md_config_t *conf = cct->_conf;
Inode *in = f->inode;
//bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
@@ -5416,7 +5417,7 @@ int Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
movepos = true;
}
- if (have & CEPH_CAP_FILE_CACHE)
+ if (!conf->client_debug_force_sync_read && have & CEPH_CAP_FILE_CACHE)
r = _read_async(f, offset, size, bl);
else
r = _read_sync(f, offset, size, bl);
@@ -5564,6 +5565,9 @@ int Client::_read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl)
flock.Unlock();
client_lock.Lock();
+ // if we get ENOENT from OSD, assume 0 bytes returned
+ if (r == -ENOENT)
+ r = 0;
if (r < 0)
return r;
if (tbl.length()) {
diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc
index e4345e64d5b..decf86db70a 100644
--- a/src/common/ceph_context.cc
+++ b/src/common/ceph_context.cc
@@ -142,7 +142,7 @@ public:
}
if (changed.count("log_max_recent")) {
- log->set_max_new(conf->log_max_recent);
+ log->set_max_recent(conf->log_max_recent);
}
}
};
diff --git a/src/common/config.cc b/src/common/config.cc
index 205a15e509e..0e86b792ae9 100644
--- a/src/common/config.cc
+++ b/src/common/config.cc
@@ -900,7 +900,7 @@ int md_config_t::set_val_raw(const char *val, const config_option *opt)
}
static const char *CONF_METAVARIABLES[] =
- { "cluster", "type", "name", "host", "num", "id" };
+ { "cluster", "type", "name", "host", "num", "id", "pid" };
static const int NUM_CONF_METAVARIABLES =
(sizeof(CONF_METAVARIABLES) / sizeof(CONF_METAVARIABLES[0]));
@@ -976,6 +976,8 @@ bool md_config_t::expand_meta(std::string &origval) const
out += name.get_id().c_str();
else if (var == "id")
out += name.get_id().c_str();
+ else if (var == "pid")
+ out += stringify(getpid());
else
assert(0); // unreachable
found_meta = true;
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index e2a29fca509..8699d789164 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -33,7 +33,7 @@ OPTION(fatal_signal_handlers, OPT_BOOL, true)
OPTION(log_file, OPT_STR, "/var/log/ceph/$cluster-$name.log")
OPTION(log_max_new, OPT_INT, 1000)
-OPTION(log_max_recent, OPT_INT, 1000000)
+OPTION(log_max_recent, OPT_INT, 100000)
OPTION(log_to_stderr, OPT_BOOL, true)
OPTION(err_to_stderr, OPT_BOOL, true)
OPTION(log_to_syslog, OPT_BOOL, false)
@@ -181,6 +181,7 @@ OPTION(client_oc_max_dirty, OPT_INT, 1024*1024* 100) // MB * n (dirty OR tx.
OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this smallish)
OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback
OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache
+OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds)
// note: the max amount of "in flight" dirty data is roughly (max - target)
OPTION(fuse_use_invalidate_cb, OPT_BOOL, false) // use fuse 2.8+ invalidate callback to keep page cache consistent
OPTION(fuse_big_writes, OPT_BOOL, true)
diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc
index c43d7e988db..aec2ba04d86 100644
--- a/src/mon/AuthMonitor.cc
+++ b/src/mon/AuthMonitor.cc
@@ -96,7 +96,7 @@ void AuthMonitor::create_initial()
KeyRing keyring;
bufferlist bl;
- mon->store->get_bl_ss(bl, "mkfs", "keyring");
+ mon->store->get_bl_ss_safe(bl, "mkfs", "keyring");
bufferlist::iterator p = bl.begin();
::decode(keyring, p);
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index dd256b9b14a..72168ac9638 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -555,7 +555,7 @@ bool MDSMonitor::preprocess_command(MMonCommand *m)
MDSMap *p = &mdsmap;
if (epoch) {
bufferlist b;
- mon->store->get_bl_sn(b, "mdsmap", epoch);
+ mon->store->get_bl_sn_safe(b, "mdsmap", epoch);
if (!b.length()) {
p = 0;
r = -ENOENT;
@@ -597,7 +597,7 @@ bool MDSMonitor::preprocess_command(MMonCommand *m)
}
epoch_t e = l;
bufferlist b;
- mon->store->get_bl_sn(b,"mdsmap",e);
+ mon->store->get_bl_sn_safe(b,"mdsmap",e);
if (!b.length()) {
r = -ENOENT;
} else {
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index b819fceddb0..a90ae6e3cf8 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -81,6 +81,11 @@ static ostream& _prefix(std::ostream *_dout, const Monitor *mon) {
long parse_pos_long(const char *s, ostream *pss)
{
+ if (*s == '-' || *s == '+') {
+ *pss << "expected numerical value, got: " << s;
+ return -EINVAL;
+ }
+
string err;
long r = strict_strtol(s, 10, &err);
if ((r == 0) && !err.empty()) {
@@ -360,7 +365,7 @@ int Monitor::check_features(MonitorStore *store)
CompatSet ondisk;
bufferlist features;
- store->get_bl_ss(features, COMPAT_SET_LOC, 0);
+ store->get_bl_ss_safe(features, COMPAT_SET_LOC, 0);
if (features.length() == 0) {
generic_dout(0) << "WARNING: mon fs missing feature list.\n"
<< "Assuming it is old-style and introducing one." << dendl;
@@ -389,7 +394,7 @@ int Monitor::check_features(MonitorStore *store)
void Monitor::read_features()
{
bufferlist bl;
- store->get_bl_ss(bl, COMPAT_SET_LOC, 0);
+ store->get_bl_ss_safe(bl, COMPAT_SET_LOC, 0);
assert(bl.length());
bufferlist::iterator p = bl.begin();
@@ -495,7 +500,7 @@ int Monitor::preinit()
if (authmon()->paxos->get_version() == 0) {
dout(10) << "loading initial keyring to bootstrap authentication for mkfs" << dendl;
bufferlist bl;
- store->get_bl_ss(bl, "mkfs", "keyring");
+ store->get_bl_ss_safe(bl, "mkfs", "keyring");
KeyRing keyring;
bufferlist::iterator p = bl.begin();
::decode(keyring, p);
@@ -1030,12 +1035,14 @@ MMonProbe *Monitor::fill_probe_data(MMonProbe *m, Paxos *pax)
version_t v = MAX(pax->get_first_committed(), m->newest_version + 1);
int len = 0;
for (; v <= pax->get_version(); v++) {
- len += store->get_bl_sn(r->paxos_values[m->machine_name][v], m->machine_name.c_str(), v);
+ store->get_bl_sn_safe(r->paxos_values[m->machine_name][v], m->machine_name.c_str(), v);
+ len += r->paxos_values[m->machine_name][v].length();
r->gv[m->machine_name][v] = store->get_global_version(m->machine_name.c_str(), v);
for (list<string>::iterator p = pax->extra_state_dirs.begin();
p != pax->extra_state_dirs.end();
++p) {
- len += store->get_bl_sn(r->paxos_values[*p][v], p->c_str(), v);
+ store->get_bl_sn_safe(r->paxos_values[*p][v], p->c_str(), v);
+ len += r->paxos_values[*p][v].length();
}
if (len >= g_conf->mon_slurp_bytes)
break;
@@ -2356,7 +2363,8 @@ int Monitor::write_fsid()
bufferlist b;
b.append(us);
- return store->put_bl_ss(b, "cluster_uuid", 0);
+ store->put_bl_ss(b, "cluster_uuid", 0);
+ return 0;
}
/*
@@ -2380,9 +2388,8 @@ int Monitor::mkfs(bufferlist& osdmapbl)
bufferlist magicbl;
magicbl.append(CEPH_MON_ONDISK_MAGIC);
magicbl.append("\n");
- r = store->put_bl_ss(magicbl, "magic", 0);
- if (r < 0)
- return r;
+ store->put_bl_ss(magicbl, "magic", 0);
+
features = get_supported_features();
write_features();
diff --git a/src/mon/MonitorStore.cc b/src/mon/MonitorStore.cc
index b9decedac9e..a74b750787c 100644
--- a/src/mon/MonitorStore.cc
+++ b/src/mon/MonitorStore.cc
@@ -35,7 +35,6 @@ static ostream& _prefix(std::ostream *_dout, const string& dir) {
return *_dout << "store(" << dir << ") ";
}
-
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -89,7 +88,8 @@ int MonitorStore::mount()
int MonitorStore::umount()
{
- ::close(lock_fd);
+ int close_err = TEMP_FAILURE_RETRY(::close(lock_fd));
+ assert (0 == close_err);
return 0;
}
@@ -110,7 +110,8 @@ int MonitorStore::mkfs()
derr << "MonitorStore::mkfs: unable to open " << dir << ": " << cpp_strerror(err) << dendl;
return err;
}
- ::close(fd);
+ int close_err = TEMP_FAILURE_RETRY(::close(fd));
+ assert (0 == close_err);
dout(0) << "created monfs at " << dir << " for "
<< g_conf->name.get_id() << dendl;
@@ -134,6 +135,7 @@ version_t MonitorStore::get_int(const char *a, const char *b)
}
derr << "MonitorStore::get_int: failed to open '" << fn << "': "
<< cpp_strerror(err) << dendl;
+ assert(0 == "failed to open");
return 0;
}
@@ -143,10 +145,13 @@ version_t MonitorStore::get_int(const char *a, const char *b)
if (r < 0) {
derr << "MonitorStore::get_int: failed to read '" << fn << "': "
<< cpp_strerror(r) << dendl;
- TEMP_FAILURE_RETRY(::close(fd));
+ int close_err = TEMP_FAILURE_RETRY(::close(fd));
+ assert(0 == close_err);
+ assert(0); // the file exists; so this is a different failure
return 0;
}
- TEMP_FAILURE_RETRY(::close(fd));
+ int close_err = TEMP_FAILURE_RETRY(::close(fd));
+ assert (0 == close_err);
version_t val = atoi(buf);
@@ -196,7 +201,12 @@ void MonitorStore::put_int(version_t val, const char *a, const char *b)
<< cpp_strerror(r) << dendl;
ceph_abort();
}
- ::fsync(fd);
+ r = ::fsync(fd);
+ if (r) {
+ derr << "Monitor::put_int: failed to fsync fd for '" << tfn << "': "
+ << cpp_strerror(r) << dendl;
+ ceph_abort();
+ }
if (TEMP_FAILURE_RETRY(::close(fd))) {
derr << "MonitorStore::put_int: failed to close fd for '" << tfn << "': "
<< cpp_strerror(r) << dendl;
@@ -245,10 +255,13 @@ bool MonitorStore::exists_bl_ss(const char *a, const char *b)
int r = ::stat(fn, &st);
//char buf[80];
//dout(15) << "exists_bl stat " << fn << " r=" << r << " errno " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl;
+ if (r) {
+ assert (errno == ENOENT);
+ }
return r == 0;
}
-int MonitorStore::erase_ss(const char *a, const char *b)
+void MonitorStore::erase_ss(const char *a, const char *b)
{
char fn[1024];
char dr[1024];
@@ -261,6 +274,7 @@ int MonitorStore::erase_ss(const char *a, const char *b)
strcpy(fn, dr);
}
int r = ::unlink(fn);
+ assert(0 == r || ENOENT == errno); // callers don't check for existence first
if (b) {
// wipe out _gv file too, if any. this is sloppy, but will work.
@@ -270,7 +284,6 @@ int MonitorStore::erase_ss(const char *a, const char *b)
}
::rmdir(dr); // sloppy attempt to clean up empty dirs
- return r;
}
int MonitorStore::get_bl_ss(bufferlist& bl, const char *a, const char *b)
@@ -314,7 +327,8 @@ int MonitorStore::get_bl_ss(bufferlist& bl, const char *a, const char *b)
off += r;
}
bl.append(bp);
- ::close(fd);
+ int close_err = TEMP_FAILURE_RETRY(::close(fd));
+ assert (0 == close_err);
if (b) {
dout(15) << "get_bl " << a << "/" << b << " = " << bl.length() << " bytes" << dendl;
@@ -325,7 +339,7 @@ int MonitorStore::get_bl_ss(bufferlist& bl, const char *a, const char *b)
return len;
}
-int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b, bool append)
+void MonitorStore::write_bl_ss(bufferlist& bl, const char *a, const char *b, bool append)
{
int err = 0;
char fn[1024];
@@ -336,7 +350,7 @@ int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b,
err = -errno;
derr << __func__ << " failed to create dir " << fn
<< ": " << cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to create dir");
}
dout(15) << "put_bl " << a << "/" << b << " = " << bl.length() << " bytes" << dendl;
snprintf(fn, sizeof(fn), "%s/%s/%s", dir.c_str(), a, b);
@@ -352,7 +366,7 @@ int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b,
err = -errno;
derr << "failed to open " << fn << "for append: "
<< cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to open for append");
}
} else {
snprintf(tfn, sizeof(tfn), "%s.new", fn);
@@ -360,42 +374,34 @@ int MonitorStore::write_bl_ss_impl(bufferlist& bl, const char *a, const char *b,
if (fd < 0) {
err = -errno;
derr << "failed to open " << tfn << ": " << cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to open");
}
}
err = bl.write_fd(fd);
-
- if (!err)
- ::fsync(fd);
- ::close(fd);
- if (!append && !err) {
- int r = ::rename(tfn, fn);
- if (r < 0) {
+ assert(!err);
+ err = ::fsync(fd);
+ assert(!err);
+ err = TEMP_FAILURE_RETRY(::close(fd));
+ assert (!err); // this really can't fail, right? right?...
+ if (!append) {
+ err = ::rename(tfn, fn);
+ if (err < 0) {
err = -errno;
derr << __func__ << " failed to rename '" << tfn << "' -> '"
<< fn << "': " << cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to rename");
}
}
-
- return err;
-}
-
-int MonitorStore::write_bl_ss(bufferlist& bl, const char *a, const char *b, bool append)
-{
- int err = write_bl_ss_impl(bl, a, b, append);
- if (err)
- derr << "write_bl_ss " << a << "/" << b << " got error " << cpp_strerror(err) << dendl;
- assert(!err); // for now
- return 0;
}
-int MonitorStore::put_bl_sn_map(const char *a,
+void MonitorStore::put_bl_sn_map(const char *a,
map<version_t,bufferlist>::iterator start,
map<version_t,bufferlist>::iterator end,
map<version_t,version_t> *gvmap)
{
+ int err = 0;
+ int close_err = 0;
version_t first = start->first;
map<version_t,bufferlist>::iterator lastp = end;
--lastp;
@@ -407,13 +413,11 @@ int MonitorStore::put_bl_sn_map(const char *a,
last - first < (unsigned)g_conf->mon_sync_fs_threshold) {
// just do them individually
for (map<version_t,bufferlist>::iterator p = start; p != end; ++p) {
- int err = put_bl_sn(p->second, a, p->first);
- if (err < 0)
- return err;
+ put_bl_sn(p->second, a, p->first);
if (gvmap && gvmap->count(p->first) && (*gvmap)[p->first] > 0)
put_global_version(a, p->first, (*gvmap)[p->first]);
}
- return 0;
+ return;
}
// make sure dir exists
@@ -421,10 +425,10 @@ int MonitorStore::put_bl_sn_map(const char *a,
snprintf(dfn, sizeof(dfn), "%s/%s", dir.c_str(), a);
int r = ::mkdir(dfn, 0755);
if ((r < 0) && (errno != EEXIST)) {
- int err = -errno;
+ err = -errno;
derr << __func__ << " failed to create dir " << dfn << ": "
<< cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to create dir");
}
for (map<version_t,bufferlist>::iterator p = start; p != end; ++p) {
@@ -437,13 +441,14 @@ int MonitorStore::put_bl_sn_map(const char *a,
if (fd < 0) {
int err = -errno;
derr << "failed to open " << tfn << ": " << cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to open");
}
- int err = p->second.write_fd(fd);
- ::close(fd);
+ err = p->second.write_fd(fd);
+ close_err = TEMP_FAILURE_RETRY(::close(fd));
+ assert (0 == close_err);
if (err < 0)
- return -errno;
+ assert(0 == "failed to write");
// this doesn't try to be efficient.. too bad for you! it may also
// extend beyond commmitted, but that's okay; we only look at these
@@ -455,12 +460,13 @@ int MonitorStore::put_bl_sn_map(const char *a,
// sync them all
int dirfd = ::open(dir.c_str(), O_RDONLY);
if (dirfd < 0) {
- int err = -errno;
+ err = -errno;
derr << "failed to open " << dir << ": " << cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to open temp file");
}
sync_filesystem(dirfd);
- ::close(dirfd);
+ close_err = TEMP_FAILURE_RETRY(::close(dirfd));
+ assert (0 == close_err);
// rename them all into place
for (map<version_t,bufferlist>::iterator p = start; p != end; ++p) {
@@ -469,23 +475,28 @@ int MonitorStore::put_bl_sn_map(const char *a,
snprintf(fn, sizeof(fn), "%s/%llu", dfn, (long long unsigned)p->first);
snprintf(tfn, sizeof(tfn), "%s.new", fn);
- int err = ::rename(tfn, fn);
+ err = ::rename(tfn, fn);
if (err < 0)
- return -errno;
+ assert(0 == "failed to rename");
}
// fsync the dir (to commit the renames)
dirfd = ::open(dir.c_str(), O_RDONLY);
if (dirfd < 0) {
- int err = -errno;
+ err = -errno;
derr << __func__ << " failed to open " << dir
<< ": " << cpp_strerror(err) << dendl;
- return err;
+ assert(0 == "failed to open dir");
}
- ::fsync(dirfd);
- ::close(dirfd);
-
- return 0;
+ err = ::fsync(dirfd);
+ if (err < 0) {
+ err = -errno;
+ derr << __func__ << " failed to fsync " << dir
+ << ": " << cpp_strerror(err) << dendl;
+ assert(0 == "failed to fsync");
+ }
+ close_err = TEMP_FAILURE_RETRY(::close(dirfd));
+ assert (0 == close_err);
}
void MonitorStore::sync()
@@ -495,8 +506,9 @@ void MonitorStore::sync()
int err = -errno;
derr << __func__ << " failed to open " << dir
<< ": " << cpp_strerror(err) << dendl;
- return;
+ assert(0 == "failed to open dir for syncing");
}
sync_filesystem(dirfd);
- ::close(dirfd);
+ int close_err = TEMP_FAILURE_RETRY(::close(dirfd));
+ assert (0 == close_err);
}
diff --git a/src/mon/MonitorStore.h b/src/mon/MonitorStore.h
index cf748360456..9d1efde3d53 100644
--- a/src/mon/MonitorStore.h
+++ b/src/mon/MonitorStore.h
@@ -18,16 +18,17 @@
#include "include/types.h"
#include "include/buffer.h"
+#include "common/compiler_extensions.h"
+
#include <iosfwd>
#include <string.h>
+#include <errno.h>
class MonitorStore {
string dir;
int lock_fd;
- int write_bl_ss_impl(bufferlist& bl, const char *a, const char *b,
- bool append);
- int write_bl_ss(bufferlist& bl, const char *a, const char *b,
+ void write_bl_ss(bufferlist& bl, const char *a, const char *b,
bool append);
public:
MonitorStore(const std::string &d) : dir(d), lock_fd(-1) { }
@@ -39,36 +40,44 @@ public:
void sync();
// ints (stored as ascii)
- version_t get_int(const char *a, const char *b=0);
+ version_t get_int(const char *a, const char *b=0) WARN_UNUSED_RESULT;
void put_int(version_t v, const char *a, const char *b=0);
- version_t get_global_version(const char *a, version_t b);
+ version_t get_global_version(const char *a, version_t b) WARN_UNUSED_RESULT;
void put_global_version(const char *a, version_t b, version_t gv);
// buffers
// ss and sn varieties.
bool exists_bl_ss(const char *a, const char *b=0);
- int get_bl_ss(bufferlist& bl, const char *a, const char *b);
- int put_bl_ss(bufferlist& bl, const char *a, const char *b) {
- return write_bl_ss(bl, a, b, false);
+ int get_bl_ss(bufferlist& bl, const char *a, const char *b) WARN_UNUSED_RESULT;
+ void get_bl_ss_safe(bufferlist& bl, const char *a, const char *b) {
+ int ret = get_bl_ss(bl, a, b);
+ assert (ret >= 0 || ret == -ENOENT);
+ }
+ void put_bl_ss(bufferlist& bl, const char *a, const char *b) {
+ write_bl_ss(bl, a, b, false);
}
- int append_bl_ss(bufferlist& bl, const char *a, const char *b) {
- return write_bl_ss(bl, a, b, true);
+ void append_bl_ss(bufferlist& bl, const char *a, const char *b) {
+ write_bl_ss(bl, a, b, true);
}
bool exists_bl_sn(const char *a, version_t b) {
char bs[20];
snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b);
return exists_bl_ss(a, bs);
}
- int get_bl_sn(bufferlist& bl, const char *a, version_t b) {
+ int get_bl_sn(bufferlist& bl, const char *a, version_t b) WARN_UNUSED_RESULT {
char bs[20];
snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b);
return get_bl_ss(bl, a, bs);
}
- int put_bl_sn(bufferlist& bl, const char *a, version_t b) {
+ void get_bl_sn_safe(bufferlist& bl, const char *a, version_t b) {
+ int ret = get_bl_sn(bl, a, b);
+ assert(ret >= 0 || ret == -ENOENT);
+ }
+ void put_bl_sn(bufferlist& bl, const char *a, version_t b) {
char bs[20];
snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b);
- return put_bl_ss(bl, a, bs);
+ put_bl_ss(bl, a, bs);
}
/**
* Put a whole set of values efficiently and safely.
@@ -77,16 +86,16 @@ public:
* @param vals - map of int name -> values
* @return 0 for success or negative error code
*/
- int put_bl_sn_map(const char *a,
+ void put_bl_sn_map(const char *a,
map<version_t,bufferlist>::iterator start,
map<version_t,bufferlist>::iterator end,
map<version_t,version_t> *gvmap);
- int erase_ss(const char *a, const char *b);
- int erase_sn(const char *a, version_t b) {
+ void erase_ss(const char *a, const char *b);
+ void erase_sn(const char *a, version_t b) {
char bs[20];
snprintf(bs, sizeof(bs), "%llu", (unsigned long long)b);
- return erase_ss(a, bs);
+ erase_ss(a, bs);
}
/*
diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc
index 24624bbc268..196edeee476 100644
--- a/src/mon/MonmapMonitor.cc
+++ b/src/mon/MonmapMonitor.cc
@@ -257,9 +257,14 @@ bool MonmapMonitor::preprocess_command(MMonCommand *m)
r = -EINVAL;
goto out;
}
- stringstream ss;
+ if (target >= (long)mon->monmap->size()) {
+ ss << "mon." << target << " does not exist";
+ r = -ENOENT;
+ goto out;
+ }
// send to target, or handle if it's me
+ stringstream ss;
MMonCommand *newm = new MMonCommand(m->fsid, m->version);
newm->cmd.insert(newm->cmd.begin(), m->cmd.begin() + 3, m->cmd.end());
mon->messenger->send_message(newm, mon->monmap->get_inst(target));
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index e72929b7d43..c845c2b600c 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -75,7 +75,7 @@ void OSDMonitor::create_initial()
OSDMap newmap;
bufferlist bl;
- mon->store->get_bl_ss(bl, "mkfs", "osdmap");
+ mon->store->get_bl_ss_safe(bl, "mkfs", "osdmap");
if (bl.length()) {
newmap.decode(bl);
newmap.set_fsid(mon->monmap->fsid);
@@ -1309,7 +1309,8 @@ void OSDMonitor::send_incremental(PaxosServiceMessage *req, epoch_t first)
if (first < paxos->get_first_committed()) {
first = paxos->get_first_committed();
bufferlist bl;
- mon->store->get_bl_sn(bl, "osdmap_full", first);
+ mon->store->get_bl_sn_safe(bl, "osdmap_full", first);
+ assert(bl.length());
dout(20) << "send_incremental starting with base full " << first << " " << bl.length() << " bytes" << dendl;
MOSDMap *m = new MOSDMap(osdmap.get_fsid());
m->oldest_map = paxos->get_first_committed();
@@ -1336,7 +1337,8 @@ void OSDMonitor::send_incremental(epoch_t first, entity_inst_t& dest, bool oneti
if (first < paxos->get_first_committed()) {
first = paxos->get_first_committed();
bufferlist bl;
- mon->store->get_bl_sn(bl, "osdmap_full", first);
+ mon->store->get_bl_sn_safe(bl, "osdmap_full", first);
+ assert(bl.length());
dout(20) << "send_incremental starting with base full " << first << " " << bl.length() << " bytes" << dendl;
MOSDMap *m = new MOSDMap(osdmap.get_fsid());
m->oldest_map = paxos->get_first_committed();
@@ -1701,7 +1703,7 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
OSDMap *p = &osdmap;
if (epoch) {
bufferlist b;
- mon->store->get_bl_sn(b,"osdmap_full", epoch);
+ mon->store->get_bl_sn_safe(b,"osdmap_full", epoch);
if (!b.length()) {
p = 0;
r = -ENOENT;
@@ -2445,7 +2447,11 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
// optional uuid provided?
uuid_d uuid;
- if (m->cmd.size() > 2 && uuid.parse(m->cmd[2].c_str())) {
+ if (m->cmd.size() > 2) {
+ if (!uuid.parse(m->cmd[2].c_str())) {
+ err = -EINVAL;
+ goto out;
+ }
dout(10) << " osd create got uuid " << uuid << dendl;
i = osdmap.identify_osd(uuid);
if (i >= 0) {
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index dcf9380613b..6a6dd60b84b 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -579,7 +579,7 @@ void PGMonitor::check_osd_map(epoch_t epoch)
e++) {
dout(10) << "check_osd_map applying osdmap e" << e << " to pg_map" << dendl;
bufferlist bl;
- mon->store->get_bl_sn(bl, "osdmap", e);
+ mon->store->get_bl_sn_safe(bl, "osdmap", e);
assert(bl.length());
OSDMap::Incremental inc(bl);
for (map<int32_t,uint32_t>::iterator p = inc.new_weight.begin();
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index 349908789e3..fbe141e3a84 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -73,7 +73,8 @@ void Paxos::collect(version_t oldpn)
if (mon->store->exists_bl_sn(machine_name, last_committed+1)) {
uncommitted_v = last_committed+1;
uncommitted_pn = accepted_pn;
- mon->store->get_bl_sn(uncommitted_value, machine_name, last_committed+1);
+ mon->store->get_bl_sn_safe(uncommitted_value, machine_name, last_committed+1);
+ assert(uncommitted_value.length());
dout(10) << "learned uncommitted " << (last_committed+1)
<< " (" << uncommitted_value.length() << " bytes) from myself"
<< dendl;
@@ -146,7 +147,7 @@ void Paxos::handle_collect(MMonPaxos *collect)
// (it'll be at last_committed+1)
bufferlist bl;
if (mon->store->exists_bl_sn(machine_name, last_committed+1)) {
- mon->store->get_bl_sn(bl, machine_name, last_committed+1);
+ mon->store->get_bl_sn_safe(bl, machine_name, last_committed+1);
assert(bl.length() > 0);
dout(10) << " sharing our accepted but uncommitted value for "
<< last_committed+1 << " (" << bl.length() << " bytes)" << dendl;
@@ -184,7 +185,8 @@ void Paxos::share_state(MMonPaxos *m, version_t peer_first_committed,
// include incrementals
for ( ; v <= last_committed; v++) {
if (mon->store->exists_bl_sn(machine_name, v)) {
- mon->store->get_bl_sn(m->values[v], machine_name, v);
+ mon->store->get_bl_sn_safe(m->values[v], machine_name, v);
+ assert(m->values[v].length());
m->gv[v] = mon->store->get_global_version(machine_name, v);
dout(10) << " sharing " << v << " ("
<< m->values[v].length() << " bytes)" << dendl;
@@ -968,7 +970,7 @@ bool Paxos::is_readable(version_t v)
bool Paxos::read(version_t v, bufferlist &bl)
{
- if (!mon->store->get_bl_sn(bl, machine_name, v))
+ if (mon->store->get_bl_sn(bl, machine_name, v) <= 0)
return false;
return true;
}
@@ -1051,7 +1053,8 @@ void Paxos::stash_latest(version_t v, bufferlist& bl)
version_t Paxos::get_stashed(bufferlist& bl)
{
bufferlist full;
- if (mon->store->get_bl_ss(full, machine_name, "latest") <= 0) {
+ mon->store->get_bl_ss_safe(full, machine_name, "latest");
+ if (!full.length()) {
dout(10) << "get_stashed not found" << dendl;
return 0;
}
diff --git a/src/os/hobject.h b/src/os/hobject.h
index 9a1c207e796..d75ae8570c4 100644
--- a/src/os/hobject.h
+++ b/src/os/hobject.h
@@ -31,7 +31,9 @@ struct hobject_t {
object_t oid;
snapid_t snap;
uint32_t hash;
+private:
bool max;
+public:
int64_t pool;
string nspace;
@@ -57,6 +59,15 @@ public:
pool(pool),
key(soid.oid.name == key ? string() : key) {}
+ /// @return min hobject_t ret s.t. ret.hash == this->hash
+ hobject_t get_boundary() const {
+ if (is_max())
+ return *this;
+ hobject_t ret;
+ ret.hash = hash;
+ return ret;
+ }
+
/* Do not use when a particular hash function is needed */
explicit hobject_t(const sobject_t &o) :
oid(o.oid), snap(o.snap), max(false), pool(-1) {
@@ -108,6 +119,12 @@ public:
void decode(json_spirit::Value& v);
void dump(Formatter *f) const;
static void generate_test_instances(list<hobject_t*>& o);
+ friend bool operator<(const hobject_t&, const hobject_t&);
+ friend bool operator>(const hobject_t&, const hobject_t&);
+ friend bool operator<=(const hobject_t&, const hobject_t&);
+ friend bool operator>=(const hobject_t&, const hobject_t&);
+ friend bool operator==(const hobject_t&, const hobject_t&);
+ friend bool operator!=(const hobject_t&, const hobject_t&);
};
WRITE_CLASS_ENCODER(hobject_t)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 96052820a15..913157a8508 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1307,15 +1307,6 @@ PG *OSD::_create_lock_pg(
t.create_collection(coll_t(pgid));
- if (newly_created) {
- /* This is weird, but all the peering code needs last_epoch_start
- * to be less than same_interval_since. Make it so!
- * This is easier to deal with if you remember that the PG, while
- * now created in memory, still hasn't peered and started -- and
- * the map epoch could change before that happens! */
- history.last_epoch_started = history.epoch_created - 1;
- }
-
pg->init(role, up, acting, history, pi, &t);
dout(7) << "_create_lock_pg " << *pg << dendl;
@@ -1586,6 +1577,8 @@ PG *OSD::get_or_create_pg(const pg_info_t& info, pg_interval_map_t& pi,
if (!_have_pg(info.pgid)) {
// same primary?
+ if (!osdmap->have_pg_pool(info.pgid.pool()))
+ return 0;
vector<int> up, acting;
osdmap->pg_to_up_acting_osds(info.pgid, up, acting);
int role = osdmap->calc_pg_role(whoami, acting, acting.size());
@@ -4502,6 +4495,7 @@ void OSD::handle_pg_create(OpRequestRef op)
0, creating_pgs[pgid].acting, creating_pgs[pgid].acting,
history, pi,
*rctx.transaction);
+ pg->info.last_epoch_started = pg->info.history.last_epoch_started;
creating_pgs.erase(pgid);
wake_pg_waiters(pg->info.pgid);
pg->handle_create(&rctx);
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 53e9ae4bb19..b81bfed36f4 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1042,11 +1042,11 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t>
for (map<int, pg_info_t>::const_iterator i = infos.begin();
i != infos.end();
++i) {
- if (max_last_epoch_started_found < i->second.history.last_epoch_started) {
+ if (max_last_epoch_started_found < i->second.last_epoch_started) {
min_last_update_acceptable = eversion_t::max();
- max_last_epoch_started_found = i->second.history.last_epoch_started;
+ max_last_epoch_started_found = i->second.last_epoch_started;
}
- if (max_last_epoch_started_found == i->second.history.last_epoch_started) {
+ if (max_last_epoch_started_found == i->second.last_epoch_started) {
if (min_last_update_acceptable > i->second.last_update)
min_last_update_acceptable = i->second.last_update;
}
@@ -1381,6 +1381,8 @@ void PG::activate(ObjectStore::Transaction& t,
send_notify = false;
+ info.last_epoch_started = query_epoch;
+
if (is_primary()) {
// If necessary, create might_have_unfound to help us find our unfound objects.
// NOTE: It's important that we build might_have_unfound before trimming the
@@ -1774,7 +1776,8 @@ void PG::all_activated_and_committed()
assert(is_primary());
assert(peer_activated.size() == acting.size());
- info.history.last_epoch_started = get_osdmap()->get_epoch();
+ // info.last_epoch_started is set during activate()
+ info.history.last_epoch_started = info.last_epoch_started;
share_pg_info();
update_stats();
@@ -3691,7 +3694,7 @@ void PG::chunky_scrub() {
// search backward from the end looking for a boundary
objects.push_back(scrubber.end);
while (!boundary_found && objects.size() > 1) {
- hobject_t end = objects.back();
+ hobject_t end = objects.back().get_boundary();
objects.pop_back();
if (objects.back().get_filestore_key() != end.get_filestore_key()) {
@@ -4134,6 +4137,10 @@ void PG::share_pg_info()
// share new pg_info_t with replicas
for (unsigned i=1; i<acting.size(); i++) {
int peer = acting[i];
+ if (peer_info.count(i)) {
+ peer_info[i].last_epoch_started = info.last_epoch_started;
+ peer_info[i].history.merge(info.history);
+ }
MOSDPGInfo *m = new MOSDPGInfo(get_osdmap()->get_epoch());
m->pg_list.push_back(
make_pair(
@@ -4523,6 +4530,10 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo)
dirty_info = true;
osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp);
+ assert(oinfo.last_epoch_started == info.last_epoch_started);
+ assert(info.history.last_epoch_started == oinfo.last_epoch_started);
+ assert(oinfo.history.last_epoch_started == oinfo.last_epoch_started);
+
// Handle changes to purged_snaps ONLY IF we have caught up
if (last_complete_ondisk.epoch >= info.history.last_epoch_started) {
interval_set<snapid_t> p;
@@ -6451,7 +6462,7 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx)
// We pull the log from the peer's last_epoch_started to ensure we
// get enough log to detect divergent updates.
- eversion_t since(pi.history.last_epoch_started, 0);
+ eversion_t since(pi.last_epoch_started, 0);
assert(pi.last_update >= pg->info.log_tail); // or else choose_acting() did a bad thing
if (pi.log_tail <= since) {
dout(10) << " requesting log+missing since " << since << " from osd." << *i << dendl;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 2513d9d6fe4..388751e8e8b 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -7168,10 +7168,15 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&)
dout(10) << "NotTrimming: obs_to_trim empty!" << dendl;
dout(10) << "purged_snaps now " << pg->info.purged_snaps << ", snap_trimq now "
<< pg->snap_trimq << dendl;
- ObjectStore::Transaction *t = new ObjectStore::Transaction;
- t->remove_collection(col_to_trim);
- int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t));
- assert(r == 0);
+ if (pg->snap_collections.contains(snap_to_trim)) {
+ ObjectStore::Transaction *t = new ObjectStore::Transaction;
+ pg->snap_collections.erase(snap_to_trim);
+ t->remove_collection(col_to_trim);
+ pg->write_info(*t);
+ int r = pg->osd->store->queue_transaction(
+ NULL, t, new ObjectStore::C_DeleteTransaction(t));
+ assert(r == 0);
+ }
post_event(SnapTrim());
return discard_event();
} else {
@@ -7222,9 +7227,10 @@ boost::statechart::result ReplicatedPG::RepColTrim::react(const SnapTrim&)
t->collection_remove(col_to_trim, *i);
}
t->remove_collection(col_to_trim);
+ pg->snap_collections.erase(snap_to_trim);
+ pg->write_info(*t);
int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t));
assert(r == 0);
- pg->snap_collections.erase(snap_to_trim);
return discard_event();
}
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 1ca5adc70be..4a1b3fcf2ef 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -1286,7 +1286,7 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
void pg_info_t::encode(bufferlist &bl) const
{
- ENCODE_START(26, 26, bl);
+ ENCODE_START(27, 26, bl);
::encode(pgid, bl);
::encode(last_update, bl);
::encode(last_complete, bl);
@@ -1295,12 +1295,13 @@ void pg_info_t::encode(bufferlist &bl) const
::encode(stats, bl);
history.encode(bl);
::encode(purged_snaps, bl);
+ ::encode(last_epoch_started, bl);
ENCODE_FINISH(bl);
}
void pg_info_t::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(26, 26, 26, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(27, 26, 26, bl);
if (struct_v < 23) {
old_pg_t opgid;
::decode(opgid, bl);
@@ -1325,6 +1326,11 @@ void pg_info_t::decode(bufferlist::iterator &bl)
set<snapid_t> snap_trimq;
::decode(snap_trimq, bl);
}
+ if (struct_v < 27) {
+ last_epoch_started = history.last_epoch_started;
+ } else {
+ ::decode(last_epoch_started, bl);
+ }
DECODE_FINISH(bl);
}
@@ -1348,6 +1354,7 @@ void pg_info_t::dump(Formatter *f) const
f->dump_int("empty", is_empty());
f->dump_int("dne", dne());
f->dump_int("incomplete", is_incomplete());
+ f->dump_int("last_epoch_started", last_epoch_started);
}
void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index bb2ed253ce6..da2b2abf319 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -1039,6 +1039,7 @@ struct pg_info_t {
pg_t pgid;
eversion_t last_update; // last object version applied to store.
eversion_t last_complete; // last version pg was complete through.
+ epoch_t last_epoch_started;// last epoch at which this pg started on this osd
eversion_t log_tail; // oldest log entry.
@@ -1051,11 +1052,11 @@ struct pg_info_t {
pg_history_t history;
pg_info_t()
- : last_backfill(hobject_t::get_max())
+ : last_epoch_started(0), last_backfill(hobject_t::get_max())
{ }
pg_info_t(pg_t p)
: pgid(p),
- last_backfill(hobject_t::get_max())
+ last_epoch_started(0), last_backfill(hobject_t::get_max())
{ }
bool is_empty() const { return last_update.version == 0; }
@@ -1086,6 +1087,7 @@ inline ostream& operator<<(ostream& out, const pg_info_t& pgi)
out << " lb " << pgi.last_backfill;
}
//out << " c " << pgi.epoch_created;
+ out << " local-les=" << pgi.last_epoch_started;
out << " n=" << pgi.stats.stats.sum.num_objects;
out << " " << pgi.history
<< ")";
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 6a7d7915a69..fb419d93b4c 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -29,7 +29,7 @@ using namespace std;
#define SECRET_KEY_LEN 40
#define PUBLIC_ID_LEN 20
-RGWRados *store;
+static RGWRados *store = NULL;
void _usage()
{
@@ -657,6 +657,15 @@ static bool bucket_object_check_filter(const string& name)
return rgw_obj::translate_raw_obj_to_obj_in_ns(obj, ns);
}
+class StoreDestructor {
+ RGWRados *store;
+public:
+ StoreDestructor(RGWRados *_s) : store(_s) {}
+ ~StoreDestructor() {
+ RGWStoreManager::close_storage(store);
+ }
+};
+
int main(int argc, char **argv)
{
vector<const char*> args;
@@ -873,6 +882,8 @@ int main(int argc, char **argv)
return 5; //EIO
}
+ StoreDestructor store_destructor(store);
+
if (opt_cmd != OPT_USER_CREATE &&
opt_cmd != OPT_LOG_SHOW && opt_cmd != OPT_LOG_LIST && opt_cmd != OPT_LOG_RM &&
user_id.empty()) {
diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc
index 55fde77f0fc..3a41fa10e4c 100644
--- a/src/test/filestore/store_test.cc
+++ b/src/test/filestore/store_test.cc
@@ -212,7 +212,7 @@ TEST_F(StoreTest, ManyObjectTest) {
ASSERT_EQ(r, 0);
listed.insert(objects.begin(), objects.end());
if (objects.size() < 50) {
- ASSERT_TRUE(next.max);
+ ASSERT_TRUE(next.is_max());
break;
}
objects.clear();
@@ -385,7 +385,7 @@ public:
ASSERT_TRUE(sorted(objects));
objects_set.insert(objects.begin(), objects.end());
objects.clear();
- if (next.max) break;
+ if (next.is_max()) break;
current = next;
}
ASSERT_EQ(objects_set.size(), available_objects.size());
@@ -529,7 +529,7 @@ TEST_F(StoreTest, HashCollisionTest) {
listed.insert(*i);
}
if (objects.size() < 50) {
- ASSERT_TRUE(next.max);
+ ASSERT_TRUE(next.is_max());
break;
}
objects.clear();
diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc
index 42fb411362d..644873b1a46 100644
--- a/src/test/libcephfs/test.cc
+++ b/src/test/libcephfs/test.cc
@@ -754,3 +754,31 @@ TEST(LibCephFS, BadFileDesc) {
ASSERT_EQ(ceph_get_file_pool(cmount, -1), -EBADF);
ASSERT_EQ(ceph_get_file_replication(cmount, -1), -EBADF);
}
+
+TEST(LibCephFS, ReadEmptyFile) {
+ struct ceph_mount_info *cmount;
+ ASSERT_EQ(ceph_create(&cmount, NULL), 0);
+ ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0);
+ ASSERT_EQ(ceph_mount(cmount, NULL), 0);
+
+ // test the read_sync path in the client for zero files
+ ASSERT_EQ(ceph_conf_set(cmount, "client_debug_force_sync_read", "true"), 0);
+
+ int mypid = getpid();
+ char testf[256];
+
+ sprintf(testf, "test_reademptyfile%d", mypid);
+ int fd = ceph_open(cmount, testf, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+ ASSERT_GT(fd, 0);
+
+ ceph_close(cmount, fd);
+
+ fd = ceph_open(cmount, testf, O_RDONLY, 0);
+ ASSERT_GT(fd, 0);
+
+ char buf[4096];
+ ASSERT_EQ(ceph_read(cmount, fd, buf, 4096, 0), 0);
+
+ ceph_close(cmount, fd);
+ ceph_shutdown(cmount);
+}
diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc
index 50c83701491..0868fac3173 100644
--- a/src/test/librados/misc.cc
+++ b/src/test/librados/misc.cc
@@ -235,12 +235,13 @@ TEST(LibRadosMisc, TmapUpdateMisorderedPutPP) {
::encode(string("aval"), bl);
::encode(string("c"), bl);
::encode(string("cval"), bl);
+ bufferlist orig = bl; // tmap_put steals bl content
ASSERT_EQ(0, ioctx.tmap_put("foo", bl));
// check
bufferlist newbl;
- ASSERT_EQ(0, ioctx.read("foo", bl, 0, 0));
- ASSERT_EQ(bl.contents_equal(newbl), false);
+ ioctx.read("foo", newbl, orig.length(), 0);
+ ASSERT_EQ(orig.contents_equal(newbl), false);
ioctx.close();
ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));