summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-07-18 10:12:17 -0700
committerSage Weil <sage@inktank.com>2013-07-24 16:20:35 -0700
commit95b1b5da439f1b7e2fb1886aaeec2d61532183f0 (patch)
treee039da815e5e4f0fd91dc89f93539881ed8d9d40
parentd92a43d8ff0123b234e47a94c2ce73fcaae7f625 (diff)
downloadceph-95b1b5da439f1b7e2fb1886aaeec2d61532183f0.tar.gz
FileStore: add global replay guard for split, collection_rename
In the event of a split or collection rename, we need to ensure that we don't replay any operations on objects within those collections prior to that point. Thus, we mark a global replay guard on the collection after doing a syncfs and make sure to check that in _check_replay_guard() for all object operations. Fixes: #5154 Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com> (cherry picked from commit f3f92fe21061e21c8b259df5ef283a61782a44db) Conflicts: src/os/FileStore.cc
-rw-r--r--src/os/FileStore.cc81
-rw-r--r--src/os/FileStore.h3
2 files changed, 84 insertions, 0 deletions
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index a91c252e531..82185dc9d7c 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -96,6 +96,7 @@ static const __SWORD_TYPE BTRFS_SUPER_MAGIC(0x9123683E);
#define CLUSTER_SNAP_ITEM "clustersnap_%s"
#define REPLAY_GUARD_XATTR "user.cephos.seq"
+#define GLOBAL_REPLAY_GUARD_XATTR "user.cephos.gseq"
/*
* long file names will have the following format:
@@ -2158,6 +2159,78 @@ int FileStore::_do_transactions(
return r;
}
+void FileStore::_set_global_replay_guard(coll_t cid,
+ const SequencerPosition &spos)
+{
+ if (btrfs_stable_commits)
+ return;
+
+ // sync all previous operations on this sequencer
+ sync_filesystem(basedir_fd);
+
+ char fn[PATH_MAX];
+ get_cdir(cid, fn, sizeof(fn));
+ int fd = ::open(fn, O_RDONLY);
+ if (fd < 0) {
+ int err = errno;
+ derr << __func__ << ": " << cid << " error " << cpp_strerror(err) << dendl;
+ assert(0 == "_set_global_replay_guard failed");
+ }
+
+ _inject_failure();
+
+ // then record that we did it
+ bufferlist v;
+ ::encode(spos, v);
+ int r = chain_fsetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, v.c_str(), v.length());
+ if (r < 0) {
+ derr << __func__ << ": fsetxattr " << GLOBAL_REPLAY_GUARD_XATTR
+ << " got " << cpp_strerror(r) << dendl;
+ assert(0 == "fsetxattr failed");
+ }
+
+ // and make sure our xattr is durable.
+ ::fsync(fd);
+
+ _inject_failure();
+
+ TEMP_FAILURE_RETRY(::close(fd));
+ dout(10) << __func__ << ": " << spos << " done" << dendl;
+}
+
+int FileStore::_check_global_replay_guard(coll_t cid,
+ const SequencerPosition& spos)
+{
+ if (!replaying || btrfs_stable_commits)
+ return 1;
+
+ char fn[PATH_MAX];
+ get_cdir(cid, fn, sizeof(fn));
+ int fd = ::open(fn, O_RDONLY);
+ if (fd < 0) {
+ dout(10) << __func__ << ": " << cid << " dne" << dendl;
+ return 1; // if collection does not exist, there is no guard, and we can replay.
+ }
+
+ char buf[100];
+ int r = chain_fgetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, buf, sizeof(buf));
+ if (r < 0) {
+ dout(20) << __func__ << " no xattr" << dendl;
+ assert(!m_filestore_fail_eio || r != -EIO);
+ return 1; // no xattr
+ }
+ bufferlist bl;
+ bl.append(buf, r);
+
+ SequencerPosition opos;
+ bufferlist::iterator p = bl.begin();
+ ::decode(opos, p);
+
+ TEMP_FAILURE_RETRY(::close(fd));
+ return spos >= opos ? 1 : -1;
+}
+
+
void FileStore::_set_replay_guard(coll_t cid,
const SequencerPosition &spos,
bool in_progress=false)
@@ -2261,6 +2334,10 @@ int FileStore::_check_replay_guard(coll_t cid, hobject_t oid, const SequencerPos
if (!replaying || btrfs_stable_commits)
return 1;
+ int r = _check_global_replay_guard(cid, spos);
+ if (r < 0)
+ return r;
+
int fd = lfn_open(cid, oid, 0);
if (fd < 0) {
dout(10) << "_check_replay_guard " << cid << " " << oid << " dne" << dendl;
@@ -4275,6 +4352,9 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid,
get_cdir(cid, old_coll, sizeof(old_coll));
get_cdir(ncid, new_coll, sizeof(new_coll));
+ _set_global_replay_guard(cid, spos);
+ _set_replay_guard(cid, spos);
+
if (_check_replay_guard(cid, spos) < 0) {
return 0;
}
@@ -4782,6 +4862,7 @@ int FileStore::_split_collection(coll_t cid,
if (srccmp < 0)
return 0;
+ _set_global_replay_guard(cid, spos);
_set_replay_guard(cid, spos, true);
_set_replay_guard(dest, spos, true);
diff --git a/src/os/FileStore.h b/src/os/FileStore.h
index d5ca2a4c237..e4f7e81a502 100644
--- a/src/os/FileStore.h
+++ b/src/os/FileStore.h
@@ -333,6 +333,8 @@ public:
void _set_replay_guard(coll_t cid,
const SequencerPosition& spos,
bool in_progress);
+ void _set_global_replay_guard(coll_t cid,
+ const SequencerPosition &spos);
/// close a replay guard opened with in_progress=true
void _close_replay_guard(int fd, const SequencerPosition& spos);
@@ -357,6 +359,7 @@ public:
int _check_replay_guard(int fd, const SequencerPosition& spos);
int _check_replay_guard(coll_t cid, const SequencerPosition& spos);
int _check_replay_guard(coll_t cid, hobject_t oid, const SequencerPosition& pos);
+ int _check_global_replay_guard(coll_t cid, const SequencerPosition& spos);
// ------------------
// objects