diff options
author | Samuel Just <sam.just@inktank.com> | 2013-07-18 10:12:17 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-24 16:20:35 -0700 |
commit | 95b1b5da439f1b7e2fb1886aaeec2d61532183f0 (patch) | |
tree | e039da815e5e4f0fd91dc89f93539881ed8d9d40 | |
parent | d92a43d8ff0123b234e47a94c2ce73fcaae7f625 (diff) | |
download | ceph-95b1b5da439f1b7e2fb1886aaeec2d61532183f0.tar.gz |
FileStore: add global replay guard for split, collection_rename
In the event of a split or collection rename, we need to ensure that
we don't replay any operations on objects within those collections
prior to that point. Thus, we mark a global replay guard on the
collection after doing a syncfs and make sure to check that in
_check_replay_guard() for all object operations.
Fixes: #5154
Signed-off-by: Samuel Just <sam.just@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
(cherry picked from commit f3f92fe21061e21c8b259df5ef283a61782a44db)
Conflicts:
src/os/FileStore.cc
-rw-r--r-- | src/os/FileStore.cc | 81 | ||||
-rw-r--r-- | src/os/FileStore.h | 3 |
2 files changed, 84 insertions, 0 deletions
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index a91c252e531..82185dc9d7c 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -96,6 +96,7 @@ static const __SWORD_TYPE BTRFS_SUPER_MAGIC(0x9123683E); #define CLUSTER_SNAP_ITEM "clustersnap_%s" #define REPLAY_GUARD_XATTR "user.cephos.seq" +#define GLOBAL_REPLAY_GUARD_XATTR "user.cephos.gseq" /* * long file names will have the following format: @@ -2158,6 +2159,78 @@ int FileStore::_do_transactions( return r; } +void FileStore::_set_global_replay_guard(coll_t cid, + const SequencerPosition &spos) +{ + if (btrfs_stable_commits) + return; + + // sync all previous operations on this sequencer + sync_filesystem(basedir_fd); + + char fn[PATH_MAX]; + get_cdir(cid, fn, sizeof(fn)); + int fd = ::open(fn, O_RDONLY); + if (fd < 0) { + int err = errno; + derr << __func__ << ": " << cid << " error " << cpp_strerror(err) << dendl; + assert(0 == "_set_global_replay_guard failed"); + } + + _inject_failure(); + + // then record that we did it + bufferlist v; + ::encode(spos, v); + int r = chain_fsetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, v.c_str(), v.length()); + if (r < 0) { + derr << __func__ << ": fsetxattr " << GLOBAL_REPLAY_GUARD_XATTR + << " got " << cpp_strerror(r) << dendl; + assert(0 == "fsetxattr failed"); + } + + // and make sure our xattr is durable. + ::fsync(fd); + + _inject_failure(); + + TEMP_FAILURE_RETRY(::close(fd)); + dout(10) << __func__ << ": " << spos << " done" << dendl; +} + +int FileStore::_check_global_replay_guard(coll_t cid, + const SequencerPosition& spos) +{ + if (!replaying || btrfs_stable_commits) + return 1; + + char fn[PATH_MAX]; + get_cdir(cid, fn, sizeof(fn)); + int fd = ::open(fn, O_RDONLY); + if (fd < 0) { + dout(10) << __func__ << ": " << cid << " dne" << dendl; + return 1; // if collection does not exist, there is no guard, and we can replay. + } + + char buf[100]; + int r = chain_fgetxattr(fd, GLOBAL_REPLAY_GUARD_XATTR, buf, sizeof(buf)); + if (r < 0) { + dout(20) << __func__ << " no xattr" << dendl; + assert(!m_filestore_fail_eio || r != -EIO); + return 1; // no xattr + } + bufferlist bl; + bl.append(buf, r); + + SequencerPosition opos; + bufferlist::iterator p = bl.begin(); + ::decode(opos, p); + + TEMP_FAILURE_RETRY(::close(fd)); + return spos >= opos ? 1 : -1; +} + + void FileStore::_set_replay_guard(coll_t cid, const SequencerPosition &spos, bool in_progress=false) @@ -2261,6 +2334,10 @@ int FileStore::_check_replay_guard(coll_t cid, hobject_t oid, const SequencerPos if (!replaying || btrfs_stable_commits) return 1; + int r = _check_global_replay_guard(cid, spos); + if (r < 0) + return r; + int fd = lfn_open(cid, oid, 0); if (fd < 0) { dout(10) << "_check_replay_guard " << cid << " " << oid << " dne" << dendl; @@ -4275,6 +4352,9 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid, get_cdir(cid, old_coll, sizeof(old_coll)); get_cdir(ncid, new_coll, sizeof(new_coll)); + _set_global_replay_guard(cid, spos); + _set_replay_guard(cid, spos); + if (_check_replay_guard(cid, spos) < 0) { return 0; } @@ -4782,6 +4862,7 @@ int FileStore::_split_collection(coll_t cid, if (srccmp < 0) return 0; + _set_global_replay_guard(cid, spos); _set_replay_guard(cid, spos, true); _set_replay_guard(dest, spos, true); diff --git a/src/os/FileStore.h b/src/os/FileStore.h index d5ca2a4c237..e4f7e81a502 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -333,6 +333,8 @@ public: void _set_replay_guard(coll_t cid, const SequencerPosition& spos, bool in_progress); + void _set_global_replay_guard(coll_t cid, + const SequencerPosition &spos); /// close a replay guard opened with in_progress=true void _close_replay_guard(int fd, const SequencerPosition& spos); @@ -357,6 +359,7 @@ public: int _check_replay_guard(int fd, const SequencerPosition& spos); int _check_replay_guard(coll_t cid, const SequencerPosition& spos); int _check_replay_guard(coll_t cid, hobject_t oid, const SequencerPosition& pos); + int _check_global_replay_guard(coll_t cid, const SequencerPosition& spos); // ------------------ // objects |