diff options
72 files changed, 1975 insertions, 1001 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes index e7fcd7201bb..7d667f3ff59 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,2 +1,3 @@ -v0.69 +v0.70 ~~~~~ +mds: disable adding snapshots by default. (re-enable them with "ceph mds allow_snaps")
\ No newline at end of file diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh index c2744fa279d..f1770e171eb 100755 --- a/qa/workunits/mon/crush_ops.sh +++ b/qa/workunits/mon/crush_ops.sh @@ -70,7 +70,7 @@ ceph osd crush rm foo # test reweight o3=`ceph osd create` -ceph osd crush add $o3 123 root=foo +ceph osd crush add $o3 123 root=default ceph osd tree | grep osd.$o3 | grep 123 ceph osd crush reweight osd.$o3 113 ceph osd tree | grep osd.$o3 | grep 113 diff --git a/qa/workunits/snaps/snap-rm-diff.sh b/qa/workunits/snaps/snap-rm-diff.sh index 8dff54f58b8..3d30dc7937a 100755 --- a/qa/workunits/snaps/snap-rm-diff.sh +++ b/qa/workunits/snaps/snap-rm-diff.sh @@ -1,5 +1,6 @@ #!/bin/sh -ex +ceph mds set allow_new_snaps --yes-i-really-mean-it wget -q http://ceph.com/qa/linux-2.6.33.tar.bz2 mkdir foo cp linux* foo diff --git a/qa/workunits/snaps/snaptest-0.sh b/qa/workunits/snaps/snaptest-0.sh index 93e747af7dd..366249e7d25 100755 --- a/qa/workunits/snaps/snaptest-0.sh +++ b/qa/workunits/snaps/snaptest-0.sh @@ -1,7 +1,16 @@ #!/bin/sh -x +expect_failure() { + if [ `"$@"` -e 0 ]; then + return 1 + fi + return 0 +} set -e +expect_failure mkdir .snap/foo +ceph mds set allow_new_snaps --yes-i-really-mean-it + echo asdf > foo mkdir .snap/foo grep asdf .snap/foo/foo @@ -14,4 +23,7 @@ grep asdf .snap/bar/bar rmdir .snap/bar rm foo +ceph mds unset allow_new_snaps --yes-i-really-mean-it +expect_failure mkdir .snap/baz + echo OK
\ No newline at end of file diff --git a/qa/workunits/snaps/snaptest-1.sh b/qa/workunits/snaps/snaptest-1.sh index 59d41ef688f..7c528dd432a 100755 --- a/qa/workunits/snaps/snaptest-1.sh +++ b/qa/workunits/snaps/snaptest-1.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + echo 1 > file1 echo 2 > file2 echo 3 > file3 diff --git a/qa/workunits/snaps/snaptest-2.sh b/qa/workunits/snaps/snaptest-2.sh index 4b67999921c..b73bf9cb97f 100755 --- a/qa/workunits/snaps/snaptest-2.sh +++ b/qa/workunits/snaps/snaptest-2.sh @@ -1,5 +1,7 @@ #!/bin/bash +ceph mds set allow_new_snaps --yes-i-really-mean-it + echo "Create dir 100 to 199 ..." for i in $(seq 100 199); do echo " create dir $i" diff --git a/qa/workunits/snaps/snaptest-authwb.sh b/qa/workunits/snaps/snaptest-authwb.sh index 128efb70d19..acbb599bda9 100755 --- a/qa/workunits/snaps/snaptest-authwb.sh +++ b/qa/workunits/snaps/snaptest-authwb.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + touch foo chmod +x foo mkdir .snap/s diff --git a/qa/workunits/snaps/snaptest-capwb.sh b/qa/workunits/snaps/snaptest-capwb.sh index 8c5a1333b69..9d0568cb6db 100755 --- a/qa/workunits/snaps/snaptest-capwb.sh +++ b/qa/workunits/snaps/snaptest-capwb.sh @@ -4,6 +4,8 @@ set -e mkdir foo +ceph mds set allow_new_snaps --yes-i-really-mean-it + # make sure mds handles it when the client does not send flushsnap echo x > foo/x sync diff --git a/qa/workunits/snaps/snaptest-dir-rename.sh b/qa/workunits/snaps/snaptest-dir-rename.sh index e81edf9c47f..6995f537a47 100755 --- a/qa/workunits/snaps/snaptest-dir-rename.sh +++ b/qa/workunits/snaps/snaptest-dir-rename.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + # # make sure we keep an existing dn's seq # diff --git a/qa/workunits/snaps/snaptest-double-null.sh b/qa/workunits/snaps/snaptest-double-null.sh index cdf32e4f0ef..5a673ff9c0d 100755 --- a/qa/workunits/snaps/snaptest-double-null.sh +++ b/qa/workunits/snaps/snaptest-double-null.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + # multiple intervening snapshots with no modifications, and thus no # snapflush client_caps messages. make sure the mds can handle this. diff --git a/qa/workunits/snaps/snaptest-estale.sh b/qa/workunits/snaps/snaptest-estale.sh index a4fb94368d4..31ba5a87659 100755 --- a/qa/workunits/snaps/snaptest-estale.sh +++ b/qa/workunits/snaps/snaptest-estale.sh @@ -1,5 +1,7 @@ #!/bin/sh -x +ceph mds set allow_new_snaps --yes-i-really-mean-it + mkdir .snap/foo echo "We want ENOENT, not ESTALE, here." diff --git a/qa/workunits/snaps/snaptest-git-ceph.sh b/qa/workunits/snaps/snaptest-git-ceph.sh index 11532d8b14b..71a71e1d469 100755 --- a/qa/workunits/snaps/snaptest-git-ceph.sh +++ b/qa/workunits/snaps/snaptest-git-ceph.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + git clone git://ceph.com/git/ceph.git cd ceph diff --git a/qa/workunits/snaps/snaptest-intodir.sh b/qa/workunits/snaps/snaptest-intodir.sh index 3cbbe01718e..d022cfd479e 100755 --- a/qa/workunits/snaps/snaptest-intodir.sh +++ b/qa/workunits/snaps/snaptest-intodir.sh @@ -1,5 +1,7 @@ #!/bin/sh -ex +ceph mds set allow_new_snaps --yes-i-really-mean-it + # this tests fix for #1399 mkdir foo mkdir foo/.snap/one diff --git a/qa/workunits/snaps/snaptest-multiple-capsnaps.sh b/qa/workunits/snaps/snaptest-multiple-capsnaps.sh index 5ebc852cf6c..d88722bde09 100755 --- a/qa/workunits/snaps/snaptest-multiple-capsnaps.sh +++ b/qa/workunits/snaps/snaptest-multiple-capsnaps.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + echo asdf > a mkdir .snap/1 chmod 777 a diff --git a/qa/workunits/snaps/snaptest-parents.sh b/qa/workunits/snaps/snaptest-parents.sh index 7e5241a27c0..8963f628dc8 100644 --- a/qa/workunits/snaps/snaptest-parents.sh +++ b/qa/workunits/snaps/snaptest-parents.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + echo "making directory tree and files" mkdir -p 1/a/b/c/ echo "i'm file1" > 1/a/file1 diff --git a/qa/workunits/snaps/snaptest-snap-rm-cmp.sh b/qa/workunits/snaps/snaptest-snap-rm-cmp.sh index aa094e70789..68ecf37b73e 100755 --- a/qa/workunits/snaps/snaptest-snap-rm-cmp.sh +++ b/qa/workunits/snaps/snaptest-snap-rm-cmp.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + file=linux-2.6.33.tar.bz2 wget -q http://ceph.com/qa/$file diff --git a/qa/workunits/snaps/snaptest-upchildrealms.sh b/qa/workunits/snaps/snaptest-upchildrealms.sh index 63b7167b42d..b5b8830e9f0 100755 --- a/qa/workunits/snaps/snaptest-upchildrealms.sh +++ b/qa/workunits/snaps/snaptest-upchildrealms.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + # # verify that a snap update on a parent realm will induce # snap cap writeback for inodes child realms diff --git a/qa/workunits/snaps/snaptest-xattrwb.sh b/qa/workunits/snaps/snaptest-xattrwb.sh index b2dd7bc748a..c36e2575845 100755 --- a/qa/workunits/snaps/snaptest-xattrwb.sh +++ b/qa/workunits/snaps/snaptest-xattrwb.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + echo "testing simple xattr wb" touch x setfattr -n user.foo x diff --git a/qa/workunits/snaps/untar_snap_rm.sh b/qa/workunits/snaps/untar_snap_rm.sh index 5c71212df75..89e2db0cd10 100755 --- a/qa/workunits/snaps/untar_snap_rm.sh +++ b/qa/workunits/snaps/untar_snap_rm.sh @@ -2,6 +2,8 @@ set -e +ceph mds set allow_new_snaps --yes-i-really-mean-it + do_tarball() { wget http://ceph.com/qa/$1 tar xvf$2 $1 diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc index 3d517da1f89..2388762f1df 100644 --- a/src/ceph_osd.cc +++ b/src/ceph_osd.cc @@ -423,7 +423,7 @@ int main(int argc, const char **argv) global_init_daemonize(g_ceph_context, 0); common_init_finish(g_ceph_context); - if (g_conf->filestore_update_to >= (int)FileStore::on_disk_version) { + if (g_conf->filestore_update_to >= (int)FileStore::target_version) { int err = OSD::convertfs(g_conf->osd_data, g_conf->osd_journal); if (err < 0) { derr << TEXT_RED << " ** ERROR: error converting store " << g_conf->osd_data diff --git a/src/common/hobject.cc b/src/common/hobject.cc index d6273693c62..b68baedd524 100644 --- a/src/common/hobject.cc +++ b/src/common/hobject.cc @@ -191,3 +191,90 @@ ostream& operator<<(ostream& out, const hobject_t& o) out << "/" << o.nspace << "/" << o.pool; return out; } + +// This is compatible with decode for hobject_t prior to +// version 5. +void ghobject_t::encode(bufferlist& bl) const +{ + ENCODE_START(5, 3, bl); + ::encode(hobj.key, bl); + ::encode(hobj.oid, bl); + ::encode(hobj.snap, bl); + ::encode(hobj.hash, bl); + ::encode(hobj.max, bl); + ::encode(hobj.nspace, bl); + ::encode(hobj.pool, bl); + ::encode(generation, bl); + ::encode(shard_id, bl); + ENCODE_FINISH(bl); +} + +void ghobject_t::decode(bufferlist::iterator& bl) +{ + DECODE_START_LEGACY_COMPAT_LEN(5, 3, 3, bl); + if (struct_v >= 1) + ::decode(hobj.key, bl); + ::decode(hobj.oid, bl); + ::decode(hobj.snap, bl); + ::decode(hobj.hash, bl); + if (struct_v >= 2) + ::decode(hobj.max, bl); + else + hobj.max = false; + if (struct_v >= 4) { + ::decode(hobj.nspace, bl); + ::decode(hobj.pool, bl); + } + if (struct_v >= 5) { + ::decode(generation, bl); + ::decode(shard_id, bl); + } else { + generation = ghobject_t::NO_GEN; + shard_id = ghobject_t::NO_SHARD; + } + DECODE_FINISH(bl); +} + +void ghobject_t::dump(Formatter *f) const +{ + hobj.dump(f); + if (generation != NO_GEN) { + f->dump_int("generation", generation); + f->dump_int("shard_id", shard_id); + } +} + +void ghobject_t::generate_test_instances(list<ghobject_t*>& o) +{ + o.push_back(new ghobject_t); + o.push_back(new ghobject_t); + o.back()->hobj.max = true; + o.push_back(new ghobject_t(hobject_t(object_t("oname"), string(), 1, 234, -1, ""))); + + o.push_back(new ghobject_t(hobject_t(object_t("oname2"), string("okey"), CEPH_NOSNAP, + 67, 0, "n1"), 1, 0)); + o.push_back(new ghobject_t(hobject_t(object_t("oname2"), string("okey"), CEPH_NOSNAP, + 67, 0, "n1"), 1, 1)); + o.push_back(new ghobject_t(hobject_t(object_t("oname2"), string("okey"), CEPH_NOSNAP, + 67, 0, "n1"), 1, 2)); + o.push_back(new ghobject_t(hobject_t(object_t("oname3"), string("oname3"), + CEPH_SNAPDIR, 910, 1, "n2"), 1, 0)); + o.push_back(new ghobject_t(hobject_t(object_t("oname3"), string("oname3"), + CEPH_SNAPDIR, 910, 1, "n2"), 2, 0)); + o.push_back(new ghobject_t(hobject_t(object_t("oname3"), string("oname3"), + CEPH_SNAPDIR, 910, 1, "n2"), 3, 0)); + o.push_back(new ghobject_t(hobject_t(object_t("oname3"), string("oname3"), + CEPH_SNAPDIR, 910, 1, "n2"), 3, 1)); + o.push_back(new ghobject_t(hobject_t(object_t("oname3"), string("oname3"), + CEPH_SNAPDIR, 910, 1, "n2"), 3, 2)); +} + +ostream& operator<<(ostream& out, const ghobject_t& o) +{ + out << o.hobj; + if (o.generation != ghobject_t::NO_GEN) { + assert(o.shard_id != ghobject_t::NO_SHARD); + out << "/" << o.generation << "/" << o.shard_id; + } + return out; +} diff --git a/src/common/hobject.h b/src/common/hobject.h index a2e7e5a9215..e483b664347 100644 --- a/src/common/hobject.h +++ b/src/common/hobject.h @@ -162,7 +162,7 @@ public: (*this) = temp; } - string get_namespace() const { + const string &get_namespace() const { return nspace; } @@ -177,6 +177,7 @@ public: friend bool operator>=(const hobject_t&, const hobject_t&); friend bool operator==(const hobject_t&, const hobject_t&); friend bool operator!=(const hobject_t&, const hobject_t&); + friend class ghobject_t; }; WRITE_CLASS_ENCODER(hobject_t) @@ -203,4 +204,98 @@ WRITE_CMP_OPERATORS_7(hobject_t, oid, snap) +typedef uint64_t gen_t; +typedef uint8_t shard_t; + +#ifndef UINT8_MAX +#define UINT8_MAX (255) +#endif +#ifndef UINT64_MAX +#define UINT64_MAX (18446744073709551615ULL) +#endif + +struct ghobject_t { + hobject_t hobj; + gen_t generation; + shard_t shard_id; + +public: + static const shard_t NO_SHARD = UINT8_MAX; + static const gen_t NO_GEN = UINT64_MAX; + + ghobject_t() : generation(NO_GEN), shard_id(NO_SHARD) {} + + ghobject_t(const hobject_t &obj) : hobj(obj), generation(NO_GEN), shard_id(NO_SHARD) {} + + ghobject_t(const hobject_t &obj, gen_t gen, shard_t shard) : hobj(obj), generation(gen), shard_id(shard) {} + + bool match(uint32_t bits, uint32_t match) const { + return hobj.match_hash(hobj.hash, bits, match); + } + /// @return min ghobject_t ret s.t. ret.hash == this->hash + ghobject_t get_boundary() const { + if (hobj.is_max()) + return *this; + ghobject_t ret; + ret.hobj.hash = hobj.hash; + return ret; + } + filestore_hobject_key_t get_filestore_key_u32() const { + assert(!hobj.max); + return hobj._reverse_nibbles(hobj.hash); + } + filestore_hobject_key_t get_filestore_key() const { + if (hobj.max) + return 0x100000000ull; + else + return get_filestore_key_u32(); + } + + // maximum sorted value. + static ghobject_t get_max() { + ghobject_t h(hobject_t::get_max()); + return h; + } + bool is_max() const { + return hobj.is_max(); + } + + void swap(ghobject_t &o) { + ghobject_t temp(o); + o = (*this); + (*this) = temp; + } + + void encode(bufferlist& bl) const; + void decode(bufferlist::iterator& bl); + void decode(json_spirit::Value& v); + void dump(Formatter *f) const; + static void generate_test_instances(list<ghobject_t*>& o); + friend bool operator<(const ghobject_t&, const ghobject_t&); + friend bool operator>(const ghobject_t&, const ghobject_t&); + friend bool operator<=(const ghobject_t&, const ghobject_t&); + friend bool operator>=(const ghobject_t&, const ghobject_t&); + friend bool operator==(const ghobject_t&, const ghobject_t&); + friend bool operator!=(const ghobject_t&, const ghobject_t&); +}; +WRITE_CLASS_ENCODER(ghobject_t) + +namespace __gnu_cxx { + template<> struct hash<ghobject_t> { + size_t operator()(const ghobject_t &r) const { + static hash<object_t> H; + static rjhash<uint64_t> I; + return H(r.hobj.oid) ^ I(r.hobj.snap); + } + }; +} + +ostream& operator<<(ostream& out, const ghobject_t& o); + +WRITE_EQ_OPERATORS_3(ghobject_t, hobj, generation, shard_id) +// sort ghobject_t's by <hobj, generation, shard_id> +WRITE_CMP_OPERATORS_3(ghobject_t, + hobj, + shard_id, + generation) #endif diff --git a/src/include/CompatSet.h b/src/include/CompatSet.h index 26c438c05f2..b23883093ac 100644 --- a/src/include/CompatSet.h +++ b/src/include/CompatSet.h @@ -36,8 +36,8 @@ struct CompatSet { FeatureSet() : mask(1), names() {} void insert(Feature f) { assert(f.id > 0); - assert(f.id < 63); - mask |= (1<<f.id); + assert(f.id < 64); + mask |= ((uint64_t)1<<f.id); names[f.id] = f.name; } @@ -50,7 +50,7 @@ struct CompatSet { void remove(uint64_t f) { if (names.count(f)) { names.erase(f); - mask &= ~(1<<f); + mask &= ~((uint64_t)1<<f); } } void remove(Feature f) { @@ -156,24 +156,48 @@ struct CompatSet { ((other.ro_compat.mask ^ ro_compat.mask) & other.ro_compat.mask); uint64_t other_incompat = ((other.incompat.mask ^ incompat.mask) & other.incompat.mask); - for (int i = 0; i < 64; ++i) { - int mask = 1 << i; + for (int id = 1; id < 64; ++id) { + uint64_t mask = (uint64_t)1 << id; if (mask & other_compat) { - diff.compat.insert( Feature(mask & other_compat, - other.compat.names[mask&other_compat])); + diff.compat.insert( Feature(id, other.compat.names[id])); } if (mask & other_ro_compat) { - diff.ro_compat.insert(Feature(mask & other_ro_compat, - other.compat.names[mask&other_ro_compat])); + diff.ro_compat.insert(Feature(id, other.ro_compat.names[id])); } if (mask & other_incompat) { - diff.incompat.insert( Feature(mask & other_incompat, - other.incompat.names[mask&other_incompat])); + diff.incompat.insert( Feature(id, other.incompat.names[id])); } } return diff; } + /* Merge features supported by other CompatSet into this one. + * Return: true if some features were merged + */ + bool merge(CompatSet& other) { + uint64_t other_compat = + ((other.compat.mask ^ compat.mask) & other.compat.mask); + uint64_t other_ro_compat = + ((other.ro_compat.mask ^ ro_compat.mask) & other.ro_compat.mask); + uint64_t other_incompat = + ((other.incompat.mask ^ incompat.mask) & other.incompat.mask); + if (!other_compat && !other_ro_compat && !other_incompat) + return false; + for (int id = 1; id < 64; ++id) { + uint64_t mask = (uint64_t)1 << id; + if (mask & other_compat) { + compat.insert( Feature(id, other.compat.names[id])); + } + if (mask & other_ro_compat) { + ro_compat.insert(Feature(id, other.ro_compat.names[id])); + } + if (mask & other_incompat) { + incompat.insert( Feature(id, other.incompat.names[id])); + } + } + return true; + } + void encode(bufferlist& bl) const { compat.encode(bl); ro_compat.encode(bl); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 6c41d14f5da..ba0b5eb0f19 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -224,6 +224,7 @@ struct ceph_mon_subscribe_ack { * mdsmap flags */ #define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ +#define CEPH_MDSMAP_ALLOW_SNAPS (1<<1) /* cluster allowed to create snapshots */ /* * mds states diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index 94d3d23a824..358142c8cb4 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -133,11 +133,16 @@ namespace librados * BALANCE_READS and LOCALIZE_READS should only be used * when reading from data you're certain won't change, * like a snapshot, or where eventual consistency is ok. + * + * ORDER_READS_WRITES will order reads the same way writes are + * ordered (e.g., waiting for degraded objects). In particular, it + * will make a write followed by a read sequence be preserved. */ enum ObjectOperationGlobalFlags { OPERATION_NOFLAG = 0, OPERATION_BALANCE_READS = 1, OPERATION_LOCALIZE_READS = 2, + OPERATION_ORDER_READS_WRITES = 4, }; /* diff --git a/src/init-ceph.in b/src/init-ceph.in index 3a404a46c6f..46877d75558 100644 --- a/src/init-ceph.in +++ b/src/init-ceph.in @@ -80,7 +80,7 @@ stop_daemon() { action=$5 [ -z "$action" ] && action="Stopping" echo -n "$action Ceph $name on $host..." - do_cmd "while [ 1 ]; do + do_cmd "while [ 1 ]; do [ -e $pidfile ] || break pid=\`cat $pidfile\` while [ -e /proc/\$pid ] && grep -q $daemon /proc/\$pid/cmdline ; do @@ -172,6 +172,14 @@ command=$1 get_local_name_list get_name_list "$@" +# Reverse the order if we are stopping +if [ "$command" = "stop" ]; then + for f in $what; do + new_order="$f $new_order" + done + what="$new_order" +fi + for name in $what; do type=`echo $name | cut -c 1-3` # e.g. 'mon', if $item is 'mon1' id=`echo $name | cut -c 4- | sed 's/^\\.//'` @@ -251,18 +259,18 @@ for name in $what; do wrap="" runmode="" runarg="" - + [ -z "$docrun" ] && get_conf_bool docrun "0" "restart on core dump" [ "$docrun" -eq 1 ] && wrap="$BINDIR/ceph-run" - + [ -z "$dovalgrind" ] && get_conf_bool valgrind "" "valgrind" [ -n "$valgrind" ] && wrap="$wrap valgrind $valgrind" - + [ -n "$wrap" ] && runmode="-f &" && runarg="-f" [ -n "$max_open_files" ] && files="ulimit -n $max_open_files;" cmd="$files $wrap $cmd $runmode" - + if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then get_conf pre_mount "true" "pre mount command" get_conf fs_type "" "osd mkfs type" @@ -361,7 +369,7 @@ for name in $what; do [ -n "$post_start" ] && do_cmd "$post_start" [ -n "$lockfile" ] && [ "$?" -eq 0 ] && touch $lockfile ;; - + stop) get_conf pre_stop "" "pre stop command" get_conf post_stop "" "post stop command" @@ -402,13 +410,13 @@ for name in $what; do [ -n "$post_forcestop" ] && do_cmd "$post_forcestop" [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile ;; - + killall) echo "killall ceph-$type on $host" do_cmd "pkill ^ceph-$type || true" [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile ;; - + force-reload | reload) signal_daemon $name ceph-$type $pid_file -1 "Reloading" ;; diff --git a/src/librados/librados.cc b/src/librados/librados.cc index 852228ed383..63092d1093d 100644 --- a/src/librados/librados.cc +++ b/src/librados/librados.cc @@ -958,6 +958,8 @@ int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, op_flags |= CEPH_OSD_FLAG_BALANCE_READS; if (flags & OPERATION_LOCALIZE_READS) op_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; + if (flags & OPERATION_ORDER_READS_WRITES) + op_flags |= CEPH_OSD_FLAG_RWORDERED; return io_ctx_impl->aio_operate_read(obj, (::ObjectOperation*)o->impl, c->pc, op_flags, pbl); diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 1ace72e0ac3..bd89da71495 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -605,7 +605,7 @@ void MDLog::_replay_thread() } dout(10) << "_replay_thread kicking waiters" << dendl; - finish_contexts(g_ceph_context, waitfor_replay, 0); + finish_contexts(g_ceph_context, waitfor_replay, r); dout(10) << "_replay_thread finish" << dendl; mds->mds_lock.Unlock(); diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index 1646a134ad5..f1ab9b112d8 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -470,7 +470,7 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const ::encode(cas_pool, bl); // kclient ignores everything from here - __u16 ev = 5; + __u16 ev = 6; ::encode(ev, bl); ::encode(compat, bl); ::encode(metadata_pool, bl); @@ -483,6 +483,8 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const ::encode(failed, bl); ::encode(stopped, bl); ::encode(last_failure_osd_epoch, bl); + ::encode(ever_allowed_snaps, bl); + ::encode(explicitly_allowed_snaps, bl); ENCODE_FINISH(bl); } } @@ -540,5 +542,12 @@ void MDSMap::decode(bufferlist::iterator& p) ::decode(stopped, p); if (ev >= 4) ::decode(last_failure_osd_epoch, p); + if (ev >= 6) { + ::decode(ever_allowed_snaps, p); + ::decode(explicitly_allowed_snaps, p); + } else { + ever_allowed_snaps = true; + explicitly_allowed_snaps = false; + } DECODE_FINISH(p); } diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index 5bfc7cc20d5..5eadf156a95 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -175,6 +175,9 @@ protected: map<int32_t,uint64_t> up; // who is in those roles map<uint64_t,mds_info_t> mds_info; + bool ever_allowed_snaps; //< the cluster has ever allowed snap creation + bool explicitly_allowed_snaps; //< the user has explicitly enabled snap creation + public: CompatSet compat; @@ -188,7 +191,9 @@ public: max_file_size(0), cas_pool(-1), metadata_pool(0), - max_mds(0) + max_mds(0), + ever_allowed_snaps(false), + explicitly_allowed_snaps(false) { } utime_t get_session_timeout() { @@ -201,6 +206,14 @@ public: void set_flag(int f) { flags |= f; } void clear_flag(int f) { flags &= ~f; } + void set_snaps_allowed() { + set_flag(CEPH_MDSMAP_ALLOW_SNAPS); + ever_allowed_snaps = true; + explicitly_allowed_snaps = true; + } + bool allows_snaps() { return test_flag(CEPH_MDSMAP_ALLOW_SNAPS); } + void clear_snaps_allowed() { clear_flag(CEPH_MDSMAP_ALLOW_SNAPS); } + epoch_t get_epoch() const { return epoch; } void inc_epoch() { epoch++; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index c6f48c2a1f7..869f3773441 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1167,10 +1167,11 @@ void Server::dispatch_client_request(MDRequest *mdr) // inodes ops. case CEPH_MDS_OP_LOOKUP: - case CEPH_MDS_OP_LOOKUPSNAP: handle_client_getattr(mdr, true); break; + case CEPH_MDS_OP_LOOKUPSNAP: + // lookupsnap does not reference a CDentry; treat it as a getattr case CEPH_MDS_OP_GETATTR: handle_client_getattr(mdr, false); break; @@ -7161,6 +7162,12 @@ struct C_MDS_mksnap_finish : public Context { /* This function takes responsibility for the passed mdr*/ void Server::handle_client_mksnap(MDRequest *mdr) { + if (!mds->mdsmap->allows_snaps()) { + // you can't make snapshots until you set an option right now + reply_request(mdr, -EPERM); + return; + } + MClientRequest *req = mdr->client_request; CInode *diri = mdcache->get_inode(req->get_filepath().get_ino()); if (!diri || diri->state_test(CInode::STATE_PURGING)) { diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index b2273274521..48c1c99d584 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -920,6 +920,36 @@ bool MDSMonitor::prepare_command(MMonCommand *m) r = 0; } + } else if (prefix == "mds set") { + string key; + cmd_getval(g_ceph_context, cmdmap, "key", key); + string sure; + cmd_getval(g_ceph_context, cmdmap, "sure", sure); + if (key == "allow_new_snaps") { + if (sure != "--yes-i-really-mean-it") { + ss << "Snapshots are unstable and will probably break your FS! Add --yes-i-really-mean-it if you are sure"; + r = -EPERM; + } else { + pending_mdsmap.set_snaps_allowed(); + ss << "turned on snaps"; + r = 0; + } + } + } else if (prefix == "mds unset") { + string key; + cmd_getval(g_ceph_context, cmdmap, "key", key); + string sure; + cmd_getval(g_ceph_context, cmdmap, "sure", sure); + if (key == "allow_new_snaps") { + if (sure != "--yes-i-really-mean-it") { + ss << "this won't get rid of snapshots or restore the cluster if it's broken. Add --yes-i-really-mean-it if you are sure"; + r = -EPERM; + } else { + pending_mdsmap.clear_snaps_allowed(); + ss << "disabled new snapshots"; + r = 0; + } + } } else if (prefix == "mds add_data_pool") { int64_t poolid; cmd_getval(g_ceph_context, cmdmap, "poolid", poolid); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 482ea91ea02..752745a0c22 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -210,8 +210,8 @@ COMMAND("quorum_status", "report status of monitor quorum", \ "mon", "r", "cli,rest") COMMAND("mon_status", "report status of monitors", "mon", "r", "cli,rest") COMMAND("sync force " \ - "name=validate1,type=CephChoices,strings=--yes-i-really-mean-it " \ - "name=validate2,type=CephChoices,strings=--i-know-what-i-am-doing", \ + "name=validate1,type=CephChoices,strings=--yes-i-really-mean-it,req=false " \ + "name=validate2,type=CephChoices,strings=--i-know-what-i-am-doing,req=false", \ "force sync of and clear monitor store", "mon", "rw", "cli,rest") COMMAND("heap " \ "name=heapcmd,type=CephChoices,strings=dump|start_profiler|stop_profiler|release|stats", \ @@ -274,6 +274,15 @@ COMMAND("mds compat rm_compat " \ COMMAND("mds compat rm_incompat " \ "name=feature,type=CephInt,range=0", \ "remove incompatible feature", "mds", "rw", "cli,rest") +COMMAND("mds set " \ + "name=key,type=CephChoices,strings=allow_new_snaps " \ + "name=sure,type=CephString,req=false", \ + "set <key>", \ + "mds", "w", "cli,rest") +COMMAND("mds unset " \ + "name=key,type=CephChoices,strings=allow_new_snaps " \ + "name=sure,type=CephString,req=false", \ + "unset <key>", "mds", "w", "cli,rest") COMMAND("mds add_data_pool " \ "name=poolid,type=CephInt,range=0", \ "add data pool <poolid>", "mds", "rw", "cli,rest") @@ -283,7 +292,7 @@ COMMAND("mds remove_data_pool " \ COMMAND("mds newfs " \ "name=metadata,type=CephInt,range=0 " \ "name=data,type=CephInt,range=0 " \ - "name=sure,type=CephChoices,strings=--yes-i-really-mean-it", \ + "name=sure,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \ "make new filesystom using pools <metadata> and <data>", \ "mds", "rw", "cli,rest") /* @@ -456,7 +465,7 @@ COMMAND("osd reweight " \ "reweight osd to 0.0 < <weight> < 1.0", "osd", "rw", "cli,rest") COMMAND("osd lost " \ "name=id,type=CephInt,range=0 " \ - "name=sure,type=CephChoices,strings=--yes-i-really-mean-it", \ + "name=sure,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \ "mark osd as permanently lost. THIS DESTROYS DATA IF NO MORE REPLICAS EXIST, BE CAREFUL", \ "osd", "rw", "cli,rest") COMMAND("osd create " \ @@ -484,9 +493,9 @@ COMMAND("osd pool create " \ "create pool", "osd", "rw", "cli,rest") COMMAND("osd pool delete " \ "name=pool,type=CephPoolname " \ - "name=pool2,type=CephPoolname " \ - "name=sure,type=CephChoices,strings=--yes-i-really-really-mean-it", \ - "delete pool (say pool twice, add --yes-i-really-really-mean-it)", \ + "name=pool2,type=CephPoolname,req=false " \ + "name=sure,type=CephChoices,strings=--yes-i-really-really-mean-it,req=false", \ + "delete pool", \ "osd", "rw", "cli,rest") COMMAND("osd pool rename " \ "name=srcpool,type=CephPoolname " \ diff --git a/src/msg/msg_types.cc b/src/msg/msg_types.cc index 38416abd4f2..b02db768bfb 100644 --- a/src/msg/msg_types.cc +++ b/src/msg/msg_types.cc @@ -135,7 +135,7 @@ bool entity_addr_t::parse(const char *s, const char **end) ostream& operator<<(ostream& out, const sockaddr_storage &ss) { char buf[NI_MAXHOST] = { 0 }; - char serv[20] = { 0 }; + char serv[NI_MAXSERV] = { 0 }; size_t hostlen; if (ss.ss_family == AF_INET) diff --git a/src/os/CollectionIndex.h b/src/os/CollectionIndex.h index 9b1ceae8c46..89b7b862632 100644 --- a/src/os/CollectionIndex.h +++ b/src/os/CollectionIndex.h @@ -23,7 +23,7 @@ #include "include/object.h" /** - * CollectionIndex provides an interface for manipulating indexed colelctions + * CollectionIndex provides an interface for manipulating indexed collections */ class CollectionIndex { protected: @@ -127,26 +127,26 @@ protected: * @return Error Code, 0 for success */ virtual int created( - const hobject_t &hoid, ///< [in] Created object. + const ghobject_t &oid, ///< [in] Created object. const char *path ///< [in] Path to created object. ) = 0; /** - * Removes hoid from the collection + * Removes oid from the collection * * @return Error Code, 0 for success */ virtual int unlink( - const hobject_t &hoid ///< [in] Object to remove + const ghobject_t &oid ///< [in] Object to remove ) = 0; /** - * Gets the IndexedPath for hoid. + * Gets the IndexedPath for oid. * * @return Error Code, 0 for success */ virtual int lookup( - const hobject_t &hoid, ///< [in] Object to lookup + const ghobject_t &oid, ///< [in] Object to lookup IndexedPath *path, ///< [out] Path to object int *exist ///< [out] True if the object exists, else false ) = 0; @@ -167,17 +167,17 @@ protected: /// List contents of collection by hash virtual int collection_list_partial( - const hobject_t &start, ///< [in] object at which to start + const ghobject_t &start, ///< [in] object at which to start int min_count, ///< [in] get at least min_count objects int max_count, ///< [in] return at most max_count objects snapid_t seq, ///< [in] list only objects with snap >= seq - vector<hobject_t> *ls, ///< [out] Listed objects - hobject_t *next ///< [out] Next object to list + vector<ghobject_t> *ls, ///< [out] Listed objects + ghobject_t *next ///< [out] Next object to list ) = 0; /// List contents of collection. virtual int collection_list( - vector<hobject_t> *ls ///< [out] Listed Objects + vector<ghobject_t> *ls ///< [out] Listed Objects ) = 0; /// Call prior to removing directory diff --git a/src/os/DBObjectMap.cc b/src/os/DBObjectMap.cc index 90c840bbe9c..635870b0db5 100644 --- a/src/os/DBObjectMap.cc +++ b/src/os/DBObjectMap.cc @@ -130,61 +130,68 @@ bool DBObjectMap::check(std::ostream &out) return retval; } -string DBObjectMap::hobject_key(const hobject_t &hoid) +string DBObjectMap::ghobject_key(const ghobject_t &oid) { string out; - append_escaped(hoid.oid.name, &out); + append_escaped(oid.hobj.oid.name, &out); out.push_back('.'); - append_escaped(hoid.get_key(), &out); + append_escaped(oid.hobj.get_key(), &out); out.push_back('.'); - append_escaped(hoid.nspace, &out); + append_escaped(oid.hobj.nspace, &out); out.push_back('.'); char snap_with_hash[1000]; char *t = snap_with_hash; char *end = t + sizeof(snap_with_hash); - if (hoid.snap == CEPH_NOSNAP) + if (oid.hobj.snap == CEPH_NOSNAP) t += snprintf(t, end - t, "head"); - else if (hoid.snap == CEPH_SNAPDIR) + else if (oid.hobj.snap == CEPH_SNAPDIR) t += snprintf(t, end - t, "snapdir"); else - t += snprintf(t, end - t, "%llx", (long long unsigned)hoid.snap); + t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap); - if (hoid.pool == -1) + if (oid.hobj.pool == -1) t += snprintf(t, end - t, ".none"); else - t += snprintf(t, end - t, ".%llx", (long long unsigned)hoid.pool); - snprintf(t, end - t, ".%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash); + t += snprintf(t, end - t, ".%llx", (long long unsigned)oid.hobj.pool); + snprintf(t, end - t, ".%.*X", (int)(sizeof(oid.hobj.hash)*2), oid.hobj.hash); + + if (oid.generation != ghobject_t::NO_GEN) { + assert(oid.shard_id != ghobject_t::NO_SHARD); + + t += snprintf(t, end - t, ".%llx", (long long unsigned)oid.generation); + t += snprintf(t, end - t, ".%x", (int)oid.shard_id); + } out += string(snap_with_hash); return out; } -string DBObjectMap::hobject_key_v0(coll_t c, const hobject_t &hoid) +string DBObjectMap::ghobject_key_v0(coll_t c, const ghobject_t &oid) { string out; append_escaped(c.to_str(), &out); out.push_back('.'); - append_escaped(hoid.oid.name, &out); + append_escaped(oid.hobj.oid.name, &out); out.push_back('.'); - append_escaped(hoid.get_key(), &out); + append_escaped(oid.hobj.get_key(), &out); out.push_back('.'); char snap_with_hash[1000]; char *t = snap_with_hash; char *end = t + sizeof(snap_with_hash); - if (hoid.snap == CEPH_NOSNAP) + if (oid.hobj.snap == CEPH_NOSNAP) t += snprintf(t, end - t, ".head"); - else if (hoid.snap == CEPH_SNAPDIR) + else if (oid.hobj.snap == CEPH_SNAPDIR) t += snprintf(t, end - t, ".snapdir"); else - t += snprintf(t, end - t, ".%llx", (long long unsigned)hoid.snap); - snprintf(t, end - t, ".%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash); + t += snprintf(t, end - t, ".%llx", (long long unsigned)oid.hobj.snap); + snprintf(t, end - t, ".%.*X", (int)(sizeof(oid.hobj.hash)*2), oid.hobj.hash); out += string(snap_with_hash); return out; } -bool DBObjectMap::parse_hobject_key_v0(const string &in, coll_t *c, - hobject_t *hoid) +bool DBObjectMap::parse_ghobject_key_v0(const string &in, coll_t *c, + ghobject_t *oid) { string coll; string name; @@ -244,13 +251,13 @@ bool DBObjectMap::parse_hobject_key_v0(const string &in, coll_t *c, pg_t pg; if (c->is_pg_prefix(pg)) pool = (int64_t)pg.pool(); - (*hoid) = hobject_t(name, key, snap, hash, pool, ""); + (*oid) = ghobject_t(hobject_t(name, key, snap, hash, pool, "")); return true; } -string DBObjectMap::map_header_key(const hobject_t &hoid) +string DBObjectMap::map_header_key(const ghobject_t &oid) { - return hobject_key(hoid); + return ghobject_key(oid); } string DBObjectMap::header_key(uint64_t seq) @@ -311,9 +318,9 @@ int DBObjectMap::DBObjectMapIteratorImpl::init() } ObjectMap::ObjectMapIterator DBObjectMap::get_iterator( - const hobject_t &hoid) + const ghobject_t &oid) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return ObjectMapIterator(new EmptyIteratorImpl()); return _get_iterator(header); @@ -496,15 +503,15 @@ int DBObjectMap::DBObjectMapIteratorImpl::status() return r; } -int DBObjectMap::set_keys(const hobject_t &hoid, +int DBObjectMap::set_keys(const ghobject_t &oid, const map<string, bufferlist> &set, const SequencerPosition *spos) { KeyValueDB::Transaction t = db->get_transaction(); - Header header = lookup_create_map_header(hoid, t); + Header header = lookup_create_map_header(oid, t); if (!header) return -EINVAL; - if (check_spos(hoid, header, spos)) + if (check_spos(oid, header, spos)) return 0; t->set(user_prefix(header), set); @@ -512,15 +519,15 @@ int DBObjectMap::set_keys(const hobject_t &hoid, return db->submit_transaction(t); } -int DBObjectMap::set_header(const hobject_t &hoid, +int DBObjectMap::set_header(const ghobject_t &oid, const bufferlist &bl, const SequencerPosition *spos) { KeyValueDB::Transaction t = db->get_transaction(); - Header header = lookup_create_map_header(hoid, t); + Header header = lookup_create_map_header(oid, t); if (!header) return -EINVAL; - if (check_spos(hoid, header, spos)) + if (check_spos(oid, header, spos)) return 0; _set_header(header, bl, t); return db->submit_transaction(t); @@ -534,10 +541,10 @@ void DBObjectMap::_set_header(Header header, const bufferlist &bl, t->set(sys_prefix(header), to_set); } -int DBObjectMap::get_header(const hobject_t &hoid, +int DBObjectMap::get_header(const ghobject_t &oid, bufferlist *bl) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) { return 0; } @@ -568,16 +575,16 @@ int DBObjectMap::_get_header(Header header, return 0; } -int DBObjectMap::clear(const hobject_t &hoid, +int DBObjectMap::clear(const ghobject_t &oid, const SequencerPosition *spos) { KeyValueDB::Transaction t = db->get_transaction(); - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; - if (check_spos(hoid, header, spos)) + if (check_spos(oid, header, spos)) return 0; - remove_map_header(hoid, header, t); + remove_map_header(oid, header, t); assert(header->num_children > 0); header->num_children--; int r = _clear(header, t); @@ -688,15 +695,15 @@ int DBObjectMap::need_parent(DBObjectMapIterator iter) return 1; } -int DBObjectMap::rm_keys(const hobject_t &hoid, +int DBObjectMap::rm_keys(const ghobject_t &oid, const set<string> &to_clear, const SequencerPosition *spos) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; KeyValueDB::Transaction t = db->get_transaction(); - if (check_spos(hoid, header, spos)) + if (check_spos(oid, header, spos)) return 0; t->rmkeys(user_prefix(header), to_clear); if (!header->parent) { @@ -756,17 +763,17 @@ int DBObjectMap::rm_keys(const hobject_t &hoid, parent->num_children--; _clear(parent, t); header->parent = 0; - set_map_header(hoid, *header, t); + set_map_header(oid, *header, t); t->rmkeys_by_prefix(complete_prefix(header)); } return db->submit_transaction(t); } -int DBObjectMap::get(const hobject_t &hoid, +int DBObjectMap::get(const ghobject_t &oid, bufferlist *_header, map<string, bufferlist> *out) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; _get_header(header, _header); @@ -779,13 +786,13 @@ int DBObjectMap::get(const hobject_t &hoid, return 0; } -int DBObjectMap::get_keys(const hobject_t &hoid, +int DBObjectMap::get_keys(const ghobject_t &oid, set<string> *keys) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; - ObjectMapIterator iter = get_iterator(hoid); + ObjectMapIterator iter = get_iterator(oid); for (; iter->valid(); iter->next()) { if (iter->status()) return iter->status(); @@ -816,40 +823,40 @@ int DBObjectMap::scan(Header header, return 0; } -int DBObjectMap::get_values(const hobject_t &hoid, +int DBObjectMap::get_values(const ghobject_t &oid, const set<string> &keys, map<string, bufferlist> *out) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; return scan(header, keys, 0, out); } -int DBObjectMap::check_keys(const hobject_t &hoid, +int DBObjectMap::check_keys(const ghobject_t &oid, const set<string> &keys, set<string> *out) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; return scan(header, keys, out, 0); } -int DBObjectMap::get_xattrs(const hobject_t &hoid, +int DBObjectMap::get_xattrs(const ghobject_t &oid, const set<string> &to_get, map<string, bufferlist> *out) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; return db->get(xattr_prefix(header), to_get, out); } -int DBObjectMap::get_all_xattrs(const hobject_t &hoid, +int DBObjectMap::get_all_xattrs(const ghobject_t &oid, set<string> *out) { - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; KeyValueDB::Iterator iter = db->get_iterator(xattr_prefix(header)); @@ -860,39 +867,39 @@ int DBObjectMap::get_all_xattrs(const hobject_t &hoid, return iter->status(); } -int DBObjectMap::set_xattrs(const hobject_t &hoid, +int DBObjectMap::set_xattrs(const ghobject_t &oid, const map<string, bufferlist> &to_set, const SequencerPosition *spos) { KeyValueDB::Transaction t = db->get_transaction(); - Header header = lookup_create_map_header(hoid, t); + Header header = lookup_create_map_header(oid, t); if (!header) return -EINVAL; - if (check_spos(hoid, header, spos)) + if (check_spos(oid, header, spos)) return 0; t->set(xattr_prefix(header), to_set); return db->submit_transaction(t); } -int DBObjectMap::remove_xattrs(const hobject_t &hoid, +int DBObjectMap::remove_xattrs(const ghobject_t &oid, const set<string> &to_remove, const SequencerPosition *spos) { KeyValueDB::Transaction t = db->get_transaction(); - Header header = lookup_map_header(hoid); + Header header = lookup_map_header(oid); if (!header) return -ENOENT; - if (check_spos(hoid, header, spos)) + if (check_spos(oid, header, spos)) return 0; t->rmkeys(xattr_prefix(header), to_remove); return db->submit_transaction(t); } -int DBObjectMap::clone(const hobject_t &hoid, - const hobject_t &target, +int DBObjectMap::clone(const ghobject_t &oid, + const ghobject_t &target, const SequencerPosition *spos) { - if (hoid == target) + if (oid == target) return 0; KeyValueDB::Transaction t = db->get_transaction(); @@ -907,18 +914,18 @@ int DBObjectMap::clone(const hobject_t &hoid, } } - Header parent = lookup_map_header(hoid); + Header parent = lookup_map_header(oid); if (!parent) return db->submit_transaction(t); - Header source = generate_new_header(hoid, parent); + Header source = generate_new_header(oid, parent); Header destination = generate_new_header(target, parent); if (spos) destination->spos = *spos; parent->num_children = 2; set_header(parent, t); - set_map_header(hoid, *source, t); + set_map_header(oid, *source, t); set_map_header(target, *destination, t); map<string, bufferlist> to_set; @@ -973,9 +980,9 @@ int DBObjectMap::upgrade() to_get); coll_t coll; - hobject_t hoid; - assert(parse_hobject_key_v0(iter->key(), &coll, &hoid)); - new_map_headers[hobject_key(hoid)] = got.begin()->second; + ghobject_t oid; + assert(parse_ghobject_key_v0(iter->key(), &coll, &oid)); + new_map_headers[ghobject_key(oid)] = got.begin()->second; } t->rmkeys(LEAF_PREFIX, legacy_to_remove); @@ -1038,18 +1045,18 @@ int DBObjectMap::init(bool do_upgrade) return 0; } -int DBObjectMap::sync(const hobject_t *hoid, +int DBObjectMap::sync(const ghobject_t *oid, const SequencerPosition *spos) { KeyValueDB::Transaction t = db->get_transaction(); write_state(t); - if (hoid) { + if (oid) { assert(spos); - Header header = lookup_map_header(*hoid); + Header header = lookup_map_header(*oid); if (header) { - dout(10) << "hoid: " << *hoid << " setting spos to " + dout(10) << "oid: " << *oid << " setting spos to " << *spos << dendl; header->spos = *spos; - set_map_header(*hoid, *header, t); + set_map_header(*oid, *header, t); } } return db->submit_transaction_sync(t); @@ -1067,27 +1074,27 @@ int DBObjectMap::write_state(KeyValueDB::Transaction _t) { } -DBObjectMap::Header DBObjectMap::_lookup_map_header(const hobject_t &hoid) +DBObjectMap::Header DBObjectMap::_lookup_map_header(const ghobject_t &oid) { - while (map_header_in_use.count(hoid)) + while (map_header_in_use.count(oid)) header_cond.Wait(header_lock); map<string, bufferlist> out; set<string> to_get; - to_get.insert(map_header_key(hoid)); + to_get.insert(map_header_key(oid)); int r = db->get(HOBJECT_TO_SEQ, to_get, &out); if (r < 0) return Header(); if (out.empty()) return Header(); - Header ret(new _Header(), RemoveMapHeaderOnDelete(this, hoid)); + Header ret(new _Header(), RemoveMapHeaderOnDelete(this, oid)); bufferlist::iterator iter = out.begin()->second.begin(); ret->decode(iter); return ret; } -DBObjectMap::Header DBObjectMap::_generate_new_header(const hobject_t &hoid, +DBObjectMap::Header DBObjectMap::_generate_new_header(const ghobject_t &oid, Header parent) { Header header = Header(new _Header(), RemoveOnDelete(this)); @@ -1097,7 +1104,7 @@ DBObjectMap::Header DBObjectMap::_generate_new_header(const hobject_t &hoid, header->spos = parent->spos; } header->num_children = 1; - header->hoid = hoid; + header->oid = oid; assert(!in_use.count(header->seq)); in_use.insert(header->seq); @@ -1137,14 +1144,14 @@ DBObjectMap::Header DBObjectMap::lookup_parent(Header input) } DBObjectMap::Header DBObjectMap::lookup_create_map_header( - const hobject_t &hoid, + const ghobject_t &oid, KeyValueDB::Transaction t) { Mutex::Locker l(header_lock); - Header header = _lookup_map_header(hoid); + Header header = _lookup_map_header(oid); if (!header) { - header = _generate_new_header(hoid, Header()); - set_map_header(hoid, *header, t); + header = _generate_new_header(oid, Header()); + set_map_header(oid, *header, t); } return header; } @@ -1169,50 +1176,50 @@ void DBObjectMap::set_header(Header header, KeyValueDB::Transaction t) t->set(sys_prefix(header), to_write); } -void DBObjectMap::remove_map_header(const hobject_t &hoid, +void DBObjectMap::remove_map_header(const ghobject_t &oid, Header header, KeyValueDB::Transaction t) { dout(20) << "remove_map_header: removing " << header->seq - << " hoid " << hoid << dendl; + << " oid " << oid << dendl; set<string> to_remove; - to_remove.insert(map_header_key(hoid)); + to_remove.insert(map_header_key(oid)); t->rmkeys(HOBJECT_TO_SEQ, to_remove); } -void DBObjectMap::set_map_header(const hobject_t &hoid, _Header header, +void DBObjectMap::set_map_header(const ghobject_t &oid, _Header header, KeyValueDB::Transaction t) { dout(20) << "set_map_header: setting " << header.seq - << " hoid " << hoid << " parent seq " + << " oid " << oid << " parent seq " << header.parent << dendl; map<string, bufferlist> to_set; - header.encode(to_set[map_header_key(hoid)]); + header.encode(to_set[map_header_key(oid)]); t->set(HOBJECT_TO_SEQ, to_set); } -bool DBObjectMap::check_spos(const hobject_t &hoid, +bool DBObjectMap::check_spos(const ghobject_t &oid, Header header, const SequencerPosition *spos) { if (!spos || *spos > header->spos) { stringstream out; if (spos) - dout(10) << "hoid: " << hoid << " not skipping op, *spos " + dout(10) << "oid: " << oid << " not skipping op, *spos " << *spos << dendl; else - dout(10) << "hoid: " << hoid << " not skipping op, *spos " + dout(10) << "oid: " << oid << " not skipping op, *spos " << "empty" << dendl; dout(10) << " > header.spos " << header->spos << dendl; return false; } else { - dout(10) << "hoid: " << hoid << " skipping op, *spos " << *spos + dout(10) << "oid: " << oid << " skipping op, *spos " << *spos << " <= header.spos " << header->spos << dendl; return true; } } -int DBObjectMap::list_objects(vector<hobject_t> *out) +int DBObjectMap::list_objects(vector<ghobject_t> *out) { KeyValueDB::Iterator iter = db->get_iterator(HOBJECT_TO_SEQ); for (iter->seek_to_first(); iter->valid(); iter->next()) { @@ -1220,7 +1227,7 @@ int DBObjectMap::list_objects(vector<hobject_t> *out) bufferlist::iterator bliter = bl.begin(); _Header header; header.decode(bliter); - out->push_back(header.hoid); + out->push_back(header.oid); } return 0; } diff --git a/src/os/DBObjectMap.h b/src/os/DBObjectMap.h index ba05dff6c6f..459447f9c97 100644 --- a/src/os/DBObjectMap.h +++ b/src/os/DBObjectMap.h @@ -26,7 +26,7 @@ * @see user_prefix * @see sys_prefix * - * - HOBJECT_TO_SEQ: Contains leaf mapping from hobject_t->seq and + * - GHOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->hobj.seq and * corresponding omap header * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number * @see State @@ -66,89 +66,89 @@ public: * Set of headers currently in use */ set<uint64_t> in_use; - set<hobject_t> map_header_in_use; + set<ghobject_t> map_header_in_use; DBObjectMap(KeyValueDB *db) : db(db), header_lock("DBOBjectMap") {} int set_keys( - const hobject_t &hoid, + const ghobject_t &oid, const map<string, bufferlist> &set, const SequencerPosition *spos=0 ); int set_header( - const hobject_t &hoid, + const ghobject_t &oid, const bufferlist &bl, const SequencerPosition *spos=0 ); int get_header( - const hobject_t &hoid, + const ghobject_t &oid, bufferlist *bl ); int clear( - const hobject_t &hoid, + const ghobject_t &oid, const SequencerPosition *spos=0 ); int rm_keys( - const hobject_t &hoid, + const ghobject_t &oid, const set<string> &to_clear, const SequencerPosition *spos=0 ); int get( - const hobject_t &hoid, + const ghobject_t &oid, bufferlist *header, map<string, bufferlist> *out ); int get_keys( - const hobject_t &hoid, + const ghobject_t &oid, set<string> *keys ); int get_values( - const hobject_t &hoid, + const ghobject_t &oid, const set<string> &keys, map<string, bufferlist> *out ); int check_keys( - const hobject_t &hoid, + const ghobject_t &oid, const set<string> &keys, set<string> *out ); int get_xattrs( - const hobject_t &hoid, + const ghobject_t &oid, const set<string> &to_get, map<string, bufferlist> *out ); int get_all_xattrs( - const hobject_t &hoid, + const ghobject_t &oid, set<string> *out ); int set_xattrs( - const hobject_t &hoid, + const ghobject_t &oid, const map<string, bufferlist> &to_set, const SequencerPosition *spos=0 ); int remove_xattrs( - const hobject_t &hoid, + const ghobject_t &oid, const set<string> &to_remove, const SequencerPosition *spos=0 ); int clone( - const hobject_t &hoid, - const hobject_t &target, + const ghobject_t &oid, + const ghobject_t &target, const SequencerPosition *spos=0 ); @@ -162,13 +162,13 @@ public: bool check(std::ostream &out); /// Ensure that all previous operations are durable - int sync(const hobject_t *hoid=0, const SequencerPosition *spos=0); + int sync(const ghobject_t *oid=0, const SequencerPosition *spos=0); /// Util, list all objects, there must be no other concurrent access - int list_objects(vector<hobject_t> *objs ///< [out] objects + int list_objects(vector<ghobject_t> *objs ///< [out] objects ); - ObjectMapIterator get_iterator(const hobject_t &hoid); + ObjectMapIterator get_iterator(const ghobject_t &oid); static const string USER_PREFIX; static const string XATTR_PREFIX; @@ -223,7 +223,7 @@ public: uint64_t num_children; coll_t c; - hobject_t hoid; + ghobject_t oid; SequencerPosition spos; @@ -233,7 +233,7 @@ public: ::encode(parent, bl); ::encode(num_children, bl); ::encode(c, bl); - ::encode(hoid, bl); + ::encode(oid, bl); ::encode(spos, bl); ENCODE_FINISH(bl); } @@ -244,7 +244,7 @@ public: ::decode(parent, bl); ::decode(num_children, bl); ::decode(c, bl); - ::decode(hoid, bl); + ::decode(oid, bl); if (struct_v >= 2) ::decode(spos, bl); DECODE_FINISH(bl); @@ -255,7 +255,7 @@ public: f->dump_unsigned("parent", parent); f->dump_unsigned("num_children", num_children); f->dump_stream("coll") << c; - f->dump_stream("oid") << hoid; + f->dump_stream("oid") << oid; } static void generate_test_instances(list<_Header*> &o) { @@ -269,15 +269,15 @@ public: }; /// String munging (public for testing) - static string hobject_key(const hobject_t &hoid); - static string hobject_key_v0(coll_t c, const hobject_t &hoid); - static bool parse_hobject_key_v0(const string &in, - coll_t *c, hobject_t *hoid); + static string ghobject_key(const ghobject_t &oid); + static string ghobject_key_v0(coll_t c, const ghobject_t &oid); + static bool parse_ghobject_key_v0(const string &in, + coll_t *c, ghobject_t *oid); private: /// Implicit lock on Header->seq typedef std::tr1::shared_ptr<_Header> Header; - string map_header_key(const hobject_t &hoid); + string map_header_key(const ghobject_t &oid); string header_key(uint64_t seq); string complete_prefix(Header header); string user_prefix(Header header); @@ -368,40 +368,40 @@ private: /// Set node containing input to new contents void set_header(Header input, KeyValueDB::Transaction t); - /// Remove leaf node corresponding to hoid in c - void remove_map_header(const hobject_t &hoid, + /// Remove leaf node corresponding to oid in c + void remove_map_header(const ghobject_t &oid, Header header, KeyValueDB::Transaction t); - /// Set leaf node for c and hoid to the value of header - void set_map_header(const hobject_t &hoid, _Header header, + /// Set leaf node for c and oid to the value of header + void set_map_header(const ghobject_t &oid, _Header header, KeyValueDB::Transaction t); - /// Set leaf node for c and hoid to the value of header - bool check_spos(const hobject_t &hoid, + /// Set leaf node for c and oid to the value of header + bool check_spos(const ghobject_t &oid, Header header, const SequencerPosition *spos); - /// Lookup or create header for c hoid - Header lookup_create_map_header(const hobject_t &hoid, + /// Lookup or create header for c oid + Header lookup_create_map_header(const ghobject_t &oid, KeyValueDB::Transaction t); /** - * Generate new header for c hoid with new seq number + * Generate new header for c oid with new seq number * * Has the side effect of syncronously saving the new DBObjectMap state */ - Header _generate_new_header(const hobject_t &hoid, Header parent); - Header generate_new_header(const hobject_t &hoid, Header parent) { + Header _generate_new_header(const ghobject_t &oid, Header parent); + Header generate_new_header(const ghobject_t &oid, Header parent) { Mutex::Locker l(header_lock); - return _generate_new_header(hoid, parent); + return _generate_new_header(oid, parent); } - /// Lookup leaf header for c hoid - Header _lookup_map_header(const hobject_t &hoid); - Header lookup_map_header(const hobject_t &hoid) { + /// Lookup leaf header for c oid + Header _lookup_map_header(const ghobject_t &oid); + Header lookup_map_header(const ghobject_t &oid) { Mutex::Locker l(header_lock); - return _lookup_map_header(hoid); + return _lookup_map_header(oid); } /// Lookup header node for input @@ -448,12 +448,12 @@ private: class RemoveMapHeaderOnDelete { public: DBObjectMap *db; - hobject_t obj; - RemoveMapHeaderOnDelete(DBObjectMap *db, const hobject_t &obj) : - db(db), obj(obj) {} + ghobject_t oid; + RemoveMapHeaderOnDelete(DBObjectMap *db, const ghobject_t &oid) : + db(db), oid(oid) {} void operator() (_Header *header) { Mutex::Locker l(db->header_lock); - db->map_header_in_use.erase(obj); + db->map_header_in_use.erase(oid); db->map_header_cond.Signal(); delete header; } diff --git a/src/os/FDCache.h b/src/os/FDCache.h index 00e632f3e0f..93557d43c47 100644 --- a/src/os/FDCache.h +++ b/src/os/FDCache.h @@ -49,7 +49,7 @@ public: }; private: - SharedLRU<hobject_t, FD> registry; + SharedLRU<ghobject_t, FD> registry; CephContext *cct; public: @@ -63,16 +63,16 @@ public: } typedef std::tr1::shared_ptr<FD> FDRef; - FDRef lookup(const hobject_t &hoid) { + FDRef lookup(const ghobject_t &hoid) { return registry.lookup(hoid); } - FDRef add(const hobject_t &hoid, int fd) { + FDRef add(const ghobject_t &hoid, int fd) { return registry.add(hoid, new FD(fd)); } /// clear cached fd for hoid, subsequent lookups will get an empty FD - void clear(const hobject_t &hoid) { + void clear(const ghobject_t &hoid) { registry.clear(hoid); assert(!registry.lookup(hoid)); } diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 68a922bc1e0..343fb25c0e4 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -86,6 +86,23 @@ using ceph::crypto::SHA1; #define REPLAY_GUARD_XATTR "user.cephos.seq" #define GLOBAL_REPLAY_GUARD_XATTR "user.cephos.gseq" +//Initial features in new superblock. +static CompatSet get_fs_initial_compat_set() { + CompatSet::FeatureSet ceph_osd_feature_compat; + CompatSet::FeatureSet ceph_osd_feature_ro_compat; + CompatSet::FeatureSet ceph_osd_feature_incompat; + return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat, + ceph_osd_feature_incompat); +} + +//Features are added here that this FileStore supports. +static CompatSet get_fs_supported_compat_set() { + CompatSet compat = get_fs_initial_compat_set(); + //Any features here can be set in code, but not in initial superblock + compat.incompat.insert(CEPH_FS_FEATURE_INCOMPAT_SHARDS); + return compat; +} + void FileStore::FSPerfTracker::update_from_perfcounters( PerfCounters &logger) @@ -124,12 +141,12 @@ int FileStore::init_index(coll_t cid) { char path[PATH_MAX]; get_cdir(cid, path, sizeof(path)); - int r = index_manager.init_index(cid, path, on_disk_version); + int r = index_manager.init_index(cid, path, target_version); assert(!m_filestore_fail_eio || r != -EIO); return r; } -int FileStore::lfn_find(coll_t cid, const hobject_t& oid, IndexedPath *path) +int FileStore::lfn_find(coll_t cid, const ghobject_t& oid, IndexedPath *path) { Index index; int r, exist; @@ -147,7 +164,7 @@ int FileStore::lfn_find(coll_t cid, const hobject_t& oid, IndexedPath *path) return 0; } -int FileStore::lfn_truncate(coll_t cid, const hobject_t& oid, off_t length) +int FileStore::lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length) { IndexedPath path; int r = lfn_find(cid, oid, &path); @@ -160,7 +177,7 @@ int FileStore::lfn_truncate(coll_t cid, const hobject_t& oid, off_t length) return r; } -int FileStore::lfn_stat(coll_t cid, const hobject_t& oid, struct stat *buf) +int FileStore::lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf) { IndexedPath path; int r = lfn_find(cid, oid, &path); @@ -173,12 +190,13 @@ int FileStore::lfn_stat(coll_t cid, const hobject_t& oid, struct stat *buf) } int FileStore::lfn_open(coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, bool create, FDRef *outfd, IndexedPath *path, Index *index) { + assert(get_allow_sharded_objects() || oid.shard_id == ghobject_t::NO_SHARD); assert(outfd); int flags = O_RDWR; if (create) @@ -246,7 +264,7 @@ void FileStore::lfn_close(FDRef fd) { } -int FileStore::lfn_link(coll_t c, coll_t newcid, const hobject_t& o, const hobject_t& newoid) +int FileStore::lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghobject_t& newoid) { Index index_new, index_old; IndexedPath path_new, path_old; @@ -298,7 +316,7 @@ int FileStore::lfn_link(coll_t c, coll_t newcid, const hobject_t& o, const hobje return 0; } -int FileStore::lfn_unlink(coll_t cid, const hobject_t& o, +int FileStore::lfn_unlink(coll_t cid, const ghobject_t& o, const SequencerPosition &spos, bool force_clear_omap) { @@ -447,6 +465,8 @@ FileStore::FileStore(const std::string &base, const std::string &jdev, const cha generic_backend = new GenericFileStoreBackend(this); backend = generic_backend; + + superblock.compat_features = get_fs_initial_compat_set(); } FileStore::~FileStore() @@ -592,6 +612,13 @@ int FileStore::mkfs() goto close_fsid_fd; } + ret = write_superblock(); + if (ret < 0) { + derr << "mkfs: write_superblock() failed: " + << cpp_strerror(ret) << dendl; + goto close_fsid_fd; + } + struct statfs basefs; ret = ::fstatfs(basedir_fd, &basefs); if (ret < 0) { @@ -917,6 +944,67 @@ int FileStore::_sanity_check_fs() return 0; } +int FileStore::write_superblock() +{ + char fn[PATH_MAX]; + snprintf(fn, sizeof(fn), "%s/superblock", basedir.c_str()); + int fd = ::open(fn, O_WRONLY|O_CREAT|O_TRUNC, 0644); + if (fd < 0) + return -errno; + bufferlist bl; + ::encode(superblock, bl); + + int ret = safe_write(fd, bl.c_str(), bl.length()); + if (ret < 0) + goto out; + ret = ::fsync(fd); + if (ret < 0) + ret = -errno; + // XXX: fsync() man page says I need to sync containing directory +out: + TEMP_FAILURE_RETRY(::close(fd)); + return ret; +} + +int FileStore::read_superblock() +{ + char fn[PATH_MAX]; + snprintf(fn, sizeof(fn), "%s/superblock", basedir.c_str()); + int fd = ::open(fn, O_RDONLY, 0644); + if (fd < 0) { + if (errno == ENOENT) { + // If the file doesn't exist write initial CompatSet + return write_superblock(); + } else + return -errno; + } + bufferptr bp(PATH_MAX); + int ret = safe_read(fd, bp.c_str(), bp.length()); + TEMP_FAILURE_RETRY(::close(fd)); + if (ret < 0) + return ret; + bufferlist bl; + bl.push_back(bp); + bufferlist::iterator i = bl.begin(); + ::decode(superblock, i); + return 0; +} + +void FileStore::set_allow_sharded_objects() +{ + if (!get_allow_sharded_objects()) { + superblock.compat_features.incompat.insert(CEPH_FS_FEATURE_INCOMPAT_SHARDS); + int ret = write_superblock(); + assert(ret == 0); //Should we return error and make caller handle it? + } + return; +} + +bool FileStore::get_allow_sharded_objects() +{ + return superblock.compat_features.incompat.contains(CEPH_FS_FEATURE_INCOMPAT_SHARDS); +} + int FileStore::update_version_stamp() { return write_version_stamp(); @@ -937,12 +1025,12 @@ int FileStore::version_stamp_is_valid(uint32_t *version) int ret = safe_read(fd, bp.c_str(), bp.length()); TEMP_FAILURE_RETRY(::close(fd)); if (ret < 0) - return -errno; + return ret; bufferlist bl; bl.push_back(bp); bufferlist::iterator i = bl.begin(); ::decode(*version, i); - if (*version == on_disk_version) + if (*version == target_version) return 1; else return 0; @@ -956,13 +1044,11 @@ int FileStore::write_version_stamp() if (fd < 0) return -errno; bufferlist bl; - ::encode(on_disk_version, bl); + ::encode(target_version, bl); int ret = safe_write(fd, bl.c_str(), bl.length()); TEMP_FAILURE_RETRY(::close(fd)); - if (ret < 0) - return -errno; - return 0; + return ret; } int FileStore::read_op_seq(uint64_t *seq) @@ -1004,6 +1090,7 @@ int FileStore::mount() char buf[PATH_MAX]; uint64_t initial_op_seq; set<string> cluster_snaps; + CompatSet supported_compat_set = get_fs_supported_compat_set(); dout(5) << "basedir " << basedir << " journal " << journalpath << dendl; @@ -1058,12 +1145,26 @@ int FileStore::mount() ret = -EINVAL; derr << "FileStore::mount : stale version stamp " << version_stamp << ". Please run the FileStore update script before starting the " - << "OSD, or set filestore_update_to to " << on_disk_version + << "OSD, or set filestore_update_to to " << target_version << dendl; goto close_fsid_fd; } } + ret = read_superblock(); + if (ret < 0) { + ret = -EINVAL; + goto close_fsid_fd; + } + + // Check if this FileStore supports all the necessary features to mount + if (supported_compat_set.compare(superblock.compat_features) == -1) { + derr << "FileStore::mount : Incompatible features set " + << superblock.compat_features << dendl; + ret = -EINVAL; + goto close_fsid_fd; + } + // open some dir handles basedir_fd = ::open(basedir.c_str(), O_RDONLY); if (basedir_fd < 0) { @@ -1813,7 +1914,7 @@ void FileStore::_set_replay_guard(coll_t cid, void FileStore::_set_replay_guard(int fd, const SequencerPosition& spos, - const hobject_t *hoid, + const ghobject_t *hoid, bool in_progress) { if (backend->can_checkpoint()) @@ -1894,7 +1995,7 @@ void FileStore::_close_replay_guard(int fd, const SequencerPosition& spos) dout(10) << "_close_replay_guard " << spos << " done" << dendl; } -int FileStore::_check_replay_guard(coll_t cid, hobject_t oid, const SequencerPosition& spos) +int FileStore::_check_replay_guard(coll_t cid, ghobject_t oid, const SequencerPosition& spos) { if (!replaying || backend->can_checkpoint()) return 1; @@ -1997,7 +2098,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_TOUCH: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); if (_check_replay_guard(cid, oid, spos) > 0) r = _touch(cid, oid); } @@ -2006,7 +2107,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_WRITE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); bool replica = i.get_replica(); @@ -2020,7 +2121,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_ZERO: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); if (_check_replay_guard(cid, oid, spos) > 0) @@ -2041,7 +2142,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_TRUNCATE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); if (_check_replay_guard(cid, oid, spos) > 0) r = _truncate(cid, oid, off); @@ -2051,7 +2152,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_REMOVE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); if (_check_replay_guard(cid, oid, spos) > 0) r = _remove(cid, oid, spos); } @@ -2060,7 +2161,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_SETATTR: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); string name = i.get_attrname(); bufferlist bl; i.get_bl(bl); @@ -2078,7 +2179,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_SETATTRS: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); map<string, bufferptr> aset; i.get_attrset(aset); if (_check_replay_guard(cid, oid, spos) > 0) @@ -2091,7 +2192,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_RMATTR: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); string name = i.get_attrname(); if (_check_replay_guard(cid, oid, spos) > 0) r = _rmattr(cid, oid, name.c_str(), spos); @@ -2101,7 +2202,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_RMATTRS: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); if (_check_replay_guard(cid, oid, spos) > 0) r = _rmattrs(cid, oid, spos); } @@ -2110,8 +2211,8 @@ unsigned FileStore::_do_transaction( case Transaction::OP_CLONE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); - hobject_t noid = i.get_oid(); + ghobject_t oid = i.get_oid(); + ghobject_t noid = i.get_oid(); r = _clone(cid, oid, noid, spos); } break; @@ -2119,8 +2220,8 @@ unsigned FileStore::_do_transaction( case Transaction::OP_CLONERANGE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); - hobject_t noid = i.get_oid(); + ghobject_t oid = i.get_oid(); + ghobject_t noid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); r = _clone_range(cid, oid, noid, off, len, off, spos); @@ -2130,8 +2231,8 @@ unsigned FileStore::_do_transaction( case Transaction::OP_CLONERANGE2: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); - hobject_t noid = i.get_oid(); + ghobject_t oid = i.get_oid(); + ghobject_t noid = i.get_oid(); uint64_t srcoff = i.get_length(); uint64_t len = i.get_length(); uint64_t dstoff = i.get_length(); @@ -2159,7 +2260,7 @@ unsigned FileStore::_do_transaction( { coll_t ncid = i.get_cid(); coll_t ocid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); r = _collection_add(ncid, ocid, oid, spos); } break; @@ -2167,7 +2268,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_COLL_REMOVE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); if (_check_replay_guard(cid, oid, spos) > 0) r = _remove(cid, oid, spos); } @@ -2178,7 +2279,7 @@ unsigned FileStore::_do_transaction( // WARNING: this is deprecated and buggy; only here to replay old journals. coll_t ocid = i.get_cid(); coll_t ncid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); r = _collection_add(ocid, ncid, oid, spos); if (r == 0 && (_check_replay_guard(ocid, oid, spos) > 0)) @@ -2189,9 +2290,9 @@ unsigned FileStore::_do_transaction( case Transaction::OP_COLL_MOVE_RENAME: { coll_t oldcid = i.get_cid(); - hobject_t oldoid = i.get_oid(); + ghobject_t oldoid = i.get_oid(); coll_t newcid = i.get_cid(); - hobject_t newoid = i.get_oid(); + ghobject_t newoid = i.get_oid(); r = _collection_move_rename(oldcid, oldoid, newcid, newoid, spos); } break; @@ -2231,14 +2332,14 @@ unsigned FileStore::_do_transaction( case Transaction::OP_OMAP_CLEAR: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); r = _omap_clear(cid, oid, spos); } break; case Transaction::OP_OMAP_SETKEYS: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); map<string, bufferlist> aset; i.get_attrset(aset); r = _omap_setkeys(cid, oid, aset, spos); @@ -2247,7 +2348,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_OMAP_RMKEYS: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); set<string> keys; i.get_keyset(keys); r = _omap_rmkeys(cid, oid, keys, spos); @@ -2256,7 +2357,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_OMAP_RMKEYRANGE: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); string first, last; first = i.get_key(); last = i.get_key(); @@ -2266,7 +2367,7 @@ unsigned FileStore::_do_transaction( case Transaction::OP_OMAP_SETHEADER: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); bufferlist bl; i.get_bl(bl); r = _omap_setheader(cid, oid, bl, spos); @@ -2386,7 +2487,7 @@ unsigned FileStore::_do_transaction( // -------------------- // objects -bool FileStore::exists(coll_t cid, const hobject_t& oid) +bool FileStore::exists(coll_t cid, const ghobject_t& oid) { struct stat st; if (stat(cid, oid, &st) == 0) @@ -2396,7 +2497,7 @@ bool FileStore::exists(coll_t cid, const hobject_t& oid) } int FileStore::stat( - coll_t cid, const hobject_t& oid, struct stat *st, bool allow_eio) + coll_t cid, const ghobject_t& oid, struct stat *st, bool allow_eio) { int r = lfn_stat(cid, oid, st); assert(allow_eio || !m_filestore_fail_eio || r != -EIO); @@ -2418,7 +2519,7 @@ int FileStore::stat( int FileStore::read( coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl, @@ -2466,7 +2567,7 @@ int FileStore::read( } } -int FileStore::fiemap(coll_t cid, const hobject_t& oid, +int FileStore::fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl) { @@ -2544,7 +2645,7 @@ done: } -int FileStore::_remove(coll_t cid, const hobject_t& oid, +int FileStore::_remove(coll_t cid, const ghobject_t& oid, const SequencerPosition &spos) { dout(15) << "remove " << cid << "/" << oid << dendl; @@ -2553,7 +2654,7 @@ int FileStore::_remove(coll_t cid, const hobject_t& oid, return r; } -int FileStore::_truncate(coll_t cid, const hobject_t& oid, uint64_t size) +int FileStore::_truncate(coll_t cid, const ghobject_t& oid, uint64_t size) { dout(15) << "truncate " << cid << "/" << oid << " size " << size << dendl; int r = lfn_truncate(cid, oid, size); @@ -2562,7 +2663,7 @@ int FileStore::_truncate(coll_t cid, const hobject_t& oid, uint64_t size) } -int FileStore::_touch(coll_t cid, const hobject_t& oid) +int FileStore::_touch(coll_t cid, const ghobject_t& oid) { dout(15) << "touch " << cid << "/" << oid << dendl; @@ -2577,7 +2678,7 @@ int FileStore::_touch(coll_t cid, const hobject_t& oid) return r; } -int FileStore::_write(coll_t cid, const hobject_t& oid, +int FileStore::_write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, bool replica) { @@ -2626,7 +2727,7 @@ int FileStore::_write(coll_t cid, const hobject_t& oid, return r; } -int FileStore::_zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len) +int FileStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len) { dout(15) << "zero " << cid << "/" << oid << " " << offset << "~" << len << dendl; int ret = 0; @@ -2669,7 +2770,7 @@ int FileStore::_zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t l return ret; } -int FileStore::_clone(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid, +int FileStore::_clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid, const SequencerPosition& spos) { dout(15) << "clone " << cid << "/" << oldoid << " -> " << cid << "/" << newoid << dendl; @@ -2803,7 +2904,7 @@ int FileStore::_do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, u return r; } -int FileStore::_clone_range(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid, +int FileStore::_clone_range(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid, uint64_t srcoff, uint64_t len, uint64_t dstoff, const SequencerPosition& spos) { @@ -3244,23 +3345,23 @@ int FileStore::_fsetattrs(int fd, map<string, bufferptr> &aset) } // debug EIO injection -void FileStore::inject_data_error(const hobject_t &oid) { +void FileStore::inject_data_error(const ghobject_t &oid) { Mutex::Locker l(read_error_lock); dout(10) << __func__ << ": init error on " << oid << dendl; data_error_set.insert(oid); } -void FileStore::inject_mdata_error(const hobject_t &oid) { +void FileStore::inject_mdata_error(const ghobject_t &oid) { Mutex::Locker l(read_error_lock); dout(10) << __func__ << ": init error on " << oid << dendl; mdata_error_set.insert(oid); } -void FileStore::debug_obj_on_delete(const hobject_t &oid) { +void FileStore::debug_obj_on_delete(const ghobject_t &oid) { Mutex::Locker l(read_error_lock); dout(10) << __func__ << ": clear error on " << oid << dendl; data_error_set.erase(oid); mdata_error_set.erase(oid); } -bool FileStore::debug_data_eio(const hobject_t &oid) { +bool FileStore::debug_data_eio(const ghobject_t &oid) { Mutex::Locker l(read_error_lock); if (data_error_set.count(oid)) { dout(10) << __func__ << ": inject error on " << oid << dendl; @@ -3269,7 +3370,7 @@ bool FileStore::debug_data_eio(const hobject_t &oid) { return false; } } -bool FileStore::debug_mdata_eio(const hobject_t &oid) { +bool FileStore::debug_mdata_eio(const ghobject_t &oid) { Mutex::Locker l(read_error_lock); if (mdata_error_set.count(oid)) { dout(10) << __func__ << ": inject error on " << oid << dendl; @@ -3282,7 +3383,7 @@ bool FileStore::debug_mdata_eio(const hobject_t &oid) { // objects -int FileStore::getattr(coll_t cid, const hobject_t& oid, const char *name, bufferptr &bp) +int FileStore::getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr &bp) { dout(15) << "getattr " << cid << "/" << oid << " '" << name << "'" << dendl; FDRef fd; @@ -3328,7 +3429,7 @@ int FileStore::getattr(coll_t cid, const hobject_t& oid, const char *name, buffe } } -int FileStore::getattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& aset, bool user_only) +int FileStore::getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset, bool user_only) { dout(15) << "getattrs " << cid << "/" << oid << dendl; FDRef fd; @@ -3387,7 +3488,7 @@ int FileStore::getattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& } } -int FileStore::_setattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& aset, +int FileStore::_setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset, const SequencerPosition &spos) { map<string, bufferlist> omap_set; @@ -3472,7 +3573,7 @@ int FileStore::_setattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr> } -int FileStore::_rmattr(coll_t cid, const hobject_t& oid, const char *name, +int FileStore::_rmattr(coll_t cid, const ghobject_t& oid, const char *name, const SequencerPosition &spos) { dout(15) << "rmattr " << cid << "/" << oid << " '" << name << "'" << dendl; @@ -3507,7 +3608,7 @@ int FileStore::_rmattr(coll_t cid, const hobject_t& oid, const char *name, return r; } -int FileStore::_rmattrs(coll_t cid, const hobject_t& oid, +int FileStore::_rmattrs(coll_t cid, const ghobject_t& oid, const SequencerPosition &spos) { dout(15) << "rmattrs " << cid << "/" << oid << dendl; @@ -3703,14 +3804,14 @@ int FileStore::_collection_remove_recursive(const coll_t &cid, return r; } - vector<hobject_t> objects; - hobject_t max; + vector<ghobject_t> objects; + ghobject_t max; r = 0; while (!max.is_max()) { r = collection_list_partial(cid, max, 200, 300, 0, &objects, &max); if (r < 0) return r; - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { assert(_check_replay_guard(cid, *i, spos)); @@ -3782,7 +3883,7 @@ int FileStore::collection_version_current(coll_t c, uint32_t *version) if (r < 0) return r; *version = index->collection_version(); - if (*version == on_disk_version) + if (*version == target_version) return 1; else return 0; @@ -3875,9 +3976,9 @@ bool FileStore::collection_empty(coll_t c) int r = get_index(c, &index); if (r < 0) return false; - vector<hobject_t> ls; + vector<ghobject_t> ls; collection_list_handle_t handle; - r = index->collection_list_partial(hobject_t(), 1, 1, 0, &ls, NULL); + r = index->collection_list_partial(ghobject_t(), 1, 1, 0, &ls, NULL); if (r < 0) { assert(!m_filestore_fail_eio || r != -EIO); return false; @@ -3885,14 +3986,14 @@ bool FileStore::collection_empty(coll_t c) return ls.empty(); } -int FileStore::collection_list_range(coll_t c, hobject_t start, hobject_t end, - snapid_t seq, vector<hobject_t> *ls) +int FileStore::collection_list_range(coll_t c, ghobject_t start, ghobject_t end, + snapid_t seq, vector<ghobject_t> *ls) { bool done = false; - hobject_t next = start; + ghobject_t next = start; while (!done) { - vector<hobject_t> next_objects; + vector<ghobject_t> next_objects; int r = collection_list_partial(c, next, get_ideal_list_min(), get_ideal_list_max(), seq, &next_objects, &next); @@ -3919,9 +4020,9 @@ int FileStore::collection_list_range(coll_t c, hobject_t start, hobject_t end, return 0; } -int FileStore::collection_list_partial(coll_t c, hobject_t start, +int FileStore::collection_list_partial(coll_t c, ghobject_t start, int min, int max, snapid_t seq, - vector<hobject_t> *ls, hobject_t *next) + vector<ghobject_t> *ls, ghobject_t *next) { dout(10) << "collection_list_partial: " << c << dendl; Index index; @@ -3940,7 +4041,7 @@ int FileStore::collection_list_partial(coll_t c, hobject_t start, return 0; } -int FileStore::collection_list(coll_t c, vector<hobject_t>& ls) +int FileStore::collection_list(coll_t c, vector<ghobject_t>& ls) { Index index; int r = get_index(c, &index); @@ -3951,7 +4052,7 @@ int FileStore::collection_list(coll_t c, vector<hobject_t>& ls) return r; } -int FileStore::omap_get(coll_t c, const hobject_t &hoid, +int FileStore::omap_get(coll_t c, const ghobject_t &hoid, bufferlist *header, map<string, bufferlist> *out) { @@ -3970,7 +4071,7 @@ int FileStore::omap_get(coll_t c, const hobject_t &hoid, int FileStore::omap_get_header( coll_t c, - const hobject_t &hoid, + const ghobject_t &hoid, bufferlist *bl, bool allow_eio) { @@ -3987,7 +4088,7 @@ int FileStore::omap_get_header( return 0; } -int FileStore::omap_get_keys(coll_t c, const hobject_t &hoid, set<string> *keys) +int FileStore::omap_get_keys(coll_t c, const ghobject_t &hoid, set<string> *keys) { dout(15) << __func__ << " " << c << "/" << hoid << dendl; IndexedPath path; @@ -4002,7 +4103,7 @@ int FileStore::omap_get_keys(coll_t c, const hobject_t &hoid, set<string> *keys) return 0; } -int FileStore::omap_get_values(coll_t c, const hobject_t &hoid, +int FileStore::omap_get_values(coll_t c, const ghobject_t &hoid, const set<string> &keys, map<string, bufferlist> *out) { @@ -4019,7 +4120,7 @@ int FileStore::omap_get_values(coll_t c, const hobject_t &hoid, return 0; } -int FileStore::omap_check_keys(coll_t c, const hobject_t &hoid, +int FileStore::omap_check_keys(coll_t c, const ghobject_t &hoid, const set<string> &keys, set<string> *out) { @@ -4037,7 +4138,7 @@ int FileStore::omap_check_keys(coll_t c, const hobject_t &hoid, } ObjectMap::ObjectMapIterator FileStore::get_omap_iterator(coll_t c, - const hobject_t &hoid) + const ghobject_t &hoid) { dout(15) << __func__ << " " << c << "/" << hoid << dendl; IndexedPath path; @@ -4108,7 +4209,7 @@ int FileStore::_destroy_collection(coll_t c) } -int FileStore::_collection_add(coll_t c, coll_t oldcid, const hobject_t& o, +int FileStore::_collection_add(coll_t c, coll_t oldcid, const ghobject_t& o, const SequencerPosition& spos) { dout(15) << "collection_add " << c << "/" << o << " from " << oldcid << "/" << o << dendl; @@ -4156,8 +4257,8 @@ int FileStore::_collection_add(coll_t c, coll_t oldcid, const hobject_t& o, return r; } -int FileStore::_collection_move_rename(coll_t oldcid, const hobject_t& oldoid, - coll_t c, const hobject_t& o, +int FileStore::_collection_move_rename(coll_t oldcid, const ghobject_t& oldoid, + coll_t c, const ghobject_t& o, const SequencerPosition& spos) { dout(15) << __func__ << " " << c << "/" << o << " from " << oldcid << "/" << oldoid << dendl; @@ -4236,7 +4337,7 @@ void FileStore::_inject_failure() } } -int FileStore::_omap_clear(coll_t cid, const hobject_t &hoid, +int FileStore::_omap_clear(coll_t cid, const ghobject_t &hoid, const SequencerPosition &spos) { dout(15) << __func__ << " " << cid << "/" << hoid << dendl; IndexedPath path; @@ -4249,7 +4350,7 @@ int FileStore::_omap_clear(coll_t cid, const hobject_t &hoid, return 0; } -int FileStore::_omap_setkeys(coll_t cid, const hobject_t &hoid, +int FileStore::_omap_setkeys(coll_t cid, const ghobject_t &hoid, const map<string, bufferlist> &aset, const SequencerPosition &spos) { dout(15) << __func__ << " " << cid << "/" << hoid << dendl; @@ -4260,7 +4361,7 @@ int FileStore::_omap_setkeys(coll_t cid, const hobject_t &hoid, return object_map->set_keys(hoid, aset, &spos); } -int FileStore::_omap_rmkeys(coll_t cid, const hobject_t &hoid, +int FileStore::_omap_rmkeys(coll_t cid, const ghobject_t &hoid, const set<string> &keys, const SequencerPosition &spos) { dout(15) << __func__ << " " << cid << "/" << hoid << dendl; @@ -4274,7 +4375,7 @@ int FileStore::_omap_rmkeys(coll_t cid, const hobject_t &hoid, return 0; } -int FileStore::_omap_rmkeyrange(coll_t cid, const hobject_t &hoid, +int FileStore::_omap_rmkeyrange(coll_t cid, const ghobject_t &hoid, const string& first, const string& last, const SequencerPosition &spos) { dout(15) << __func__ << " " << cid << "/" << hoid << " [" << first << "," << last << "]" << dendl; @@ -4291,7 +4392,7 @@ int FileStore::_omap_rmkeyrange(coll_t cid, const hobject_t &hoid, return _omap_rmkeys(cid, hoid, keys, spos); } -int FileStore::_omap_setheader(coll_t cid, const hobject_t &hoid, +int FileStore::_omap_setheader(coll_t cid, const ghobject_t &hoid, const bufferlist &bl, const SequencerPosition &spos) { @@ -4351,8 +4452,8 @@ int FileStore::_split_collection(coll_t cid, _close_replay_guard(dest, spos); } if (g_conf->filestore_debug_verify_split) { - vector<hobject_t> objects; - hobject_t next; + vector<ghobject_t> objects; + ghobject_t next; while (1) { collection_list_partial( cid, @@ -4362,7 +4463,7 @@ int FileStore::_split_collection(coll_t cid, &next); if (objects.empty()) break; - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { dout(20) << __func__ << ": " << *i << " still in source " @@ -4371,7 +4472,7 @@ int FileStore::_split_collection(coll_t cid, } objects.clear(); } - next = hobject_t(); + next = ghobject_t(); while (1) { collection_list_partial( dest, @@ -4381,7 +4482,7 @@ int FileStore::_split_collection(coll_t cid, &next); if (objects.empty()) break; - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { dout(20) << __func__ << ": " << *i << " now in dest " @@ -4529,3 +4630,39 @@ void FileStore::dump_transactions(list<ObjectStore::Transaction*>& ls, uint64_t m_filestore_dump_fmt.flush(m_filestore_dump); m_filestore_dump.flush(); } + +// -- FSSuperblock -- + +void FSSuperblock::encode(bufferlist &bl) const +{ + ENCODE_START(1, 1, bl); + compat_features.encode(bl); + ENCODE_FINISH(bl); +} + +void FSSuperblock::decode(bufferlist::iterator &bl) +{ + DECODE_START(1, bl); + compat_features.decode(bl); + DECODE_FINISH(bl); +} + +void FSSuperblock::dump(Formatter *f) const +{ + f->open_object_section("compat"); + compat_features.dump(f); + f->close_section(); +} + +void FSSuperblock::generate_test_instances(list<FSSuperblock*>& o) +{ + FSSuperblock z; + o.push_back(new FSSuperblock(z)); + CompatSet::FeatureSet feature_compat; + CompatSet::FeatureSet feature_ro_compat; + CompatSet::FeatureSet feature_incompat; + feature_incompat.insert(CEPH_FS_FEATURE_INCOMPAT_SHARDS); + z.compat_features = CompatSet(feature_compat, feature_ro_compat, + feature_incompat); + o.push_back(new FSSuperblock(z)); +} diff --git a/src/os/FileStore.h b/src/os/FileStore.h index 9ba4a866a4b..b9017985a34 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -66,6 +66,26 @@ static const __SWORD_TYPE ZFS_SUPER_MAGIC(0x2fc12fc1); class FileStoreBackend; +#define CEPH_FS_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(1, "sharded objects") + +class FSSuperblock { +public: + CompatSet compat_features; + + FSSuperblock() { } + + void encode(bufferlist &bl) const; + void decode(bufferlist::iterator &bl); + void dump(Formatter *f) const; + static void generate_test_instances(list<FSSuperblock*>& o); +}; +WRITE_CLASS_ENCODER(FSSuperblock) + +inline ostream& operator<<(ostream& out, const FSSuperblock& sb) +{ + return out << "sb(" << sb.compat_features << ")"; +} + class FileStore : public JournalingObjectStore, public md_config_obs_t { @@ -89,7 +109,7 @@ public: return perf_tracker.get_cur_stats(); } - static const uint32_t on_disk_version = 3; + static const uint32_t target_version = 3; private: string internal_name; ///< internal name, used to name the perfcounter instance string basedir, journalpath; @@ -281,25 +301,26 @@ private: void op_queue_release_throttle(Op *o); void _journaled_ahead(OpSequencer *osr, Op *o, Context *ondisk); friend struct C_JournaledAhead; + int write_version_stamp(); int open_journal(); PerfCounters *logger; public: - int lfn_find(coll_t cid, const hobject_t& oid, IndexedPath *path); - int lfn_truncate(coll_t cid, const hobject_t& oid, off_t length); - int lfn_stat(coll_t cid, const hobject_t& oid, struct stat *buf); + int lfn_find(coll_t cid, const ghobject_t& oid, IndexedPath *path); + int lfn_truncate(coll_t cid, const ghobject_t& oid, off_t length); + int lfn_stat(coll_t cid, const ghobject_t& oid, struct stat *buf); int lfn_open( coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, bool create, FDRef *outfd, IndexedPath *path = 0, Index *index = 0); void lfn_close(FDRef fd); - int lfn_link(coll_t c, coll_t newcid, const hobject_t& o, const hobject_t& newoid) ; - int lfn_unlink(coll_t cid, const hobject_t& o, const SequencerPosition &spos, + int lfn_link(coll_t c, coll_t newcid, const ghobject_t& o, const ghobject_t& newoid) ; + int lfn_unlink(coll_t cid, const ghobject_t& o, const SequencerPosition &spos, bool force_clear_omap=false); public: @@ -310,7 +331,6 @@ public: int _sanity_check_fs(); bool test_mount_in_use(); - int write_version_stamp(); int version_stamp_is_valid(uint32_t *version); int update_version_stamp(); int read_op_seq(uint64_t *seq); @@ -321,6 +341,22 @@ public: int mkfs(); int mkjournal(); + /** + * set_allow_sharded_objects() + * + * Before sharded ghobject_t can be specified this function must be called + * + * Once this function is called the FileStore is not mountable by prior releases + */ + void set_allow_sharded_objects(); + + /** + * get_allow_sharded_objects() + * + * return value: true if set_allow_sharded_objects() called, otherwise false + */ + bool get_allow_sharded_objects(); + int statfs(struct statfs *buf); int _do_transactions( @@ -347,7 +383,7 @@ public: */ void _set_replay_guard(int fd, const SequencerPosition& spos, - const hobject_t *hoid=0, + const ghobject_t *oid=0, bool in_progress=false); void _set_replay_guard(coll_t cid, const SequencerPosition& spos, @@ -377,42 +413,42 @@ public: */ int _check_replay_guard(int fd, const SequencerPosition& spos); int _check_replay_guard(coll_t cid, const SequencerPosition& spos); - int _check_replay_guard(coll_t cid, hobject_t oid, const SequencerPosition& pos); + int _check_replay_guard(coll_t cid, ghobject_t oid, const SequencerPosition& pos); int _check_global_replay_guard(coll_t cid, const SequencerPosition& spos); // ------------------ // objects - int pick_object_revision_lt(hobject_t& oid) { + int pick_object_revision_lt(ghobject_t& oid) { return 0; } - bool exists(coll_t cid, const hobject_t& oid); + bool exists(coll_t cid, const ghobject_t& oid); int stat( coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, struct stat *st, bool allow_eio = false); int read( coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl, bool allow_eio = false); - int fiemap(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len, bufferlist& bl); + int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl); - int _touch(coll_t cid, const hobject_t& oid); - int _write(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, + int _touch(coll_t cid, const ghobject_t& oid); + int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, bool replica = false); - int _zero(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len); - int _truncate(coll_t cid, const hobject_t& oid, uint64_t size); - int _clone(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid, + int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len); + int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size); + int _clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid, const SequencerPosition& spos); - int _clone_range(coll_t cid, const hobject_t& oldoid, const hobject_t& newoid, + int _clone_range(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid, uint64_t srcoff, uint64_t len, uint64_t dstoff, const SequencerPosition& spos); int _do_clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff); int _do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff); - int _remove(coll_t cid, const hobject_t& oid, const SequencerPosition &spos); + int _remove(coll_t cid, const ghobject_t& oid, const SequencerPosition &spos); int _fgetattr(int fd, const char *name, bufferptr& bp); int _fgetattrs(int fd, map<string,bufferptr>& aset, bool user_only); @@ -436,25 +472,25 @@ public: // DEBUG read error injection, an object is removed from both on delete() Mutex read_error_lock; - set<hobject_t> data_error_set; // read() will return -EIO - set<hobject_t> mdata_error_set; // getattr(),stat() will return -EIO - void inject_data_error(const hobject_t &oid); - void inject_mdata_error(const hobject_t &oid); - void debug_obj_on_delete(const hobject_t &oid); - bool debug_data_eio(const hobject_t &oid); - bool debug_mdata_eio(const hobject_t &oid); + set<ghobject_t> data_error_set; // read() will return -EIO + set<ghobject_t> mdata_error_set; // getattr(),stat() will return -EIO + void inject_data_error(const ghobject_t &oid); + void inject_mdata_error(const ghobject_t &oid); + void debug_obj_on_delete(const ghobject_t &oid); + bool debug_data_eio(const ghobject_t &oid); + bool debug_mdata_eio(const ghobject_t &oid); int snapshot(const string& name); // attrs - int getattr(coll_t cid, const hobject_t& oid, const char *name, bufferptr &bp); - int getattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& aset, bool user_only = false); + int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr &bp); + int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset, bool user_only = false); - int _setattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& aset, + int _setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset, const SequencerPosition &spos); - int _rmattr(coll_t cid, const hobject_t& oid, const char *name, + int _rmattr(coll_t cid, const ghobject_t& oid, const char *name, const SequencerPosition &spos); - int _rmattrs(coll_t cid, const hobject_t& oid, + int _rmattrs(coll_t cid, const ghobject_t& oid, const SequencerPosition &spos); int collection_getattr(coll_t c, const char *name, void *value, size_t size); @@ -475,35 +511,35 @@ public: int collection_stat(coll_t c, struct stat *st); bool collection_exists(coll_t c); bool collection_empty(coll_t c); - int collection_list(coll_t c, vector<hobject_t>& o); - int collection_list_partial(coll_t c, hobject_t start, + int collection_list(coll_t c, vector<ghobject_t>& oid); + int collection_list_partial(coll_t c, ghobject_t start, int min, int max, snapid_t snap, - vector<hobject_t> *ls, hobject_t *next); - int collection_list_range(coll_t c, hobject_t start, hobject_t end, - snapid_t seq, vector<hobject_t> *ls); + vector<ghobject_t> *ls, ghobject_t *next); + int collection_list_range(coll_t c, ghobject_t start, ghobject_t end, + snapid_t seq, vector<ghobject_t> *ls); // omap (see ObjectStore.h for documentation) - int omap_get(coll_t c, const hobject_t &hoid, bufferlist *header, + int omap_get(coll_t c, const ghobject_t &oid, bufferlist *header, map<string, bufferlist> *out); int omap_get_header( coll_t c, - const hobject_t &hoid, + const ghobject_t &oid, bufferlist *out, bool allow_eio = false); - int omap_get_keys(coll_t c, const hobject_t &hoid, set<string> *keys); - int omap_get_values(coll_t c, const hobject_t &hoid, const set<string> &keys, + int omap_get_keys(coll_t c, const ghobject_t &oid, set<string> *keys); + int omap_get_values(coll_t c, const ghobject_t &oid, const set<string> &keys, map<string, bufferlist> *out); - int omap_check_keys(coll_t c, const hobject_t &hoid, const set<string> &keys, + int omap_check_keys(coll_t c, const ghobject_t &oid, const set<string> &keys, set<string> *out); - ObjectMap::ObjectMapIterator get_omap_iterator(coll_t c, const hobject_t &hoid); + ObjectMap::ObjectMapIterator get_omap_iterator(coll_t c, const ghobject_t &oid); int _create_collection(coll_t c); int _create_collection(coll_t c, const SequencerPosition &spos); int _destroy_collection(coll_t c); - int _collection_add(coll_t c, coll_t ocid, const hobject_t& o, + int _collection_add(coll_t c, coll_t ocid, const ghobject_t& oid, const SequencerPosition& spos); - int _collection_move_rename(coll_t oldcid, const hobject_t& oldoid, - coll_t c, const hobject_t& o, + int _collection_move_rename(coll_t oldcid, const ghobject_t& oldoid, + coll_t c, const ghobject_t& o, const SequencerPosition& spos); void dump_start(const std::string& file); void dump_stop(); @@ -513,17 +549,17 @@ private: void _inject_failure(); // omap - int _omap_clear(coll_t cid, const hobject_t &hoid, + int _omap_clear(coll_t cid, const ghobject_t &oid, const SequencerPosition &spos); - int _omap_setkeys(coll_t cid, const hobject_t &hoid, + int _omap_setkeys(coll_t cid, const ghobject_t &oid, const map<string, bufferlist> &aset, const SequencerPosition &spos); - int _omap_rmkeys(coll_t cid, const hobject_t &hoid, const set<string> &keys, + int _omap_rmkeys(coll_t cid, const ghobject_t &oid, const set<string> &keys, const SequencerPosition &spos); - int _omap_rmkeyrange(coll_t cid, const hobject_t &hoid, + int _omap_rmkeyrange(coll_t cid, const ghobject_t &oid, const string& first, const string& last, const SequencerPosition &spos); - int _omap_setheader(coll_t cid, const hobject_t &hoid, const bufferlist &bl, + int _omap_setheader(coll_t cid, const ghobject_t &oid, const bufferlist &bl, const SequencerPosition &spos); int _split_collection(coll_t cid, uint32_t bits, uint32_t rem, coll_t dest, const SequencerPosition &spos); @@ -555,6 +591,25 @@ private: std::ofstream m_filestore_dump; JSONFormatter m_filestore_dump_fmt; atomic_t m_filestore_kill_at; + FSSuperblock superblock; + + /** + * write_superblock() + * + * Write superblock to persisent storage + * + * return value: 0 on success, otherwise negative errno + */ + int write_superblock(); + + /** + * read_superblock() + * + * Fill in FileStore::superblock by reading persistent storage + * + * return value: 0 on success, otherwise negative errno + */ + int read_superblock(); friend class FileStoreBackend; }; diff --git a/src/os/FlatIndex.cc b/src/os/FlatIndex.cc index db46750e411..d4644abc627 100644 --- a/src/os/FlatIndex.cc +++ b/src/os/FlatIndex.cc @@ -134,18 +134,18 @@ static void lfn_translate(const char *path, const char *name, char *new_name, in return; } -static int append_oname(const hobject_t &oid, char *s, int len) +static int append_oname(const ghobject_t &oid, char *s, int len) { //assert(sizeof(oid) == 28); char *end = s + len; char *t = s + strlen(s); - const char *i = oid.oid.name.c_str(); + const char *i = oid.hobj.oid.name.c_str(); while (*i && t < end) { if (*i == '\\') { *t++ = '\\'; *t++ = '\\'; - } else if (*i == '.' && i == oid.oid.name.c_str()) { // only escape leading . + } else if (*i == '.' && i == oid.hobj.oid.name.c_str()) { // only escape leading . *t++ = '\\'; *t++ = '.'; } else if (*i == '/') { @@ -158,17 +158,17 @@ static int append_oname(const hobject_t &oid, char *s, int len) int size = t - s; - if (oid.snap == CEPH_NOSNAP) + if (oid.hobj.snap == CEPH_NOSNAP) size += snprintf(t, end - t, "_head"); - else if (oid.snap == CEPH_SNAPDIR) + else if (oid.hobj.snap == CEPH_SNAPDIR) size += snprintf(t, end - t, "_snapdir"); else - size += snprintf(t, end - t, "_%llx", (long long unsigned)oid.snap); + size += snprintf(t, end - t, "_%llx", (long long unsigned)oid.hobj.snap); return size; } -static bool parse_object(char *s, hobject_t& oid) +static bool parse_object(char *s, ghobject_t& oid) { sobject_t o; char *bar = s + strlen(s) - 1; @@ -201,13 +201,13 @@ static bool parse_object(char *s, hobject_t& oid) o.snap = CEPH_SNAPDIR; else o.snap = strtoull(bar+1, &s, 16); - oid = hobject_t(o); + oid = ghobject_t(hobject_t(o)); return true; } return false; } -static int lfn_get(const char *coll_path, const hobject_t& oid, char *pathname, int len, char *lfn, int lfn_len, int *exist, int *is_lfn) +static int lfn_get(const char *coll_path, const ghobject_t& oid, char *pathname, int len, char *lfn, int lfn_len, int *exist, int *is_lfn) { int i = 0; strncpy(pathname, coll_path, len); @@ -277,7 +277,7 @@ int FlatIndex::init() { return 0; } -int FlatIndex::created(const hobject_t &hoid, const char *path) { +int FlatIndex::created(const ghobject_t &hoid, const char *path) { char long_name[PATH_MAX]; long_name[0] = '\0'; int actual_len = append_oname(hoid, long_name, sizeof(long_name)); @@ -292,7 +292,7 @@ int FlatIndex::created(const hobject_t &hoid, const char *path) { return 0; } -int FlatIndex::unlink(const hobject_t &o) { +int FlatIndex::unlink(const ghobject_t &o) { char long_fn[PATH_MAX]; char short_fn[PATH_MAX]; char short_fn2[PATH_MAX]; @@ -346,7 +346,7 @@ int FlatIndex::unlink(const hobject_t &o) { return 0; } -int FlatIndex::lookup(const hobject_t &hoid, IndexedPath *path, int *exist) { +int FlatIndex::lookup(const ghobject_t &hoid, IndexedPath *path, int *exist) { char long_fn[PATH_MAX]; char short_fn[PATH_MAX]; int r; @@ -361,7 +361,7 @@ int FlatIndex::lookup(const hobject_t &hoid, IndexedPath *path, int *exist) { } static int get_hobject_from_oinfo(const char *dir, const char *file, - hobject_t *o) { + ghobject_t *o) { char path[PATH_MAX]; bufferptr bp(PATH_MAX); snprintf(path, sizeof(path), "%s/%s", dir, file); @@ -376,17 +376,17 @@ static int get_hobject_from_oinfo(const char *dir, const char *file, return 0; } -int FlatIndex::collection_list_partial(const hobject_t &start, +int FlatIndex::collection_list_partial(const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next) { + vector<ghobject_t> *ls, + ghobject_t *next) { assert(0); // Should not be called return 0; } -int FlatIndex::collection_list(vector<hobject_t> *ls) { +int FlatIndex::collection_list(vector<ghobject_t> *ls) { char buf[offsetof(struct dirent, d_name) + PATH_MAX + 1]; char dir_name[PATH_MAX], new_name[PATH_MAX]; strncpy(dir_name, base_path.c_str(), sizeof(dir_name)); @@ -397,7 +397,7 @@ int FlatIndex::collection_list(vector<hobject_t> *ls) { return -errno; // first, build (ino, object) list - vector< pair<ino_t,hobject_t> > inolist; + vector< pair<ino_t,ghobject_t> > inolist; struct dirent *de; while (::readdir_r(dir, (struct dirent *)buf, &de) == 0) { @@ -407,11 +407,11 @@ int FlatIndex::collection_list(vector<hobject_t> *ls) { if (de->d_name[0] == '.') continue; //cout << " got object " << de->d_name << std::endl; - hobject_t o; + ghobject_t o; lfn_translate(dir_name, de->d_name, new_name, sizeof(new_name)); if (parse_object(new_name, o)) { get_hobject_from_oinfo(dir_name, de->d_name, &o); - inolist.push_back(pair<ino_t,hobject_t>(de->d_ino, o)); + inolist.push_back(pair<ino_t,ghobject_t>(de->d_ino, o)); ls->push_back(o); } } @@ -422,7 +422,7 @@ int FlatIndex::collection_list(vector<hobject_t> *ls) { // build final list ls->resize(inolist.size()); int i = 0; - for (vector< pair<ino_t,hobject_t> >::iterator p = inolist.begin(); p != inolist.end(); ++p) + for (vector< pair<ino_t,ghobject_t> >::iterator p = inolist.begin(); p != inolist.end(); ++p) (*ls)[i++].swap(p->second); ::closedir(dir); diff --git a/src/os/FlatIndex.h b/src/os/FlatIndex.h index 7a10912dc28..657c273468b 100644 --- a/src/os/FlatIndex.h +++ b/src/os/FlatIndex.h @@ -52,35 +52,35 @@ public: /// @see CollectionIndex int created( - const hobject_t &hoid, + const ghobject_t &oid, const char *path ); /// @see CollectionIndex int unlink( - const hobject_t &hoid + const ghobject_t &oid ); /// @see CollectionIndex int lookup( - const hobject_t &hoid, + const ghobject_t &oid, IndexedPath *path, int *exist ); /// @see CollectionIndex int collection_list( - vector<hobject_t> *ls + vector<ghobject_t> *ls ); /// @see CollectionIndex int collection_list_partial( - const hobject_t &start, + const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next + vector<ghobject_t> *ls, + ghobject_t *next ); }; diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc index c279bab3a60..ea50cd038ca 100644 --- a/src/os/HashIndex.cc +++ b/src/os/HashIndex.cc @@ -66,7 +66,7 @@ int HashIndex::reset_attr( return r; if (!exists) return 0; - map<string, hobject_t> objects; + map<string, ghobject_t> objects; set<string> subdirs; r = list_objects(path, 0, 0, &objects); if (r < 0) @@ -98,7 +98,7 @@ int HashIndex::col_split_level( int r = from.list_subdirs(path, &subdirs); if (r < 0) return r; - map<string, hobject_t> objects; + map<string, ghobject_t> objects; r = from.list_objects(path, 0, 0, &objects); if (r < 0) return r; @@ -134,8 +134,8 @@ int HashIndex::col_split_level( } /* Then, do the same for each object */ - map<string, hobject_t> objs_to_move; - for (map<string, hobject_t>::iterator i = objects.begin(); + map<string, ghobject_t> objs_to_move; + for (map<string, ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { if (i->second.match(inbits, match)) { @@ -199,7 +199,7 @@ int HashIndex::col_split_level( return r; } - for (map<string, hobject_t>::iterator i = objs_to_move.begin(); + for (map<string, ghobject_t>::iterator i = objs_to_move.begin(); i != objs_to_move.end(); ++i) { from_info.objs--; @@ -244,7 +244,7 @@ int HashIndex::_init() { /* LFNIndex virtual method implementations */ int HashIndex::_created(const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, const string &mangled_name) { subdir_info_s info; int r; @@ -267,10 +267,10 @@ int HashIndex::_created(const vector<string> &path, } int HashIndex::_remove(const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, const string &mangled_name) { int r; - r = remove_object(path, hoid); + r = remove_object(path, oid); if (r < 0) return r; subdir_info_s info; @@ -291,12 +291,12 @@ int HashIndex::_remove(const vector<string> &path, } } -int HashIndex::_lookup(const hobject_t &hoid, +int HashIndex::_lookup(const ghobject_t &oid, vector<string> *path, string *mangled_name, int *exists_out) { vector<string> path_comp; - get_path_components(hoid, &path_comp); + get_path_components(oid, &path_comp); vector<string>::iterator next = path_comp.begin(); int exists; while (1) { @@ -313,22 +313,22 @@ int HashIndex::_lookup(const hobject_t &hoid, break; path->push_back(*(next++)); } - return get_mangled_name(*path, hoid, mangled_name, exists_out); + return get_mangled_name(*path, oid, mangled_name, exists_out); } -int HashIndex::_collection_list(vector<hobject_t> *ls) { +int HashIndex::_collection_list(vector<ghobject_t> *ls) { vector<string> path; return list_by_hash(path, 0, 0, 0, 0, ls); } -int HashIndex::_collection_list_partial(const hobject_t &start, +int HashIndex::_collection_list_partial(const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next) { + vector<ghobject_t> *ls, + ghobject_t *next) { vector<string> path; - hobject_t _next; + ghobject_t _next; if (!next) next = &_next; *next = start; @@ -345,7 +345,7 @@ int HashIndex::recursive_remove(const vector<string> &path) { int r = list_subdirs(path, &subdirs); if (r < 0) return r; - map<string, hobject_t> objects; + map<string, ghobject_t> objects; r = list_objects(path, 0, 0, &objects); if (r < 0) return r; @@ -475,7 +475,7 @@ int HashIndex::initiate_split(const vector<string> &path, subdir_info_s info) { int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { int level = info.hash_level; - map<string, hobject_t> objects; + map<string, ghobject_t> objects; vector<string> dst = path; int r; dst.push_back(""); @@ -486,17 +486,17 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { r = list_subdirs(path, &subdirs); if (r < 0) return r; - map<string, map<string, hobject_t> > mapped; - map<string, hobject_t> moved; + map<string, map<string, ghobject_t> > mapped; + map<string, ghobject_t> moved; int num_moved = 0; - for (map<string, hobject_t>::iterator i = objects.begin(); + for (map<string, ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { vector<string> new_path; get_path_components(i->second, &new_path); mapped[new_path[level]][i->first] = i->second; } - for (map<string, map<string, hobject_t> >::iterator i = mapped.begin(); + for (map<string, map<string, ghobject_t> >::iterator i = mapped.begin(); i != mapped.end(); ) { dst[level] = i->first; @@ -505,7 +505,7 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { subdir_info_s temp; // subdir has already been fully copied if (subdirs.count(i->first) && !get_info(dst, &temp)) { - for (map<string, hobject_t>::iterator j = i->second.begin(); + for (map<string, ghobject_t>::iterator j = i->second.begin(); j != i->second.end(); ++j) { moved[j->first] = j->second; @@ -533,7 +533,7 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { return r; } // else subdir has been created but only partially copied - for (map<string, hobject_t>::iterator j = i->second.begin(); + for (map<string, ghobject_t>::iterator j = i->second.begin(); j != i->second.end(); ++j) { moved[j->first] = j->second; @@ -574,12 +574,12 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { return end_split_or_merge(path); } -void HashIndex::get_path_components(const hobject_t &hoid, +void HashIndex::get_path_components(const ghobject_t &oid, vector<string> *path) { char buf[MAX_HASH_LEVEL + 1]; - snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, (uint32_t)hoid.get_filestore_key()); + snprintf(buf, sizeof(buf), "%.*X", MAX_HASH_LEVEL, (uint32_t)oid.hobj.get_filestore_key()); - // Path components are the hex characters of hoid.hash, least + // Path components are the hex characters of oid.hobj.hash, least // significant first for (int i = 0; i < MAX_HASH_LEVEL; ++i) { path->push_back(string(&buf[i], 1)); @@ -596,9 +596,9 @@ string HashIndex::get_hash_str(uint32_t hash) { return retval; } -string HashIndex::get_path_str(const hobject_t &hoid) { - assert(!hoid.is_max()); - return get_hash_str(hoid.hash); +string HashIndex::get_path_str(const ghobject_t &oid) { + assert(!oid.is_max()); + return get_hash_str(oid.hobj.hash); } uint32_t HashIndex::hash_prefix_to_hash(string prefix) { @@ -616,12 +616,12 @@ uint32_t HashIndex::hash_prefix_to_hash(string prefix) { int HashIndex::get_path_contents_by_hash(const vector<string> &path, const string *lower_bound, - const hobject_t *next_object, + const ghobject_t *next_object, const snapid_t *seq, set<string> *hash_prefixes, - set<pair<string, hobject_t> > *objects) { + set<pair<string, ghobject_t> > *objects) { set<string> subdirs; - map<string, hobject_t> rev_objects; + map<string, ghobject_t> rev_objects; int r; string cur_prefix; for (vector<string>::const_iterator i = path.begin(); @@ -632,7 +632,7 @@ int HashIndex::get_path_contents_by_hash(const vector<string> &path, r = list_objects(path, 0, 0, &rev_objects); if (r < 0) return r; - for (map<string, hobject_t>::iterator i = rev_objects.begin(); + for (map<string, ghobject_t>::iterator i = rev_objects.begin(); i != rev_objects.end(); ++i) { string hash_prefix = get_path_str(i->second); @@ -640,10 +640,10 @@ int HashIndex::get_path_contents_by_hash(const vector<string> &path, continue; if (next_object && i->second < *next_object) continue; - if (seq && i->second.snap < *seq) + if (seq && i->second.hobj.snap < *seq) continue; hash_prefixes->insert(hash_prefix); - objects->insert(pair<string, hobject_t>(hash_prefix, i->second)); + objects->insert(pair<string, ghobject_t>(hash_prefix, i->second)); } r = list_subdirs(path, &subdirs); if (r < 0) @@ -667,13 +667,13 @@ int HashIndex::list_by_hash(const vector<string> &path, int min_count, int max_count, snapid_t seq, - hobject_t *next, - vector<hobject_t> *out) { + ghobject_t *next, + vector<ghobject_t> *out) { assert(out); vector<string> next_path = path; next_path.push_back(""); set<string> hash_prefixes; - set<pair<string, hobject_t> > objects; + set<pair<string, ghobject_t> > objects; int r = get_path_contents_by_hash(path, NULL, next, @@ -686,16 +686,16 @@ int HashIndex::list_by_hash(const vector<string> &path, for (set<string>::iterator i = hash_prefixes.begin(); i != hash_prefixes.end(); ++i) { - set<pair<string, hobject_t> >::iterator j = objects.lower_bound( - make_pair(*i, hobject_t())); + set<pair<string, ghobject_t> >::iterator j = objects.lower_bound( + make_pair(*i, ghobject_t())); if (j == objects.end() || j->first != *i) { if (min_count > 0 && out->size() > (unsigned)min_count) { if (next) - *next = hobject_t("", "", CEPH_NOSNAP, hash_prefix_to_hash(*i), -1, ""); + *next = ghobject_t(hobject_t("", "", CEPH_NOSNAP, hash_prefix_to_hash(*i), -1, "")); return 0; } *(next_path.rbegin()) = *(i->rbegin()); - hobject_t next_recurse; + ghobject_t next_recurse; if (next) next_recurse = *next; r = list_by_hash(next_path, @@ -727,6 +727,6 @@ int HashIndex::list_by_hash(const vector<string> &path, } } if (next) - *next = hobject_t::get_max(); + *next = ghobject_t(hobject_t::get_max()); return 0; } diff --git a/src/os/HashIndex.h b/src/os/HashIndex.h index fcabd9f7198..6f5bca077d4 100644 --- a/src/os/HashIndex.h +++ b/src/os/HashIndex.h @@ -39,7 +39,7 @@ * given by the hex characters in the hash beginning with the least * significant. * - * ex: hobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2) + * ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2) * would be located in (root)/2/D/0/ * * Subdirectories are created when the number of objects in a directory @@ -163,30 +163,30 @@ protected: int _created( const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, const string &mangled_name ); int _remove( const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, const string &mangled_name ); int _lookup( - const hobject_t &hoid, + const ghobject_t &oid, vector<string> *path, string *mangled_name, int *exists ); int _collection_list( - vector<hobject_t> *ls + vector<ghobject_t> *ls ); int _collection_list_partial( - const hobject_t &start, + const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next + vector<ghobject_t> *ls, + ghobject_t *next ); private: /// Recursively remove path and its subdirs @@ -262,7 +262,7 @@ private: /// Determine path components from hoid hash void get_path_components( - const hobject_t &hoid, ///< [in] Object for which to get path components + const ghobject_t &oid, ///< [in] Object for which to get path components vector<string> *path ///< [out] Path components for hoid. ); @@ -278,12 +278,12 @@ private: /** - * Get string representation of hobject_t/hash + * Get string representation of ghobject_t/hash * * e.g: 0x01234567 -> "76543210" */ static string get_path_str( - const hobject_t &hoid ///< [in] Object to get hash string for + const ghobject_t &oid ///< [in] Object to get hash string for ); ///< @return Hash string for hoid. /// Get string from hash, @see get_path_str @@ -319,20 +319,20 @@ private: int get_path_contents_by_hash( const vector<string> &path, /// [in] Path to list const string *lower_bound, /// [in] list > *lower_bound - const hobject_t *next_object, /// [in] list > *next_object + const ghobject_t *next_object, /// [in] list > *next_object const snapid_t *seq, /// [in] list >= *seq set<string> *hash_prefixes, /// [out] prefixes in dir - set<pair<string, hobject_t> > *objects /// [out] objects + set<pair<string, ghobject_t> > *objects /// [out] objects ); - /// List objects in collection in hobject_t order + /// List objects in collection in ghobject_t order int list_by_hash( const vector<string> &path, /// [in] Path to list int min_count, /// [in] List at least min_count int max_count, /// [in] List at most max_count snapid_t seq, /// [in] list only objects where snap >= seq - hobject_t *next, /// [in,out] List objects >= *next - vector<hobject_t> *out /// [out] Listed objects + ghobject_t *next, /// [in,out] List objects >= *next + vector<ghobject_t> *out /// [out] Listed objects ); ///< @return Error Code, 0 on success }; diff --git a/src/os/IndexManager.cc b/src/os/IndexManager.cc index 412721a04c8..83bbfc9703e 100644 --- a/src/os/IndexManager.cc +++ b/src/os/IndexManager.cc @@ -75,7 +75,7 @@ int IndexManager::init_index(coll_t c, const char *path, uint32_t version) { return r; HashIndex index(c, path, g_conf->filestore_merge_threshold, g_conf->filestore_split_multiple, - CollectionIndex::HASH_INDEX_TAG_2, + version, g_conf->filestore_index_retry_probability); return index.init(); } diff --git a/src/os/LFNIndex.cc b/src/os/LFNIndex.cc index 029e8ad8197..83e1c144754 100644 --- a/src/os/LFNIndex.cc +++ b/src/os/LFNIndex.cc @@ -73,7 +73,7 @@ int LFNIndex::init() return _init(); } -int LFNIndex::created(const hobject_t &hoid, const char *path) +int LFNIndex::created(const ghobject_t &oid, const char *path) { WRAP_RETRY( vector<string> path_comp; @@ -81,38 +81,39 @@ int LFNIndex::created(const hobject_t &hoid, const char *path) r = decompose_full_path(path, &path_comp, 0, &short_name); if (r < 0) goto out; - r = lfn_created(path_comp, hoid, short_name); + r = lfn_created(path_comp, oid, short_name); if (r < 0) goto out; - r = _created(path_comp, hoid, short_name); + r = _created(path_comp, oid, short_name); if (r < 0) goto out; ); } -int LFNIndex::unlink(const hobject_t &hoid) +int LFNIndex::unlink(const ghobject_t &oid) { WRAP_RETRY( vector<string> path; string short_name; - r = _lookup(hoid, &path, &short_name, NULL); + r = _lookup(oid, &path, &short_name, NULL); if (r < 0) { goto out; } - r = _remove(path, hoid, short_name); + r = _remove(path, oid, short_name); if (r < 0) { goto out; } ); } -int LFNIndex::lookup(const hobject_t &hoid, +int LFNIndex::lookup(const ghobject_t &oid, IndexedPath *out_path, - int *exist) { + int *exist) +{ WRAP_RETRY( vector<string> path; string short_name; - r = _lookup(hoid, &path, &short_name, exist); + r = _lookup(oid, &path, &short_name, exist); if (r < 0) goto out; string full_path = get_full_path(path, short_name); @@ -135,18 +136,18 @@ int LFNIndex::lookup(const hobject_t &hoid, ); } -int LFNIndex::collection_list(vector<hobject_t> *ls) +int LFNIndex::collection_list(vector<ghobject_t> *ls) { return _collection_list(ls); } -int LFNIndex::collection_list_partial(const hobject_t &start, +int LFNIndex::collection_list_partial(const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next) + vector<ghobject_t> *ls, + ghobject_t *next) { return _collection_list_partial(start, min_count, max_count, seq, ls, next); } @@ -171,13 +172,14 @@ int LFNIndex::fsync_dir(const vector<string> &path) int LFNIndex::link_object(const vector<string> &from, const vector<string> &to, - const hobject_t &hoid, - const string &from_short_name) { + const ghobject_t &oid, + const string &from_short_name) +{ int r; string from_path = get_full_path(from, from_short_name); string to_path; maybe_inject_failure(); - r = lfn_get_name(to, hoid, 0, &to_path, 0); + r = lfn_get_name(to, oid, 0, &to_path, 0); if (r < 0) return r; maybe_inject_failure(); @@ -190,10 +192,11 @@ int LFNIndex::link_object(const vector<string> &from, } int LFNIndex::remove_objects(const vector<string> &dir, - const map<string, hobject_t> &to_remove, - map<string, hobject_t> *remaining) { + const map<string, ghobject_t> &to_remove, + map<string, ghobject_t> *remaining) +{ set<string> clean_chains; - for (map<string, hobject_t>::const_iterator to_clean = to_remove.begin(); + for (map<string, ghobject_t>::const_iterator to_clean = to_remove.begin(); to_clean != to_remove.end(); ++to_clean) { if (!lfn_is_hashed_filename(to_clean->first)) { @@ -207,7 +210,7 @@ int LFNIndex::remove_objects(const vector<string> &dir, if (clean_chains.count(lfn_get_short_name(to_clean->second, 0))) continue; set<int> holes; - map<int, pair<string, hobject_t> > chain; + map<int, pair<string, ghobject_t> > chain; for (int i = 0; ; ++i) { string short_name = lfn_get_short_name(to_clean->second, i); if (remaining->count(short_name)) { @@ -219,7 +222,7 @@ int LFNIndex::remove_objects(const vector<string> &dir, } } - map<int, pair<string, hobject_t > >::reverse_iterator candidate = chain.rbegin(); + map<int, pair<string, ghobject_t > >::reverse_iterator candidate = chain.rbegin(); for (set<int>::iterator i = holes.begin(); i != holes.end(); ++i) { @@ -241,7 +244,7 @@ int LFNIndex::remove_objects(const vector<string> &dir, if (r < 0) return -errno; remaining->erase(candidate->second.first); - remaining->insert(pair<string, hobject_t>( + remaining->insert(pair<string, ghobject_t>( lfn_get_short_name(candidate->second.second, *i), candidate->second.second)); ++candidate; @@ -253,13 +256,14 @@ int LFNIndex::remove_objects(const vector<string> &dir, } int LFNIndex::move_objects(const vector<string> &from, - const vector<string> &to) { - map<string, hobject_t> to_move; + const vector<string> &to) +{ + map<string, ghobject_t> to_move; int r; r = list_objects(from, 0, NULL, &to_move); if (r < 0) return r; - for (map<string,hobject_t>::iterator i = to_move.begin(); + for (map<string,ghobject_t>::iterator i = to_move.begin(); i != to_move.end(); ++i) { string from_path = get_full_path(from, i->first); @@ -280,7 +284,7 @@ int LFNIndex::move_objects(const vector<string> &from, r = fsync_dir(to); if (r < 0) return r; - for (map<string,hobject_t>::iterator i = to_move.begin(); + for (map<string,ghobject_t>::iterator i = to_move.begin(); i != to_move.end(); ++i) { maybe_inject_failure(); @@ -293,21 +297,23 @@ int LFNIndex::move_objects(const vector<string> &from, } int LFNIndex::remove_object(const vector<string> &from, - const hobject_t &hoid) { + const ghobject_t &oid) +{ string short_name; int r, exist; maybe_inject_failure(); - r = get_mangled_name(from, hoid, &short_name, &exist); + r = get_mangled_name(from, oid, &short_name, &exist); maybe_inject_failure(); if (r < 0) return r; - return lfn_unlink(from, hoid, short_name); + return lfn_unlink(from, oid, short_name); } int LFNIndex::get_mangled_name(const vector<string> &from, - const hobject_t &hoid, - string *mangled_name, int *exists) { - return lfn_get_name(from, hoid, mangled_name, 0, exists); + const ghobject_t &oid, + string *mangled_name, int *exists) +{ + return lfn_get_name(from, oid, mangled_name, 0, exists); } int LFNIndex::move_subdir( @@ -315,7 +321,8 @@ int LFNIndex::move_subdir( LFNIndex &dest, const vector<string> &path, string dir - ) { + ) +{ vector<string> sub_path(path.begin(), path.end()); sub_path.push_back(dir); string from_path(from.get_full_path_subdir(sub_path)); @@ -330,8 +337,9 @@ int LFNIndex::move_object( LFNIndex &from, LFNIndex &dest, const vector<string> &path, - const pair<string, hobject_t> &obj - ) { + const pair<string, ghobject_t> &obj + ) +{ string from_path(from.get_full_path(path, obj.first)); string to_path; string to_name; @@ -358,7 +366,8 @@ int LFNIndex::move_object( static int get_hobject_from_oinfo(const char *dir, const char *file, - hobject_t *o) { + ghobject_t *o) +{ char path[PATH_MAX]; bufferptr bp(PATH_MAX); snprintf(path, sizeof(path), "%s/%s", dir, file); @@ -375,7 +384,8 @@ static int get_hobject_from_oinfo(const char *dir, const char *file, int LFNIndex::list_objects(const vector<string> &to_list, int max_objs, - long *handle, map<string, hobject_t> *out) { + long *handle, map<string, ghobject_t> *out) +{ string to_list_path = get_full_path_subdir(to_list); DIR *dir = ::opendir(to_list_path.c_str()); char buf[offsetof(struct dirent, d_name) + PATH_MAX + 1]; @@ -402,7 +412,7 @@ int LFNIndex::list_objects(const vector<string> &to_list, int max_objs, if (de->d_name[0] == '.') continue; string short_name(de->d_name); - hobject_t obj; + ghobject_t obj; if (lfn_is_object(short_name)) { r = lfn_translate(to_list, short_name, &obj); if (r < 0) { @@ -416,7 +426,7 @@ int LFNIndex::list_objects(const vector<string> &to_list, int max_objs, if (index_version == HASH_INDEX_TAG) get_hobject_from_oinfo(to_list_path.c_str(), short_name.c_str(), &obj); - out->insert(pair<string, hobject_t>(short_name, obj)); + out->insert(pair<string, ghobject_t>(short_name, obj)); ++listed; } else { continue; @@ -435,7 +445,8 @@ int LFNIndex::list_objects(const vector<string> &to_list, int max_objs, } int LFNIndex::list_subdirs(const vector<string> &to_list, - set<string> *out) { + set<string> *out) +{ string to_list_path = get_full_path_subdir(to_list); DIR *dir = ::opendir(to_list_path.c_str()); char buf[offsetof(struct dirent, d_name) + PATH_MAX + 1]; @@ -449,7 +460,7 @@ int LFNIndex::list_subdirs(const vector<string> &to_list, } string short_name(de->d_name); string demangled_name; - hobject_t obj; + ghobject_t obj; if (lfn_is_subdir(short_name, &demangled_name)) { out->insert(demangled_name); } @@ -501,7 +512,8 @@ int LFNIndex::path_exists(const vector<string> &to_check, int *exists) int LFNIndex::add_attr_path(const vector<string> &path, const string &attr_name, - bufferlist &attr_value) { + bufferlist &attr_value) +{ string full_path = get_full_path_subdir(path); maybe_inject_failure(); return chain_setxattr(full_path.c_str(), mangle_attr_name(attr_name).c_str(), @@ -511,7 +523,8 @@ int LFNIndex::add_attr_path(const vector<string> &path, int LFNIndex::get_attr_path(const vector<string> &path, const string &attr_name, - bufferlist &attr_value) { + bufferlist &attr_value) +{ string full_path = get_full_path_subdir(path); size_t size = 1024; // Initial while (1) { @@ -536,22 +549,24 @@ int LFNIndex::get_attr_path(const vector<string> &path, } int LFNIndex::remove_attr_path(const vector<string> &path, - const string &attr_name) { + const string &attr_name) +{ string full_path = get_full_path_subdir(path); string mangled_attr_name = mangle_attr_name(attr_name); maybe_inject_failure(); return chain_removexattr(full_path.c_str(), mangled_attr_name.c_str()); } -string LFNIndex::lfn_generate_object_name_keyless(const hobject_t &hoid) +string LFNIndex::lfn_generate_object_name_keyless(const ghobject_t &oid) { char s[FILENAME_MAX_LEN]; char *end = s + sizeof(s); char *t = s; - const char *i = hoid.oid.name.c_str(); + assert(oid.generation == ghobject_t::NO_GEN); + const char *i = oid.hobj.oid.name.c_str(); // Escape subdir prefix - if (hoid.oid.name.substr(0, 4) == "DIR_") { + if (oid.hobj.oid.name.substr(0, 4) == "DIR_") { *t++ = '\\'; *t++ = 'd'; i += 4; @@ -560,7 +575,7 @@ string LFNIndex::lfn_generate_object_name_keyless(const hobject_t &hoid) if (*i == '\\') { *t++ = '\\'; *t++ = '\\'; - } else if (*i == '.' && i == hoid.oid.name.c_str()) { // only escape leading . + } else if (*i == '.' && i == oid.hobj.oid.name.c_str()) { // only escape leading . *t++ = '\\'; *t++ = '.'; } else if (*i == '/') { @@ -571,13 +586,13 @@ string LFNIndex::lfn_generate_object_name_keyless(const hobject_t &hoid) i++; } - if (hoid.snap == CEPH_NOSNAP) + if (oid.hobj.snap == CEPH_NOSNAP) t += snprintf(t, end - t, "_head"); - else if (hoid.snap == CEPH_SNAPDIR) + else if (oid.hobj.snap == CEPH_SNAPDIR) t += snprintf(t, end - t, "_snapdir"); else - t += snprintf(t, end - t, "_%llx", (long long unsigned)hoid.snap); - snprintf(t, end - t, "_%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash); + t += snprintf(t, end - t, "_%llx", (long long unsigned)oid.hobj.snap); + snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.hash)*2), oid.hobj.hash); return string(s); } @@ -601,94 +616,112 @@ static void append_escaped(string::const_iterator begin, } } -string LFNIndex::lfn_generate_object_name(const hobject_t &hoid) +string LFNIndex::lfn_generate_object_name(const ghobject_t &oid) { if (index_version == HASH_INDEX_TAG) - return lfn_generate_object_name_keyless(hoid); + return lfn_generate_object_name_keyless(oid); if (index_version == HASH_INDEX_TAG_2) - return lfn_generate_object_name_poolless(hoid); + return lfn_generate_object_name_poolless(oid); string full_name; - string::const_iterator i = hoid.oid.name.begin(); - if (hoid.oid.name.substr(0, 4) == "DIR_") { + string::const_iterator i = oid.hobj.oid.name.begin(); + if (oid.hobj.oid.name.substr(0, 4) == "DIR_") { full_name.append("\\d"); i += 4; - } else if (hoid.oid.name[0] == '.') { + } else if (oid.hobj.oid.name[0] == '.') { full_name.append("\\."); ++i; } - append_escaped(i, hoid.oid.name.end(), &full_name); + append_escaped(i, oid.hobj.oid.name.end(), &full_name); full_name.append("_"); - append_escaped(hoid.get_key().begin(), hoid.get_key().end(), &full_name); + append_escaped(oid.hobj.get_key().begin(), oid.hobj.get_key().end(), &full_name); full_name.append("_"); char buf[PATH_MAX]; char *t = buf; char *end = t + sizeof(buf); - if (hoid.snap == CEPH_NOSNAP) + if (oid.hobj.snap == CEPH_NOSNAP) t += snprintf(t, end - t, "head"); - else if (hoid.snap == CEPH_SNAPDIR) + else if (oid.hobj.snap == CEPH_SNAPDIR) t += snprintf(t, end - t, "snapdir"); else - t += snprintf(t, end - t, "%llx", (long long unsigned)hoid.snap); - snprintf(t, end - t, "_%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash); + t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap); + snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.hash)*2), oid.hobj.hash); full_name += string(buf); full_name.append("_"); - append_escaped(hoid.nspace.begin(), hoid.nspace.end(), &full_name); + append_escaped(oid.hobj.nspace.begin(), oid.hobj.nspace.end(), &full_name); full_name.append("_"); t = buf; end = t + sizeof(buf); - if (hoid.pool == -1) + if (oid.hobj.pool == -1) t += snprintf(t, end - t, "none"); else - t += snprintf(t, end - t, "%llx", (long long unsigned)hoid.pool); + t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.pool); full_name += string(buf); + if (oid.generation != ghobject_t::NO_GEN) { + assert(oid.shard_id != ghobject_t::NO_SHARD); + full_name.append("_"); + + t = buf; + end = t + sizeof(buf); + t += snprintf(t, end - t, "%llx", (long long unsigned)oid.generation); + full_name += string(buf); + + full_name.append("_"); + + t = buf; + end = t + sizeof(buf); + t += snprintf(t, end - t, "%x", (int)oid.shard_id); + full_name += string(buf); + } + return full_name; } -string LFNIndex::lfn_generate_object_name_poolless(const hobject_t &hoid) +string LFNIndex::lfn_generate_object_name_poolless(const ghobject_t &oid) { if (index_version == HASH_INDEX_TAG) - return lfn_generate_object_name_keyless(hoid); + return lfn_generate_object_name_keyless(oid); + assert(oid.generation == ghobject_t::NO_GEN); string full_name; - string::const_iterator i = hoid.oid.name.begin(); - if (hoid.oid.name.substr(0, 4) == "DIR_") { + string::const_iterator i = oid.hobj.oid.name.begin(); + if (oid.hobj.oid.name.substr(0, 4) == "DIR_") { full_name.append("\\d"); i += 4; - } else if (hoid.oid.name[0] == '.') { + } else if (oid.hobj.oid.name[0] == '.') { full_name.append("\\."); ++i; } - append_escaped(i, hoid.oid.name.end(), &full_name); + append_escaped(i, oid.hobj.oid.name.end(), &full_name); full_name.append("_"); - append_escaped(hoid.get_key().begin(), hoid.get_key().end(), &full_name); + append_escaped(oid.hobj.get_key().begin(), oid.hobj.get_key().end(), &full_name); full_name.append("_"); char snap_with_hash[PATH_MAX]; char *t = snap_with_hash; char *end = t + sizeof(snap_with_hash); - if (hoid.snap == CEPH_NOSNAP) + if (oid.hobj.snap == CEPH_NOSNAP) t += snprintf(t, end - t, "head"); - else if (hoid.snap == CEPH_SNAPDIR) + else if (oid.hobj.snap == CEPH_SNAPDIR) t += snprintf(t, end - t, "snapdir"); else - t += snprintf(t, end - t, "%llx", (long long unsigned)hoid.snap); - snprintf(t, end - t, "_%.*X", (int)(sizeof(hoid.hash)*2), hoid.hash); + t += snprintf(t, end - t, "%llx", (long long unsigned)oid.hobj.snap); + snprintf(t, end - t, "_%.*X", (int)(sizeof(oid.hobj.hash)*2), oid.hobj.hash); full_name += string(snap_with_hash); return full_name; } int LFNIndex::lfn_get_name(const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, string *mangled_name, string *out_path, int *exists) { string subdir_path = get_full_path_subdir(path); - string full_name = lfn_generate_object_name(hoid); + string full_name = lfn_generate_object_name(oid); int r; if (!lfn_must_hash(full_name)) { @@ -718,7 +751,7 @@ int LFNIndex::lfn_get_name(const vector<string> &path, string candidate_path; char buf[FILENAME_MAX_LEN + 1]; for ( ; ; ++i) { - candidate = lfn_get_short_name(hoid, i); + candidate = lfn_get_short_name(oid, i); candidate_path = get_full_path(path, candidate); r = chain_getxattr(candidate_path.c_str(), get_lfn_attr().c_str(), buf, sizeof(buf)); if (r < 0) { @@ -757,20 +790,20 @@ int LFNIndex::lfn_get_name(const vector<string> &path, } int LFNIndex::lfn_created(const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, const string &mangled_name) { if (!lfn_is_hashed_filename(mangled_name)) return 0; string full_path = get_full_path(path, mangled_name); - string full_name = lfn_generate_object_name(hoid); + string full_name = lfn_generate_object_name(oid); maybe_inject_failure(); return chain_setxattr(full_path.c_str(), get_lfn_attr().c_str(), full_name.c_str(), full_name.size()); } int LFNIndex::lfn_unlink(const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, const string &mangled_name) { if (!lfn_is_hashed_filename(mangled_name)) { @@ -787,7 +820,7 @@ int LFNIndex::lfn_unlink(const vector<string> &path, int i = 0; for ( ; ; ++i) { - string candidate = lfn_get_short_name(hoid, i); + string candidate = lfn_get_short_name(oid, i); if (candidate == mangled_name) break; } @@ -795,7 +828,7 @@ int LFNIndex::lfn_unlink(const vector<string> &path, ++i; for ( ; ; ++i) { struct stat buf; - string to_check = lfn_get_short_name(hoid, i); + string to_check = lfn_get_short_name(oid, i); string to_check_path = get_full_path(path, to_check); int r = ::stat(to_check_path.c_str(), &buf); if (r < 0) { @@ -817,7 +850,7 @@ int LFNIndex::lfn_unlink(const vector<string> &path, return 0; } else { string rename_to = get_full_path(path, mangled_name); - string rename_from = get_full_path(path, lfn_get_short_name(hoid, i - 1)); + string rename_from = get_full_path(path, lfn_get_short_name(oid, i - 1)); maybe_inject_failure(); int r = ::rename(rename_from.c_str(), rename_to.c_str()); maybe_inject_failure(); @@ -830,7 +863,7 @@ int LFNIndex::lfn_unlink(const vector<string> &path, int LFNIndex::lfn_translate(const vector<string> &path, const string &short_name, - hobject_t *out) + ghobject_t *out) { if (!lfn_is_hashed_filename(short_name)) { return lfn_parse_object_name(short_name, out); @@ -863,7 +896,7 @@ bool LFNIndex::lfn_is_subdir(const string &name, string *demangled) return 0; } -static int parse_object(const char *s, hobject_t& o) +static int parse_object(const char *s, ghobject_t& o) { const char *hash = s + strlen(s) - 1; while (*hash != '_' && @@ -899,28 +932,28 @@ static int parse_object(const char *s, hobject_t& o) i++; } *t = 0; - o.oid.name = string(buf, t-buf); + o.hobj.oid.name = string(buf, t-buf); if (strncmp(bar+1, "head", 4) == 0) - o.snap = CEPH_NOSNAP; + o.hobj.snap = CEPH_NOSNAP; else if (strncmp(bar+1, "snapdir", 7) == 0) - o.snap = CEPH_SNAPDIR; + o.hobj.snap = CEPH_SNAPDIR; else - o.snap = strtoull(bar+1, NULL, 16); - sscanf(hash, "_%X", &o.hash); + o.hobj.snap = strtoull(bar+1, NULL, 16); + sscanf(hash, "_%X", &o.hobj.hash); return 1; } return 0; } -bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, hobject_t *out) +bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, ghobject_t *out) { bool r = parse_object(long_name.c_str(), *out); int64_t pool = -1; pg_t pg; if (coll().is_pg_prefix(pg)) pool = (int64_t)pg.pool(); - out->pool = pool; + out->hobj.pool = pool; if (!r) return r; string temp = lfn_generate_object_name(*out); return r; @@ -928,7 +961,8 @@ bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, hobject_t static bool append_unescaped(string::const_iterator begin, string::const_iterator end, - string *out) { + string *out) +{ for (string::const_iterator i = begin; i != end; ++i) { if (*i == '\\') { ++i; @@ -950,7 +984,8 @@ static bool append_unescaped(string::const_iterator begin, } bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name, - hobject_t *out) { + ghobject_t *out) +{ string name; string key; uint32_t hash; @@ -1011,12 +1046,12 @@ bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name, pg_t pg; if (coll().is_pg_prefix(pg)) pool = (int64_t)pg.pool(); - (*out) = hobject_t(name, key, snap, hash, pool, ""); + (*out) = ghobject_t(hobject_t(name, key, snap, hash, pool, "")); return true; } -bool LFNIndex::lfn_parse_object_name(const string &long_name, hobject_t *out) +bool LFNIndex::lfn_parse_object_name(const string &long_name, ghobject_t *out) { string name; string key; @@ -1024,6 +1059,8 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, hobject_t *out) uint32_t hash; snapid_t snap; uint64_t pool; + gen_t generation = ghobject_t::NO_GEN; + shard_t shard_id = ghobject_t::NO_SHARD; if (index_version == HASH_INDEX_TAG) return lfn_parse_object_name_keyless(long_name, out); @@ -1081,10 +1118,28 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, hobject_t *out) current = ++end; for ( ; end != long_name.end() && *end != '_'; ++end) ; - if (end != long_name.end()) - return false; string pstring(current, end); + // Optional generation/shard_id + string genstring, shardstring; + if (end != long_name.end()) { + current = ++end; + for ( ; end != long_name.end() && *end != '_'; ++end) ; + if (end == long_name.end()) + return false; + genstring = string(current, end); + + generation = (gen_t)strtoull(genstring.c_str(), NULL, 16); + + current = ++end; + for ( ; end != long_name.end() && *end != '_'; ++end) ; + if (end != long_name.end()) + return false; + shardstring = string(current, end); + + shard_id = (shard_t)strtoul(shardstring.c_str(), NULL, 16); + } + if (snap_str == "head") snap = CEPH_NOSNAP; else if (snap_str == "snapdir") @@ -1098,7 +1153,7 @@ bool LFNIndex::lfn_parse_object_name(const string &long_name, hobject_t *out) else pool = strtoull(pstring.c_str(), NULL, 16); - (*out) = hobject_t(name, key, snap, hash, (int64_t)pool, ns); + (*out) = ghobject_t(hobject_t(name, key, snap, hash, (int64_t)pool, ns), generation, shard_id); return true; } @@ -1170,9 +1225,9 @@ void LFNIndex::build_filename(const char *old_filename, int i, char *filename, i } } -string LFNIndex::lfn_get_short_name(const hobject_t &hoid, int i) +string LFNIndex::lfn_get_short_name(const ghobject_t &oid, int i) { - string long_name = lfn_generate_object_name(hoid); + string long_name = lfn_generate_object_name(oid); assert(lfn_must_hash(long_name)); char buf[FILENAME_SHORT_LEN + 4]; build_filename(long_name.c_str(), i, buf, sizeof(buf)); @@ -1212,7 +1267,7 @@ string LFNIndex::demangle_path_component(const string &component) } int LFNIndex::decompose_full_path(const char *in, vector<string> *out, - hobject_t *hoid, string *shortname) + ghobject_t *oid, string *shortname) { const char *beginning = in + get_base_path().size(); const char *end = beginning; @@ -1228,8 +1283,8 @@ int LFNIndex::decompose_full_path(const char *in, vector<string> *out, } } *shortname = string(beginning, end - beginning); - if (hoid) { - int r = lfn_translate(*out, *shortname, hoid); + if (oid) { + int r = lfn_translate(*out, *shortname, oid); if (r < 0) return r; } diff --git a/src/os/LFNIndex.h b/src/os/LFNIndex.h index b73ff4db268..f436446bf0f 100644 --- a/src/os/LFNIndex.h +++ b/src/os/LFNIndex.h @@ -165,35 +165,35 @@ public: /// @see CollectionIndex int created( - const hobject_t &hoid, + const ghobject_t &oid, const char *path ); /// @see CollectionIndex int unlink( - const hobject_t &hoid + const ghobject_t &oid ); /// @see CollectionIndex int lookup( - const hobject_t &hoid, + const ghobject_t &oid, IndexedPath *path, int *exist ); /// @see CollectionIndex int collection_list( - vector<hobject_t> *ls + vector<ghobject_t> *ls ); /// @see CollectionIndex int collection_list_partial( - const hobject_t &start, + const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next + vector<ghobject_t> *ls, + ghobject_t *next ); virtual int _split( @@ -221,20 +221,20 @@ protected: /// Will be called upon object creation virtual int _created( const vector<string> &path, ///< [in] Path to subdir. - const hobject_t &hoid, ///< [in] Object created. + const ghobject_t &oid, ///< [in] Object created. const string &mangled_name ///< [in] Mangled filename. ) = 0; /// Will be called to remove an object virtual int _remove( const vector<string> &path, ///< [in] Path to subdir. - const hobject_t &hoid, ///< [in] Object to remove. + const ghobject_t &oid, ///< [in] Object to remove. const string &mangled_name ///< [in] Mangled filename. ) = 0; - /// Return the path and mangled_name for hoid. + /// Return the path and mangled_name for oid. virtual int _lookup( - const hobject_t &hoid,///< [in] Object for lookup. + const ghobject_t &oid,///< [in] Object for lookup. vector<string> *path, ///< [out] Path to the object. string *mangled_name, ///< [out] Mangled filename. int *exists ///< [out] True if the object exists. @@ -252,17 +252,17 @@ protected: */ /// List contents of collection. virtual int _collection_list( - vector<hobject_t> *ls ///< [out] Listed objects. + vector<ghobject_t> *ls ///< [out] Listed objects. ) = 0; /// @see CollectionIndex virtual int _collection_list_partial( - const hobject_t &start, + const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next + vector<ghobject_t> *ls, + ghobject_t *next ) = 0; protected: @@ -278,8 +278,8 @@ protected: int link_object( const vector<string> &from, ///< [in] Source subdirectory. const vector<string> &to, ///< [in] Dest subdirectory. - const hobject_t &hoid, ///< [in] Object to move. - const string &from_short_name ///< [in] Mangled filename of hoid. + const ghobject_t &oid, ///< [in] Object to move. + const string &from_short_name ///< [in] Mangled filename of oid. ); ///< @return Error Code, 0 on success /** @@ -296,8 +296,8 @@ protected: */ int remove_objects( const vector<string> &dir, - const map<string, hobject_t> &to_remove, - map<string, hobject_t> *remaining + const map<string, ghobject_t> &to_remove, + map<string, ghobject_t> *remaining ); @@ -322,11 +322,11 @@ protected: */ int remove_object( const vector<string> &from, ///< [in] Directory from which to remove. - const hobject_t &to_remove ///< [in] Object to remove. + const ghobject_t &to_remove ///< [in] Object to remove. ); /** - * Gets the filename corresponding to hoid in from. + * Gets the filename corresponding to oid in from. * * The filename may differ between subdirectories. Furthermore, * file creations ore removals in from may invalidate the name. @@ -334,7 +334,7 @@ protected: */ int get_mangled_name( const vector<string> &from, ///< [in] Subdirectory - const hobject_t &hoid, ///< [in] Object + const ghobject_t &oid, ///< [in] Object string *mangled_name, ///< [out] Filename int *exists ///< [out] 1 if the file exists, else 0 ); @@ -352,7 +352,7 @@ protected: LFNIndex &from, ///< [in] from index LFNIndex &dest, ///< [in] to index const vector<string> &path, ///< [in] path to split - const pair<string, hobject_t> &obj ///< [in] obj to move + const pair<string, ghobject_t> &obj ///< [in] obj to move ); /** @@ -369,7 +369,7 @@ protected: const vector<string> &to_list, int max_objects, long *handle, - map<string, hobject_t> *out + map<string, ghobject_t> *out ); /// Lists subdirectories. @@ -425,43 +425,43 @@ private: } /** - * Gets the filename corresponsing to hoid in path. + * Gets the filename corresponsing to oid in path. * - * @param [in] path Path in which to get filename for hoid. - * @param [in] hoid Object for which to get filename. - * @param [out] mangled_name Filename for hoid, pass NULL if not needed. - * @param [out] full_path Fullpath for hoid, pass NULL if not needed. + * @param [in] path Path in which to get filename for oid. + * @param [in] oid Object for which to get filename. + * @param [out] mangled_name Filename for oid, pass NULL if not needed. + * @param [out] full_path Fullpath for oid, pass NULL if not needed. * @param [out] exists 1 if the file exists, 0 otherwise, pass NULL if * not needed * @return Error Code, 0 on success. */ int lfn_get_name( const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &oid, string *mangled_name, string *full_path, int *exists ); - /// Adjusts path contents when hoid is created at name mangled_name. + /// Adjusts path contents when oid is created at name mangled_name. int lfn_created( const vector<string> &path, ///< [in] Path to adjust. - const hobject_t &hoid, ///< [in] Object created. + const ghobject_t &oid, ///< [in] Object created. const string &mangled_name ///< [in] Filename of created object. ); - /// Removes hoid from path while adjusting path contents + /// Removes oid from path while adjusting path contents int lfn_unlink( - const vector<string> &path, ///< [in] Path containing hoid. - const hobject_t &hoid, ///< [in] Object to remove. + const vector<string> &path, ///< [in] Path containing oid. + const ghobject_t &oid, ///< [in] Object to remove. const string &mangled_name ///< [in] Filename of object to remove. ); - ///Transate a file into and hobject_t. + ///Transate a file into and ghobject_t. int lfn_translate( const vector<string> &path, ///< [in] Path containing the file. const string &short_name, ///< [in] Filename to translate. - hobject_t *out ///< [out] Object found. + ghobject_t *out ///< [out] Object found. ); ///< @return Negative error code on error, 0 if not an object, 1 else /* manglers/demanglers */ @@ -478,35 +478,35 @@ private: /// Generate object name string lfn_generate_object_name_keyless( - const hobject_t &hoid ///< [in] Object for which to generate. + const ghobject_t &oid ///< [in] Object for which to generate. ); ///< @return Generated object name. /// Generate object name string lfn_generate_object_name_poolless( - const hobject_t &hoid ///< [in] Object for which to generate. + const ghobject_t &oid ///< [in] Object for which to generate. ); ///< @return Generated object name. /// Generate object name string lfn_generate_object_name( - const hobject_t &hoid ///< [in] Object for which to generate. + const ghobject_t &oid ///< [in] Object for which to generate. ); ///< @return Generated object name. /// Parse object name bool lfn_parse_object_name_keyless( const string &long_name, ///< [in] Name to parse - hobject_t *out ///< [out] Resulting Object + ghobject_t *out ///< [out] Resulting Object ); ///< @return True if successfull, False otherwise. /// Parse object name bool lfn_parse_object_name_poolless( const string &long_name, ///< [in] Name to parse - hobject_t *out ///< [out] Resulting Object + ghobject_t *out ///< [out] Resulting Object ); ///< @return True if successfull, False otherwise. /// Parse object name bool lfn_parse_object_name( const string &long_name, ///< [in] Name to parse - hobject_t *out ///< [out] Resulting Object + ghobject_t *out ///< [out] Resulting Object ); ///< @return True if successfull, False otherwise. /// Checks whether short_name is a hashed filename. @@ -521,7 +521,7 @@ private: /// Generate hashed name. string lfn_get_short_name( - const hobject_t &hoid, ///< [in] Object for which to generate. + const ghobject_t &oid, ///< [in] Object for which to generate. int i ///< [in] Index of hashed name to generate. ); ///< @return Hashed filename. @@ -554,7 +554,7 @@ private: int decompose_full_path( const char *in, ///< [in] Full path to object. vector<string> *out, ///< [out] Path to object at in. - hobject_t *hoid, ///< [out] Object at in. + ghobject_t *oid, ///< [out] Object at in. string *shortname ///< [out] Filename of object at in. ); ///< @return Error Code, 0 on success. diff --git a/src/os/ObjectMap.h b/src/os/ObjectMap.h index 5cc1e495de1..7717aac7437 100644 --- a/src/os/ObjectMap.h +++ b/src/os/ObjectMap.h @@ -30,102 +30,102 @@ class ObjectMap { public: /// Set keys and values from specified map virtual int set_keys( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map const map<string, bufferlist> &set, ///< [in] key to value map to set const SequencerPosition *spos=0 ///< [in] sequencer position ) = 0; /// Set header virtual int set_header( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map const bufferlist &bl, ///< [in] header to set const SequencerPosition *spos=0 ///< [in] sequencer position ) = 0; /// Retrieve header virtual int get_header( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map bufferlist *bl ///< [out] header to set ) = 0; - /// Clear all map keys and values from hoid + /// Clear all map keys and values from oid virtual int clear( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map const SequencerPosition *spos=0 ///< [in] sequencer position ) = 0; - /// Clear all map keys and values from hoid + /// Clear all map keys and values from oid virtual int rm_keys( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map const set<string> &to_clear, ///< [in] Keys to clear const SequencerPosition *spos=0 ///< [in] sequencer position ) = 0; /// Get all keys and values virtual int get( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map bufferlist *header, ///< [out] Returned Header map<string, bufferlist> *out ///< [out] Returned keys and values ) = 0; /// Get values for supplied keys virtual int get_keys( - const hobject_t &hoid, ///< [in] object containing map - set<string> *keys ///< [out] Keys defined on hoid + const ghobject_t &oid, ///< [in] object containing map + set<string> *keys ///< [out] Keys defined on oid ) = 0; /// Get values for supplied keys virtual int get_values( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map const set<string> &keys, ///< [in] Keys to get map<string, bufferlist> *out ///< [out] Returned keys and values ) = 0; /// Check key existence virtual int check_keys( - const hobject_t &hoid, ///< [in] object containing map + const ghobject_t &oid, ///< [in] object containing map const set<string> &keys, ///< [in] Keys to check - set<string> *out ///< [out] Subset of keys defined on hoid + set<string> *out ///< [out] Subset of keys defined on oid ) = 0; /// Get xattrs virtual int get_xattrs( - const hobject_t &hoid, ///< [in] object + const ghobject_t &oid, ///< [in] object const set<string> &to_get, ///< [in] keys to get map<string, bufferlist> *out ///< [out] subset of attrs/vals defined ) = 0; /// Get all xattrs virtual int get_all_xattrs( - const hobject_t &hoid, ///< [in] object + const ghobject_t &oid, ///< [in] object set<string> *out ///< [out] attrs and values ) = 0; /// set xattrs in to_set virtual int set_xattrs( - const hobject_t &hoid, ///< [in] object + const ghobject_t &oid, ///< [in] object const map<string, bufferlist> &to_set,///< [in] attrs/values to set const SequencerPosition *spos=0 ///< [in] sequencer position ) = 0; /// remove xattrs in to_remove virtual int remove_xattrs( - const hobject_t &hoid, ///< [in] object + const ghobject_t &oid, ///< [in] object const set<string> &to_remove, ///< [in] attrs to remove const SequencerPosition *spos=0 ///< [in] sequencer position ) = 0; - /// Clone keys efficiently from hoid map to target map + /// Clone keys efficiently from oid map to target map virtual int clone( - const hobject_t &hoid, ///< [in] object containing map - const hobject_t &target, ///< [in] target of clone + const ghobject_t &oid, ///< [in] object containing map + const ghobject_t &target, ///< [in] target of clone const SequencerPosition *spos=0 ///< [in] sequencer position ) { return 0; } /// Ensure all previous writes are durable virtual int sync( - const hobject_t *hoid=0, ///< [in] object + const ghobject_t *oid=0, ///< [in] object const SequencerPosition *spos=0 ///< [in] Sequencer ) { return 0; } @@ -144,7 +144,7 @@ public: virtual ~ObjectMapIteratorImpl() {} }; typedef std::tr1::shared_ptr<ObjectMapIteratorImpl> ObjectMapIterator; - virtual ObjectMapIterator get_iterator(const hobject_t &hoid) { + virtual ObjectMapIterator get_iterator(const ghobject_t &oid) { return ObjectMapIterator(); } diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc index 9d8b989225b..84549821aff 100644 --- a/src/os/ObjectStore.cc +++ b/src/os/ObjectStore.cc @@ -15,6 +15,7 @@ #include <tr1/memory> #include "ObjectStore.h" #include "common/Formatter.h" +#include "FileStore.h" ostream& operator<<(ostream& out, const ObjectStore::Sequencer& s) { @@ -77,7 +78,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_TOUCH: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->dump_string("op_name", "touch"); f->dump_stream("collection") << cid; f->dump_stream("oid") << oid; @@ -87,7 +88,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_WRITE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); bufferlist bl; @@ -104,7 +105,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_ZERO: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); f->dump_string("op_name", "zero"); @@ -118,7 +119,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_TRIMCACHE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); f->dump_string("op_name", "trim_cache"); @@ -132,7 +133,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_TRUNCATE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); uint64_t off = i.get_length(); f->dump_string("op_name", "truncate"); f->dump_stream("collection") << cid; @@ -144,7 +145,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_REMOVE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->dump_string("op_name", "remove"); f->dump_stream("collection") << cid; f->dump_stream("oid") << oid; @@ -154,7 +155,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_SETATTR: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); string name = i.get_attrname(); bufferlist bl; i.get_bl(bl); @@ -169,7 +170,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_SETATTRS: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); map<string, bufferptr> aset; i.get_attrset(aset); f->dump_string("op_name", "setattrs"); @@ -187,7 +188,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_RMATTR: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); string name = i.get_attrname(); f->dump_string("op_name", "rmattr"); f->dump_stream("collection") << cid; @@ -199,7 +200,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_RMATTRS: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->dump_string("op_name", "rmattrs"); f->dump_stream("collection") << cid; f->dump_stream("oid") << oid; @@ -209,8 +210,8 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_CLONE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); - hobject_t noid = i.get_oid(); + ghobject_t oid = i.get_oid(); + ghobject_t noid = i.get_oid(); f->dump_string("op_name", "clone"); f->dump_stream("collection") << cid; f->dump_stream("src_oid") << oid; @@ -221,8 +222,8 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_CLONERANGE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); - hobject_t noid = i.get_oid(); + ghobject_t oid = i.get_oid(); + ghobject_t noid = i.get_oid(); uint64_t off = i.get_length(); uint64_t len = i.get_length(); f->dump_string("op_name", "clonerange"); @@ -237,8 +238,8 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_CLONERANGE2: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); - hobject_t noid = i.get_oid(); + ghobject_t oid = i.get_oid(); + ghobject_t noid = i.get_oid(); uint64_t srcoff = i.get_length(); uint64_t len = i.get_length(); uint64_t dstoff = i.get_length(); @@ -272,7 +273,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) { coll_t ocid = i.get_cid(); coll_t ncid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->dump_string("op_name", "collection_add"); f->dump_stream("src_collection") << ocid; f->dump_stream("dst_collection") << ncid; @@ -283,7 +284,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_COLL_REMOVE: { coll_t cid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->dump_string("op_name", "collection_remove"); f->dump_stream("collection") << cid; f->dump_stream("oid") << oid; @@ -294,7 +295,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) { coll_t ocid = i.get_cid(); coll_t ncid = i.get_cid(); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->open_object_section("collection_move"); f->dump_stream("src_collection") << ocid; f->dump_stream("dst_collection") << ncid; @@ -344,7 +345,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_OMAP_CLEAR: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); f->dump_string("op_name", "omap_clear"); f->dump_stream("collection") << cid; f->dump_stream("oid") << oid; @@ -354,7 +355,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_OMAP_SETKEYS: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); map<string, bufferlist> aset; i.get_attrset(aset); f->dump_string("op_name", "omap_setkeys"); @@ -372,7 +373,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_OMAP_RMKEYS: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); set<string> keys; i.get_keyset(keys); f->dump_string("op_name", "omap_rmkeys"); @@ -384,7 +385,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_OMAP_SETHEADER: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); bufferlist bl; i.get_bl(bl); f->dump_string("op_name", "omap_setheader"); @@ -425,7 +426,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) case Transaction::OP_OMAP_RMKEYRANGE: { coll_t cid(i.get_cid()); - hobject_t oid = i.get_oid(); + ghobject_t oid = i.get_oid(); string first, last; first = i.get_key(); last = i.get_key(); @@ -460,9 +461,9 @@ void ObjectStore::Transaction::generate_test_instances(list<ObjectStore::Transac t = new Transaction; coll_t c("foocoll"); coll_t c2("foocoll2"); - hobject_t o1("obj", "", 123, 456, -1, ""); - hobject_t o2("obj2", "", 123, 456, -1, ""); - hobject_t o3("obj3", "", 123, 456, -1, ""); + ghobject_t o1(hobject_t("obj", "", 123, 456, -1, "")); + ghobject_t o2(hobject_t("obj2", "", 123, 456, -1, "")); + ghobject_t o3(hobject_t("obj3", "", 123, 456, -1, "")); t->touch(c, o1); bufferlist bl; bl.append("some data"); @@ -497,3 +498,47 @@ void ObjectStore::Transaction::generate_test_instances(list<ObjectStore::Transac o.push_back(t); } +int ObjectStore::collection_list(coll_t c, vector<hobject_t>& o) +{ + vector<ghobject_t> go; + FileStore *fs = dynamic_cast<FileStore * >(this); + int ret = fs->collection_list(c, go); + if (ret == 0) { + o.reserve(go.size()); + for (vector<ghobject_t>::iterator i = go.begin(); i != go.end() ; i++) + o.push_back(i->hobj); + } + return ret; +} + +int ObjectStore::collection_list_partial(coll_t c, hobject_t start, + int min, int max, snapid_t snap, + vector<hobject_t> *ls, hobject_t *next) +{ + vector<ghobject_t> go; + ghobject_t gnext, gstart(start); + FileStore *fs = dynamic_cast<FileStore * >(this); + int ret = fs->collection_list_partial(c, gstart, min, max, snap, &go, &gnext); + if (ret == 0) { + *next = gnext.hobj; + ls->reserve(go.size()); + for (vector<ghobject_t>::iterator i = go.begin(); i != go.end() ; i++) + ls->push_back(i->hobj); + } + return ret; +} + +int ObjectStore::collection_list_range(coll_t c, hobject_t start, hobject_t end, + snapid_t seq, vector<hobject_t> *ls) +{ + vector<ghobject_t> go; + ghobject_t gstart(start), gend(end); + FileStore *fs = dynamic_cast<FileStore * >(this); + int ret = fs->collection_list_range(c, gstart, gend, seq, &go); + if (ret == 0) { + ls->reserve(go.size()); + for (vector<ghobject_t>::iterator i = go.begin(); i != go.end() ; i++) + ls->push_back(i->hobj); + } + return ret; +} diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index 7e8f6ce43bf..07473b344f5 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -340,21 +340,23 @@ public: void get_bl(bufferlist& bl) { ::decode(bl, p); } - hobject_t get_oid() { - hobject_t hoid; + ghobject_t get_oid() { + ghobject_t oid; if (sobject_encoding) { sobject_t soid; ::decode(soid, p); - hoid.snap = soid.snap; - hoid.oid = soid.oid; + oid.hobj.snap = soid.snap; + oid.hobj.oid = soid.oid; + oid.generation = ghobject_t::NO_GEN; + oid.shard_id = ghobject_t::NO_SHARD; } else { - ::decode(hoid, p); + ::decode(oid, p); if (use_pool_override && pool_override != -1 && - hoid.pool == -1) { - hoid.pool = pool_override; + oid.hobj.pool == -1) { + oid.hobj.pool = pool_override; } } - return hoid; + return oid; } coll_t get_cid() { coll_t c; @@ -408,14 +410,14 @@ public: ::encode(op, tbl); ops++; } - void touch(coll_t cid, const hobject_t& oid) { + void touch(coll_t cid, const ghobject_t& oid) { __u32 op = OP_TOUCH; ::encode(op, tbl); ::encode(cid, tbl); ::encode(oid, tbl); ops++; } - void write(coll_t cid, const hobject_t& oid, uint64_t off, uint64_t len, const bufferlist& data) { + void write(coll_t cid, const ghobject_t& oid, uint64_t off, uint64_t len, const bufferlist& data) { __u32 op = OP_WRITE; ::encode(op, tbl); ::encode(cid, tbl); @@ -431,7 +433,7 @@ public: ::encode(data, tbl); ops++; } - void zero(coll_t cid, const hobject_t& oid, uint64_t off, uint64_t len) { + void zero(coll_t cid, const ghobject_t& oid, uint64_t off, uint64_t len) { __u32 op = OP_ZERO; ::encode(op, tbl); ::encode(cid, tbl); @@ -440,7 +442,7 @@ public: ::encode(len, tbl); ops++; } - void truncate(coll_t cid, const hobject_t& oid, uint64_t off) { + void truncate(coll_t cid, const ghobject_t& oid, uint64_t off) { __u32 op = OP_TRUNCATE; ::encode(op, tbl); ::encode(cid, tbl); @@ -448,18 +450,18 @@ public: ::encode(off, tbl); ops++; } - void remove(coll_t cid, const hobject_t& oid) { + void remove(coll_t cid, const ghobject_t& oid) { __u32 op = OP_REMOVE; ::encode(op, tbl); ::encode(cid, tbl); ::encode(oid, tbl); ops++; } - void setattr(coll_t cid, const hobject_t& oid, const char* name, bufferlist& val) { + void setattr(coll_t cid, const ghobject_t& oid, const char* name, bufferlist& val) { string n(name); setattr(cid, oid, n, val); } - void setattr(coll_t cid, const hobject_t& oid, const string& s, bufferlist& val) { + void setattr(coll_t cid, const ghobject_t& oid, const string& s, bufferlist& val) { __u32 op = OP_SETATTR; ::encode(op, tbl); ::encode(cid, tbl); @@ -468,7 +470,7 @@ public: ::encode(val, tbl); ops++; } - void setattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& attrset) { + void setattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& attrset) { __u32 op = OP_SETATTRS; ::encode(op, tbl); ::encode(cid, tbl); @@ -484,11 +486,11 @@ public: ::encode(attrset, tbl); ops++; } - void rmattr(coll_t cid, const hobject_t& oid, const char *name) { + void rmattr(coll_t cid, const ghobject_t& oid, const char *name) { string n(name); rmattr(cid, oid, n); } - void rmattr(coll_t cid, const hobject_t& oid, const string& s) { + void rmattr(coll_t cid, const ghobject_t& oid, const string& s) { __u32 op = OP_RMATTR; ::encode(op, tbl); ::encode(cid, tbl); @@ -496,14 +498,14 @@ public: ::encode(s, tbl); ops++; } - void rmattrs(coll_t cid, const hobject_t& oid) { + void rmattrs(coll_t cid, const ghobject_t& oid) { __u32 op = OP_RMATTR; ::encode(op, tbl); ::encode(cid, tbl); ::encode(oid, tbl); ops++; } - void clone(coll_t cid, const hobject_t& oid, hobject_t noid) { + void clone(coll_t cid, const ghobject_t& oid, ghobject_t noid) { __u32 op = OP_CLONE; ::encode(op, tbl); ::encode(cid, tbl); @@ -511,7 +513,7 @@ public: ::encode(noid, tbl); ops++; } - void clone_range(coll_t cid, const hobject_t& oid, hobject_t noid, + void clone_range(coll_t cid, const ghobject_t& oid, ghobject_t noid, uint64_t srcoff, uint64_t srclen, uint64_t dstoff) { __u32 op = OP_CLONERANGE2; ::encode(op, tbl); @@ -535,7 +537,7 @@ public: ::encode(cid, tbl); ops++; } - void collection_add(coll_t cid, coll_t ocid, const hobject_t& oid) { + void collection_add(coll_t cid, coll_t ocid, const ghobject_t& oid) { __u32 op = OP_COLL_ADD; ::encode(op, tbl); ::encode(cid, tbl); @@ -543,20 +545,20 @@ public: ::encode(oid, tbl); ops++; } - void collection_remove(coll_t cid, const hobject_t& oid) { + void collection_remove(coll_t cid, const ghobject_t& oid) { __u32 op = OP_COLL_REMOVE; ::encode(op, tbl); ::encode(cid, tbl); ::encode(oid, tbl); ops++; } - void collection_move(coll_t cid, coll_t oldcid, const hobject_t& oid) { + void collection_move(coll_t cid, coll_t oldcid, const ghobject_t& oid) { collection_add(cid, oldcid, oid); collection_remove(oldcid, oid); return; } - void collection_move_rename(coll_t oldcid, const hobject_t& oldoid, - coll_t cid, const hobject_t& oid) { + void collection_move_rename(coll_t oldcid, const ghobject_t& oldoid, + coll_t cid, const ghobject_t& oid) { __u32 op = OP_COLL_MOVE_RENAME; ::encode(op, tbl); ::encode(oldcid, tbl); @@ -611,55 +613,55 @@ public: ops++; } - /// Remove omap from hoid + /// Remove omap from oid void omap_clear( - coll_t cid, ///< [in] Collection containing hoid - const hobject_t &hoid ///< [in] Object from which to remove omap + coll_t cid, ///< [in] Collection containing oid + const ghobject_t &oid ///< [in] Object from which to remove omap ) { __u32 op = OP_OMAP_CLEAR; ::encode(op, tbl); ::encode(cid, tbl); - ::encode(hoid, tbl); + ::encode(oid, tbl); ops++; } - /// Set keys on hoid omap. Replaces duplicate keys. + /// Set keys on oid omap. Replaces duplicate keys. void omap_setkeys( - coll_t cid, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object to update + coll_t cid, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object to update const map<string, bufferlist> &attrset ///< [in] Replacement keys and values ) { __u32 op = OP_OMAP_SETKEYS; ::encode(op, tbl); ::encode(cid, tbl); - ::encode(hoid, tbl); + ::encode(oid, tbl); ::encode(attrset, tbl); ops++; } - /// Remove keys from hoid omap + /// Remove keys from oid omap void omap_rmkeys( - coll_t cid, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object from which to remove the omap + coll_t cid, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object from which to remove the omap const set<string> &keys ///< [in] Keys to clear ) { __u32 op = OP_OMAP_RMKEYS; ::encode(op, tbl); ::encode(cid, tbl); - ::encode(hoid, tbl); + ::encode(oid, tbl); ::encode(keys, tbl); ops++; } - /// Remove key range from hoid omap + /// Remove key range from oid omap void omap_rmkeyrange( - coll_t cid, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object from which to remove the omap + coll_t cid, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object from which to remove the omap const string& first, ///< [in] first key in range const string& last ///< [in] first key past range ) { __u32 op = OP_OMAP_RMKEYRANGE; ::encode(op, tbl); ::encode(cid, tbl); - ::encode(hoid, tbl); + ::encode(oid, tbl); ::encode(first, tbl); ::encode(last, tbl); ops++; @@ -667,14 +669,14 @@ public: /// Set omap header void omap_setheader( - coll_t cid, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object from which to remove the omap + coll_t cid, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object from which to remove the omap const bufferlist &bl ///< [in] Header value ) { __u32 op = OP_OMAP_SETHEADER; ::encode(op, tbl); ::encode(cid, tbl); - ::encode(hoid, tbl); + ::encode(oid, tbl); ::encode(bl, tbl); ops++; } @@ -857,6 +859,8 @@ public: virtual int get_max_object_name_length() = 0; virtual int mkfs() = 0; // wipe virtual int mkjournal() = 0; // journal only + virtual void set_allow_sharded_objects() = 0; + virtual bool get_allow_sharded_objects() = 0; virtual int statfs(struct statfs *buf) = 0; @@ -875,32 +879,32 @@ public: virtual int get_ideal_list_max() { return 64; } // objects - virtual bool exists(coll_t cid, const hobject_t& oid) = 0; // useful? + virtual bool exists(coll_t cid, const ghobject_t& oid) = 0; // useful? virtual int stat( coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, struct stat *st, bool allow_eio = false) = 0; // struct stat? virtual int read( coll_t cid, - const hobject_t& oid, + const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl, bool allow_eio = false) = 0; - virtual int fiemap(coll_t cid, const hobject_t& oid, uint64_t offset, size_t len, bufferlist& bl) = 0; + virtual int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl) = 0; - virtual int getattr(coll_t cid, const hobject_t& oid, const char *name, bufferptr& value) = 0; - int getattr(coll_t cid, const hobject_t& oid, const char *name, bufferlist& value) { + virtual int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferptr& value) = 0; + int getattr(coll_t cid, const ghobject_t& oid, const char *name, bufferlist& value) { bufferptr bp; int r = getattr(cid, oid, name, bp); if (bp.length()) value.push_back(bp); return r; } - virtual int getattrs(coll_t cid, const hobject_t& oid, map<string,bufferptr>& aset, bool user_only = false) {return 0;}; + virtual int getattrs(coll_t cid, const ghobject_t& oid, map<string,bufferptr>& aset, bool user_only = false) {return 0;}; // collections @@ -915,7 +919,7 @@ public: virtual int collection_getattr(coll_t cid, const char *name, bufferlist& bl) = 0; virtual int collection_getattrs(coll_t cid, map<string,bufferptr> &aset) = 0; virtual bool collection_empty(coll_t c) = 0; - virtual int collection_list(coll_t c, vector<hobject_t>& o) = 0; + virtual int collection_list(coll_t c, vector<ghobject_t>& o) = 0; /** * list partial contents of collection relative to a hash offset/position @@ -929,9 +933,9 @@ public: * @param next [out] next item sorts >= this value * @return zero on success, or negative error */ - virtual int collection_list_partial(coll_t c, hobject_t start, + virtual int collection_list_partial(coll_t c, ghobject_t start, int min, int max, snapid_t snap, - vector<hobject_t> *ls, hobject_t *next) = 0; + vector<ghobject_t> *ls, ghobject_t *next) = 0; /** * list contents of a collection that fall in the range [start, end) @@ -943,47 +947,57 @@ public: * @param ls [out] result * @return zero on success, or negative error */ - virtual int collection_list_range(coll_t c, hobject_t start, hobject_t end, - snapid_t seq, vector<hobject_t> *ls) = 0; + virtual int collection_list_range(coll_t c, ghobject_t start, ghobject_t end, + snapid_t seq, vector<ghobject_t> *ls) = 0; + + //TODO: Remove + int collection_list(coll_t c, vector<hobject_t>& o); + + int collection_list_partial(coll_t c, hobject_t start, + int min, int max, snapid_t snap, + vector<hobject_t> *ls, hobject_t *next); + + int collection_list_range(coll_t c, hobject_t start, hobject_t end, + snapid_t seq, vector<hobject_t> *ls); /// OMAP /// Get omap contents virtual int omap_get( - coll_t c, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object containing omap + coll_t c, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object containing omap bufferlist *header, ///< [out] omap header map<string, bufferlist> *out /// < [out] Key to value map ) = 0; /// Get omap header virtual int omap_get_header( - coll_t c, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object containing omap + coll_t c, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object containing omap bufferlist *header, ///< [out] omap header bool allow_eio = false ///< [in] don't assert on eio ) = 0; - /// Get keys defined on hoid + /// Get keys defined on oid virtual int omap_get_keys( - coll_t c, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object containing omap - set<string> *keys ///< [out] Keys defined on hoid + coll_t c, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object containing omap + set<string> *keys ///< [out] Keys defined on oid ) = 0; /// Get key values virtual int omap_get_values( - coll_t c, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object containing omap + coll_t c, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object containing omap const set<string> &keys, ///< [in] Keys to get map<string, bufferlist> *out ///< [out] Returned keys and values ) = 0; - /// Filters keys into out which are defined on hoid + /// Filters keys into out which are defined on oid virtual int omap_check_keys( - coll_t c, ///< [in] Collection containing hoid - const hobject_t &hoid, ///< [in] Object containing omap + coll_t c, ///< [in] Collection containing oid + const ghobject_t &oid, ///< [in] Object containing omap const set<string> &keys, ///< [in] Keys to check - set<string> *out ///< [out] Subset of keys defined on hoid + set<string> *out ///< [out] Subset of keys defined on oid ) = 0; /** @@ -997,7 +1011,7 @@ public: */ virtual ObjectMap::ObjectMapIterator get_omap_iterator( coll_t c, ///< [in] collection - const hobject_t &hoid ///< [in] object + const ghobject_t &oid ///< [in] object ) = 0; virtual void sync(Context *onsync) {} @@ -1013,8 +1027,8 @@ public: virtual uuid_d get_fsid() = 0; // DEBUG - virtual void inject_data_error(const hobject_t &oid) {} - virtual void inject_mdata_error(const hobject_t &oid) {} + virtual void inject_data_error(const ghobject_t &oid) {} + virtual void inject_mdata_error(const ghobject_t &oid) {} }; diff --git a/src/os/WBThrottle.cc b/src/os/WBThrottle.cc index 8479b3c878d..e02c17677bb 100644 --- a/src/os/WBThrottle.cc +++ b/src/os/WBThrottle.cc @@ -116,7 +116,7 @@ void WBThrottle::handle_conf_change(const md_config_t *conf, } bool WBThrottle::get_next_should_flush( - boost::tuple<hobject_t, FDRef, PendingWB> *next) + boost::tuple<ghobject_t, FDRef, PendingWB> *next) { assert(lock.is_locked()); assert(next); @@ -128,9 +128,9 @@ bool WBThrottle::get_next_should_flush( if (stopping) return false; assert(!pending_wbs.empty()); - hobject_t obj(pop_object()); + ghobject_t obj(pop_object()); - map<hobject_t, pair<PendingWB, FDRef> >::iterator i = + map<ghobject_t, pair<PendingWB, FDRef> >::iterator i = pending_wbs.find(obj); *next = boost::make_tuple(obj, i->second.second, i->second.first); pending_wbs.erase(i); @@ -141,7 +141,7 @@ bool WBThrottle::get_next_should_flush( void *WBThrottle::entry() { Mutex::Locker l(lock); - boost::tuple<hobject_t, FDRef, PendingWB> wb; + boost::tuple<ghobject_t, FDRef, PendingWB> wb; while (get_next_should_flush(&wb)) { clearing = wb.get<0>(); lock.Unlock(); @@ -149,24 +149,24 @@ void *WBThrottle::entry() if (wb.get<2>().nocache) posix_fadvise(**wb.get<1>(), 0, 0, POSIX_FADV_DONTNEED); lock.Lock(); - clearing = hobject_t(); + clearing = ghobject_t(); cur_ios -= wb.get<2>().ios; logger->dec(l_wbthrottle_ios_dirtied, wb.get<2>().ios); cur_size -= wb.get<2>().size; logger->dec(l_wbthrottle_bytes_dirtied, wb.get<2>().size); logger->dec(l_wbthrottle_inodes_dirtied); cond.Signal(); - wb = boost::tuple<hobject_t, FDRef, PendingWB>(); + wb = boost::tuple<ghobject_t, FDRef, PendingWB>(); } return 0; } void WBThrottle::queue_wb( - FDRef fd, const hobject_t &hoid, uint64_t offset, uint64_t len, + FDRef fd, const ghobject_t &hoid, uint64_t offset, uint64_t len, bool nocache) { Mutex::Locker l(lock); - map<hobject_t, pair<PendingWB, FDRef> >::iterator wbiter = + map<ghobject_t, pair<PendingWB, FDRef> >::iterator wbiter = pending_wbs.find(hoid); if (wbiter == pending_wbs.end()) { wbiter = pending_wbs.insert( @@ -192,7 +192,7 @@ void WBThrottle::queue_wb( void WBThrottle::clear() { Mutex::Locker l(lock); - for (map<hobject_t, pair<PendingWB, FDRef> >::iterator i = + for (map<ghobject_t, pair<PendingWB, FDRef> >::iterator i = pending_wbs.begin(); i != pending_wbs.end(); ++i) { @@ -208,12 +208,12 @@ void WBThrottle::clear() cond.Signal(); } -void WBThrottle::clear_object(const hobject_t &hoid) +void WBThrottle::clear_object(const ghobject_t &hoid) { Mutex::Locker l(lock); while (clearing == hoid) cond.Wait(lock); - map<hobject_t, pair<PendingWB, FDRef> >::iterator i = + map<ghobject_t, pair<PendingWB, FDRef> >::iterator i = pending_wbs.find(hoid); if (i == pending_wbs.end()) return; diff --git a/src/os/WBThrottle.h b/src/os/WBThrottle.h index d480a6b751c..e418cf98d2a 100644 --- a/src/os/WBThrottle.h +++ b/src/os/WBThrottle.h @@ -44,7 +44,7 @@ enum { * Tracks, throttles, and flushes outstanding IO */ class WBThrottle : Thread, public md_config_obs_t { - hobject_t clearing; + ghobject_t clearing; /* *_limits.first is the start_flusher limit and * *_limits.second is the hard limit @@ -89,36 +89,36 @@ class WBThrottle : Thread, public md_config_obs_t { /** * Flush objects in lru order */ - list<hobject_t> lru; - map<hobject_t, list<hobject_t>::iterator> rev_lru; - void remove_object(const hobject_t &hoid) { + list<ghobject_t> lru; + map<ghobject_t, list<ghobject_t>::iterator> rev_lru; + void remove_object(const ghobject_t &oid) { assert(lock.is_locked()); - map<hobject_t, list<hobject_t>::iterator>::iterator iter = - rev_lru.find(hoid); + map<ghobject_t, list<ghobject_t>::iterator>::iterator iter = + rev_lru.find(oid); if (iter == rev_lru.end()) return; lru.erase(iter->second); rev_lru.erase(iter); } - hobject_t pop_object() { + ghobject_t pop_object() { assert(!lru.empty()); - hobject_t hoid(lru.front()); + ghobject_t oid(lru.front()); lru.pop_front(); - rev_lru.erase(hoid); - return hoid; + rev_lru.erase(oid); + return oid; } - void insert_object(const hobject_t &hoid) { - assert(rev_lru.find(hoid) == rev_lru.end()); - lru.push_back(hoid); - rev_lru.insert(make_pair(hoid, --lru.end())); + void insert_object(const ghobject_t &oid) { + assert(rev_lru.find(oid) == rev_lru.end()); + lru.push_back(oid); + rev_lru.insert(make_pair(oid, --lru.end())); } - map<hobject_t, pair<PendingWB, FDRef> > pending_wbs; + map<ghobject_t, pair<PendingWB, FDRef> > pending_wbs; /// get next flush to perform bool get_next_should_flush( - boost::tuple<hobject_t, FDRef, PendingWB> *next ///< [out] next to flush + boost::tuple<ghobject_t, FDRef, PendingWB> *next ///< [out] next to flush ); ///< @return false if we are shutting down public: enum FS { @@ -141,10 +141,10 @@ public: set_from_conf(); } - /// Queue wb on hoid, fd taking throttle (does not block) + /// Queue wb on oid, fd taking throttle (does not block) void queue_wb( - FDRef fd, ///< [in] FDRef to hoid - const hobject_t &hoid, ///< [in] object + FDRef fd, ///< [in] FDRef to oid + const ghobject_t &oid, ///< [in] object uint64_t offset, ///< [in] offset written uint64_t len, ///< [in] length written bool nocache ///< [in] try to clear out of cache after write @@ -154,7 +154,7 @@ public: void clear(); /// Clear object - void clear_object(const hobject_t &hoid); + void clear_object(const ghobject_t &oid); /// Block until there is throttle available void throttle(); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 822596cd997..9a2fbb5c576 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -134,7 +134,9 @@ static ostream& _prefix(std::ostream* _dout, int whoami, OSDMapRef osdmap) { << " "; } -static CompatSet get_osd_compat_set() { +//Initial features in new superblock. +//Features here are also automatically upgraded +CompatSet OSD::get_osd_initial_compat_set() { CompatSet::FeatureSet ceph_osd_feature_compat; CompatSet::FeatureSet ceph_osd_feature_ro_compat; CompatSet::FeatureSet ceph_osd_feature_incompat; @@ -152,6 +154,14 @@ static CompatSet get_osd_compat_set() { ceph_osd_feature_incompat); } +//Features are added here that this OSD supports. +CompatSet OSD::get_osd_compat_set() { + CompatSet compat = get_osd_initial_compat_set(); + //Any features here can be set in code, but not in initial superblock + compat.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS); + return compat; +} + OSDService::OSDService(OSD *osd) : osd(osd), cct(osd->cct), @@ -450,7 +460,7 @@ int OSD::convert_collection(ObjectStore *store, coll_t cid) { coll_t tmp0("convertfs_temp"); coll_t tmp1("convertfs_temp1"); - vector<hobject_t> objects; + vector<ghobject_t> objects; map<string, bufferptr> aset; int r = store->collection_getattrs(cid, aset); @@ -470,10 +480,10 @@ int OSD::convert_collection(ObjectStore *store, coll_t cid) store->apply_transaction(t); } - hobject_t next; + ghobject_t next; while (!next.is_max()) { objects.clear(); - hobject_t start = next; + ghobject_t start = next; r = store->collection_list_partial(cid, start, 200, 300, 0, &objects, &next); @@ -481,7 +491,7 @@ int OSD::convert_collection(ObjectStore *store, coll_t cid) return r; ObjectStore::Transaction t; - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { t.collection_add(tmp0, cid, *i); @@ -647,7 +657,7 @@ int OSD::mkfs(CephContext *cct, const std::string &dev, const std::string &jdev, sb.cluster_fsid = fsid; sb.osd_fsid = store->get_fsid(); sb.whoami = whoami; - sb.compat_features = get_osd_compat_set(); + sb.compat_features = get_osd_initial_compat_set(); // benchmark? if (cct->_conf->osd_auto_weight) { @@ -1140,6 +1150,7 @@ public: int OSD::init() { + CompatSet initial, diff; Mutex::Locker lock(osd_lock); if (is_stopping()) return 0; @@ -1164,9 +1175,48 @@ int OSD::init() r = read_superblock(); if (r < 0) { derr << "OSD::init() : unable to read osd superblock" << dendl; - store->umount(); - delete store; - return -EINVAL; + r = -EINVAL; + goto out; + } + + if (osd_compat.compare(superblock.compat_features) < 0) { + derr << "The disk uses features unsupported by the executable." << dendl; + derr << " ondisk features " << superblock.compat_features << dendl; + derr << " daemon features " << osd_compat << dendl; + + if (osd_compat.writeable(superblock.compat_features)) { + CompatSet diff = osd_compat.unsupported(superblock.compat_features); + derr << "it is still writeable, though. Missing features: " << diff << dendl; + r = -EOPNOTSUPP; + goto out; + } + else { + CompatSet diff = osd_compat.unsupported(superblock.compat_features); + derr << "Cannot write to disk! Missing features: " << diff << dendl; + r = -EOPNOTSUPP; + goto out; + } + } + + assert_warn(whoami == superblock.whoami); + if (whoami != superblock.whoami) { + derr << "OSD::init: superblock says osd" + << superblock.whoami << " but i am osd." << whoami << dendl; + r = -EINVAL; + goto out; + } + + initial = get_osd_initial_compat_set(); + diff = superblock.compat_features.unsupported(initial); + if (superblock.compat_features.merge(initial)) { + // We need to persist the new compat_set before we + // do anything else + dout(5) << "Upgrading superblock adding: " << diff << dendl; + ObjectStore::Transaction t; + write_superblock(t); + r = store->apply_transaction(t); + if (r < 0) + goto out; } // make sure info object exists @@ -1176,7 +1226,7 @@ int OSD::init() t.touch(coll_t::META_COLL, service.infos_oid); r = store->apply_transaction(t); if (r < 0) - return r; + goto out; } // make sure snap mapper object exists @@ -1186,19 +1236,7 @@ int OSD::init() t.touch(coll_t::META_COLL, OSD::make_snapmapper_oid()); r = store->apply_transaction(t); if (r < 0) - return r; - } - - if (osd_compat.compare(superblock.compat_features) != 0) { - // We need to persist the new compat_set before we - // do anything else - dout(5) << "Upgrading superblock compat_set" << dendl; - superblock.compat_features = osd_compat; - ObjectStore::Transaction t; - write_superblock(t); - r = store->apply_transaction(t); - if (r < 0) - return r; + goto out; } class_handler = new ClassHandler(cct); @@ -1214,7 +1252,8 @@ int OSD::init() assert_warn(!osdmap); if (osdmap) { derr << "OSD::init: unable to read current osdmap" << dendl; - return -EINVAL; + r = -EINVAL; + goto out; } osdmap = get_map(superblock.current_epoch); check_osdmap_features(); @@ -1227,12 +1266,6 @@ int OSD::init() load_pgs(); dout(2) << "superblock: i am osd." << superblock.whoami << dendl; - assert_warn(whoami == superblock.whoami); - if (whoami != superblock.whoami) { - derr << "OSD::init: logic error: superblock says osd" - << superblock.whoami << " but i am osd." << whoami << dendl; - return -EINVAL; - } create_logger(); @@ -1249,7 +1282,7 @@ int OSD::init() monc->set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD); r = monc->init(); if (r < 0) - return r; + goto out; // tell monc about log_client so it will know about mon session resets monc->set_log_client(&clog); @@ -1273,12 +1306,10 @@ int OSD::init() r = monc->authenticate(); if (r < 0) { - monc->shutdown(); - store->umount(); osd_lock.Lock(); // locker is going to unlock this on function exit if (is_stopping()) - return 0; - return r; + r = 0; + goto monout; } while (monc->wait_auth_rotating(30.0) < 0) { @@ -1298,6 +1329,13 @@ int OSD::init() start_boot(); return 0; +monout: + monc->shutdown(); + +out: + store->umount(); + delete store; + return r; } void OSD::final_init() @@ -1716,28 +1754,6 @@ int OSD::read_superblock() ::decode(superblock, p); dout(10) << "read_superblock " << superblock << dendl; - if (osd_compat.compare(superblock.compat_features) < 0) { - derr << "The disk uses features unsupported by the executable." << dendl; - derr << " ondisk features " << superblock.compat_features << dendl; - derr << " daemon features " << osd_compat << dendl; - - if (osd_compat.writeable(superblock.compat_features)) { - derr << "it is still writeable, though. Missing features:" << dendl; - CompatSet diff = osd_compat.unsupported(superblock.compat_features); - return -EOPNOTSUPP; - } - else { - derr << "Cannot write to disk! Missing features:" << dendl; - CompatSet diff = osd_compat.unsupported(superblock.compat_features); - return -EOPNOTSUPP; - } - } - - if (whoami != superblock.whoami) { - derr << "read_superblock superblock says osd." << superblock.whoami - << ", but i (think i) am osd." << whoami << dendl; - return -1; - } return 0; } @@ -1752,17 +1768,17 @@ void OSD::recursive_remove_collection(ObjectStore *store, coll_t tmp) make_snapmapper_oid()); SnapMapper mapper(&driver, 0, 0, 0); - vector<hobject_t> objects; + vector<ghobject_t> objects; store->collection_list(tmp, objects); // delete them. ObjectStore::Transaction t; unsigned removed = 0; - for (vector<hobject_t>::iterator p = objects.begin(); + for (vector<ghobject_t>::iterator p = objects.begin(); p != objects.end(); ++p, removed++) { OSDriver::OSTransaction _t(driver.get_transaction(&t)); - int r = mapper.remove_oid(*p, &_t); + int r = mapper.remove_oid(p->hobj, &_t); if (r != 0 && r != -ENOENT) assert(0); t.collection_remove(tmp, *p); @@ -3343,10 +3359,10 @@ bool remove_dir( ObjectStore::Sequencer *osr, coll_t coll, DeletingStateRef dstate) { - vector<hobject_t> olist; + vector<ghobject_t> olist; int64_t num = 0; ObjectStore::Transaction *t = new ObjectStore::Transaction; - hobject_t next; + ghobject_t next; while (!next.is_max()) { store->collection_list_partial( coll, @@ -3356,11 +3372,11 @@ bool remove_dir( 0, &olist, &next); - for (vector<hobject_t>::iterator i = olist.begin(); + for (vector<ghobject_t>::iterator i = olist.begin(); i != olist.end(); ++i, ++num) { OSDriver::OSTransaction _t(osdriver->get_transaction(t)); - int r = mapper->remove_oid(*i, &_t); + int r = mapper->remove_oid(i->hobj, &_t); if (r != 0 && r != -ENOENT) { assert(0); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index f906573e5ad..5fe667344a9 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -746,6 +746,25 @@ public: return oid; } static void recursive_remove_collection(ObjectStore *store, coll_t tmp); + + /** + * get_osd_initial_compat_set() + * + * Get the initial feature set for this OSD. Features + * here are automatically upgraded. + * + * Return value: Initial osd CompatSet + */ + static CompatSet get_osd_initial_compat_set(); + + /** + * get_osd_compat_set() + * + * Get all features supported by this OSD + * + * Return value: CompatSet of all supported features + */ + static CompatSet get_osd_compat_set(); private: diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 919d3e3913a..f1985bf961b 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2184,7 +2184,8 @@ epoch_t PG::peek_map_epoch(ObjectStore *store, coll_t coll, hobject_t &infos_oid snapid_t snap; bool ok = coll.is_pg(pgid, snap); assert(ok); - store->collection_getattr(coll, "info", *bl); + int r = store->collection_getattr(coll, "info", *bl); + assert(r > 0); bufferlist::iterator bp = bl->begin(); __u8 struct_v = 0; ::decode(struct_v, bp); diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 486d64302b9..6e025f289bc 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -782,10 +782,6 @@ void PGLog::read_log_old(ObjectStore *store, coll_t coll, hobject_t log_oid, log.tail = info.log_tail; - // In case of sobject_t based encoding, may need to list objects in the store - // to find hashes - vector<hobject_t> ls; - if (ondisklog_head > 0) { // read bufferlist bl; @@ -803,7 +799,6 @@ void PGLog::read_log_old(ObjectStore *store, coll_t coll, hobject_t log_oid, assert(log.empty()); eversion_t last; bool reorder = false; - bool listed_collection = false; while (!p.end()) { uint64_t pos = ondisklog_tail + p.get_off(); @@ -846,29 +841,7 @@ void PGLog::read_log_old(ObjectStore *store, coll_t coll, hobject_t log_oid, << e.version << " after " << last << "\n"; } - if (e.invalid_hash) { - // We need to find the object in the store to get the hash - if (!listed_collection) { - store->collection_list(coll, ls); - listed_collection = true; - } - bool found = false; - for (vector<hobject_t>::iterator i = ls.begin(); - i != ls.end(); - ++i) { - if (i->oid == e.soid.oid && i->snap == e.soid.snap) { - e.soid = *i; - found = true; - break; - } - } - if (!found) { - // Didn't find the correct hash - std::ostringstream oss; - oss << "Could not find hash for hoid " << e.soid << std::endl; - throw read_log_error(oss.str().c_str()); - } - } + assert(!e.invalid_hash); if (e.invalid_pool) { e.soid.pool = info.pgid.pool(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 191e7deb77a..7831f95818d 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -869,13 +869,21 @@ void ReplicatedPG::do_op(OpRequestRef op) return do_pg_op(op); } - dout(10) << "do_op " << *m << (op->may_write() ? " may_write" : "") << dendl; + // order this op as a write? + bool write_ordered = op->may_write() || (m->get_flags() & CEPH_OSD_FLAG_RWORDERED); + + dout(10) << "do_op " << *m + << (op->may_write() ? " may_write" : "") + << (op->may_read() ? " may_read" : "") + << " -> " << (write_ordered ? "write-ordered" : "read-ordered") + << dendl; hobject_t head(m->get_oid(), m->get_object_locator().key, CEPH_NOSNAP, m->get_pg().ps(), info.pgid.pool(), m->get_object_locator().nspace); - if (op->may_write() && scrubber.write_blocked_by_scrub(head)) { + + if (write_ordered && scrubber.write_blocked_by_scrub(head)) { dout(20) << __func__ << ": waiting for scrub" << dendl; waiting_for_active.push_back(op); op->mark_delayed("waiting for scrub"); @@ -889,7 +897,7 @@ void ReplicatedPG::do_op(OpRequestRef op) } // degraded object? - if (op->may_write() && is_degraded_object(head)) { + if (write_ordered && is_degraded_object(head)) { wait_for_degraded_object(head, op); return; } @@ -909,7 +917,7 @@ void ReplicatedPG::do_op(OpRequestRef op) } // degraded object? - if (op->may_write() && is_degraded_object(snapdir)) { + if (write_ordered && is_degraded_object(snapdir)) { wait_for_degraded_object(snapdir, op); return; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 1df55ce5cab..884b8ada8cc 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -41,6 +41,7 @@ #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO CompatSet::Feature(8, "leveldbinfo") #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG CompatSet::Feature(9, "leveldblog") #define CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER CompatSet::Feature(10, "snapmapper") +#define CEPH_OSD_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(11, "sharded objects") typedef hobject_t collection_list_handle_t; diff --git a/src/test/Makefile.am b/src/test/Makefile.am index 88cf1ce970f..32fee301e4c 100644 --- a/src/test/Makefile.am +++ b/src/test/Makefile.am @@ -308,6 +308,11 @@ unittest_ceph_argparse_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) unittest_ceph_argparse_CXXFLAGS = $(UNITTEST_CXXFLAGS) check_PROGRAMS += unittest_ceph_argparse +unittest_ceph_compatset_SOURCES = test/ceph_compatset.cc +unittest_ceph_compatset_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) +unittest_ceph_compatset_CXXFLAGS = $(UNITTEST_CXXFLAGS) +check_PROGRAMS += unittest_ceph_compatset + libec_example_la_SOURCES = test/osd/ErasureCodePluginExample.cc libec_example_la_CFLAGS = ${AM_CFLAGS} libec_example_la_CXXFLAGS= ${AM_CXXFLAGS} diff --git a/src/test/ObjectMap/test_object_map.cc b/src/test/ObjectMap/test_object_map.cc index 1b39c8068fb..23f220daf45 100644 --- a/src/test/ObjectMap/test_object_map.cc +++ b/src/test/ObjectMap/test_object_map.cc @@ -55,16 +55,16 @@ public: } void set_key(const string &objname, const string &key, const string &value) { - set_key(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + set_key(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key, value); } void set_xattr(const string &objname, const string &key, const string &value) { - set_xattr(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + set_xattr(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key, value); } - void set_key(hobject_t hoid, + void set_key(ghobject_t hoid, string key, string value) { map<string, bufferlist> to_write; bufferptr bp(value.c_str(), value.size()); @@ -74,7 +74,7 @@ public: db->set_keys(hoid, to_write); } - void set_xattr(hobject_t hoid, + void set_xattr(ghobject_t hoid, string key, string value) { map<string, bufferlist> to_write; bufferptr bp(value.c_str(), value.size()); @@ -85,11 +85,11 @@ public: } void set_header(const string &objname, const string &value) { - set_header(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + set_header(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), value); } - void set_header(hobject_t hoid, + void set_header(ghobject_t hoid, const string &value) { bufferlist header; header.append(bufferptr(value.c_str(), value.size() + 1)); @@ -97,11 +97,11 @@ public: } int get_header(const string &objname, string *value) { - return get_header(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + return get_header(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), value); } - int get_header(hobject_t hoid, + int get_header(ghobject_t hoid, string *value) { bufferlist header; int r = db->get_header(hoid, &header); @@ -115,11 +115,11 @@ public: } int get_xattr(const string &objname, const string &key, string *value) { - return get_xattr(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + return get_xattr(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key, value); } - int get_xattr(hobject_t hoid, + int get_xattr(ghobject_t hoid, string key, string *value) { set<string> to_get; to_get.insert(key); @@ -135,11 +135,11 @@ public: } int get_key(const string &objname, const string &key, string *value) { - return get_key(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + return get_key(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key, value); } - int get_key(hobject_t hoid, + int get_key(ghobject_t hoid, string key, string *value) { set<string> to_get; to_get.insert(key); @@ -155,11 +155,11 @@ public: } void remove_key(const string &objname, const string &key) { - remove_key(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + remove_key(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key); } - void remove_key(hobject_t hoid, + void remove_key(ghobject_t hoid, string key) { set<string> to_remove; to_remove.insert(key); @@ -167,11 +167,11 @@ public: } void remove_xattr(const string &objname, const string &key) { - remove_xattr(hobject_t(sobject_t(objname, CEPH_NOSNAP)), + remove_xattr(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key); } - void remove_xattr(hobject_t hoid, + void remove_xattr(ghobject_t hoid, string key) { set<string> to_remove; to_remove.insert(key); @@ -179,20 +179,20 @@ public: } void clone(const string &objname, const string &target) { - clone(hobject_t(sobject_t(objname, CEPH_NOSNAP)), - hobject_t(sobject_t(target, CEPH_NOSNAP))); + clone(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + ghobject_t(hobject_t(sobject_t(target, CEPH_NOSNAP)))); } - void clone(hobject_t hoid, - hobject_t hoid2) { + void clone(ghobject_t hoid, + ghobject_t hoid2) { db->clone(hoid, hoid2); } void clear(const string &objname) { - clear(hobject_t(sobject_t(objname, CEPH_NOSNAP))); + clear(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP)))); } - void clear(hobject_t hoid) { + void clear(ghobject_t hoid) { db->clear(hoid); } @@ -543,7 +543,7 @@ int main(int argc, char **argv) { } TEST_F(ObjectMapTest, CreateOneObject) { - hobject_t hoid(sobject_t("foo", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP)), 100, 0); map<string, bufferlist> to_set; string key("test"); string val("test_val"); @@ -579,8 +579,8 @@ TEST_F(ObjectMapTest, CreateOneObject) { } TEST_F(ObjectMapTest, CloneOneObject) { - hobject_t hoid(sobject_t("foo", CEPH_NOSNAP)); - hobject_t hoid2(sobject_t("foo2", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP)), 200, 0); + ghobject_t hoid2(hobject_t(sobject_t("foo2", CEPH_NOSNAP)), 201, 1); tester.set_key(hoid, "foo", "bar"); tester.set_key(hoid, "foo2", "bar2"); @@ -640,8 +640,8 @@ TEST_F(ObjectMapTest, CloneOneObject) { } TEST_F(ObjectMapTest, OddEvenClone) { - hobject_t hoid(sobject_t("foo", CEPH_NOSNAP)); - hobject_t hoid2(sobject_t("foo2", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP))); + ghobject_t hoid2(hobject_t(sobject_t("foo2", CEPH_NOSNAP))); for (unsigned i = 0; i < 1000; ++i) { tester.set_key(hoid, "foo" + num_str(i), "bar" + num_str(i)); diff --git a/src/test/ceph_compatset.cc b/src/test/ceph_compatset.cc new file mode 100644 index 00000000000..2b57db01ab9 --- /dev/null +++ b/src/test/ceph_compatset.cc @@ -0,0 +1,164 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <fstream> +#include <iostream> +#include <errno.h> +#include <sys/stat.h> +#include <signal.h> +#include <ctype.h> +#include <boost/scoped_ptr.hpp> +#include <string> + +#include "include/types.h" +#include "include/compat.h" + +//#undef assert +//#define assert(foo) if (!(foo)) abort(); + +#include "include/CompatSet.h" + +#include "gtest/gtest.h" +#include <vector> + +TEST(CephCompatSet, AllSet) { + CompatSet::FeatureSet compat; + CompatSet::FeatureSet ro; + CompatSet::FeatureSet incompat; + + EXPECT_THROW(compat.insert(CompatSet::Feature(0, "test")), FailedAssertion); + EXPECT_THROW(compat.insert(CompatSet::Feature(64, "test")), FailedAssertion); + + for (int i = 1; i < 64; i++) { + stringstream cname; + cname << string("c") << i; + compat.insert(CompatSet::Feature(i,cname.str().c_str())); + stringstream roname; + roname << string("r") << i; + ro.insert(CompatSet::Feature(i,roname.str().c_str())); + stringstream iname; + iname << string("i") << i; + incompat.insert(CompatSet::Feature(i,iname.str().c_str())); + } + CompatSet tcs(compat, ro, incompat); + + //cout << tcs << std::endl; + + //Due to a workaround for a bug bit 0 is always set even though it is + //not a legal feature. + EXPECT_EQ(tcs.compat.mask, (uint64_t)0xffffffffffffffff); + EXPECT_EQ(tcs.ro_compat.mask, (uint64_t)0xffffffffffffffff); + EXPECT_EQ(tcs.incompat.mask, (uint64_t)0xffffffffffffffff); + + for (int i = 1; i < 64; i++) { + EXPECT_TRUE(tcs.compat.contains(i)); + stringstream cname; + cname << string("c") << i; + EXPECT_TRUE(tcs.compat.contains(CompatSet::Feature(i,cname.str().c_str()))); + tcs.compat.remove(i); + + EXPECT_TRUE(tcs.ro_compat.contains(i)); + stringstream roname; + roname << string("r") << i; + EXPECT_TRUE(tcs.ro_compat.contains(CompatSet::Feature(i,roname.str().c_str()))); + tcs.ro_compat.remove(i); + + EXPECT_TRUE(tcs.incompat.contains(i)); + stringstream iname; + iname << string("i") << i; + EXPECT_TRUE(tcs.incompat.contains(CompatSet::Feature(i,iname.str().c_str()))); + tcs.incompat.remove(i); + } + //Due to a workaround for a bug bit 0 is always set even though it is + //not a legal feature. + EXPECT_EQ(tcs.compat.mask, (uint64_t)1); + EXPECT_TRUE(tcs.compat.names.empty()); + EXPECT_EQ(tcs.ro_compat.mask, (uint64_t)1); + EXPECT_TRUE(tcs.ro_compat.names.empty()); + EXPECT_EQ(tcs.incompat.mask, (uint64_t)1); + EXPECT_TRUE(tcs.incompat.names.empty()); +} + +TEST(CephCompatSet, other) { + CompatSet s1, s2, s1dup; + + s1.compat.insert(CompatSet::Feature(1, "c1")); + s1.compat.insert(CompatSet::Feature(2, "c2")); + s1.compat.insert(CompatSet::Feature(32, "c32")); + s1.ro_compat.insert(CompatSet::Feature(63, "r63")); + s1.incompat.insert(CompatSet::Feature(1, "i1")); + + s2.compat.insert(CompatSet::Feature(1, "c1")); + s2.compat.insert(CompatSet::Feature(32, "c32")); + s2.ro_compat.insert(CompatSet::Feature(63, "r63")); + s2.incompat.insert(CompatSet::Feature(1, "i1")); + + s1dup = s1; + + //Check exact match + EXPECT_EQ(s1.compare(s1dup), 0); + + //Check superset + EXPECT_EQ(s1.compare(s2), 1); + + //Check missing features + EXPECT_EQ(s2.compare(s1), -1); + + CompatSet diff = s2.unsupported(s1); + EXPECT_EQ(diff.compat.mask, (uint64_t)1<<2 | 1); + EXPECT_EQ(diff.ro_compat.mask, (uint64_t)1); + EXPECT_EQ(diff.incompat.mask, (uint64_t)1); + + CompatSet s3 = s1; + s3.incompat.insert(CompatSet::Feature(4, "i4")); + + diff = s1.unsupported(s3); + EXPECT_EQ(diff.compat.mask, (uint64_t)1); + EXPECT_EQ(diff.ro_compat.mask, (uint64_t)1); + EXPECT_EQ(diff.incompat.mask, (uint64_t)1<<4 | 1); +} + +TEST(CephCompatSet, merge) { + CompatSet s1, s2, s1dup, s2dup; + + s1.compat.insert(CompatSet::Feature(1, "c1")); + s1.compat.insert(CompatSet::Feature(2, "c2")); + s1.compat.insert(CompatSet::Feature(32, "c32")); + s1.ro_compat.insert(CompatSet::Feature(63, "r63")); + s1.incompat.insert(CompatSet::Feature(1, "i1")); + + s1dup = s1; + + s2.compat.insert(CompatSet::Feature(1, "c1")); + s2.compat.insert(CompatSet::Feature(32, "c32")); + s2.ro_compat.insert(CompatSet::Feature(1, "r1")); + s2.ro_compat.insert(CompatSet::Feature(63, "r63")); + s2.incompat.insert(CompatSet::Feature(1, "i1")); + + s2dup = s2; + + //Nothing to merge if they are the same + EXPECT_FALSE(s1.merge(s1dup)); + EXPECT_FALSE(s2.merge(s2dup)); + + EXPECT_TRUE(s1.merge(s2)); + EXPECT_EQ(s1.compat.mask, (uint64_t)1<<1 | (uint64_t)1<<2 | (uint64_t)1<<32 | 1); + EXPECT_EQ(s1.ro_compat.mask, (uint64_t)1<<1 | (uint64_t)1<<63 | 1); + EXPECT_EQ(s1.incompat.mask, (uint64_t)1<<1 | 1); + + EXPECT_TRUE(s2.merge(s1dup)); + EXPECT_EQ(s2.compat.mask, (uint64_t)1<<1 | (uint64_t)1<<2 | (uint64_t)1<<32 | 1); + EXPECT_EQ(s2.ro_compat.mask, (uint64_t)1<<1 | (uint64_t)1<<63 | 1); + EXPECT_EQ(s2.incompat.mask, (uint64_t)1<<1 | 1); +} diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index fe17f077d8e..7f2b4d9db5d 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -78,6 +78,7 @@ TYPE(SequencerPosition) #include "common/hobject.h" TYPE(hobject_t) +TYPE(ghobject_t) #include "mon/AuthMonitor.h" TYPE(AuthMonitor::Incremental) diff --git a/src/test/filestore/FileStoreDiff.cc b/src/test/filestore/FileStoreDiff.cc index b2419f5e298..40c0b32d30c 100644 --- a/src/test/filestore/FileStoreDiff.cc +++ b/src/test/filestore/FileStoreDiff.cc @@ -131,7 +131,7 @@ bool FileStoreDiff::diff_objects(FileStore *a_store, FileStore *b_store, coll_t bool ret = false; int err; - std::vector<hobject_t> b_objects, a_objects; + std::vector<ghobject_t> b_objects, a_objects; err = b_store->collection_list(coll, b_objects); if (err < 0) { dout(0) << "diff_objects list on verify coll " << coll.to_str() @@ -151,11 +151,11 @@ bool FileStoreDiff::diff_objects(FileStore *a_store, FileStore *b_store, coll_t ret = true; } - std::vector<hobject_t>::iterator b_it = b_objects.begin(); - std::vector<hobject_t>::iterator a_it = b_objects.begin(); + std::vector<ghobject_t>::iterator b_it = b_objects.begin(); + std::vector<ghobject_t>::iterator a_it = b_objects.begin(); for (; b_it != b_objects.end(); ++b_it, ++a_it) { - hobject_t b_obj = *b_it, a_obj = *a_it; - if (b_obj.oid.name != a_obj.oid.name) { + ghobject_t b_obj = *b_it, a_obj = *a_it; + if (b_obj.hobj.oid.name != a_obj.hobj.oid.name) { dout(0) << "diff_objects name mismatch on A object " << coll << "/" << a_obj << " and B object " << coll << "/" << b_obj << dendl; @@ -167,7 +167,7 @@ bool FileStoreDiff::diff_objects(FileStore *a_store, FileStore *b_store, coll_t err = b_store->stat(coll, b_obj, &b_stat); if (err < 0) { dout(0) << "diff_objects error stating B object " - << coll.to_str() << "/" << b_obj.oid.name << dendl; + << coll.to_str() << "/" << b_obj.hobj.oid.name << dendl; ret = true; } err = a_store->stat(coll, a_obj, &a_stat); diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc index 92104960127..50450f467ff 100644 --- a/src/test/filestore/store_test.cc +++ b/src/test/filestore/store_test.cc @@ -51,9 +51,9 @@ public: } }; -bool sorted(const vector<hobject_t> &in) { - hobject_t start; - for (vector<hobject_t>::const_iterator i = in.begin(); +bool sorted(const vector<ghobject_t> &in) { + ghobject_t start; + for (vector<ghobject_t>::const_iterator i = in.begin(); i != in.end(); ++i) { if (start > *i) return false; @@ -105,7 +105,7 @@ TEST_F(StoreTest, SimpleObjectTest) { r = store->apply_transaction(t); ASSERT_EQ(r, 0); } - hobject_t hoid(sobject_t("Object 1", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("Object 1", CEPH_NOSNAP))); { ObjectStore::Transaction t; t.touch(cid, hoid); @@ -133,7 +133,7 @@ TEST_F(StoreTest, SimpleObjectLongnameTest) { r = store->apply_transaction(t); ASSERT_EQ(r, 0); } - hobject_t hoid(sobject_t("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaObjectaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaObjectaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1", CEPH_NOSNAP))); { ObjectStore::Transaction t; t.touch(cid, hoid); @@ -157,7 +157,7 @@ TEST_F(StoreTest, ManyObjectTest) { coll_t cid("blah"); string base = ""; for (int i = 0; i < 100; ++i) base.append("aaaaa"); - set<hobject_t> created; + set<ghobject_t> created; { ObjectStore::Transaction t; t.create_collection(cid); @@ -171,27 +171,27 @@ TEST_F(StoreTest, ManyObjectTest) { ObjectStore::Transaction t; char buf[100]; snprintf(buf, sizeof(buf), "%d", i); - hobject_t hoid(sobject_t(string(buf) + base, CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t(string(buf) + base, CEPH_NOSNAP))); t.touch(cid, hoid); created.insert(hoid); r = store->apply_transaction(t); ASSERT_EQ(r, 0); } - for (set<hobject_t>::iterator i = created.begin(); + for (set<ghobject_t>::iterator i = created.begin(); i != created.end(); ++i) { struct stat buf; ASSERT_TRUE(!store->stat(cid, *i, &buf)); } - set<hobject_t> listed; - vector<hobject_t> objects; + set<ghobject_t> listed; + vector<ghobject_t> objects; r = store->collection_list(cid, objects); ASSERT_EQ(r, 0); cerr << "objects.size() is " << objects.size() << std::endl; - for (vector<hobject_t> ::iterator i = objects.begin(); + for (vector<ghobject_t> ::iterator i = objects.begin(); i != objects.end(); ++i) { listed.insert(*i); @@ -199,11 +199,11 @@ TEST_F(StoreTest, ManyObjectTest) { } ASSERT_TRUE(listed.size() == created.size()); - hobject_t start, next; + ghobject_t start, next; objects.clear(); r = store->collection_list_partial( cid, - hobject_t::get_max(), + ghobject_t::get_max(), 50, 60, 0, @@ -234,13 +234,13 @@ TEST_F(StoreTest, ManyObjectTest) { } cerr << "listed.size() is " << listed.size() << std::endl; ASSERT_TRUE(listed.size() == created.size()); - for (set<hobject_t>::iterator i = listed.begin(); + for (set<ghobject_t>::iterator i = listed.begin(); i != listed.end(); ++i) { ASSERT_TRUE(created.count(*i)); } - for (set<hobject_t>::iterator i = created.begin(); + for (set<ghobject_t>::iterator i = created.begin(); i != created.end(); ++i) { ObjectStore::Transaction t; @@ -259,7 +259,7 @@ TEST_F(StoreTest, ManyObjectTest) { class ObjectGenerator { public: - virtual hobject_t create_object(gen_type *gen) = 0; + virtual ghobject_t create_object(gen_type *gen) = 0; virtual ~ObjectGenerator() {} }; @@ -267,7 +267,7 @@ class MixedGenerator : public ObjectGenerator { public: unsigned seq; MixedGenerator() : seq(0) {} - hobject_t create_object(gen_type *gen) { + ghobject_t create_object(gen_type *gen) { char buf[100]; snprintf(buf, sizeof(buf), "%u", seq); @@ -283,7 +283,7 @@ public: // hash //boost::binomial_distribution<uint32_t> bin(0xFFFFFF, 0.5); ++seq; - return hobject_t(name, string(), rand() & 2 ? CEPH_NOSNAP : rand(), rand() & 0xFF, 0, ""); + return ghobject_t(hobject_t(name, string(), rand() & 2 ? CEPH_NOSNAP : rand(), rand() & 0xFF, 0, "")); } }; @@ -293,8 +293,8 @@ public: static const unsigned max_objects = 3000; coll_t cid; unsigned in_flight; - set<hobject_t> available_objects; - set<hobject_t> in_use_objects; + set<ghobject_t> available_objects; + set<ghobject_t> in_use_objects; ObjectGenerator *object_gen; gen_type *rng; ObjectStore *store; @@ -307,9 +307,9 @@ public: public: SyntheticWorkloadState *state; ObjectStore::Transaction *t; - hobject_t hoid; + ghobject_t hoid; C_SyntheticOnReadable(SyntheticWorkloadState *state, - ObjectStore::Transaction *t, hobject_t hoid) + ObjectStore::Transaction *t, ghobject_t hoid) : state(state), t(t), hoid(hoid) {} void finish(int r) { @@ -339,14 +339,14 @@ public: return store->apply_transaction(t); } - hobject_t get_uniform_random_object() { + ghobject_t get_uniform_random_object() { while (in_flight >= max_in_flight || available_objects.empty()) cond.Wait(lock); boost::uniform_int<> choose(0, available_objects.size() - 1); int index = choose(*rng); - set<hobject_t>::iterator i = available_objects.begin(); + set<ghobject_t>::iterator i = available_objects.begin(); for ( ; index > 0; --index, ++i) ; - hobject_t ret = *i; + ghobject_t ret = *i; available_objects.erase(i); return ret; } @@ -375,7 +375,7 @@ public: if (!can_create()) return -ENOSPC; wait_for_ready(); - hobject_t new_obj = object_gen->create_object(rng); + ghobject_t new_obj = object_gen->create_object(rng); in_use_objects.insert(new_obj); available_objects.erase(new_obj); ObjectStore::Transaction *t = new ObjectStore::Transaction; @@ -388,9 +388,9 @@ public: Mutex::Locker locker(lock); while (in_flight) cond.Wait(lock); - vector<hobject_t> objects; - set<hobject_t> objects_set, objects_set2; - hobject_t next, current; + vector<ghobject_t> objects; + set<ghobject_t> objects_set, objects_set2; + ghobject_t next, current; while (1) { cerr << "scanning..." << std::endl; int r = store->collection_list_partial(cid, current, 50, 100, @@ -403,7 +403,7 @@ public: current = next; } ASSERT_EQ(objects_set.size(), available_objects.size()); - for (set<hobject_t>::iterator i = objects_set.begin(); + for (set<ghobject_t>::iterator i = objects_set.begin(); i != objects_set.end(); ++i) { ASSERT_GT(available_objects.count(*i), (unsigned)0); @@ -413,7 +413,7 @@ public: ASSERT_EQ(r, 0); objects_set2.insert(objects.begin(), objects.end()); ASSERT_EQ(objects_set2.size(), available_objects.size()); - for (set<hobject_t>::iterator i = objects_set2.begin(); + for (set<ghobject_t>::iterator i = objects_set2.begin(); i != objects_set2.end(); ++i) { ASSERT_GT(available_objects.count(*i), (unsigned)0); @@ -421,7 +421,7 @@ public: } int stat() { - hobject_t hoid; + ghobject_t hoid; { Mutex::Locker locker(lock); if (!can_unlink()) @@ -446,7 +446,7 @@ public: Mutex::Locker locker(lock); if (!can_unlink()) return -ENOENT; - hobject_t to_remove = get_uniform_random_object(); + ghobject_t to_remove = get_uniform_random_object(); ObjectStore::Transaction *t = new ObjectStore::Transaction; t->remove(cid, to_remove); ++in_flight; @@ -505,7 +505,7 @@ TEST_F(StoreTest, HashCollisionTest) { } string base = ""; for (int i = 0; i < 100; ++i) base.append("aaaaa"); - set<hobject_t> created; + set<ghobject_t> created; for (int n = 0; n < 10; ++n) { char nbuf[100]; sprintf(nbuf, "n%d", n); @@ -515,7 +515,7 @@ TEST_F(StoreTest, HashCollisionTest) { if (!(i % 5)) { cerr << "Object n" << n << " "<< i << std::endl; } - hobject_t hoid(string(buf) + base, string(), CEPH_NOSNAP, 0, 0, string(nbuf)); + ghobject_t hoid(hobject_t(string(buf) + base, string(), CEPH_NOSNAP, 0, 0, string(nbuf))); { ObjectStore::Transaction t; t.touch(cid, hoid); @@ -525,21 +525,21 @@ TEST_F(StoreTest, HashCollisionTest) { created.insert(hoid); } } - vector<hobject_t> objects; + vector<ghobject_t> objects; r = store->collection_list(cid, objects); ASSERT_EQ(r, 0); - set<hobject_t> listed(objects.begin(), objects.end()); + set<ghobject_t> listed(objects.begin(), objects.end()); cerr << "listed.size() is " << listed.size() << " and created.size() is " << created.size() << std::endl; ASSERT_TRUE(listed.size() == created.size()); objects.clear(); listed.clear(); - hobject_t current, next; + ghobject_t current, next; while (1) { r = store->collection_list_partial(cid, current, 50, 60, 0, &objects, &next); ASSERT_EQ(r, 0); ASSERT_TRUE(sorted(objects)); - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { if (listed.count(*i)) @@ -555,13 +555,13 @@ TEST_F(StoreTest, HashCollisionTest) { } cerr << "listed.size() is " << listed.size() << std::endl; ASSERT_TRUE(listed.size() == created.size()); - for (set<hobject_t>::iterator i = listed.begin(); + for (set<ghobject_t>::iterator i = listed.begin(); i != listed.end(); ++i) { ASSERT_TRUE(created.count(*i)); } - for (set<hobject_t>::iterator i = created.begin(); + for (set<ghobject_t>::iterator i = created.begin(); i != created.end(); ++i) { ObjectStore::Transaction t; @@ -576,7 +576,7 @@ TEST_F(StoreTest, HashCollisionTest) { TEST_F(StoreTest, OMapTest) { coll_t cid("blah"); - hobject_t hoid("tesomap", "", CEPH_NOSNAP, 0, 0, ""); + ghobject_t hoid(hobject_t("tesomap", "", CEPH_NOSNAP, 0, 0, "")); int r; { ObjectStore::Transaction t; @@ -672,7 +672,7 @@ TEST_F(StoreTest, OMapTest) { TEST_F(StoreTest, XattrTest) { coll_t cid("blah"); - hobject_t hoid("tesomap", "", CEPH_NOSNAP, 0, 0, ""); + ghobject_t hoid(hobject_t("tesomap", "", CEPH_NOSNAP, 0, 0, "")); bufferlist big; for (unsigned i = 0; i < 10000; ++i) { big.append('\0'); @@ -769,12 +769,12 @@ void colsplittest( for (uint32_t i = 0; i < 2*num_objects; ++i) { stringstream objname; objname << "obj" << i; - t.touch(cid, hobject_t( + t.touch(cid, ghobject_t(hobject_t( objname.str(), "", CEPH_NOSNAP, i<<common_suffix_size, - 0, "")); + 0, ""))); } r = store->apply_transaction(t); ASSERT_EQ(r, 0); @@ -788,14 +788,14 @@ void colsplittest( } ObjectStore::Transaction t; - vector<hobject_t> objects; + vector<ghobject_t> objects; r = store->collection_list(cid, objects); ASSERT_EQ(r, 0); ASSERT_EQ(objects.size(), num_objects); - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { - ASSERT_EQ(!(i->hash & (1<<common_suffix_size)), 0u); + ASSERT_EQ(!(i->hobj.hash & (1<<common_suffix_size)), 0u); t.remove(cid, *i); } @@ -803,10 +803,10 @@ void colsplittest( r = store->collection_list(tid, objects); ASSERT_EQ(r, 0); ASSERT_EQ(objects.size(), num_objects); - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { - ASSERT_EQ(i->hash & (1<<common_suffix_size), 0u); + ASSERT_EQ(i->hobj.hash & (1<<common_suffix_size), 0u); t.remove(tid, *i); } @@ -848,12 +848,12 @@ TEST_F(StoreTest, TwoHash) { std::cout << "Making objects" << std::endl; for (int i = 0; i < 360; ++i) { ObjectStore::Transaction t; - hobject_t o; + ghobject_t o; if (i < 8) { - o.hash = (i << 16) | 0xA1; + o.hobj.hash = (i << 16) | 0xA1; t.touch(cid, o); } - o.hash = (i << 16) | 0xB1; + o.hobj.hash = (i << 16) | 0xB1; t.touch(cid, o); r = store->apply_transaction(t); ASSERT_EQ(r, 0); @@ -861,8 +861,8 @@ TEST_F(StoreTest, TwoHash) { std::cout << "Removing half" << std::endl; for (int i = 1; i < 8; ++i) { ObjectStore::Transaction t; - hobject_t o; - o.hash = (i << 16) | 0xA1; + ghobject_t o; + o.hobj.hash = (i << 16) | 0xA1; t.remove(cid, o); r = store->apply_transaction(t); ASSERT_EQ(r, 0); @@ -870,24 +870,24 @@ TEST_F(StoreTest, TwoHash) { std::cout << "Checking" << std::endl; for (int i = 1; i < 8; ++i) { ObjectStore::Transaction t; - hobject_t o; - o.hash = (i << 16) | 0xA1; + ghobject_t o; + o.hobj.hash = (i << 16) | 0xA1; bool exists = store->exists(cid, o); ASSERT_EQ(exists, false); } { - hobject_t o; - o.hash = 0xA1; + ghobject_t o; + o.hobj.hash = 0xA1; bool exists = store->exists(cid, o); ASSERT_EQ(exists, true); } std::cout << "Cleanup" << std::endl; for (int i = 0; i < 360; ++i) { ObjectStore::Transaction t; - hobject_t o; - o.hash = (i << 16) | 0xA1; + ghobject_t o; + o.hobj.hash = (i << 16) | 0xA1; t.remove(cid, o); - o.hash = (i << 16) | 0xB1; + o.hobj.hash = (i << 16) | 0xB1; t.remove(cid, o); r = store->apply_transaction(t); ASSERT_EQ(r, 0); diff --git a/src/test/filestore/workload_generator.cc b/src/test/filestore/workload_generator.cc index 496379d7ad1..704d93021e2 100644 --- a/src/test/filestore/workload_generator.cc +++ b/src/test/filestore/workload_generator.cc @@ -344,12 +344,12 @@ void WorkloadGenerator::do_destroy_collection(ObjectStore::Transaction *t, { m_nr_runs.set(0); entry->m_osr.flush(); - vector<hobject_t> ls; + vector<ghobject_t> ls; m_store->collection_list(entry->m_coll, ls); dout(2) << __func__ << " coll " << entry->m_coll << " (" << ls.size() << " objects)" << dendl; - for (vector<hobject_t>::iterator it = ls.begin(); it < ls.end(); ++it) { + for (vector<ghobject_t>::iterator it = ls.begin(); it < ls.end(); ++it) { t->remove(entry->m_coll, *it); } diff --git a/src/test/os/TestFlatIndex.cc b/src/test/os/TestFlatIndex.cc index 6db4f6c4aa5..53d2bbe6376 100644 --- a/src/test/os/TestFlatIndex.cc +++ b/src/test/os/TestFlatIndex.cc @@ -49,8 +49,8 @@ TEST(FlatIndex, collection) { uint64_t hash = 111; uint64_t pool = 222; const std::string object_name(10, 'A'); - hobject_t hoid(object_t(object_name), key, CEPH_NOSNAP, hash, pool, ""); - vector<hobject_t> ls; + ghobject_t hoid(hobject_t(object_t(object_name), key, CEPH_NOSNAP, hash, pool, "")); + vector<ghobject_t> ls; ASSERT_DEATH(index.collection_list_partial(hoid, 0, 0, 0, &ls, &hoid), "0"); } @@ -70,7 +70,7 @@ TEST(FlatIndex, created_unlink) { CollectionIndex::IndexedPath indexed_path; index->set_ref(index); const std::string object_name(10, 'A'); - hobject_t hoid(object_t(object_name), key, CEPH_NOSNAP, hash, pool, ""); + ghobject_t hoid(hobject_t(object_t(object_name), key, CEPH_NOSNAP, hash, pool, "")); int exists; EXPECT_EQ(0, index->lookup(hoid, &indexed_path, &exists)); EXPECT_EQ(0, exists); @@ -88,7 +88,7 @@ TEST(FlatIndex, created_unlink) { CollectionIndex::IndexedPath indexed_path; index->set_ref(index); const std::string object_name(1024, 'A'); - hobject_t hoid(object_t(object_name), key, CEPH_NOSNAP, hash, pool, ""); + ghobject_t hoid(hobject_t(object_t(object_name), key, CEPH_NOSNAP, hash, pool, "")); int exists; EXPECT_EQ(0, index->lookup(hoid, &indexed_path, &exists)); EXPECT_EQ(0, exists); @@ -110,10 +110,10 @@ TEST(FlatIndex, collection_list) { const std::string filename("PATH/" + object_name + "_head"); EXPECT_EQ(0, ::close(::creat(filename.c_str(), 0600))); std::tr1::shared_ptr<CollectionIndex> index(new FlatIndex(collection, base_path)); - vector<hobject_t> ls; + vector<ghobject_t> ls; index->collection_list(&ls); EXPECT_EQ((unsigned)1, ls.size()); - EXPECT_EQ(object_name, ls[0].oid.name); + EXPECT_EQ(object_name, ls[0].hobj.oid.name); EXPECT_EQ(0, ::system("rm -fr PATH")); } diff --git a/src/test/os/TestLFNIndex.cc b/src/test/os/TestLFNIndex.cc index b5dd4a4f3d0..02578eb4a71 100644 --- a/src/test/os/TestLFNIndex.cc +++ b/src/test/os/TestLFNIndex.cc @@ -45,10 +45,10 @@ public: std::tr1::shared_ptr<CollectionIndex> dest ) { return 0; } - void test_generate_and_parse(const hobject_t &hoid, const std::string &mangled_expected) { + void test_generate_and_parse(const ghobject_t &hoid, const std::string &mangled_expected) { const std::string mangled_name = lfn_generate_object_name(hoid); EXPECT_EQ(mangled_expected, mangled_name); - hobject_t hoid_parsed; + ghobject_t hoid_parsed; EXPECT_TRUE(lfn_parse_object_name(mangled_name, &hoid_parsed)); EXPECT_EQ(hoid, hoid_parsed); } @@ -58,34 +58,34 @@ protected: virtual int _created( const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &hoid, const string &mangled_name ) { return 0; } virtual int _remove( const vector<string> &path, - const hobject_t &hoid, + const ghobject_t &hoid, const string &mangled_name ) { return 0; } virtual int _lookup( - const hobject_t &hoid, + const ghobject_t &hoid, vector<string> *path, string *mangled_name, int *exists ) { return 0; } virtual int _collection_list( - vector<hobject_t> *ls + vector<ghobject_t> *ls ) { return 0; } virtual int _collection_list_partial( - const hobject_t &start, + const ghobject_t &start, int min_count, int max_count, snapid_t seq, - vector<hobject_t> *ls, - hobject_t *next + vector<ghobject_t> *ls, + ghobject_t *next ) { return 0; } }; @@ -101,9 +101,9 @@ TEST_F(TestHASH_INDEX_TAG, generate_and_parse_name) { uint64_t hash = 0xABABABAB; uint64_t pool = -1; - test_generate_and_parse(hobject_t(object_t(".A/B_\\C.D"), key, CEPH_NOSNAP, hash, pool, ""), + test_generate_and_parse(ghobject_t(hobject_t(object_t(".A/B_\\C.D"), key, CEPH_NOSNAP, hash, pool, "")), "\\.A\\sB_\\\\C.D_head_ABABABAB"); - test_generate_and_parse(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, ""), + test_generate_and_parse(ghobject_t(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, "")), "\\dA_head_ABABABAB"); } @@ -123,11 +123,11 @@ TEST_F(TestHASH_INDEX_TAG_2, generate_and_parse_name) { { std::string name(".XA/B_\\C.D"); name[1] = '\0'; - hobject_t hoid(object_t(name), key, CEPH_NOSNAP, hash, pool, ""); + ghobject_t hoid(hobject_t(object_t(name), key, CEPH_NOSNAP, hash, pool, "")); test_generate_and_parse(hoid, "\\.\\nA\\sB\\u\\\\C.D_KEY_head_ABABABAB"); } - test_generate_and_parse(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, ""), + test_generate_and_parse(ghobject_t(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, "")), "\\dA_KEY_head_ABABABAB"); } @@ -143,21 +143,37 @@ TEST_F(TestHOBJECT_WITH_POOL, generate_and_parse_name) { const std::string key("KEY"); uint64_t hash = 0xABABABAB; uint64_t pool = 0xCDCDCDCD; + int64_t gen = 0xefefefefef; + int8_t shard_id = 0xb; { std::string name(".XA/B_\\C.D"); name[1] = '\0'; - hobject_t hoid(object_t(name), key, CEPH_NOSNAP, hash, pool, ""); - hoid.nspace = "NSPACE"; + ghobject_t hoid(hobject_t(object_t(name), key, CEPH_NOSNAP, hash, pool, "")); + hoid.hobj.nspace = "NSPACE"; test_generate_and_parse(hoid, "\\.\\nA\\sB\\u\\\\C.D_KEY_head_ABABABAB_NSPACE_cdcdcdcd"); } { - hobject_t hoid(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, ""); - hoid.nspace = "NSPACE"; + ghobject_t hoid(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, "")); + hoid.hobj.nspace = "NSPACE"; test_generate_and_parse(hoid, "\\dA_KEY_head_ABABABAB_NSPACE_cdcdcdcd"); } + { + std::string name(".XA/B_\\C.D"); + name[1] = '\0'; + ghobject_t hoid(hobject_t(object_t(name), key, CEPH_NOSNAP, hash, pool, ""), gen, shard_id); + hoid.hobj.nspace = "NSPACE"; + + test_generate_and_parse(hoid, "\\.\\nA\\sB\\u\\\\C.D_KEY_head_ABABABAB_NSPACE_cdcdcdcd_efefefefef_b"); + } + { + ghobject_t hoid(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, ""), gen, shard_id); + hoid.hobj.nspace = "NSPACE"; + + test_generate_and_parse(hoid, "\\dA_KEY_head_ABABABAB_NSPACE_cdcdcdcd_efefefefef_b"); + } } class TestLFNIndex : public TestWrapLFNIndex, public ::testing::Test { @@ -185,7 +201,7 @@ TEST_F(TestLFNIndex, remove_object) { { std::string mangled_name; int exists = 666; - hobject_t hoid(sobject_t("ABC", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("ABC", CEPH_NOSNAP))); EXPECT_EQ(0, ::chmod("PATH", 0000)); EXPECT_EQ(-EACCES, remove_object(path, hoid)); @@ -205,7 +221,7 @@ TEST_F(TestLFNIndex, remove_object) { std::string mangled_name; int exists; const std::string object_name(1024, 'A'); - hobject_t hoid(sobject_t(object_name, CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); EXPECT_EQ(0, exists); @@ -226,7 +242,7 @@ TEST_F(TestLFNIndex, remove_object) { std::string mangled_name; int exists; const std::string object_name(1024, 'A'); - hobject_t hoid(sobject_t(object_name, CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); // // PATH/AAA..._0_long => does not match long object name @@ -275,7 +291,7 @@ TEST_F(TestLFNIndex, remove_object) { std::string mangled_name; int exists; const std::string object_name(1024, 'A'); - hobject_t hoid(sobject_t(object_name, CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); // // PATH/AAA..._0_long => matches long object name @@ -323,7 +339,7 @@ TEST_F(TestLFNIndex, get_mangled_name) { { std::string mangled_name; int exists = 666; - hobject_t hoid(sobject_t("ABC", CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t("ABC", CEPH_NOSNAP))); EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); EXPECT_NE(std::string::npos, mangled_name.find("ABC__head")); @@ -343,7 +359,7 @@ TEST_F(TestLFNIndex, get_mangled_name) { std::string mangled_name; int exists; const std::string object_name(1024, 'A'); - hobject_t hoid(sobject_t(object_name, CEPH_NOSNAP)); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); // // long version of the mangled name and no matching diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h index ab0b13d73e4..7bda0fe9a56 100644 --- a/src/test/osd/RadosModel.h +++ b/src/test/osd/RadosModel.h @@ -1402,6 +1402,7 @@ public: string oid, oid_src; ObjectDesc src_value; librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; librados::AioCompletion *comp; librados::AioCompletion *comp_racing_read; int snap; @@ -1456,7 +1457,11 @@ public: new TestOp::CallbackInfo(1)); comp_racing_read = context->rados.aio_create_completion((void*) read_cb_arg, &write_callback, NULL); - context->io_ctx.aio_stat(context->prefix+oid, comp_racing_read, NULL, NULL); + rd_op.stat(NULL, NULL, NULL); + context->io_ctx.aio_operate(context->prefix+oid, comp_racing_read, &rd_op, + librados::SNAP_HEAD, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + NULL); } void _finish(CallbackInfo *info) diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py index 85af54d6f75..fe7c9bacccc 100755 --- a/src/test/pybind/test_ceph_argparse.py +++ b/src/test/pybind/test_ceph_argparse.py @@ -290,13 +290,13 @@ class TestMonitor(TestArgparse): 'force', '--yes-i-really-mean-it', '--i-know-what-i-am-doing']) + self.assert_valid_command(['sync', + 'force', + '--yes-i-really-mean-it']) + self.assert_valid_command(['sync', + 'force']) assert_equal({}, validate_command(sigdict, ['sync'])) assert_equal({}, validate_command(sigdict, ['sync', - 'force'])) - assert_equal({}, validate_command(sigdict, ['sync', - 'force', - '--yes-i-really-mean-it'])) - assert_equal({}, validate_command(sigdict, ['sync', 'force', '--yes-i-really-mean-it', '--i-know-what-i-am-doing', @@ -435,6 +435,30 @@ class TestMDS(TestArgparse): 'compat', 'rm_incompat', '1', '1'])) + def test_mds_set(self): + self.assert_valid_command(['mds', 'set', 'allow_new_snaps']) + self.assert_valid_command(['mds', 'set', 'allow_new_snaps', 'sure']) + assert_equal({}, validate_command(sigdict, ['mds', + 'set', + 'invalid'])) + assert_equal({}, validate_command(sigdict, ['mds', + 'set', + 'allow_new_snaps', + 'sure', + 'toomany'])) + + def test_mds_unset(self): + self.assert_valid_command(['mds', 'unset', 'allow_new_snaps']) + self.assert_valid_command(['mds', 'unset', 'allow_new_snaps', 'sure']) + assert_equal({}, validate_command(sigdict, ['mds', + 'unset', + 'invalid'])) + assert_equal({}, validate_command(sigdict, ['mds', + 'unset', + 'allow_new_snaps', + 'sure', + 'toomany'])) + def test_add_data_pool(self): self.check_1_natural_arg('mds', 'add_data_pool') @@ -444,11 +468,9 @@ class TestMDS(TestArgparse): def test_newfs(self): self.assert_valid_command(['mds', 'newfs', '1', '2', '--yes-i-really-mean-it']) + self.assert_valid_command(['mds', 'newfs', '1', '2']) assert_equal({}, validate_command(sigdict, ['mds', 'newfs'])) assert_equal({}, validate_command(sigdict, ['mds', 'newfs', '1'])) - assert_equal({}, validate_command(sigdict, ['mds', 'newfs', '1', '1'])) - assert_equal({}, validate_command(sigdict, ['mds', 'newfs', '1', '1', - 'no I dont'])) assert_equal({}, validate_command(sigdict, ['mds', 'newfs', '1', @@ -785,10 +807,9 @@ class TestOSD(TestArgparse): def test_lost(self): self.assert_valid_command(['osd', 'lost', '1', '--yes-i-really-mean-it']) + self.assert_valid_command(['osd', 'lost', '1']) assert_equal({}, validate_command(sigdict, ['osd', 'lost'])) assert_equal({}, validate_command(sigdict, ['osd', 'lost', - '1'])) - assert_equal({}, validate_command(sigdict, ['osd', 'lost', '1', 'what?'])) assert_equal({}, validate_command(sigdict, ['osd', 'lost', @@ -874,12 +895,12 @@ class TestOSD(TestArgparse): self.assert_valid_command(['osd', 'pool', 'delete', 'poolname', 'poolname', '--yes-i-really-really-mean-it']) + self.assert_valid_command(['osd', 'pool', 'delete', + 'poolname', 'poolname']) + self.assert_valid_command(['osd', 'pool', 'delete', + 'poolname']) assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'delete'])) assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'delete', - 'poolname'])) - assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'delete', - 'poolname', 'poolname'])) - assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'delete', 'poolname', 'poolname', 'not really'])) assert_equal({}, validate_command(sigdict, diff --git a/src/tools/ceph-filestore-dump.cc b/src/tools/ceph-filestore-dump.cc index 49b8d10bdba..b4220bae307 100644 --- a/src/tools/ceph-filestore-dump.cc +++ b/src/tools/ceph-filestore-dump.cc @@ -52,6 +52,32 @@ enum { END_OF_TYPES, //Keep at the end }; +//#define INTERNAL_TEST +//#define INTERNAL_TEST2 + +#ifdef INTERNAL_TEST +CompatSet get_test_compat_set() { + CompatSet::FeatureSet ceph_osd_feature_compat; + CompatSet::FeatureSet ceph_osd_feature_ro_compat; + CompatSet::FeatureSet ceph_osd_feature_incompat; + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG); +#ifdef INTERNAL_TEST2 + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS); +#endif + return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat, + ceph_osd_feature_incompat); +} +#endif + typedef uint8_t sectiontype_t; typedef uint32_t mymagic_t; typedef int64_t mysize_t; @@ -69,7 +95,7 @@ const int fd_none = INT_MIN; //can be added to the export format. struct super_header { static const uint32_t super_magic = (shortmagic << 16) | shortmagic; - static const uint32_t super_ver = 1; + static const uint32_t super_ver = 2; static const uint32_t FIXED_LENGTH = 16; uint32_t magic; uint32_t version; @@ -139,18 +165,25 @@ struct footer { struct pg_begin { pg_t pgid; + OSDSuperblock superblock; - pg_begin(pg_t pg): pgid(pg) { } + pg_begin(pg_t pg, OSDSuperblock sb): + pgid(pg), superblock(sb) { } pg_begin() { } void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + // New super_ver prevents decode from ver 1 + ENCODE_START(2, 2, bl); ::encode(pgid, bl); + ::encode(superblock, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); + DECODE_START(2, bl); ::decode(pgid, bl); + if (struct_v > 1) { + ::decode(superblock, bl); + } DECODE_FINISH(bl); } }; @@ -347,8 +380,8 @@ void remove_coll(ObjectStore *store, const coll_t &coll) OSD::make_snapmapper_oid()); SnapMapper mapper(&driver, 0, 0, 0); - vector<hobject_t> objects; - hobject_t next; + vector<ghobject_t> objects; + ghobject_t next; int r = 0; int64_t num = 0; ObjectStore::Transaction *t = new ObjectStore::Transaction; @@ -358,13 +391,14 @@ void remove_coll(ObjectStore *store, const coll_t &coll) &objects, &next); if (r < 0) goto out; - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i, ++num) { + assert(i->generation == ghobject_t::NO_GEN); OSDriver::OSTransaction _t(driver.get_transaction(t)); cout << "remove " << *i << std::endl; - int r = mapper.remove_oid(*i, &_t); + int r = mapper.remove_oid(i->hobj, &_t); if (r != 0 && r != -ENOENT) { assert(0); } @@ -621,18 +655,19 @@ int export_file(ObjectStore *store, coll_t cid, hobject_t &obj) int export_files(ObjectStore *store, coll_t coll) { - vector<hobject_t> objects; - hobject_t next; + vector<ghobject_t> objects; + ghobject_t next; while (!next.is_max()) { int r = store->collection_list_partial(coll, next, 200, 300, 0, &objects, &next); if (r < 0) return r; - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { - r = export_file(store, coll, *i); + assert(i->generation == ghobject_t::NO_GEN); + r = export_file(store, coll, i->hobj); if (r < 0) return r; } @@ -664,7 +699,7 @@ void write_super() } int do_export(ObjectStore *fs, coll_t coll, pg_t pgid, pg_info_t &info, - epoch_t map_epoch, __u8 struct_ver) + epoch_t map_epoch, __u8 struct_ver, OSDSuperblock superblock) { PGLog::IndexedLog log; pg_missing_t missing; @@ -675,7 +710,7 @@ int do_export(ObjectStore *fs, coll_t coll, pg_t pgid, pg_info_t &info, write_super(); - pg_begin pgb(pgid); + pg_begin pgb(pgid, superblock); ret = write_section(TYPE_PG_BEGIN, pgb, file_fd); if (ret) return ret; @@ -909,7 +944,7 @@ int get_pg_metadata(ObjectStore *store, coll_t coll, bufferlist &bl) return 0; } -int do_import(ObjectStore *store) +int do_import(ObjectStore *store, OSDSuperblock sb) { bufferlist ebl; pg_info_t info; @@ -943,7 +978,16 @@ int do_import(ObjectStore *store) pg_begin pgb; pgb.decode(ebliter); pg_t pgid = pgb.pgid; - + + if (debug) { + cout << "Exported features: " << pgb.superblock.compat_features << std::endl; + } + if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) { + cout << "Export has incompatible features set " + << pgb.superblock.compat_features << std::endl; + return 1; + } + log_oid = OSD::make_pg_log_oid(pgid); biginfo_oid = OSD::make_pg_biginfo_oid(pgid); @@ -1017,7 +1061,7 @@ int main(int argc, char **argv) ("pgid", po::value<string>(&pgidstr), "PG id, mandatory") ("type", po::value<string>(&type), - "Type one of info, log, export, or import, mandatory") + "Type one of info, log, remove, export, or import, mandatory") ("file", po::value<string>(&file), "path of file to export or import") ("debug", "Enable diagnostic output to stderr") @@ -1170,14 +1214,67 @@ int main(int argc, char **argv) return 1; } + bool fs_sharded_objects = fs->get_allow_sharded_objects(); + int ret = 0; vector<coll_t> ls; vector<coll_t>::iterator it; + CompatSet supported; + +#ifdef INTERNAL_TEST + supported = get_test_compat_set(); +#else + supported = OSD::get_osd_compat_set(); +#endif + + bufferlist bl; + OSDSuperblock superblock; + bufferlist::iterator p; + ret = fs->read(coll_t::META_COLL, OSD_SUPERBLOCK_POBJECT, 0, 0, bl); + if (ret < 0) { + cout << "Failure to read OSD superblock error= " << r << std::endl; + goto out; + } + + p = bl.begin(); + ::decode(superblock, p); + +#ifdef INTERNAL_TEST2 + fs->set_allow_sharded_objects(); + assert(fs->get_allow_sharded_objects()); + fs_sharded_objects = true; + superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS); +#endif + + if (debug && file_fd != STDOUT_FILENO) { + cout << "Supported features: " << supported << std::endl; + cout << "On-disk features: " << superblock.compat_features << std::endl; + } + if (supported.compare(superblock.compat_features) == -1) { + cout << "On-disk OSD incompatible features set " + << superblock.compat_features << std::endl; + ret = EINVAL; + goto out; + } + + // If there was a crash as an OSD was transitioning to sharded objects + // and hadn't completed a set_allow_sharded_objects(). + // This utility does not want to attempt to finish that transition. + if (superblock.compat_features.incompat.contains(CEPH_OSD_FEATURE_INCOMPAT_SHARDS) != fs_sharded_objects) { + // An OSD should never have call set_allow_sharded_objects() before + // updating its own OSD features. + if (fs_sharded_objects) + cout << "FileStore sharded but OSD not set, Corruption?" << std::endl; + else + cout << "Found incomplete transition to sharded objects" << std::endl; + ret = EINVAL; + goto out; + } if (type == "import") { try { - ret = do_import(fs); + ret = do_import(fs, superblock); } catch (const buffer::error &e) { cout << "do_import threw exception error " << e.what() << std::endl; @@ -1260,7 +1357,7 @@ int main(int argc, char **argv) cerr << "struct_v " << (int)struct_ver << std::endl; if (type == "export") { - ret = do_export(fs, coll, pgid, info, map_epoch, struct_ver); + ret = do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock); } else if (type == "info") { formatter->open_object_section("info"); info.dump(formatter); diff --git a/src/tools/ceph-osdomap-tool.cc b/src/tools/ceph-osdomap-tool.cc index aedc4c824e7..bde4b28b45f 100644 --- a/src/tools/ceph-osdomap-tool.cc +++ b/src/tools/ceph-osdomap-tool.cc @@ -115,30 +115,30 @@ int main(int argc, char **argv) { i->value().hexdump(std::cout); } } else if (cmd == "dump-objects") { - vector<hobject_t> objects; + vector<ghobject_t> objects; r = omap.list_objects(&objects); if (r < 0) { std::cerr << "list_objects got: " << cpp_strerror(r) << std::endl; goto done; } - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { std::cout << *i << std::endl; } r = 0; } else if (cmd == "dump-objects-with-keys") { - vector<hobject_t> objects; + vector<ghobject_t> objects; r = omap.list_objects(&objects); if (r < 0) { std::cerr << "list_objects got: " << cpp_strerror(r) << std::endl; goto done; } - for (vector<hobject_t>::iterator i = objects.begin(); + for (vector<ghobject_t>::iterator i = objects.begin(); i != objects.end(); ++i) { std::cout << "Object: " << *i << std::endl; - ObjectMap::ObjectMapIterator j = omap.get_iterator(*i); + ObjectMap::ObjectMapIterator j = omap.get_iterator(i->hobj); for (j->seek_to_first(); j->valid(); j->next()) { std::cout << j->key() << std::endl; j->value().hexdump(std::cout); diff --git a/src/tools/dupstore.cc b/src/tools/dupstore.cc index e17eb2201a7..c8b8ece31c8 100644 --- a/src/tools/dupstore.cc +++ b/src/tools/dupstore.cc @@ -27,7 +27,7 @@ int dupstore(ObjectStore* src, ObjectStore* dst) if (dst->mount() < 0) return 1; // objects - hash_map<hobject_t, coll_t> did_object; + hash_map<ghobject_t, coll_t> did_object; // collections vector<coll_t> collections; @@ -54,11 +54,11 @@ int dupstore(ObjectStore* src, ObjectStore* dst) dst->apply_transaction(t); } - vector<hobject_t> o; + vector<ghobject_t> o; src->collection_list(*p, o); int numo = o.size(); int j = 1; - for (vector<hobject_t>::iterator q = o.begin(); q != o.end(); ++q) { + for (vector<ghobject_t>::iterator q = o.begin(); q != o.end(); ++q) { ObjectStore::Transaction t; if (did_object.count(*q)) t.collection_add(*p, did_object[*q], *q); |